linkedin · mkuchenbecker · May 1, 2026 · May 1, 2026 · May 1, 2026 · May 1, 2026
diff --git a/apps/optimizer/schedulerapp/build.gradle b/apps/optimizer/schedulerapp/build.gradle
@@ -0,0 +1,14 @@
+plugins {
+  id 'openhouse.springboot-ext-conventions'
+  id 'org.springframework.boot' version '2.7.8'
+}
+
+// Deployable Spring Boot wrapper around the scheduler library. Holds SchedulerApplication (the
+// @SpringBootApplication entry point) and application.properties; the scheduling logic lives in
+// :services:optimizer:scheduler.
+dependencies {
+  implementation project(':services:optimizer:scheduler')
+  implementation 'org.springframework.boot:spring-boot-starter:2.7.8'
+  implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8'
+  runtimeOnly 'mysql:mysql-connector-java:8.0.33'
+}
diff --git a/...lerapp/src/main/java/com/linkedin/openhouse/optimizer/scheduler/SchedulerApplication.java b/...lerapp/src/main/java/com/linkedin/openhouse/optimizer/scheduler/SchedulerApplication.java
@@ -0,0 +1,63 @@
+package com.linkedin.openhouse.optimizer.scheduler;
+
+import com.linkedin.openhouse.optimizer.model.OperationTypeDto;
+import java.util.Map;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.CommandLineRunner;
+import org.springframework.boot.ExitCodeGenerator;
+import org.springframework.boot.SpringApplication;
+import org.springframework.boot.autoconfigure.SpringBootApplication;
+import org.springframework.boot.autoconfigure.domain.EntityScan;
+import org.springframework.data.jpa.repository.config.EnableJpaRepositories;
+
+/**
+ * Entry point for the Optimizer Scheduler application.
+ *
+ * <p>Spring Batch–style: implements {@link CommandLineRunner} so the work runs after context
+ * startup, and {@link ExitCodeGenerator} so the JVM exit code reflects batch outcome. {@code
+ * SpringApplication.exit(...)} closes the context (triggers {@code @PreDestroy} hooks, drains the
+ * JPA pool, etc.) so the k8s CronJob pod terminates cleanly with a status reflecting reality.
+ */
+@Slf4j
+@SpringBootApplication
+@EntityScan(basePackages = "com.linkedin.openhouse.optimizer.db")
+@EnableJpaRepositories(basePackages = "com.linkedin.openhouse.optimizer.repository")
+public class SchedulerApplication implements CommandLineRunner, ExitCodeGenerator {
+
+  private final SchedulerRunner runner;
+  private final Map<OperationTypeDto, BinPacker> binPackers;
+  private int exitCode = 0;
+
+  @Autowired
+  public SchedulerApplication(SchedulerRunner runner, Map<OperationTypeDto, BinPacker> binPackers) {
+    this.runner = runner;
+    this.binPackers = binPackers;
+  }
+
+  public static void main(String[] args) {
+    System.exit(SpringApplication.exit(SpringApplication.run(SchedulerApplication.class, args)));
+  }
+
+  /**
+   * Runs the scheduler once per registered {@link BinPacker} per process invocation. Each call is
+   * scoped to one operation type. Any thrown exception is logged and surfaces as a non-zero exit
+   * code via {@link #getExitCode()} after the context is shut down cleanly.
+   */
+  @Override
+  public void run(String... args) {
+    try {
+      log.info("Scheduler starting; operation types: {}", binPackers.keySet());
+      binPackers.keySet().forEach(runner::schedule);
+      log.info("Scheduler completed successfully");
+    } catch (Exception e) {
+      log.error("Scheduler failed", e);
+      exitCode = 1;
+    }
+  }
+
+  @Override
+  public int getExitCode() {
+    return exitCode;
+  }
+}
diff --git a/apps/optimizer/schedulerapp/src/main/resources/application.properties b/apps/optimizer/schedulerapp/src/main/resources/application.properties
@@ -0,0 +1,11 @@
+spring.application.name=openhouse-optimizer-scheduler
+spring.main.web-application-type=none
+spring.main.banner-mode=off
+spring.datasource.url=${OPTIMIZER_DB_URL:jdbc:h2:mem:schedulerdb;DB_CLOSE_DELAY=-1;MODE=MySQL}
+spring.datasource.username=${OPTIMIZER_DB_USER:sa}
+spring.datasource.password=${OPTIMIZER_DB_PASSWORD:}
+spring.jpa.hibernate.ddl-auto=none
+optimizer.scheduler.jobs.base-uri=${JOBS_BASE_URI:http://localhost:8002}
+optimizer.scheduler.ofd.max-files-per-bin=${SCHEDULER_OFD_MAX_FILES_PER_BIN:1000000}
+optimizer.scheduler.results-endpoint=${SCHEDULER_RESULTS_ENDPOINT:http://openhouse-optimizer:8080/v1/optimizer/operations}
+optimizer.scheduler.cluster-id=${SCHEDULER_CLUSTER_ID:LocalHadoopCluster}
diff --git a/services/optimizer/scheduler/build.gradle b/services/optimizer/scheduler/build.gradle
@@ -0,0 +1,33 @@
+plugins {
+  id 'openhouse.springboot-ext-conventions'
+  id 'org.springframework.boot' version '2.7.8'
+}
+
+// Library jar — the @SpringBootApplication entry point lives in :apps:optimizer:schedulerapp.
+// Disable bootJar so we don't try to assemble a runnable jar from a library that has no main
+// class; keep jar enabled so consumers (the apps wrapper) get a normal library artifact.
+bootJar {
+  enabled = false
+}
+
+jar {
+  enabled = true
+  archiveClassifier = ''
+}
+
+dependencies {
+  // api: the scheduler's public types (e.g. BinPacker, OperationTypeDto) come from
+  // :services:optimizer, so consumers of this library see them on their compile classpath.
+  api project(':services:optimizer')
+  implementation 'org.springframework.boot:spring-boot-starter:2.7.8'
+  implementation 'org.springframework.boot:spring-boot-starter-webflux:2.7.8'
+  implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8'
+  implementation 'org.springframework.boot:spring-boot-starter-aop:2.7.8'
+  runtimeOnly 'mysql:mysql-connector-java:8.0.33'
+  testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8'
+  testRuntimeOnly 'com.h2database:h2'
+}
+
+test {
+  useJUnitPlatform()
+}
diff --git a/...ces/optimizer/scheduler/src/main/java/com/linkedin/openhouse/optimizer/scheduler/Bin.java b/...ces/optimizer/scheduler/src/main/java/com/linkedin/openhouse/optimizer/scheduler/Bin.java
@@ -0,0 +1,61 @@
+package com.linkedin.openhouse.optimizer.scheduler;
+
+import com.linkedin.openhouse.optimizer.model.OperationTypeDto;
+import com.linkedin.openhouse.optimizer.model.TableOperationDto;
+import com.linkedin.openhouse.optimizer.scheduler.client.JobsServiceClient;
+import java.time.Instant;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
+import lombok.Getter;
+import lombok.RequiredArgsConstructor;
+
+/**
+ * A set of operations the scheduler will submit together as a single Spark job. A bin owns its own
+ * launch — callers ask it to schedule itself and react to the returned job id. The surrounding
+ * status-update machinery (claim, mark-scheduled, revert-to-pending) lives in the scheduler because
+ * it is shared across all bins regardless of operation type.
+ */
+@RequiredArgsConstructor
+public class Bin {
+
+  @Getter private final OperationTypeDto operationType;
+  @Getter private final List<TableOperationDto> operations;
+
+  /** Operation UUIDs in this bin, parallel to {@link #getTableNames()}. */
+  public List<String> getOperationIds() {
+    return operations.stream().map(TableOperationDto::getId).collect(Collectors.toList());
+  }
+
+  /** Fully-qualified {@code database.table} identifiers for the operations in this bin. */
+  public List<String> getTableNames() {
+    return operations.stream()
+        .map(op -> op.getDatabaseName() + "." + op.getTableName())
+        .collect(Collectors.toList());
+  }
+
+  /**
+   * Return a new {@link Bin} containing only the operations whose IDs are in {@code keepIds}. Used
+   * by the scheduler to narrow the bin to the rows it actually claimed before launching the job.
+   */
+  public Bin subset(Collection<String> keepIds) {
+    Set<String> keep = new HashSet<>(keepIds);
+    List<TableOperationDto> filtered =
+        operations.stream().filter(op -> keep.contains(op.getId())).collect(Collectors.toList());
+    return new Bin(operationType, filtered);
+  }
+
+  /**
+   * Submit this bin as a single Spark job. Returns the job id on success, or empty on submission
+   * failure — the caller is responsible for the surrounding status updates.
+   */
+  public Optional<String> schedule(JobsServiceClient client, String resultsEndpoint) {
+    String jobName =
+        "batched-" + operationType.name().toLowerCase() + "-" + Instant.now().toEpochMilli();
+    return client.launch(
+        jobName, operationType.name(), getTableNames(), getOperationIds(), resultsEndpoint);
+  }
+}
diff --git a/...timizer/scheduler/src/main/java/com/linkedin/openhouse/optimizer/scheduler/BinPacker.java b/...timizer/scheduler/src/main/java/com/linkedin/openhouse/optimizer/scheduler/BinPacker.java
@@ -0,0 +1,24 @@
+package com.linkedin.openhouse.optimizer.scheduler;
+
+import com.linkedin.openhouse.optimizer.model.TableStatsDto;
+import java.util.List;
+
+/**
+ * Strategy for packing a set of operations into bins for batched job submission. Implementations
+ * encode the constraints of a particular packing dimension (file count, partition count, etc.);
+ * binding to an operation type is the responsibility of the scheduler configuration, not the
+ * strategy class.
+ *
+ * <p>{@link TableStatsDto} is the cost source at the interface boundary, carried alongside each
+ * operation in a {@link SchedulingCandidate}. Implementations project the stats down to the minimal
+ * data needed to make their packing decision (e.g. file count for OFD) and do not retain the full
+ * stats payload in the returned bins.
+ */
+public interface BinPacker {
+
+  /**
+   * Pack {@code pending} into one or more {@link Bin}s. Each returned bin is non-empty; the
+   * scheduler dispatches one Spark job per bin.
+   */
+  List<Bin> pack(List<SchedulingCandidate> pending);
+}
diff --git a/...cheduler/src/main/java/com/linkedin/openhouse/optimizer/scheduler/FileCountBinPacker.java b/...cheduler/src/main/java/com/linkedin/openhouse/optimizer/scheduler/FileCountBinPacker.java
@@ -0,0 +1,84 @@
+package com.linkedin.openhouse.optimizer.scheduler;
+
+import com.linkedin.openhouse.optimizer.model.OperationTypeDto;
+import com.linkedin.openhouse.optimizer.model.TableOperationDto;
+import com.linkedin.openhouse.optimizer.model.TableStatsDto;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.OptionalInt;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+import lombok.RequiredArgsConstructor;
+
+/**
+ * Greedy first-fit-descending bin-packer keyed on per-table file count, projected from each
+ * candidate's {@link TableStatsDto}.
+ *
+ * <p>Candidates are sorted by descending file count, then assigned to the first bin whose running
+ * total stays at or below {@code maxFilesPerBin}. An operation larger than the limit gets its own
+ * bin (oversized bins are allowed — we never drop an operation).
+ */
+@RequiredArgsConstructor
+public class FileCountBinPacker implements BinPacker {
+
+  private final OperationTypeDto operationType;
+  private final long maxFilesPerBin;
+
+  @Override
+  public List<Bin> pack(List<SchedulingCandidate> pending) {
+    if (pending.isEmpty()) {
+      return List.of();
+    }
+
+    // Project once: each candidate's packing cost is just a long, keyed by operation id.
+    Map<String, Long> costByOperationId =
+        pending.stream()
+            .collect(Collectors.toMap(c -> c.getOperation().getId(), c -> cost(c.getStats())));
+
+    List<TableOperationDto> sorted =
+        pending.stream()
+            .map(SchedulingCandidate::getOperation)
+            .sorted(
+                Comparator.comparingLong(
+                        (TableOperationDto op) -> costByOperationId.get(op.getId()))
+                    .reversed())
+            .collect(Collectors.toList());
+
+    // First-fit-descending is inherently stateful — each placement depends on the running totals
+    // for bins assembled so far.
+    List<List<TableOperationDto>> binContents = new ArrayList<>();
+    List<Long> binTotals = new ArrayList<>();
+    sorted.forEach(
+        op -> {
+          long c = costByOperationId.get(op.getId());
+          OptionalInt placed =
+              IntStream.range(0, binContents.size())
+                  .filter(i -> binTotals.get(i) + c <= maxFilesPerBin || binTotals.get(i) == 0)
+                  .findFirst();
+          if (placed.isPresent()) {
+            int idx = placed.getAsInt();
+            binContents.get(idx).add(op);
+            binTotals.set(idx, binTotals.get(idx) + c);
+          } else {
+            List<TableOperationDto> newBin = new ArrayList<>();
+            newBin.add(op);
+            binContents.add(newBin);
+            binTotals.add(c);
+          }
+        });
+
+    return binContents.stream()
+        .map(ops -> new Bin(operationType, ops))
+        .collect(Collectors.toList());
+  }
+
+  private static long cost(TableStatsDto stats) {
+    if (stats == null || stats.getSnapshot() == null) {
+      return 0L;
+    }
+    Long n = stats.getSnapshot().getNumCurrentFiles();
+    return n != null ? n : 0L;
+  }
+}