diff --git a/common/workflow-core/src/main/scala/org/apache/texera/service/util/LargeBinaryManager.scala b/common/workflow-core/src/main/scala/org/apache/texera/service/util/LargeBinaryManager.scala index 211d7d3b757..b23edb7ae92 100644 --- a/common/workflow-core/src/main/scala/org/apache/texera/service/util/LargeBinaryManager.scala +++ b/common/workflow-core/src/main/scala/org/apache/texera/service/util/LargeBinaryManager.scala @@ -30,7 +30,7 @@ import java.util.UUID * normal tuple size limits. */ object LargeBinaryManager extends LazyLogging { - private val DEFAULT_BUCKET = "texera-large-binaries" + val DEFAULT_BUCKET: String = "texera-large-binaries" /** * Creates a new LargeBinary reference. @@ -39,8 +39,6 @@ object LargeBinaryManager extends LazyLogging { * @return S3 URI string for the new LargeBinary (format: s3://bucket/key) */ def create(): String = { - S3StorageClient.createBucketIfNotExist(DEFAULT_BUCKET) - val objectKey = s"objects/${System.currentTimeMillis()}/${UUID.randomUUID()}" val uri = s"s3://$DEFAULT_BUCKET/$objectKey" diff --git a/file-service/src/main/scala/org/apache/texera/service/FileService.scala b/file-service/src/main/scala/org/apache/texera/service/FileService.scala index 20bb242bc12..cc4174682fe 100644 --- a/file-service/src/main/scala/org/apache/texera/service/FileService.scala +++ b/file-service/src/main/scala/org/apache/texera/service/FileService.scala @@ -38,6 +38,7 @@ import org.apache.texera.service.resource.{ HealthCheckResource } import org.apache.texera.service.util.S3StorageClient +import org.apache.texera.service.util.LargeBinaryManager import org.eclipse.jetty.server.session.SessionHandler import java.nio.file.Path @@ -70,6 +71,8 @@ class FileService extends Application[FileServiceConfiguration] with LazyLogging // check if the texera dataset bucket exists, if not create it S3StorageClient.createBucketIfNotExist(StorageConfig.lakefsBucketName) + // ensure the large-binary S3 bucket exists before any workflow execution attempts to use it + S3StorageClient.createBucketIfNotExist(LargeBinaryManager.DEFAULT_BUCKET) // check if we can connect to the lakeFS service LakeFSStorageClient.healthCheck()