spcl · Russellpang · Oct 16, 2025 · Oct 16, 2025 · Oct 17, 2025 · Oct 30, 2025
diff --git a/.circleci/config.yml b/.circleci/config.yml
diff --git a/.dockerignore b/.dockerignore
@@ -7,3 +7,9 @@ cache
 python-venv
 regression-*
 *_code
+scylladb-volume
+minio-volume
+output
+results
+*.json
+out_storage.json
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -0,0 +1,55 @@
+name: Lint
+
+on:
+  push:
+  pull_request:
+
+jobs:
+  linting:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        id: setup-python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.x'
+
+      - name: Cache virtualenv
+        uses: actions/cache@v4
+        with:
+          path: python-venv
+          key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('requirements.txt') }}-${{ github.ref_name }}
+          restore-keys: |
+            venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('requirements.txt') }}-
+            venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-
+
+      - name: Install system packages
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y libcurl4-openssl-dev
+
+      - name: Install Python dependencies (via install.py)
+        run: |
+          python3 install.py --no-aws --no-azure --no-gcp --no-openwhisk --no-local
+
+      - name: Black (check)
+        run: |
+          . python-venv/bin/activate
+          black benchmarks --check --config .black.toml
+
+      - name: Flake8 (lint)
+        run: |
+          . python-venv/bin/activate
+          # write to file and echo to stdout (requires flake8 with --tee support)
+          flake8 benchmarks --config=.flake8.cfg --tee --output-file flake-reports
+
+      - name: Upload flake report
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: flake-reports
+          path: flake-reports
diff --git a/.gitignore b/.gitignore
@@ -11,7 +11,6 @@ cache*
 minio-volume
 scylladb-volume
 
-
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
@@ -188,3 +187,6 @@ cache
 # IntelliJ IDEA files
 .idea
 *.iml
+
+# MacOS Finder
+**/.DS_Store
diff --git a/.gitmodules b/.gitmodules
@@ -3,4 +3,4 @@
   url = https://github.com/mcopik/pypapi.git
 [submodule "benchmarks-data"]
 	path = benchmarks-data
-  url = https://github.com/spcl/serverless-benchmarks-data.git
+  url = https://github.com/McLavish/serverless-benchmarks-data-dphpc.git
diff --git a/.mypy.ini b/.mypy.ini
@@ -3,6 +3,9 @@
 [mypy-docker]
 ignore_missing_imports = True
 
+[mypy-docker.*]
+ignore_missing_imports = True
+
 [mypy-tzlocal]
 ignore_missing_imports = True
 

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,30 @@
+# .pre-commit-config.yaml
+repos:
+  - repo: local
+    hooks:
+      - id: flake8-local
+        name: flake8 (project env)
+        language: python
+        additional_dependencies: ["flake8==7.1.1"]
+        entry: flake8
+        args: ["--config=.flake8.cfg"]
+        types: [python]
+        files: ^(sebs/|benchmarks/)
+  - repo: local
+    hooks:
+      - id: black-check-local
+        name: black --check (project env)
+        language: python
+        additional_dependencies: ["black==22.8.0"]
+        entry: black
+        args: ["--config=.black.toml", "--check", "--diff"]
+        types: [python]
+        files: ^(sebs/|benchmarks/)
+  # - repo: local
+  #   hooks:
+  #     - id: mypy-local
+  #       name: mypy (project venv)
+  #       language: system
+  #       entry: bash -lc 'python -m mypy --config-file=.mypy.ini sebs'
+  #       types: [python]
+
diff --git a/.tuff.toml b/.tuff.toml
@@ -0,0 +1,7 @@
+line-length = 100
+target-version = "py38"
+[lint]
+select = ["E", "F", "W"]
+
+[lint.isort]
+known-first-party = ["sebs"]
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,15 @@
+{
+  "[python]": {
+    "editor.defaultFormatter": "ms-python.black-formatter",
+    "editor.formatOnSave": true
+  },
+
+  "black-formatter.importStrategy": "fromEnvironment",
+  "black-formatter.path": [],
+  "black-formatter.args": ["--config=.black.toml"],
+
+  "flake8.importStrategy": "fromEnvironment",
+  "flake8.path": [],
+  "flake8.args": ["--config=.flake8.cfg"],
+  "flake8.enabled": true
+}
diff --git a/QUICKSTART_SONATAFLOW.md b/QUICKSTART_SONATAFLOW.md
@@ -0,0 +1,61 @@
+# SonataFlow quickstart
+
+This backend generates Serverless Workflow definitions from SeBS workflow specs and drives them through a running SonataFlow (Kogito) instance. Functions still run as local SeBS containers; SonataFlow orchestrates them via HTTP.
+
+## Prerequisites
+- Docker available locally.
+- A SonataFlow dev-mode/runtime reachable at `http://localhost:8080` (default). Example:
+  ```bash
+  docker run --rm -it -p 8080:8080 \
+    -v "$PWD/output/workflow_resources/sonataflow":/home/kogito/serverless-workflow-project/src/main/resources/workflows \
+    quay.io/kiegroup/kogito-swf-devmode:latest
+  ```
+  The volume mount should point to the directory where SeBS writes generated `.sw.json` files.
+  If you also need to provide `application.properties`, mount a directory to
+  `/home/kogito/serverless-workflow-project/src/main/resources` that contains both
+  `application.properties` and a `workflows/` subdirectory.
+- Local object/NoSQL/redis services (reuse `run_local_workflows.sh` setup or `./sebs.py storage start all config/storage.json`).
+
+## Configure
+Add a `deployment.sonataflow` block to your config (based on `config/example.json`):
+```json
+{
+  "deployment": {
+    "name": "sonataflow",
+    "sonataflow": {
+      "resources": {
+        "redis": { "host": "localhost:6380", "password": "" },
+        "runtime": { "url": "http://localhost:8080", "endpoint_prefix": "" }
+      },
+      "storage": {
+        "type": "minio",
+        "address": "localhost",
+        "mapped_port": 9000,
+        "access_key": "minio",
+        "secret_key": "minio123",
+        "instance_id": "minio",
+        "input_buckets": [],
+        "output_buckets": []
+      }
+    }
+  }
+}
+```
+Adjust storage/redis endpoints to match your local services.
+
+## Run
+1. Start storage/redis (as in `run_local_workflows.sh`).
+2. Start SonataFlow dev-mode and mount the output directory (see above).
+3. Execute a workflow benchmark:
+   ```bash
+   ./sebs.py benchmark workflow 610.gen test \
+     --config config/your-sonataflow-config.json \
+     --deployment sonataflow --trigger http --repetitions 1 --verbose
+   ```
+
+On first run SeBS will:
+- Package workflow functions into local containers.
+- Translate `definition.json` into `workflow_resources/sonataflow/<workflow_id>.sw.json` under the generated code package directory (inside your `--output-dir` tree).
+- Invoke SonataFlow at `{runtime_url}/{workflow_id}` with the workflow payload (and auto-fallback to `/services/{workflow_id}` if needed).
+
+If SonataFlow dev-mode fails with a “Duplicated item found with id …” error, ensure there is only one `.sw.json` file per workflow id under the mounted resources directory.
diff --git a/benchmarks-data b/benchmarks-data
diff --git a/benchmarks/000.microbenchmarks/010.sleep/input.py b/benchmarks/000.microbenchmarks/010.sleep/input.py
@@ -1,12 +1,11 @@
+size_generators = {"test": 1, "small": 100, "large": 1000}
 
-size_generators = {
-    'test' : 1,
-    'small' : 100,
-    'large': 1000
-}
 
 def buckets_count():
     return (0, 0)
 
-def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func):
-    return { 'sleep': size_generators[size] }
+
+def generate_input(
+    data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func
+):
+    return {"sleep": size_generators[size]}
diff --git a/benchmarks/000.microbenchmarks/010.sleep/python/function.py b/benchmarks/000.microbenchmarks/010.sleep/python/function.py
@@ -1,9 +1,9 @@
-
 from time import sleep
 
+
 def handler(event):
 
     # start timing
-    sleep_time = event.get('sleep')
+    sleep_time = event.get("sleep")
     sleep(sleep_time)
-    return { 'result': sleep_time }
+    return {"result": sleep_time}
diff --git a/benchmarks/000.microbenchmarks/020.network-benchmark/input.py b/benchmarks/000.microbenchmarks/020.network-benchmark/input.py
@@ -2,10 +2,12 @@ def buckets_count():
     return 0, 1
 
 
-def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func):
+def generate_input(
+    data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func
+):
     return {
-        'bucket': {
-            'bucket': benchmarks_bucket,
-            'output': output_paths[0],
+        "bucket": {
+            "bucket": benchmarks_bucket,
+            "output": output_paths[0],
         },
     }
+41 −0		200.multimedia/210.thumbnailer/README.md
+5 −2		200.multimedia/220.video-processing/README.md
+1 −0		200.multimedia/225.video-watermarking-gpu/.gitkeep
+ −		200.multimedia/225.video-watermarking-gpu/sample.mp4
+ −		200.multimedia/225.video-watermarking-gpu/watermark.png
+7 −0		300.utilities/311.compression/README.md
+12 −0		400.inference/411.image-recognition/README.md
+ −		400.inference/412.language-bert/model/bert-tiny-onnx.tar.gz
+15 −0		400.inference/412.language-bert/text/sentences.jsonl
+ −		400.inference/413.image-classification/data/512px-Cacatua_moluccensis_-Cincinnati_Zoo-8a.jpg
+ −		400.inference/413.image-classification/data/782px-Pumiforme.JPG
+ −		400.inference/413.image-classification/data/800px-20180630_Tesla_Model_S_70D_2015_midnight_blue_left_front.jpg
+ −		400.inference/413.image-classification/data/800px-7weeks_old.JPG
+ −		400.inference/413.image-classification/data/800px-Jammlich_crop.jpg
+ −		400.inference/413.image-classification/data/800px-Porsche_991_silver_IAA.jpg
+ −		400.inference/413.image-classification/data/800px-Sardinian_Warbler.jpg
+ −		400.inference/413.image-classification/data/800px-Welsh_Springer_Spaniel.jpg
+8 −0		400.inference/413.image-classification/data/val_map.txt
+ −		400.inference/413.image-classification/model/resnet50.tar.gz
+3 −0		400.inference/413.recommendation/data/requests.jsonl
+ −		400.inference/413.recommendation/model/dlrm_tiny.pt
+13 −0		500.scientific/504.dna-visualisation/README.md
+ −		600.workflows/6xx.OCR-pipeline/pages/captcha1.jpg
+ −		600.workflows/6xx.OCR-pipeline/pages/captcha2.jpg
+ −		600.workflows/6xx.OCR-pipeline/pages/image0.png
+ −		600.workflows/6xx.OCR-pipeline/pages/numbers_gs150.jpg
+ −		600.workflows/6xx.OCR-pipeline/pages/plaid_c150.jpg
+26 −0		600.workflows/download_datasets.sh
+ −		700.batchsize/701.language-bert-batchsize/model/bert-tiny-onnx.tar.gz
+15 −0		700.batchsize/701.language-bert-batchsize/text/sentences.jsonl
+ −		700.batchsize/702.language-bert-arrival/model/bert-tiny-onnx.tar.gz
+15 −0		700.batchsize/702.language-bert-arrival/text/sentences.jsonl
+4 −0		700.batchsize/703.recommendation-batchsize/data/requests.jsonl
+ −		700.batchsize/703.recommendation-batchsize/model/dlrm_tiny.pt
+3 −0		700.batchsize/704.recommendation-arrival/data/requests.jsonl
+ −		700.batchsize/704.recommendation-arrival/model/dlrm_tiny.pt
+5 −0		README.md