Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
76917ab
docs: remove disclaimer from README.MDs
marlon-tobaben May 12, 2026
586cfc4
feat: initial update of conversion scripts
marlon-tobaben May 12, 2026
755327c
feat: update simple benchmarks to new container
marlon-tobaben May 12, 2026
9459c27
refactor: remove venv
marlon-tobaben May 12, 2026
7490666
refactor: remove unused imports from .py files
marlon-tobaben May 12, 2026
ed12c97
docs: remove old mention of venv
marlon-tobaben May 12, 2026
871d1cb
fix: rename to
marlon-tobaben May 12, 2026
78e7b60
feat: update convert scripts to use new container
marlon-tobaben May 12, 2026
e513a30
fix: remove project id
marlon-tobaben May 12, 2026
cf8d10a
feat: update benchmark scripst
marlon-tobaben May 12, 2026
f1b34d1
fix: remove hardcoded link
marlon-tobaben May 12, 2026
f9c307d
fix: give more time for script
marlon-tobaben May 12, 2026
202b711
fix: import lmdb later to avoid crash
marlon-tobaben May 12, 2026
774732a
fix: avoid zero byte bug
marlon-tobaben May 12, 2026
6c1edf6
fix: add setup_venv back to makefile
marlon-tobaben May 12, 2026
e19b6dd
refactor: rename to install_venv
marlon-tobaben May 12, 2026
ec94afc
fix: make post processing more robust
marlon-tobaben May 13, 2026
6586e17
docs: update instructions for running benchmarks
marlon-tobaben May 13, 2026
ad0d9c7
refactor: remove old duplicate file
marlon-tobaben May 13, 2026
edd696e
refactor: remove undocumented old files
marlon-tobaben May 13, 2026
6cddb71
refactor: move zip folder to relevant folder
marlon-tobaben May 15, 2026
9ef21f3
feat: add new version of training benchmark
marlon-tobaben May 15, 2026
785b892
fix: change partition and runtime
marlon-tobaben May 15, 2026
65f8be2
refactor: rename scripts to match benchmark
marlon-tobaben May 18, 2026
004454e
fix: modify jobname and extend time
marlon-tobaben May 18, 2026
00ab950
docs: update numbers and links and restructure
marlon-tobaben May 18, 2026
9154990
docs: general updates and small fixes
marlon-tobaben May 18, 2026
a988c10
refactor: remove outdated and not documented scripts for ImageNet-1k
marlon-tobaben May 18, 2026
21743c7
feat: add post processing script and instructions for ViT benchmark
marlon-tobaben May 18, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 8 additions & 17 deletions 03-file-formats/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,20 @@ build:
convert:
sbatch convert.sh "squashfs" && sbatch convert.sh "lmdb" && sbatch convert.sh "hdf5"

clean:
rm -rf venv-extension data-formats

bench-seq:
sbatch run-scripts/simple-benchmarks/run-comp-seq.sh "squashfs" && sbatch run-scripts/simple-benchmarks/run-comp-seq.sh "lmdb" && sbatch run-scripts/simple-benchmarks/run-comp-seq.sh "hdf5"
sbatch run-scripts/simple-benchmarks/run-comp-tiny-seq.sh "squashfs" && sbatch run-scripts/simple-benchmarks/run-comp-tiny-seq.sh "lmdb" && sbatch run-scripts/simple-benchmarks/run-comp-tiny-seq.sh "hdf5"

bench-par:
sbatch run-scripts/simple-benchmarks/run-comp-tiny.sh "squashfs" && sbatch run-scripts/simple-benchmarks/run-comp-tiny.sh "lmdb" && sbatch run-scripts/simple-benchmarks/run-comp-tiny.sh "hdf5"

bench-large:
sbatch run-scripts/simple-benchmarks/run-comp-large.sh "squashfs" && sbatch run-scripts/simple-benchmarks/run-comp-large.sh "lmdb"

bench-full:
sbatch run-scripts/simple-benchmarks/run-comp-large-full.sh "squashfs" && sbatch run-scripts/simple-benchmarks/run-comp-large-full.sh "lmdb"
sbatch run-scripts/simple-benchmarks/run-comp-tiny-par.sh "squashfs" && sbatch run-scripts/simple-benchmarks/run-comp-tiny-par.sh "lmdb" && sbatch run-scripts/simple-benchmarks/run-comp-tiny-par.sh "hdf5"

post-seq:
python run-scripts/simple-benchmarks/post-process.py -d 'seq'
python3 run-scripts/simple-benchmarks/post-process.py -d 'tiny-seq'

post-par:
python run-scripts/simple-benchmarks/post-process.py -d 'tiny'
python3 run-scripts/simple-benchmarks/post-process.py -d 'tiny-par'

post-large:
python run-scripts/simple-benchmarks/post-process.py -d 'large'
bench-vit:
sbatch run-scripts/training-benchmarks/run-comp-vision-transformer.sh "squashfs" && sbatch run-scripts/training-benchmarks/run-comp-vision-transformer.sh "lmdb" && sbatch run-scripts/training-benchmarks/run-comp-vision-transformer.sh "hdf5"

post-large-full:
python run-scripts/simple-benchmarks/post-process.py -d 'large-full'
post-vit:
python3 run-scripts/training-benchmarks/post-process.py
96 changes: 68 additions & 28 deletions 03-file-formats/README.md

Large diffs are not rendered by default.

21 changes: 0 additions & 21 deletions 03-file-formats/analyse-traces.py

This file was deleted.

76 changes: 0 additions & 76 deletions 03-file-formats/compare-dataset.py

This file was deleted.

23 changes: 14 additions & 9 deletions 03-file-formats/convert.sh
Original file line number Diff line number Diff line change
@@ -1,24 +1,29 @@
#!/bin/bash
#SBATCH --account=project_462000002
#SBATCH --account=project_xxxxxxxxx
#SBATCH --partition=small
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=16
#SBATCH --mem-per-cpu=1750
#SBATCH --time=0:10:00
#SBATCH --time=0:30:00

module use /appl/local/training/modules/AI-20240529
module load singularity-userfilesystems singularity-CPEbits
# this module facilitates the use of singularity containers on LUMI
module purge
module use /appl/local/laifs/modules
module load lumi-aif-singularity-bindings

# choose container
SIF=/appl/local/laifs/containers/lumi-multitorch-u24r70f21m50t210-20260415_130625/lumi-multitorch-full-u24r70f21m50t210-20260415_130625.sif

CONTAINER=/appl/local/containers/sif-images/lumi-pytorch-rocm-6.2.1-python-3.12-pytorch-20240918-vllm-4075b35.sif

if [[ $1 == 'squashfs' ]]; then
mkdir -p data-formats/squashfs/
time srun bash -c 'mksquashfs data-formats/raw/tiny-imagenet-200/val/ data-formats/squashfs/val.squashfs -processors 16 -no-progress'
time srun bash -c 'mksquashfs data-formats/raw/tiny-imagenet-200/train/ data-formats/squashfs/train.squashfs -processors 16 -no-progress'
time srun bash -c 'mksquashfs data-formats/raw/tiny-imagenet-200/val/ data-formats/squashfs/val.squashfs -processors 16 -no-progress -no-xattrs -noappend'
time srun bash -c 'mksquashfs data-formats/raw/tiny-imagenet-200/train/ data-formats/squashfs/train.squashfs -processors 16 -no-progress -no-xattrs -noappend'
elif [[ $1 == 'lmdb' ]]; then
mkdir -p data-formats/lmdb/
time srun singularity exec $CONTAINER bash -c '$WITH_CONDA && source venv-extension/bin/activate && python scripts/lmdb/convert_to_lmdb.py'
export SINGULARITYENV_PREPEND_PATH=/user-software/bin # gives access to packages inside the container
time srun singularity run -B venv.sqsh:/user-software:image-src=/ $SIF bash -c 'python scripts/lmdb/convert_to_lmdb.py'
elif [[ $1 == 'hdf5' ]]; then
mkdir -p data-formats/hdf5/
time srun singularity exec $CONTAINER bash -c '$WITH_CONDA && source venv-extension/bin/activate && python scripts/hdf5/convert_to_hdf5.py'
time srun singularity run $SIF bash -c 'python scripts/hdf5/convert_to_hdf5.py'
fi
21 changes: 0 additions & 21 deletions 03-file-formats/convert_zip.sh

This file was deleted.

25 changes: 17 additions & 8 deletions 03-file-formats/install_venv.sh
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@
#!/bin/bash
module use /appl/local/training/modules/AI-20240529
module load singularity-userfilesystems singularity-CPEbits

CONTAINER=/appl/local/containers/sif-images/lumi-pytorch-rocm-6.2.1-python-3.12-pytorch-20240918-vllm-4075b35.sif

if [ -d "venv-extension" ]; then echo 'Removing existing venv-extension'; rm -Rf venv-extension; fi

singularity exec $CONTAINER bash -c '$WITH_CONDA && python -m venv venv-extension --system-site-packages && source venv-extension/bin/activate && python -m pip install -r venv-requirements.txt'
module purge
export BUILD_DIR=env-temp
export SQUASHFS_NAME=venv.sqsh
export SIF=/appl/local/laifs/containers/lumi-multitorch-u24r70f21m50t210-20260415_130625/lumi-multitorch-full-u24r70f21m50t210-20260415_130625.sif

# creating the venv
echo "Creating venv in: $BUILD_DIR"
mkdir $BUILD_DIR
singularity exec -B "$BUILD_DIR":/user-software "$SIF" bash -c '
set -euo pipefail
python -m venv /user-software --system-site-packages
/user-software/bin/python -m pip install lmdb
'

# creating the squashfs file and removing the venv
echo "Creating squashfs in: $SQUASHFS_NAME"
mksquashfs $BUILD_DIR $SQUASHFS_NAME -processors 1 -no-xattrs
rm -rf $BUILD_DIR
echo "done"
24 changes: 0 additions & 24 deletions 03-file-formats/run-scripts/dataset-comparison/run_dataset_comp.sh

This file was deleted.

101 changes: 0 additions & 101 deletions 03-file-formats/run-scripts/simple-benchmarks/compare-dataset-large.py

This file was deleted.

Loading