Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 9 additions & 60 deletions .github/workflows/build_kernel.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
- uses: DeterminateSystems/nix-installer-action@main
with:
extra-conf: |
max-jobs = 4
max-jobs = 8
cores = 12
sandbox-fallback = false
- uses: cachix/cachix-action@v16
Expand All @@ -39,55 +39,10 @@ jobs:
- name: Nix info
run: nix-shell -p nix-info --run "nix-info -m"

- name: Build relu kernel
run: ( cd examples/kernels/relu && nix build .\#redistributable.torch29-cxx11-cu126-${{ matrix.arch }} )
- name: Copy relu kernel
run: cp -rL examples/kernels/relu/result relu-kernel

- name: Build relu-tvm-ffi kernel
run: ( cd examples/kernels/relu-tvm-ffi && nix build .\#redistributable.tvm-ffi01-cu126-${{ matrix.arch }} )
- name: Copy relu-tvm-ffi kernel
run: cp -rL examples/kernels/relu-tvm-ffi/result relu-tvm-ffi-kernel

- name: Build extra-data kernel
run: ( cd examples/kernels/extra-data && nix build .\#redistributable.torch29-cxx11-cu126-${{ matrix.arch }} )
- name: Copy extra-data kernel
run: cp -rL examples/kernels/extra-data/result extra-data

- name: Build relu kernel (CPU)
run: ( cd examples/kernels/relu && nix build .\#redistributable.torch29-cxx11-cpu-${{ matrix.arch }} )
- name: Copy relu kernel (CPU)
run: cp -rL examples/kernels/relu/result relu-kernel-cpu

- name: Build cutlass GEMM kernel
run: ( cd examples/kernels/cutlass-gemm && nix build .\#redistributable.torch29-cxx11-cu126-${{ matrix.arch }} )
- name: Copy cutlass GEMM kernel
run: cp -rL examples/kernels/cutlass-gemm/result cutlass-gemm-kernel

- name: Build cutlass-gemm-tvm-ffi kernel
run: ( cd examples/kernels/cutlass-gemm-tvm-ffi && nix build .\#redistributable.tvm-ffi01-cu126-${{ matrix.arch }} )
- name: Copy cutlass-gemm-tvm-ffi kernel
run: cp -rL examples/kernels/cutlass-gemm-tvm-ffi/result cutlass-gemm-tvm-ffi-kernel

- name: Build relu-backprop-compile kernel
run: ( cd examples/kernels/relu-backprop-compile && nix build .\#redistributable.torch29-cxx11-cu126-${{ matrix.arch }} )
- name: Copy relu-backprop-compile kernel
run: cp -rL examples/kernels/relu-backprop-compile/result relu-backprop-compile-kernel

# Just test that we build with the extra torchVersions argument.
- name: Build relu kernel (specific Torch version)
run: ( cd examples/kernels/relu-specific-torch && nix build . )

- name: Build relu kernel (compiler flags)
run: ( cd examples/kernels/relu-compiler-flags && nix build .\#redistributable.torch29-cxx11-cu126-${{ matrix.arch }} )

- name: Test that we can build a test shell (e.g. that gcc corresponds to CUDA-required)
run: ( cd examples/kernels/relu && nix build .#devShells.${{ matrix.arch }}.test )

- name: Build silu-and-mul kernel
run: ( cd examples/kernels/silu-and-mul && nix build .\#redistributable.torch-cuda )
- name: Copy silu-and-mul kernel
run: cp -rL examples/kernels/silu-and-mul/result silu-and-mul-kernel
- name: Build all example kernels
run: nix build -L ./examples/kernels#ci-build
- name: Copy kernel artifacts
run: cp -rL result/* .

- name: Upload kernel artifacts
uses: actions/upload-artifact@v6
Expand Down Expand Up @@ -118,17 +73,11 @@ jobs:
name: built-kernels-x86_64-linux
path: .

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build Docker image
uses: docker/build-push-action@v6
with:
context: .
file: nix-builder/tests/Dockerfile.test-kernel
platforms: linux/amd64
load: true
push: false
tags: kernel-builder:latest
run: |
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess you meant

Do_not_ build test image inside a buildx container. We don't use its features and it adds a lot of serialization time.

docker build \
-t kernel-builder:latest \
-f nix-builder/tests/Dockerfile.test-kernel .

- name: Run Tests
run: |
Expand Down
Empty file.
Empty file.
159 changes: 159 additions & 0 deletions examples/kernels/flake.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
{
description = "All example kernels";
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TIL. Thanks for this.


inputs = {
kernel-builder.url = "path:../..";
};

outputs =
{
self,
kernel-builder,
}:
let
inherit (kernel-builder.inputs) flake-utils nixpkgs;
inherit (kernel-builder.inputs.nixpkgs) lib;

cudaVersion = "cu126";
torchVersion = "29";
tvmFfiVersion = "01";

# All example kernels to build in CI.
#
# - name: name in the output path
# - path: kernel flake path
# - drv (system -> flakeOutputs -> derivation): the derivation for the given
# system and flake outputs.
# - torchVersions: optional override for the torchVersions argument
ciKernels = [
{
name = "relu-kernel";
path = ./relu;
drv =
sys: out: out.packages.${sys}.redistributable.${"torch${torchVersion}-cxx11-${cudaVersion}-${sys}"};
}
{
name = "relu-tvm-ffi-kernel";
path = ./relu-tvm-ffi;
drv =
sys: out: out.packages.${sys}.redistributable.${"tvm-ffi${tvmFfiVersion}-${cudaVersion}-${sys}"};
}
{
name = "extra-data";
path = ./extra-data;
drv =
sys: out: out.packages.${sys}.redistributable.${"torch${torchVersion}-cxx11-${cudaVersion}-${sys}"};
}
{
name = "relu-kernel-cpu";
path = ./relu;
drv = sys: out: out.packages.${sys}.redistributable.${"torch${torchVersion}-cxx11-cpu-${sys}"};
}
{
name = "cutlass-gemm-kernel";
path = ./cutlass-gemm;
drv =
sys: out: out.packages.${sys}.redistributable.${"torch${torchVersion}-cxx11-${cudaVersion}-${sys}"};
}
{
name = "cutlass-gemm-tvm-ffi-kernel";
path = ./cutlass-gemm-tvm-ffi;
drv =
sys: out: out.packages.${sys}.redistributable.${"tvm-ffi${tvmFfiVersion}-${cudaVersion}-${sys}"};
}
{
name = "relu-backprop-compile-kernel";
path = ./relu-backprop-compile;
drv =
sys: out: out.packages.${sys}.redistributable.${"torch${torchVersion}-cxx11-${cudaVersion}-${sys}"};
}
{
name = "silu-and-mul-kernel";
path = ./silu-and-mul;
drv = sys: out: out.packages.${sys}.redistributable.torch-cuda;
}
{
# Tests that we can build with the extra torchVersions argument.
name = "relu-specific-torch";
path = ./relu-specific-torch;
drv = sys: out: out.packages.${sys}.default;
torchVersions = _defaultVersions: [
{
torchVersion = "2.9";
cudaVersion = "12.8";
systems = [
"x86_64-linux"
"aarch64-linux"
];
bundleBuild = true;
}
];
}
{
name = "relu-compiler-flags";
path = ./relu-compiler-flags;
drv =
sys: out: out.packages.${sys}.redistributable.${"torch${torchVersion}-cxx11-${cudaVersion}-${sys}"};
}
{
# Check that we can build a test shell (e.g. gcc is compatible with
# CUDA requirements).
name = "relu-test-shell";
path = ./relu;
drv = sys: out: out.devShells.${sys}.test;
}
];

mkKernelOutputs =
{
path,
torchVersions ? null,
}:
kernel-builder.lib.genKernelFlakeOutputs (
{
inherit self path;
}
// lib.optionalAttrs (torchVersions != null) { inherit torchVersions; }
);

ciKernelOutputs = map (
kernel:
kernel
// {
outputs = mkKernelOutputs {
inherit (kernel) path;
torchVersions = kernel.torchVersions or null;
};
}
) ciKernels;
in
flake-utils.lib.eachSystem
[
"x86_64-linux"
"aarch64-linux"
]
(
system:
let
pkgs = nixpkgs.legacyPackages.${system};

resolvedKernels = map (kernel: {
inherit (kernel) name;
drv = kernel.drv system kernel.outputs;
}) ciKernelOutputs;

ci-build = pkgs.linkFarm "ci-kernels" (
map (kernel: {
inherit (kernel) name;
path = kernel.drv;
}) resolvedKernels
);
in
{
packages = {
inherit ci-build;
default = ci-build;
};
}
);
}
4 changes: 2 additions & 2 deletions kernel-builder/src/pyproject/templates/compat.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import ctypes
import importlib.util
import sys

import importlib
from pathlib import Path
from types import ModuleType


def _import_from_path(file_path: Path) -> ModuleType:
# We cannot use the module name as-is, after adding it to `sys.modules`,
# it would also be used for other imports. So, we make a module name that
Expand Down
15 changes: 7 additions & 8 deletions nix-builder/tests/Dockerfile.test-kernel
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,10 @@ ENV DEBIAN_FRONTEND=noninteractive \
NVIDIA_VISIBLE_DEVICES=all \
NVIDIA_DRIVER_CAPABILITIES=compute,utility

# Install system dependencies
RUN dnf install -y \
curl

# Install uv package manager
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
# Install uv.
RUN dnf install -y curl && \
curl -LsSf https://astral.sh/uv/install.sh | sh && \
dnf clean all

# Set working directory
WORKDIR /app
Expand Down Expand Up @@ -59,8 +57,8 @@ RUN CUDA_MAJOR_MINOR=$(echo ${CUDA_VERSION} | cut -d'.' -f1,2) && \
uv add "torch==${TORCH_VERSION}"; \
fi

# add pytest for runtime tests
RUN uv add numpy pytest
# Add additional dependencies.
RUN uv add "apache-tvm-ffi~=0.1.9" numpy pytest

# Copy kernels and tests
COPY relu-kernel ./relu-kernel
Expand All @@ -69,6 +67,7 @@ COPY relu-kernel-cpu ./relu-kernel-cpu
COPY cutlass-gemm-kernel ./cutlass-gemm-kernel
COPY cutlass-gemm-tvm-ffi-kernel ./cutlass-gemm-tvm-ffi-kernel
COPY silu-and-mul-kernel ./silu-and-mul-kernel
COPY extra-data ./extra-data
COPY examples/kernels/extra-data/tests ./extra_data_tests
COPY examples/kernels/relu/tests ./relu_tests
COPY examples/kernels/relu-tvm-ffi/tests ./relu_tvm_ffi_tests
Expand Down
7 changes: 4 additions & 3 deletions nix-builder/tests/run-tests.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/bin/bash
set -euo pipefail

# Expand to build variant directories.
EXTRA_DATA_PATH=$(echo extra-data/torch*)
Expand All @@ -9,13 +10,13 @@ CUTLASS_TVM_FFI_PATH=$(echo cutlass-gemm-tvm-ffi-kernel/tvm-ffi*)
SILU_MUL_PATH=$(echo silu-and-mul-kernel/torch*)
RELU_CPU_PATH=$(echo relu-kernel-cpu/torch*)

PYTHONPATH="$EXTRA_DATA_PATH:$RELU_PATH:$RELU_TVM_FFI_PATH:$CUTLASS_PATH:$CUTLASS_TVM_FFI_PATH:$PYTHONPATH" \
PYTHONPATH="$EXTRA_DATA_PATH:$RELU_PATH:$RELU_TVM_FFI_PATH:$CUTLASS_PATH:$CUTLASS_TVM_FFI_PATH" \
.venv/bin/pytest extra_data_tests relu_tests relu_tvm_ffi_tests cutlass_gemm_tests cutlass_gemm_tvm_ffi_tests

# We only care about importing, the kernel is trivial.
PYTHONPATH="$SILU_MUL_PATH:$PYTHONPATH" \
PYTHONPATH="$SILU_MUL_PATH" \
.venv/bin/python -c "import silu_and_mul"

PYTHONPATH="$RELU_CPU_PATH:$PYTHONPATH" \
PYTHONPATH="$RELU_CPU_PATH" \
CUDA_VISIBLE_DEVICES="" \
.venv/bin/pytest relu_tests
Loading