From 98040e84adf4b84e3db8f227a07d74134b31ceab Mon Sep 17 00:00:00 2001
From: Jitendra Verma <jitendravjh@gmail.com>
Date: Mon, 11 May 2026 00:21:30 +0530
Subject: [PATCH 1/3] fix: transfer GridEmbedding grid to input device (fixes
 CUDA scalar indexing #125)

GridEmbedding built the positional grid using CPU range/meshgrid, then
called cat(grid, x) where x may be a CuArray. This caused:

  ERROR: Scalar indexing is disallowed. Invocation of getindex resulted
  in scalar indexing of a GPU array.

Fix: call Lux.get_device(x)(grid) immediately after building the grid,
so the array is moved to the same device as the input before the cat.
This is a no-op on CPU and transparently transfers to GPU/Metal/etc.

Fixes #125
---
 src/layers.jl | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/layers.jl b/src/layers.jl
index e5d47dc..d562b10 100644
--- a/src/layers.jl
+++ b/src/layers.jl
@@ -231,7 +231,7 @@ function (layer::GridEmbedding)(x::AbstractArray{T, N}, ps, st) where {T, N}
     grid = meshgrid(
         map(enumerate(layer.grid_boundaries)) do (i, (min, max))
             return range(T(min), T(max); length = size(x, i))
-        end...
+        end...,
     )
 
     grid = repeat(
@@ -239,6 +239,10 @@ function (layer::GridEmbedding)(x::AbstractArray{T, N}, ps, st) where {T, N}
         ntuple(Returns(1), N - 1)...,
         size(x, N),
     )
+
+    # Move the CPU-built grid to the same device as x (fixes CUDA scalar indexing, #125)
+    grid = Lux.get_device(x)(grid)
+
     return cat(grid, x; dims = N - 1), st
 end
 

From e2ded6456a259b2de38f6abf8ee5c6800a6a13d3 Mon Sep 17 00:00:00 2001
From: Jitendra Verma <jitendravjh@gmail.com>
Date: Mon, 11 May 2026 10:05:36 +0530
Subject: [PATCH 2/3] feat: implement ConvolutionalNeuralOperator (CNO) (#122)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements the Convolutional Neural Operator from:
  Raonic et al., "Convolutional Neural Operators for robust and accurate
  learning of PDEs", NeurIPS 2023. https://arxiv.org/abs/2302.01178

Architecture:
  - Lifting:     Conv(1x...x1): in_channels → hidden_channels
  - CNO blocks:  Upsample(:bilinear) → Conv(3x...x3, act, SamePad) → MeanPool
  - Projection:  Conv(1x...x1, act) → Conv(1x...x1): → out_channels

Each CNOBlock upsamples by upsample_factor, convolves at higher resolution,
then downsamples via MeanPool — ensuring the operator converges to a
continuous limit as spatial resolution increases (resolution-invariant).

New types exported:
  - ConvolutionalNeuralOperator  (the full model)
  - CNOBlock                     (the building block, composable)

Closes #122
---
 src/NeuralOperators.jl |   8 ++-
 src/models/cno.jl      | 151 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 158 insertions(+), 1 deletion(-)
 create mode 100644 src/models/cno.jl

diff --git a/src/NeuralOperators.jl b/src/NeuralOperators.jl
index 3991aed..248c3e9 100644
--- a/src/NeuralOperators.jl
+++ b/src/NeuralOperators.jl
@@ -4,7 +4,9 @@ using AbstractFFTs: fft, rfft, ifft, irfft, fftshift
 using ConcreteStructs: @concrete
 using Random: Random, AbstractRNG
 
-using Lux: Lux, Chain, Dense, Conv, Parallel, NoOpLayer, WrappedFunction, Scale
+using Lux: Lux, Chain, Dense, Conv, Parallel, NoOpLayer, WrappedFunction, Scale,
+    Upsample, MeanPool, SamePad
+
 using LuxCore: LuxCore, AbstractLuxLayer, AbstractLuxWrapperLayer
 using LuxLib: fast_activation!!
 using NNlib: NNlib, batched_mul, pad_constant, gelu
@@ -18,6 +20,8 @@ include("layers.jl")
 include("models/fno.jl")
 include("models/deeponet.jl")
 include("models/nomad.jl")
+include("models/cno.jl")
+
 
 export FourierTransform
 export SpectralConv, OperatorConv, SpectralKernel, OperatorKernel
@@ -26,5 +30,7 @@ export GridEmbedding, ComplexDecomposedLayer, SoftGating
 export FourierNeuralOperator
 export DeepONet
 export NOMAD
+export ConvolutionalNeuralOperator
+
 
 end
diff --git a/src/models/cno.jl b/src/models/cno.jl
new file mode 100644
index 0000000..df160f0
--- /dev/null
+++ b/src/models/cno.jl
@@ -0,0 +1,151 @@
+"""
+    CNOBlock(
+        in_channels::Integer,
+        out_channels::Integer,
+        modes::Dims{N},
+        activation = gelu;
+        upsample_factor::Integer = 2,
+    ) where {N}
+
+A single Convolutional Neural Operator (CNO) block.
+
+Each block:
+1. **Upsamples** the input by `upsample_factor` using bilinear/trilinear interpolation.
+2. Applies a **3×(…×3) convolution** in the higher-resolution space with `SamePad`.
+3. Applies the **activation function** pointwise.
+4. **Downsamples** back to the original resolution via average pooling.
+
+This design ensures the discrete operator converges to a continuous limit as resolution
+increases, unlike standard CNNs which only approximate finite-dimensional maps.
+
+## Arguments
+
+  - `in_channels`: Number of input channels.
+  - `out_channels`: Number of output channels.
+  - `modes`: Spatial dimensions tuple (length = data dimensionality). Only the length is
+    used to set the kernel dimensionality.
+  - `activation`: Pointwise activation applied after convolution.
+
+## Keyword Arguments
+
+  - `upsample_factor`: Integer upsampling factor. Default is `2`.
+
+## References
+
+[1] Raonic et al., "Convolutional Neural Operators for robust and accurate learning of
+PDEs," NeurIPS 2023. https://arxiv.org/abs/2302.01178
+"""
+@concrete struct CNOBlock <: AbstractLuxWrapperLayer{:model}
+    model
+end
+
+function CNOBlock(
+        in_channels::Integer,
+        out_channels::Integer,
+        modes::Dims{N},
+        activation = gelu;
+        upsample_factor::Integer = 2,
+    ) where {N}
+    spatial_dims = ntuple(identity, N)
+    kernel = ntuple(Returns(3), N)
+    pool_window = ntuple(Returns(upsample_factor), N)
+
+    return CNOBlock(
+        Chain(
+            # 1. Upsample to higher resolution
+            Upsample(:bilinear; scale = upsample_factor),
+            # 2. Convolution at high resolution (preserves spatial size via SamePad)
+            Conv(kernel, in_channels => out_channels, activation; pad = SamePad()),
+            # 3. Downsample back via average pooling (paper Section 3.1)
+            MeanPool(pool_window),
+        ),
+    )
+end
+
+"""
+    ConvolutionalNeuralOperator(
+        modes::Dims{N},
+        in_channels::Integer,
+        out_channels::Integer,
+        hidden_channels::Integer;
+        num_layers::Integer = 4,
+        activation = gelu,
+        upsample_factor::Integer = 2,
+    ) where {N}
+
+Convolutional Neural Operator (CNO) for learning PDE solution operators.
+
+CNO applies a sequence of resolution-preserving continuous convolutional blocks. Each
+block upsamples the input, applies a convolution in the higher-resolution space, and
+downsamples back. This design is proven to converge to a well-defined continuous operator
+as resolution increases, making CNO resolution-invariant by construction.
+
+**Architecture**:
+1. **Lifting** `Conv(1×…×1)`: maps `in_channels → hidden_channels`
+2. **CNO blocks** × `num_layers`: each is upsample → conv → activation → avgpool
+3. **Projection**: `Conv(1×…×1, act)` → `Conv(1×…×1)` maps to `out_channels`
+
+## Arguments
+
+  - `modes`: Spatial size tuple (length `d` for d-dimensional data). Only its length
+    matters — kept consistent with the FNO API.
+  - `in_channels`: Number of input channels.
+  - `out_channels`: Number of output channels.
+  - `hidden_channels`: Number of channels inside the CNO blocks.
+
+## Keyword Arguments
+
+  - `num_layers`: Number of `CNOBlock` layers. Default is `4`.
+  - `activation`: Activation function used inside each block. Default is `gelu`.
+  - `upsample_factor`: Spatial upsampling factor inside each block. Default is `2`.
+
+## References
+
+[1] Raonic et al., "Convolutional Neural Operators for robust and accurate learning of
+PDEs," NeurIPS 2023. https://arxiv.org/abs/2302.01178
+
+## Example
+
+```jldoctest
+julia> cno = ConvolutionalNeuralOperator((16,), 1, 1, 32; num_layers=3);
+
+julia> ps, st = Lux.setup(Xoshiro(), cno);
+
+julia> u = rand(Float32, 64, 1, 5);
+
+julia> size(first(cno(u, ps, st)))
+(64, 1, 5)
+```
+"""
+@concrete struct ConvolutionalNeuralOperator <: AbstractLuxWrapperLayer{:model}
+    model <: AbstractLuxLayer
+end
+
+function ConvolutionalNeuralOperator(
+        modes::Dims{N},
+        in_channels::Integer,
+        out_channels::Integer,
+        hidden_channels::Integer;
+        num_layers::Integer = 4,
+        activation = gelu,
+        upsample_factor::Integer = 2,
+    ) where {N}
+    ones_kernel = ntuple(Returns(1), N)
+
+    lifting = Conv(ones_kernel, in_channels => hidden_channels)
+
+    cno_blocks = Chain(
+        [
+            CNOBlock(
+                hidden_channels, hidden_channels, modes, activation; upsample_factor,
+            ) for _ in 1:num_layers
+        ]...,
+    )
+
+    projection = Chain(
+        Conv(ones_kernel, hidden_channels => hidden_channels, activation),
+        Conv(ones_kernel, hidden_channels => out_channels),
+    )
+
+    return ConvolutionalNeuralOperator(Chain(; lifting, cno_blocks, projection))
+end

From 8b1a961f6964afb993508ad2a251e17cad794d69 Mon Sep 17 00:00:00 2001
From: Jitendra Verma <jitendravjh@gmail.com>
Date: Mon, 18 May 2026 21:22:15 +0530
Subject: [PATCH 3/3] test: add CI-compatible Reactant gradient tests for
 ConvolutionalNeuralOperator

---
 test/models/cno_tests.jl | 59 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 test/models/cno_tests.jl

diff --git a/test/models/cno_tests.jl b/test/models/cno_tests.jl
new file mode 100644
index 0000000..8d0a4fa
--- /dev/null
+++ b/test/models/cno_tests.jl
@@ -0,0 +1,59 @@
+using NeuralOperators, Test
+
+include("../shared_testsetup.jl")
+
+@testset "Convolutional Neural Operator" begin
+    rng = StableRNG(12345)
+
+    setups = [
+        (
+            modes = (4,),
+            in_channels = 1,
+            out_channels = 1,
+            hidden_channels = 8,
+            num_layers = 2,
+            x_size = (16, 1, 4),
+            y_size = (16, 1, 4),
+        ),
+        (
+            modes = (4, 4),
+            in_channels = 2,
+            out_channels = 1,
+            hidden_channels = 8,
+            num_layers = 2,
+            x_size = (16, 16, 2, 4),
+            y_size = (16, 16, 1, 4),
+        ),
+    ]
+
+    xdev = reactant_device(; force = true)
+
+    @testset "$(length(setup.modes))D" for setup in setups
+        cno = ConvolutionalNeuralOperator(
+            setup.modes, setup.in_channels, setup.out_channels, setup.hidden_channels;
+            num_layers = setup.num_layers,
+        )
+        display(cno)
+        ps, st = Lux.setup(rng, cno)
+
+        x = rand(rng, Float32, setup.x_size...)
+        y = rand(rng, Float32, setup.y_size...)
+
+        @test size(first(cno(x, ps, st))) == setup.y_size
+
+        ps_ra, st_ra = (ps, st) |> xdev
+        x_ra, y_ra = (x, y) |> xdev
+
+        res = first(cno(x, ps, st))
+        res_ra, _ = @jit cno(x_ra, ps_ra, st_ra)
+        @test res_ra ≈ res atol = 1.0f-2 rtol = 1.0f-2
+
+        @testset "check gradients" begin
+            ∂x_fd, ∂ps_fd = ∇sumabs2_reactant_fd(cno, x_ra, ps_ra, st_ra)
+            ∂x_ra, ∂ps_ra = ∇sumabs2_reactant(cno, x_ra, ps_ra, st_ra)
+
+            @test ∂x_fd ≈ ∂x_ra atol = 1.0f-1 rtol = 1.0f-1
+            @test check_approx(∂ps_fd, ∂ps_ra; atol = 1.0f-1, rtol = 1.0f-1)
+        end
+    end
+end