From 9bb04cee8803d5f8479ae58197c21931c75b5613 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Wed, 14 Jan 2026 16:08:31 +0900
Subject: [PATCH 001/183] Update docker installation commit for
 perception_evaluation

---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index c5240c498..22d18f5ba 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -61,7 +61,7 @@ RUN python3 -m pip --no-cache-dir install \
 RUN python3 -m pip install git+https://github.com/tier4/t4-devkit@v0.5.1
 
 # Install autoware-perception-evaluation
-RUN python3 -m pip install git+https://github.com/tier4/autoware_perception_evaluation@develop
+RUN python3 -m pip install git+https://github.com/tier4/autoware_perception_evaluation@dd37a546352f953565033f1d4b8cb443df1232c59
 
 # Need to dowgrade setuptools to 60.2.0 to fix setup
 RUN python3 -m pip --no-cache-dir install \

From 1a7ebee39f66d37cd45b20e23575d352a6cdc493 Mon Sep 17 00:00:00 2001
From: KokSeang Tan <kseangtan@Gmail.com>
Date: Thu, 19 Mar 2026 20:57:55 +0900
Subject: [PATCH 002/183] Update config

---
 ...evfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py} | 4 ++--
 ...idar_voxel_second_secfpn_20e_8xb8_j6gen2_base_120m.py} | 4 ++--
 ...oxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m.py} | 4 ++--
 ...idar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py} | 4 ++--
 ...oxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py} | 4 ++--
 ...fline_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py} | 4 ++--
 ...idar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py} | 4 ++--
 ...oxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py} | 4 ++--
 ...usion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py} | 8 ++++----
 ..._cosine.py => default_20e_8xb8_adamw_linear_cosine.py} | 6 +++---
 ...8_adamw_cosine.py => default_30e_8xb8_adamw_cosine.py} | 4 ++--
 ..._cosine.py => default_30e_8xb8_adamw_linear_cosine.py} | 4 ++--
 ...8_adamw_cosine.py => default_50e_8xb8_adamw_cosine.py} | 4 ++--
 13 files changed, 29 insertions(+), 29 deletions(-)
 rename projects/BEVFusion/configs/t4dataset/BEVFusion-C/{bevfusion_camera_swin_fpn_30e_4xb8_j6gen2_base_120m.py => bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py} (97%)
 rename projects/BEVFusion/configs/t4dataset/BEVFusion-CL/{bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_j6gen2_base_120m.py => bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_j6gen2_base_120m.py} (98%)
 rename projects/BEVFusion/configs/t4dataset/BEVFusion-CL/{bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_jpntaxi_gen2_base_120m.py => bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m.py} (98%)
 rename projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/{bevfusion_offline_lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m.py => bevfusion_offline_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py} (97%)
 rename projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/{bevfusion_offline_lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m.py => bevfusion_offline_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py} (97%)
 rename projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/{bevfusion_offline_lidar_voxel_second_secfpn_50e_4xb8_base_120m.py => bevfusion_offline_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py} (97%)
 rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py} (97%)
 rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py} (97%)
 rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_50e_4xb8_base_120m.py => bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py} (94%)
 rename projects/BEVFusion/configs/t4dataset/default/schedulers/{default_20e_4xb8_adamw_linear_cosine.py => default_20e_8xb8_adamw_linear_cosine.py} (97%)
 rename projects/BEVFusion/configs/t4dataset/default/schedulers/{default_30e_4xb8_adamw_cosine.py => default_30e_8xb8_adamw_cosine.py} (98%)
 rename projects/BEVFusion/configs/t4dataset/default/schedulers/{default_30e_4xb8_adamw_linear_cosine.py => default_30e_8xb8_adamw_linear_cosine.py} (98%)
 rename projects/BEVFusion/configs/t4dataset/default/schedulers/{default_50e_4xb8_adamw_cosine.py => default_50e_8xb8_adamw_cosine.py} (98%)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_4xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py
similarity index 97%
rename from projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_4xb8_j6gen2_base_120m.py
rename to projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py
index b781e2c71..e65c52ece 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_4xb8_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py
@@ -3,7 +3,7 @@
     "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
     "../default/pipelines/default_camera_lidar_intensity_120m.py",
     "../default/models/default_camera_swin_fpn_120m.py",
-    "../default/schedulers/default_30e_4xb8_adamw_linear_cosine.py",
+    "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py",
     "../default/default_misc.py",
 ]
 
@@ -16,7 +16,7 @@
 info_directory_path = "info/user_name/"
 
 experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type
-experiment_name = "bevfusion_camera_swin_fpn_30e_4xb8_j6gen2_base_120m"
+experiment_name = "bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_j6gen2_base_120m.py
similarity index 98%
rename from projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_j6gen2_base_120m.py
rename to projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_j6gen2_base_120m.py
index a1ab10f57..4f81af760 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_j6gen2_base_120m.py
@@ -3,7 +3,7 @@
     "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
     "../default/pipelines/default_camera_lidar_intensity_120m.py",
     "../default/models/default_camera_swin_fpn_lidar_second_secfpn_120m.py",
-    "../default/schedulers/default_20e_4xb8_adamw_linear_cosine.py",
+    "../default/schedulers/default_20e_8xb8_adamw_linear_cosine.py",
     "../default/default_misc.py",
 ]
 
@@ -16,7 +16,7 @@
 info_directory_path = "info/user_name/"
 
 experiment_group_name = "bevfusion_camera_lidar_intensity/j6gen2_base/" + _base_.dataset_type
-experiment_name = "bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_j6gen2_base_120m"
+experiment_name = "bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_j6gen2_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_jpntaxi_gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m.py
similarity index 98%
rename from projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_jpntaxi_gen2_base_120m.py
rename to projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m.py
index a9887a15c..6556cf818 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_jpntaxi_gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m.py
@@ -3,7 +3,7 @@
     "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2_base.py",
     "../default/pipelines/default_camera_lidar_intensity_120m.py",
     "../default/models/default_camera_swin_fpn_lidar_second_secfpn_120m.py",
-    "../default/schedulers/default_20e_4xb8_adamw_linear_cosine.py",
+    "../default/schedulers/default_20e_8xb8_adamw_linear_cosine.py",
     "../default/default_misc.py",
 ]
 
@@ -16,7 +16,7 @@
 info_directory_path = "info/user_name/"
 
 experiment_group_name = "bevfusion_camera_lidar_intensity/jpntaxi_gen2_base/" + _base_.dataset_type
-experiment_name = "bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_jpntaxi_gen2_base_120m"
+experiment_name = "bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
similarity index 97%
rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m.py
rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
index 8b0aef32e..5d743e184 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
@@ -3,7 +3,7 @@
     "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
     "../default/pipelines/default_offline_lidar_intensity_120m.py",
     "../default/models/default_lidar_second_secfpn_120m.py",
-    "../default/schedulers/default_30e_4xb8_adamw_cosine.py",
+    "../default/schedulers/default_30e_8xb8_adamw_cosine.py",
     "../default/default_misc.py",
 ]
 
@@ -16,7 +16,7 @@
 info_directory_path = "info/user_name/"
 
 experiment_group_name = "bevfusion_lidar_intensity_offline/j6gen2_base/" + _base_.dataset_type
-experiment_name = "lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m"
+experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py
similarity index 97%
rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m.py
rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py
index 59f91c8a4..da461a567 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py
@@ -3,7 +3,7 @@
     "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2_base.py",
     "../default/pipelines/default_offline_lidar_intensity_120m.py",
     "../default/models/default_lidar_second_secfpn_120m.py",
-    "../default/schedulers/default_30e_4xb8_adamw_cosine.py",
+    "../default/schedulers/default_30e_8xb8_adamw_cosine.py",
     "../default/default_misc.py",
 ]
 
@@ -16,7 +16,7 @@
 info_directory_path = "info/user_name/"
 
 experiment_group_name = "bevfusion_lidar_intensity_offline/jpntaxi_gen2_base/" + _base_.dataset_type
-experiment_name = "lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m"
+experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_50e_4xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
similarity index 97%
rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_50e_4xb8_base_120m.py
rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
index df500705c..89bb7cd7b 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_50e_4xb8_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
@@ -4,7 +4,7 @@
     "../default/pipelines/default_offline_lidar_120m.py",
     "../models/default_lidar_second_secfpn_120m.py",
     "../default/models/default_lidar_second_secfpn_120m.py",
-    "../default/schedulers/default_50e_4xb8_adamw_cosine.py",
+    "../default/schedulers/default_50e_8xb8_adamw_cosine.py",
     "../default/default_misc.py",
 ]
 
@@ -17,7 +17,7 @@
 info_directory_path = "info/user_name/"
 
 experiment_group_name = "bevfusion_lidar_offline/base/" + _base_.dataset_type
-experiment_name = "lidar_voxel_second_secfpn_50e_4xb8_base_120m"
+experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
similarity index 97%
rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m.py
rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
index 178f5ff3d..69be0f0d9 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
@@ -3,7 +3,7 @@
     "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
     "../default/pipelines/default_lidar_intensity_120m.py",
     "../default/models/default_lidar_second_secfpn_120m.py",
-    "../default/schedulers/default_30e_4xb8_adamw_cosine.py",
+    "../default/schedulers/default_30e_8xb8_adamw_cosine.py",
     "../default/default_misc.py",
 ]
 
@@ -16,7 +16,7 @@
 info_directory_path = "info/user_name/"
 
 experiment_group_name = "bevfusion_lidar_intensity/j6gen2_base/" + _base_.dataset_type
-experiment_name = "lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m"
+experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py
similarity index 97%
rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m.py
rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py
index e2b2d1678..e6addac7d 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py
@@ -3,7 +3,7 @@
     "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2_base.py",
     "../default/pipelines/default_lidar_intensity_120m.py",
     "../default/models/default_lidar_second_secfpn_120m.py",
-    "../default/schedulers/default_30e_4xb8_adamw_cosine.py",
+    "../default/schedulers/default_30e_8xb8_adamw_cosine.py",
     "../default/default_misc.py",
 ]
 
@@ -16,7 +16,7 @@
 info_directory_path = "info/user_name/"
 
 experiment_group_name = "bevfusion_lidar_intensity/jpntaxi_gen2_base/" + _base_.dataset_type
-experiment_name = "lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m"
+experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_4xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
similarity index 94%
rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_4xb8_base_120m.py
rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
index d65c470d9..a5c72aef0 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_4xb8_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
@@ -3,7 +3,7 @@
     "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
     "../default/pipelines/default_lidar_120m.py",
     "../default/models/default_lidar_second_secfpn_120m.py",
-    "../default/schedulers/default_50e_4xb8_adamw_cosine.py",
+    "../default/schedulers/default_50e_8xb8_adamw_cosine.py",
     "../default/default_misc.py",
 ]
 
@@ -13,10 +13,10 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/user_name/"
+info_directory_path = "info/kokseang_2_6_1/"
 
-experiment_group_name = "bevfusion_lidar/base/" + _base_.dataset_type
-experiment_name = "lidar_voxel_second_secfpn_50e_4xb8_base_120m"
+experiment_group_name = "bevfusion_lidar_2.6.0/base/" + _base_.dataset_type
+experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_4xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py
similarity index 97%
rename from projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_4xb8_adamw_linear_cosine.py
rename to projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py
index 8dc5e7bf1..d491eaa4b 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_4xb8_adamw_linear_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py
@@ -1,11 +1,11 @@
 # learning rate
-# lr = 0.0001
-lr = 1e-4
+# lr = 0.0002
+lr = 2e-4
 t_max = 6
 max_epochs = 20
 val_interval = 1
 
-train_gpu_size = 4
+train_gpu_size = 8
 test_batch_size = 2
 train_batch_size = 8
 
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_4xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py
similarity index 98%
rename from projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_4xb8_adamw_cosine.py
rename to projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py
index 07a9110c7..c5053d943 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_4xb8_adamw_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py
@@ -1,10 +1,10 @@
 # learning rate
-lr = 0.0001
+lr = 2e-4
 t_max = 8
 max_epochs = 30
 val_interval = 5
 
-train_gpu_size = 4
+train_gpu_size = 8
 test_batch_size = 2
 train_batch_size = 8
 
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_4xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py
similarity index 98%
rename from projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_4xb8_adamw_linear_cosine.py
rename to projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py
index 9d4ee8e61..c05aeafcb 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_4xb8_adamw_linear_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py
@@ -1,10 +1,10 @@
 # learning rate
-lr = 1e-4
+lr = 2e-4
 t_max = 8
 max_epochs = 30
 val_interval = 1
 
-train_gpu_size = 4
+train_gpu_size = 8
 test_batch_size = 2
 train_batch_size = 8
 
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_4xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py
similarity index 98%
rename from projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_4xb8_adamw_cosine.py
rename to projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py
index 29fc38b4c..fc1914dde 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_4xb8_adamw_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py
@@ -1,10 +1,10 @@
 # learning rate
-lr = 0.0001
+lr = 2e-4
 t_max = 15
 max_epochs = 50
 val_interval = 5
 
-train_gpu_size = 4
+train_gpu_size = 8
 test_batch_size = 2
 train_batch_size = 8
 

From 84024516eafe70aac0252f8b1cafbf40ba7fd05f Mon Sep 17 00:00:00 2001
From: KokSeang Tan <kseangtan@Gmail.com>
Date: Tue, 24 Mar 2026 16:48:43 +0900
Subject: [PATCH 003/183] Update config

---
 ..._voxel_second_secfpn_20e_8xb8_jpntaxi_base_120m.py} |  6 +++---
 ...ar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py |  6 ++++--
 ..._voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py} | 10 ++++++----
 3 files changed, 13 insertions(+), 9 deletions(-)
 rename projects/BEVFusion/configs/t4dataset/BEVFusion-CL/{bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m.py => bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_base_120m.py} (98%)
 rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py} (92%)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_base_120m.py
similarity index 98%
rename from projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m.py
rename to projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_base_120m.py
index 6556cf818..20c85b1d8 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_base_120m.py
@@ -1,6 +1,6 @@
 _base_ = [
     "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
-    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2_base.py",
+    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py",
     "../default/pipelines/default_camera_lidar_intensity_120m.py",
     "../default/models/default_camera_swin_fpn_lidar_second_secfpn_120m.py",
     "../default/schedulers/default_20e_8xb8_adamw_linear_cosine.py",
@@ -15,8 +15,8 @@
 data_root = "data/t4dataset/"
 info_directory_path = "info/user_name/"
 
-experiment_group_name = "bevfusion_camera_lidar_intensity/jpntaxi_gen2_base/" + _base_.dataset_type
-experiment_name = "bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m"
+experiment_group_name = "bevfusion_camera_lidar_intensity/jpntaxi_base/" + _base_.dataset_type
+experiment_name = "bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
index 69be0f0d9..e43f9c485 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
@@ -13,9 +13,9 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/user_name/"
+info_directory_path = "info/kokseang_2_6_1/"
 
-experiment_group_name = "bevfusion_lidar_intensity/j6gen2_base/" + _base_.dataset_type
+experiment_group_name = "bevfusion_lidar_intensity_2.6.1/j6gen2_base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
@@ -143,3 +143,5 @@
     checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
 )
 log_processor = dict(window_size=50)
+
+load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_40.pth"
\ No newline at end of file
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
similarity index 92%
rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py
rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
index e6addac7d..8538784f8 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
@@ -1,6 +1,6 @@
 _base_ = [
     "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
-    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2_base.py",
+    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py",
     "../default/pipelines/default_lidar_intensity_120m.py",
     "../default/models/default_lidar_second_secfpn_120m.py",
     "../default/schedulers/default_30e_8xb8_adamw_cosine.py",
@@ -13,10 +13,10 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/user_name/"
+info_directory_path = "info/kokseang_2_6_1/"
 
-experiment_group_name = "bevfusion_lidar_intensity/jpntaxi_gen2_base/" + _base_.dataset_type
-experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m"
+experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type
+experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
@@ -143,3 +143,5 @@
     checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
 )
 log_processor = dict(window_size=50)
+
+load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_40.pth"
\ No newline at end of file

From cf6e13dea087c2983e045df78840b86c166cd79a Mon Sep 17 00:00:00 2001
From: KokSeang Tan <kseangtan@Gmail.com>
Date: Wed, 25 Mar 2026 19:07:44 +0900
Subject: [PATCH 004/183] Update config

---
 ...n_30e_8xb8_j6gen2_base_120m_t4metric_v2.py | 78 +++++++++++++++++++
 ..._30e_8xb8_jpntaxi_base_120m_t4metric_v2.py | 78 +++++++++++++++++++
 ...d_secfpn_50e_8xb8_base_120m_t4metric_v2.py | 78 +++++++++++++++++++
 3 files changed, 234 insertions(+)
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py
new file mode 100644
index 000000000..f048ab1a8
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py
@@ -0,0 +1,78 @@
+_base_ = [
+    "./bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py",
+]
+
+# user setting
+data_root = "data/t4dataset/"
+info_directory_path = "info/kokseang_2_6_1/"
+
+experiment_group_name = "bevfusion_lidar_intensity_2.6.1/j6gen2_base/" + _base_.dataset_type
+experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# Add evaluator configs
+perception_evaluator_configs = dict(
+    dataset_paths=_base_.data_root,
+    frame_id="base_link",
+    evaluation_config_dict=_base_.evaluator_metric_configs,
+    load_raw_data=False,
+)
+
+frame_pass_fail_config = dict(
+    target_labels=_base_.class_names,
+    # Matching thresholds per class (must align with `plane_distance_thresholds` used in evaluation)
+    matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0],
+    confidence_threshold_list=None,
+)
+
+training_statistics_parquet_path = (
+    _base_.data_root + _base_.info_directory_path + _base_.info_train_statistics_file_name
+)
+testing_statistics_parquet_path = _base_.data_root + _base_.info_directory_path + _base_.info_test_statistics_file_name
+validation_statistics_parquet_path = (
+    _base_.data_root + _base_.info_directory_path + _base_.info_val_statistics_file_name
+)
+
+val_evaluator = dict(
+    _delete_=True,
+    type="T4MetricV2",
+    data_root=_base_.data_root,
+    ann_file=_base_.data_root + _base_.info_directory_path + _base_.info_val_file_name,
+    training_statistics_parquet_path=training_statistics_parquet_path,
+    testing_statistics_parquet_path=testing_statistics_parquet_path,
+    validation_statistics_parquet_path=validation_statistics_parquet_path,
+    output_dir="validation",
+    dataset_name="base",
+    perception_evaluator_configs=perception_evaluator_configs,
+    critical_object_filter_config=None,
+    frame_pass_fail_config=frame_pass_fail_config,
+    num_workers=64,
+    scene_batch_size=-1,
+    write_metric_summary=False,
+    class_names={{_base_.class_names}},
+    name_mapping={{_base_.name_mapping}},
+    experiment_name=experiment_name,
+    experiment_group_name=_base_.experiment_group_name,
+)
+
+test_evaluator = dict(
+    _delete_=True,
+    type="T4MetricV2",
+    data_root=_base_.data_root,
+    ann_file=_base_.data_root + _base_.info_directory_path + _base_.info_test_file_name,
+    training_statistics_parquet_path=training_statistics_parquet_path,
+    testing_statistics_parquet_path=testing_statistics_parquet_path,
+    validation_statistics_parquet_path=validation_statistics_parquet_path,
+    output_dir="testing",
+    dataset_name="base",
+    perception_evaluator_configs=perception_evaluator_configs,
+    critical_object_filter_config=None,
+    frame_pass_fail_config=frame_pass_fail_config,
+    num_workers=64,
+    scene_batch_size=-1,
+    write_metric_summary=True,
+    class_names={{_base_.class_names}},
+    name_mapping={{_base_.name_mapping}},
+    experiment_name=experiment_name,
+    experiment_group_name=_base_.experiment_group_name,
+)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py
new file mode 100644
index 000000000..b4d8ddfbf
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py
@@ -0,0 +1,78 @@
+_base_ = [
+    "./bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py",
+]
+
+# user setting
+data_root = "data/t4dataset/"
+info_directory_path = "info/kokseang_2_6_1/"
+
+experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type
+experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# Add evaluator configs
+perception_evaluator_configs = dict(
+    dataset_paths=_base_.data_root,
+    frame_id="base_link",
+    evaluation_config_dict=_base_.evaluator_metric_configs,
+    load_raw_data=False,
+)
+
+frame_pass_fail_config = dict(
+    target_labels=_base_.class_names,
+    # Matching thresholds per class (must align with `plane_distance_thresholds` used in evaluation)
+    matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0],
+    confidence_threshold_list=None,
+)
+
+training_statistics_parquet_path = (
+    _base_.data_root + _base_.info_directory_path + _base_.info_train_statistics_file_name
+)
+testing_statistics_parquet_path = _base_.data_root + _base_.info_directory_path + _base_.info_test_statistics_file_name
+validation_statistics_parquet_path = (
+    _base_.data_root + _base_.info_directory_path + _base_.info_val_statistics_file_name
+)
+
+val_evaluator = dict(
+    _delete_=True,
+    type="T4MetricV2",
+    data_root=_base_.data_root,
+    ann_file=_base_.data_root + _base_.info_directory_path + _base_.info_val_file_name,
+    training_statistics_parquet_path=training_statistics_parquet_path,
+    testing_statistics_parquet_path=testing_statistics_parquet_path,
+    validation_statistics_parquet_path=validation_statistics_parquet_path,
+    output_dir="validation",
+    dataset_name="base",
+    perception_evaluator_configs=perception_evaluator_configs,
+    critical_object_filter_config=None,
+    frame_pass_fail_config=frame_pass_fail_config,
+    num_workers=64,
+    scene_batch_size=-1,
+    write_metric_summary=False,
+    class_names={{_base_.class_names}},
+    name_mapping={{_base_.name_mapping}},
+    experiment_name=experiment_name,
+    experiment_group_name=_base_.experiment_group_name,
+)
+
+test_evaluator = dict(
+    _delete_=True,
+    type="T4MetricV2",
+    data_root=_base_.data_root,
+    ann_file=_base_.data_root + _base_.info_directory_path + _base_.info_test_file_name,
+    training_statistics_parquet_path=training_statistics_parquet_path,
+    testing_statistics_parquet_path=testing_statistics_parquet_path,
+    validation_statistics_parquet_path=validation_statistics_parquet_path,
+    output_dir="testing",
+    dataset_name="base",
+    perception_evaluator_configs=perception_evaluator_configs,
+    critical_object_filter_config=None,
+    frame_pass_fail_config=frame_pass_fail_config,
+    num_workers=64,
+    scene_batch_size=-1,
+    write_metric_summary=True,
+    class_names={{_base_.class_names}},
+    name_mapping={{_base_.name_mapping}},
+    experiment_name=experiment_name,
+    experiment_group_name=_base_.experiment_group_name,
+)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py
new file mode 100644
index 000000000..7a0215139
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py
@@ -0,0 +1,78 @@
+_base_ = [
+    "./bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py",
+]
+
+# user setting
+data_root = "data/t4dataset/"
+info_directory_path = "info/kokseang_2_6_1/"
+
+experiment_group_name = "bevfusion_lidar_2.6.0/base/" + _base_.dataset_type
+experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# Add evaluator configs
+perception_evaluator_configs = dict(
+    dataset_paths=_base_.data_root,
+    frame_id="base_link",
+    evaluation_config_dict=_base_.evaluator_metric_configs,
+    load_raw_data=False,
+)
+
+frame_pass_fail_config = dict(
+    target_labels=_base_.class_names,
+    # Matching thresholds per class (must align with `plane_distance_thresholds` used in evaluation)
+    matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0],
+    confidence_threshold_list=None,
+)
+
+training_statistics_parquet_path = (
+    _base_.data_root + _base_.info_directory_path + _base_.info_train_statistics_file_name
+)
+testing_statistics_parquet_path = _base_.data_root + _base_.info_directory_path + _base_.info_test_statistics_file_name
+validation_statistics_parquet_path = (
+    _base_.data_root + _base_.info_directory_path + _base_.info_val_statistics_file_name
+)
+
+val_evaluator = dict(
+    _delete_=True,
+    type="T4MetricV2",
+    data_root=_base_.data_root,
+    ann_file=_base_.data_root + _base_.info_directory_path + _base_.info_val_file_name,
+    training_statistics_parquet_path=training_statistics_parquet_path,
+    testing_statistics_parquet_path=testing_statistics_parquet_path,
+    validation_statistics_parquet_path=validation_statistics_parquet_path,
+    output_dir="validation",
+    dataset_name="base",
+    perception_evaluator_configs=perception_evaluator_configs,
+    critical_object_filter_config=None,
+    frame_pass_fail_config=frame_pass_fail_config,
+    num_workers=64,
+    scene_batch_size=-1,
+    write_metric_summary=False,
+    class_names={{_base_.class_names}},
+    name_mapping={{_base_.name_mapping}},
+    experiment_name=experiment_name,
+    experiment_group_name=_base_.experiment_group_name,
+)
+
+test_evaluator = dict(
+    _delete_=True,
+    type="T4MetricV2",
+    data_root=_base_.data_root,
+    ann_file=_base_.data_root + _base_.info_directory_path + _base_.info_test_file_name,
+    training_statistics_parquet_path=training_statistics_parquet_path,
+    testing_statistics_parquet_path=testing_statistics_parquet_path,
+    validation_statistics_parquet_path=validation_statistics_parquet_path,
+    output_dir="testing",
+    dataset_name="base",
+    perception_evaluator_configs=perception_evaluator_configs,
+    critical_object_filter_config=None,
+    frame_pass_fail_config=frame_pass_fail_config,
+    num_workers=64,
+    scene_batch_size=-1,
+    write_metric_summary=True,
+    class_names={{_base_.class_names}},
+    name_mapping={{_base_.name_mapping}},
+    experiment_name=experiment_name,
+    experiment_group_name=_base_.experiment_group_name,
+)

From e1ebc993b9c8f17090a64b5a11476a3733e89084 Mon Sep 17 00:00:00 2001
From: KokSeang Tan <kseangtan@Gmail.com>
Date: Thu, 26 Mar 2026 20:20:08 +0900
Subject: [PATCH 005/183] Update config

---
 .../configs/detection3d/dataset/t4dataset/jpntaxi_base.py | 1 -
 .../pipelines/default_camera_lidar_intensity_120m.py      | 4 ++--
 .../t4dataset/default/pipelines/default_lidar_120m.py     | 8 ++++++--
 .../default/pipelines/default_lidar_intensity_120m.py     | 8 ++++++--
 .../schedulers/default_20e_8xb8_adamw_linear_cosine.py    | 2 +-
 .../default/schedulers/default_30e_8xb8_adamw_cosine.py   | 2 +-
 .../schedulers/default_30e_8xb8_adamw_linear_cosine.py    | 2 +-
 .../default/schedulers/default_50e_8xb8_adamw_cosine.py   | 2 +-
 8 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py
index 28ba4ab33..a50cf8852 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py
@@ -18,7 +18,6 @@
 
 # dataset scene setting
 dataset_test_groups = {
-    "jpntaxi_gen2": ("t4dataset_jpntaxi_gen2_infos_test.pkl", False),
     "jpntaxi_base": ("t4dataset_jpntaxi_base_infos_test.pkl", True),
 }
 
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
index acac440dc..9d1910dab 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
@@ -84,8 +84,8 @@
             "traffic_cone",
         ],
     ),
-    dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2),
-    dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1),
+    # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2),
+    # dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1),
     dict(type="PointShuffle"),
     dict(
         type="Pack3DDetInputs",
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
index 7ffedc232..7ee393ea6 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
@@ -64,8 +64,8 @@
             "traffic_cone",
         ],
     ),
-    dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2),
-    dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1),
+    # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=1),
+    # dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1),
     dict(type="PointShuffle"),
     dict(
         type="Pack3DDetInputs",
@@ -89,6 +89,8 @@
             "img_aug_matrix",
             "lidar_aug_matrix",
             "timestamp",
+            "vehicle_type",
+            "city",
         ],
     ),
 ]
@@ -130,6 +132,8 @@
             "num_pts_feats",
             "num_views",
             "timestamp",
+            "vehicle_type",
+            "city",
         ],
     ),
 ]
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
index c7fa1b2cb..8b154901e 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
@@ -64,8 +64,8 @@
             "traffic_cone",
         ],
     ),
-    dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2),
-    dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1),
+    # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2),
+    # dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1),
     dict(type="PointShuffle"),
     dict(
         type="Pack3DDetInputs",
@@ -89,6 +89,8 @@
             "img_aug_matrix",
             "lidar_aug_matrix",
             "timestamp",
+            "vehicle_type",
+            "city",
         ],
     ),
 ]
@@ -130,6 +132,8 @@
             "num_pts_feats",
             "num_views",
             "timestamp",
+            "vehicle_type",
+            "city",
         ],
     ),
 ]
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py
index d491eaa4b..c3f82e76d 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py
@@ -1,6 +1,6 @@
 # learning rate
 # lr = 0.0002
-lr = 2e-4
+lr = 1.5e-4
 t_max = 6
 max_epochs = 20
 val_interval = 1
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py
index c5053d943..94c2a4160 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py
@@ -1,5 +1,5 @@
 # learning rate
-lr = 2e-4
+lr = 1.5e-4
 t_max = 8
 max_epochs = 30
 val_interval = 5
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py
index c05aeafcb..f5c747e62 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py
@@ -1,5 +1,5 @@
 # learning rate
-lr = 2e-4
+lr = 1.5e-4
 t_max = 8
 max_epochs = 30
 val_interval = 1
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py
index fc1914dde..f0bd87ca4 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py
@@ -1,5 +1,5 @@
 # learning rate
-lr = 2e-4
+lr = 1.5e-4
 t_max = 15
 max_epochs = 50
 val_interval = 5

From 2dac4333e0dce899701b0602778626e0df191a8a Mon Sep 17 00:00:00 2001
From: KokSeang Tan <kseangtan@Gmail.com>
Date: Fri, 27 Mar 2026 11:01:29 +0900
Subject: [PATCH 006/183] Update config

---
 .../t4dataset/default/pipelines/default_lidar_120m.py  | 10 +++++-----
 .../schedulers/default_20e_8xb8_adamw_linear_cosine.py |  6 +++++-
 .../schedulers/default_30e_8xb8_adamw_cosine.py        |  6 +++++-
 .../schedulers/default_30e_8xb8_adamw_linear_cosine.py |  6 +++++-
 .../schedulers/default_50e_8xb8_adamw_cosine.py        |  6 +++++-
 5 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
index 7ee393ea6..347ba6452 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
@@ -54,14 +54,14 @@
         classes=[
             "car",
             "truck",
-            "construction_vehicle",
+            # "construction_vehicle",
             "bus",
-            "trailer",
-            "barrier",
-            "motorcycle",
+            # "trailer",
+            # "barrier",
+            # "motorcycle",
             "bicycle",
             "pedestrian",
-            "traffic_cone",
+            # "traffic_cone",
         ],
     ),
     # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=1),
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py
index c3f82e76d..a0be6f4ab 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py
@@ -1,6 +1,6 @@
 # learning rate
 # lr = 0.0002
-lr = 1.5e-4
+lr = 1.0e-4
 t_max = 6
 max_epochs = 20
 val_interval = 1
@@ -57,3 +57,7 @@
 )
 
 auto_scale_lr = dict(enable=False, base_batch_size=train_gpu_size * train_batch_size)
+
+# Only set if the number of train_gpu_size more than 1
+if train_gpu_size > 1:
+    sync_bn = "torch"
\ No newline at end of file
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py
index 94c2a4160..edcbd74bf 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py
@@ -1,5 +1,5 @@
 # learning rate
-lr = 1.5e-4
+lr = 1.0e-4
 t_max = 8
 max_epochs = 30
 val_interval = 5
@@ -69,3 +69,7 @@
 )
 
 auto_scale_lr = dict(enable=False, base_batch_size=train_gpu_size * train_batch_size)
+
+# Only set if the number of train_gpu_size more than 1
+if train_gpu_size > 1:
+    sync_bn = "torch"
\ No newline at end of file
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py
index f5c747e62..32e8d59fa 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py
@@ -1,5 +1,5 @@
 # learning rate
-lr = 1.5e-4
+lr = 1.0e-4
 t_max = 8
 max_epochs = 30
 val_interval = 1
@@ -56,3 +56,7 @@
 )
 
 auto_scale_lr = dict(enable=False, base_batch_size=train_gpu_size * train_batch_size)
+
+# Only set if the number of train_gpu_size more than 1
+if train_gpu_size > 1:
+    sync_bn = "torch"
\ No newline at end of file
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py
index f0bd87ca4..58192c2de 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py
@@ -1,5 +1,5 @@
 # learning rate
-lr = 1.5e-4
+lr = 1.0e-4
 t_max = 15
 max_epochs = 50
 val_interval = 5
@@ -69,3 +69,7 @@
 )
 
 auto_scale_lr = dict(enable=False, base_batch_size=train_gpu_size * train_batch_size)
+
+# Only set if the number of train_gpu_size more than 1
+if train_gpu_size > 1:
+    sync_bn = "torch"
\ No newline at end of file

From 9046b7bb1b3603345dc886903794ea118e9737f4 Mon Sep 17 00:00:00 2001
From: KokSeang Tan <kseangtan@Gmail.com>
Date: Fri, 27 Mar 2026 11:29:07 +0900
Subject: [PATCH 007/183] Update config

---
 tools/detection3d/train.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/detection3d/train.py b/tools/detection3d/train.py
index c379025d1..254783b92 100644
--- a/tools/detection3d/train.py
+++ b/tools/detection3d/train.py
@@ -123,6 +123,10 @@ def main():
         # build customized runner from the registry
         # if 'runner_type' is set in the cfg
         runner = RUNNERS.build(cfg)
+   
+    # Output all model 
+    print_log(f"Runner model: ", logger="current")
+    print_log(f"{runner.model}", logger="current")
 
     # start training
     runner.train()

From d4d93fa3bab4fbe63c528c2a029bfb8e3a5346db Mon Sep 17 00:00:00 2001
From: KokSeang Tan <kseangtan@Gmail.com>
Date: Sat, 28 Mar 2026 00:39:14 +0900
Subject: [PATCH 008/183] Added

---
 projects/BEVFusion/bevfusion/__init__.py                       | 3 ++-
 projects/BEVFusion/bevfusion/bevfusion_head.py                 | 2 +-
 projects/BEVFusion/bevfusion/utils.py                          | 1 +
 .../default/models/default_lidar_second_secfpn_120m.py         | 2 +-
 4 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py
index e849db227..947ebab23 100644
--- a/projects/BEVFusion/bevfusion/__init__.py
+++ b/projects/BEVFusion/bevfusion/__init__.py
@@ -6,7 +6,7 @@
 from .sparse_encoder import BEVFusionSparseEncoder
 from .transformer import TransformerDecoderLayer
 from .transforms_3d import BEVFusionGlobalRotScaleTrans, BEVFusionRandomFlip3D, GridMask, ImageAug3D
-from .utils import BBoxBEVL1Cost, HeuristicAssigner3D, HungarianAssigner3D, IoU3DCost
+from .utils import BBoxBEVL1Cost, HeuristicAssigner3D, HungarianAssigner3D, IoU3DCost, TransFusionBBoxCoder
 
 __all__ = [
     "BEVFusion",
@@ -26,4 +26,5 @@
     "TransformerDecoderLayer",
     "BEVFusionRandomFlip3D",
     "BEVFusionGlobalRotScaleTrans",
+		"TransFusionBBoxCoder",
 ]
diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index 143c35a14..853523c4f 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -554,7 +554,7 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx):
             vel = None
 
         boxes_dict = self.bbox_coder.decode(
-            score, rot, dim, center, height, vel
+            score, rot, dim, center, height, vel, filter=False
         )  # decode the prediction to real world metric bbox
         bboxes_tensor = boxes_dict[0]["bboxes"]
         gt_bboxes_tensor = gt_bboxes_3d.tensor.to(score.device)
diff --git a/projects/BEVFusion/bevfusion/utils.py b/projects/BEVFusion/bevfusion/utils.py
index c47604dbd..5b7c94877 100644
--- a/projects/BEVFusion/bevfusion/utils.py
+++ b/projects/BEVFusion/bevfusion/utils.py
@@ -93,6 +93,7 @@ def decode(self, heatmap, rot, dim, center, height, vel, filter=False):
             predictions_dicts.append(predictions_dict)
 
         if filter is False:
+					  print("filter is False")
             return predictions_dicts
 
         # use score threshold
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
index b5d9a8fdc..a7fac4b37 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
@@ -98,7 +98,7 @@
         bbox_coder=dict(
             type="TransFusionBBoxCoder",
             post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0],
-            score_threshold=0.0,
+            score_threshold=0.1,
             out_size_factor=8,
             code_size=10,
         ),

From b2714a87052e441b94bf8cc15793ef12fb39698f Mon Sep 17 00:00:00 2001
From: KokSeang Tan <kseangtan@Gmail.com>
Date: Tue, 31 Mar 2026 10:58:41 +0900
Subject: [PATCH 009/183] Added

---
 projects/BEVFusion/bevfusion/utils.py                      | 1 -
 .../default/models/default_lidar_second_secfpn_120m.py     | 2 +-
 .../t4dataset/default/pipelines/default_lidar_120m.py      | 7 +------
 .../default/pipelines/default_lidar_intensity_120m.py      | 5 -----
 .../schedulers/default_20e_8xb8_adamw_linear_cosine.py     | 4 ++--
 .../default/schedulers/default_30e_8xb8_adamw_cosine.py    | 3 ++-
 .../schedulers/default_30e_8xb8_adamw_linear_cosine.py     | 3 ++-
 .../default/schedulers/default_50e_8xb8_adamw_cosine.py    | 3 ++-
 8 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/utils.py b/projects/BEVFusion/bevfusion/utils.py
index 5b7c94877..c47604dbd 100644
--- a/projects/BEVFusion/bevfusion/utils.py
+++ b/projects/BEVFusion/bevfusion/utils.py
@@ -93,7 +93,6 @@ def decode(self, heatmap, rot, dim, center, height, vel, filter=False):
             predictions_dicts.append(predictions_dict)
 
         if filter is False:
-					  print("filter is False")
             return predictions_dicts
 
         # use score threshold
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
index a7fac4b37..b5d9a8fdc 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
@@ -98,7 +98,7 @@
         bbox_coder=dict(
             type="TransFusionBBoxCoder",
             post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0],
-            score_threshold=0.1,
+            score_threshold=0.0,
             out_size_factor=8,
             code_size=10,
         ),
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
index 347ba6452..e79c30710 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
@@ -54,17 +54,12 @@
         classes=[
             "car",
             "truck",
-            # "construction_vehicle",
             "bus",
-            # "trailer",
-            # "barrier",
-            # "motorcycle",
             "bicycle",
             "pedestrian",
-            # "traffic_cone",
         ],
     ),
-    # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=1),
+    # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2),
     # dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1),
     dict(type="PointShuffle"),
     dict(
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
index 8b154901e..ce7985fd8 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
@@ -54,14 +54,9 @@
         classes=[
             "car",
             "truck",
-            "construction_vehicle",
             "bus",
-            "trailer",
-            "barrier",
-            "motorcycle",
             "bicycle",
             "pedestrian",
-            "traffic_cone",
         ],
     ),
     # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2),
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py
index a0be6f4ab..d1d11e7c9 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py
@@ -1,6 +1,6 @@
 # learning rate
-# lr = 0.0002
-lr = 1.0e-4
+# 1e-4 * sqrt(2) = 0.0001414
+lr = 1.4141e-4
 t_max = 6
 max_epochs = 20
 val_interval = 1
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py
index edcbd74bf..f4f102170 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py
@@ -1,5 +1,6 @@
 # learning rate
-lr = 1.0e-4
+# 1e-4 * sqrt(2) = 0.0001414
+lr = 1.4141e-4
 t_max = 8
 max_epochs = 30
 val_interval = 5
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py
index 32e8d59fa..44870ccf4 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py
@@ -1,5 +1,6 @@
 # learning rate
-lr = 1.0e-4
+# 1e-4 * sqrt(2) = 0.0001414
+lr = 1.4141e-4
 t_max = 8
 max_epochs = 30
 val_interval = 1
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py
index 58192c2de..542ccdd8e 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py
@@ -1,5 +1,6 @@
 # learning rate
-lr = 1.0e-4
+# 1e-4 * sqrt(2) = 0.0001414
+lr = 1.4141e-4
 t_max = 15
 max_epochs = 50
 val_interval = 5

From 43adb38bf15ea01b0ff81b06a5e4e435a6e490f9 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 31 Mar 2026 02:01:59 +0000
Subject: [PATCH 010/183] ci(pre-commit): autofix

---
 projects/BEVFusion/bevfusion/__init__.py                      | 2 +-
 ...ion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py | 2 +-
 ...on_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py | 2 +-
 .../schedulers/default_20e_8xb8_adamw_linear_cosine.py        | 2 +-
 .../default/schedulers/default_30e_8xb8_adamw_cosine.py       | 2 +-
 .../schedulers/default_30e_8xb8_adamw_linear_cosine.py        | 2 +-
 .../default/schedulers/default_50e_8xb8_adamw_cosine.py       | 2 +-
 tools/detection3d/train.py                                    | 4 ++--
 8 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py
index 947ebab23..60a64b532 100644
--- a/projects/BEVFusion/bevfusion/__init__.py
+++ b/projects/BEVFusion/bevfusion/__init__.py
@@ -26,5 +26,5 @@
     "TransformerDecoderLayer",
     "BEVFusionRandomFlip3D",
     "BEVFusionGlobalRotScaleTrans",
-		"TransFusionBBoxCoder",
+    "TransFusionBBoxCoder",
 ]
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
index e43f9c485..d984b5585 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
@@ -144,4 +144,4 @@
 )
 log_processor = dict(window_size=50)
 
-load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_40.pth"
\ No newline at end of file
+load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_40.pth"
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
index 8538784f8..0878cef29 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
@@ -144,4 +144,4 @@
 )
 log_processor = dict(window_size=50)
 
-load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_40.pth"
\ No newline at end of file
+load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_40.pth"
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py
index d1d11e7c9..15ba38878 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py
@@ -60,4 +60,4 @@
 
 # Only set if the number of train_gpu_size more than 1
 if train_gpu_size > 1:
-    sync_bn = "torch"
\ No newline at end of file
+    sync_bn = "torch"
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py
index f4f102170..a2cd2d2e9 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py
@@ -73,4 +73,4 @@
 
 # Only set if the number of train_gpu_size more than 1
 if train_gpu_size > 1:
-    sync_bn = "torch"
\ No newline at end of file
+    sync_bn = "torch"
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py
index 44870ccf4..264eda921 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py
@@ -60,4 +60,4 @@
 
 # Only set if the number of train_gpu_size more than 1
 if train_gpu_size > 1:
-    sync_bn = "torch"
\ No newline at end of file
+    sync_bn = "torch"
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py
index 542ccdd8e..87571d0b3 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py
@@ -73,4 +73,4 @@
 
 # Only set if the number of train_gpu_size more than 1
 if train_gpu_size > 1:
-    sync_bn = "torch"
\ No newline at end of file
+    sync_bn = "torch"
diff --git a/tools/detection3d/train.py b/tools/detection3d/train.py
index 254783b92..f7e6309fb 100644
--- a/tools/detection3d/train.py
+++ b/tools/detection3d/train.py
@@ -123,8 +123,8 @@ def main():
         # build customized runner from the registry
         # if 'runner_type' is set in the cfg
         runner = RUNNERS.build(cfg)
-   
-    # Output all model 
+
+    # Output all model
     print_log(f"Runner model: ", logger="current")
     print_log(f"{runner.model}", logger="current")
 

From 73dd1c40c1100879b3a700fe5e82536019045cb0 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Wed, 8 Apr 2026 19:05:59 +0900
Subject: [PATCH 011/183] Added

---
 .../bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py | 4 +++-
 .../configs/t4dataset/default/pipelines/default_lidar_120m.py | 2 +-
 projects/BEVFusion/setup.py                                   | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
index a5c72aef0..64bf2208a 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
@@ -13,7 +13,7 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_6_1/"
+info_directory_path = "info/kokseang_2_6_2/"
 
 experiment_group_name = "bevfusion_lidar_2.6.0/base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m"
@@ -143,3 +143,5 @@
     checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
 )
 log_processor = dict(window_size=50)
+
+resume = True
\ No newline at end of file
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
index e79c30710..a74ad2ea0 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
@@ -1,6 +1,6 @@
 # Dataset parameters
 backend_args = None
-num_workers = 32
+num_workers = 16
 input_modality = dict(use_lidar=True, use_camera=False)
 
 # range setting
diff --git a/projects/BEVFusion/setup.py b/projects/BEVFusion/setup.py
index 837d1f53e..38f588b20 100644
--- a/projects/BEVFusion/setup.py
+++ b/projects/BEVFusion/setup.py
@@ -43,7 +43,7 @@ def make_cuda_ext(name, module, sources, sources_cuda=[], extra_args=[], extra_i
         name="bev_pool",
         install_requires=[
             "onnx_graphsurgeon==0.5.8",
-            "spconv-cu120==2.3.6",
+            "spconv-cu126==2.3.8",
         ],
         ext_modules=[
             make_cuda_ext(

From 89e26700d614ab6abc92212ad69083c84019083f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 8 Apr 2026 10:06:34 +0000
Subject: [PATCH 012/183] ci(pre-commit): autofix

---
 .../bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
index 64bf2208a..0a2a178c6 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
@@ -144,4 +144,4 @@
 )
 log_processor = dict(window_size=50)
 
-resume = True
\ No newline at end of file
+resume = True

From 380d7aa0d4e85934bc2bdd3bd8e88f7746acd4d4 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Fri, 10 Apr 2026 23:47:24 +0900
Subject: [PATCH 013/183] Added

---
 ...ion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
index d984b5585..605e3cf7c 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
@@ -13,7 +13,7 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_6_1/"
+info_directory_path = "info/kokseang_2_6_2/"
 
 experiment_group_name = "bevfusion_lidar_intensity_2.6.1/j6gen2_base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m"
@@ -144,4 +144,4 @@
 )
 log_processor = dict(window_size=50)
 
-load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_40.pth"
+load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth"

From b60e45ae0ca338c30fdb16a7efcc9fb5332bae0e Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sun, 12 Apr 2026 21:42:53 +0900
Subject: [PATCH 014/183] Update configs

---
 ..._lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py | 4 ++--
 ..._second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py | 2 +-
 ...evfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py | 6 +++++-
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
index 0878cef29..563f71cf9 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
@@ -13,7 +13,7 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_6_1/"
+info_directory_path = "info/kokseang_2_6_2/"
 
 experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m"
@@ -144,4 +144,4 @@
 )
 log_processor = dict(window_size=50)
 
-load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_40.pth"
+load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth"
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py
index b4d8ddfbf..238054ab5 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py
@@ -4,7 +4,7 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_6_1/"
+info_directory_path = "info/kokseang_2_6_2/"
 
 experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2"
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
index 0a2a178c6..8bf21b1b2 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
@@ -15,7 +15,7 @@
 data_root = "data/t4dataset/"
 info_directory_path = "info/kokseang_2_6_2/"
 
-experiment_group_name = "bevfusion_lidar_2.6.0/base/" + _base_.dataset_type
+experiment_group_name = "bevfusion_lidar_2.7.0/base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
@@ -31,6 +31,10 @@
     pts_middle_encoder=dict(
         in_channels=_base_.point_use_dim,
         sparse_shape=_base_.grid_size,
+        num_aug_features=4,
+        # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here
+        aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
+        aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
     ),
     bbox_head=dict(
         class_names=_base_.class_names,  # Use class names to identify the correct class indices

From 9b38a42898e724c57532b5ad6bf94546c870e809 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Mon, 13 Apr 2026 18:34:04 +0900
Subject: [PATCH 015/183] Added

---
 autoware_ml/configs/detection3d/dataset/t4dataset/base.py     | 1 -
 .../configs/detection3d/dataset/t4dataset/jpntaxi_base.py     | 2 +-
 ...on_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py | 4 ++--
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py
index c92d58431..d0744a131 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py
@@ -48,7 +48,6 @@
     "j6gen2_base": ("t4dataset_j6gen2_base_infos_test.pkl", False),
     "j6gen2": ("t4dataset_j6gen2_infos_test.pkl", False),
     "largebus": ("t4dataset_largebus_infos_test.pkl", False),
-    "jpntaxi_base": ("t4dataset_jpntaxi_base_infos_test.pkl", False),
     "jpntaxi_gen2": ("t4dataset_jpntaxi_gen2_infos_test.pkl", False),
     "base": ("t4dataset_base_infos_test.pkl", True),
 }
diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py
index a50cf8852..b7ddb799a 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py
@@ -18,7 +18,7 @@
 
 # dataset scene setting
 dataset_test_groups = {
-    "jpntaxi_base": ("t4dataset_jpntaxi_base_infos_test.pkl", True),
+    "jpntaxi_gen2": ("t4dataset_jpntaxi_gen2_infos_test.pkl", True),
 }
 
 dataset_version_list = [
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
index 0878cef29..563f71cf9 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
@@ -13,7 +13,7 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_6_1/"
+info_directory_path = "info/kokseang_2_6_2/"
 
 experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m"
@@ -144,4 +144,4 @@
 )
 log_processor = dict(window_size=50)
 
-load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_40.pth"
+load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth"

From f39b5841db399a9ebd60bf4d40396c83cadd5089 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Fri, 17 Apr 2026 12:08:05 +0900
Subject: [PATCH 016/183] Updated

---
 ...idar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
index 563f71cf9..b6677ff05 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
@@ -15,7 +15,7 @@
 data_root = "data/t4dataset/"
 info_directory_path = "info/kokseang_2_6_2/"
 
-experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type
+experiment_group_name = "bevfusion_lidar_intensity_2.7.1/jpntaxi_base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
@@ -31,6 +31,10 @@
     pts_middle_encoder=dict(
         in_channels=_base_.point_use_dim,
         sparse_shape=_base_.grid_size,
+        num_aug_features=5,
+        # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
+        aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0, 0.0],
+        aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 255.0, 0.2],
     ),
     bbox_head=dict(
         class_names=_base_.class_names,  # Use class names to identify the correct class indices
@@ -144,4 +148,4 @@
 )
 log_processor = dict(window_size=50)
 
-load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth"
+load_from = "work_dirs/bevfusion_lidar_2.7.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_48.pth"

From 24a88adf1e782379104d6d7ff64d408b3c263d8e Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Fri, 17 Apr 2026 12:08:56 +0900
Subject: [PATCH 017/183] Added

---
 ...on_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py | 2 +-
 ...el_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py | 2 +-
 ...n_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py | 5 +++--
 ...l_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py | 2 +-
 ...dar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py | 2 +-
 5 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
index 605e3cf7c..41a2152cf 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
@@ -144,4 +144,4 @@
 )
 log_processor = dict(window_size=50)
 
-load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth"
+load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth"
\ No newline at end of file
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py
index f048ab1a8..7dfc7e0f8 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py
@@ -4,7 +4,7 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_6_1/"
+info_directory_path = "info/kokseang_2_6_2/"
 
 experiment_group_name = "bevfusion_lidar_intensity_2.6.1/j6gen2_base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2"
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
index 563f71cf9..998e5a22e 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
@@ -16,7 +16,7 @@
 info_directory_path = "info/kokseang_2_6_2/"
 
 experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type
-experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m"
+experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_no_bicycle_pooling"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
@@ -48,6 +48,7 @@
             pc_range=_base_.point_cloud_range[0:2],
             voxel_size=_base_.voxel_size[0:2],
         ),
+        dense_heatmap_pooling_classes=["car", "truck", "bus"],  # Use class indices for pooling
     ),
 )
 
@@ -144,4 +145,4 @@
 )
 log_processor = dict(window_size=50)
 
-load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth"
+load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth"
\ No newline at end of file
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py
index b4d8ddfbf..238054ab5 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py
@@ -4,7 +4,7 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_6_1/"
+info_directory_path = "info/kokseang_2_6_2/"
 
 experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2"
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py
index 7a0215139..4f9fb7b65 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py
@@ -4,7 +4,7 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_6_1/"
+info_directory_path = "info/kokseang_2_6_2/"
 
 experiment_group_name = "bevfusion_lidar_2.6.0/base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2"

From c637420d5e6e9f2fa898c581184a45098ce6469b Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Fri, 17 Apr 2026 12:10:58 +0900
Subject: [PATCH 018/183] Added

---
 ..._lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py | 4 ++--
 ..._second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py | 2 +-
 ...lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py | 7 +++----
 ...second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py | 2 +-
 ...vfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py | 4 +---
 ...r_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py | 2 +-
 6 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
index 41a2152cf..8c02ca112 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
@@ -13,7 +13,7 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_6_2/"
+info_directory_path = "info/user_name/"
 
 experiment_group_name = "bevfusion_lidar_intensity_2.6.1/j6gen2_base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m"
@@ -144,4 +144,4 @@
 )
 log_processor = dict(window_size=50)
 
-load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth"
\ No newline at end of file
+load_from "<best_checkpoint>"
\ No newline at end of file
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py
index 7dfc7e0f8..afb150284 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py
@@ -4,7 +4,7 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_6_2/"
+info_directory_path = "info/user_name/"
 
 experiment_group_name = "bevfusion_lidar_intensity_2.6.1/j6gen2_base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2"
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
index 998e5a22e..3dda36c3a 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
@@ -13,10 +13,10 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_6_2/"
+info_directory_path = "info/user_name/"
 
 experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type
-experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_no_bicycle_pooling"
+experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
@@ -48,7 +48,6 @@
             pc_range=_base_.point_cloud_range[0:2],
             voxel_size=_base_.voxel_size[0:2],
         ),
-        dense_heatmap_pooling_classes=["car", "truck", "bus"],  # Use class indices for pooling
     ),
 )
 
@@ -145,4 +144,4 @@
 )
 log_processor = dict(window_size=50)
 
-load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth"
\ No newline at end of file
+load_from "<best_checkpoint>"
\ No newline at end of file
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py
index 238054ab5..3320d2b08 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py
@@ -4,7 +4,7 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_6_2/"
+info_directory_path = "info/user_name/"
 
 experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2"
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
index 0a2a178c6..38f3e369a 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
@@ -13,7 +13,7 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_6_2/"
+info_directory_path = "info/user_name/"
 
 experiment_group_name = "bevfusion_lidar_2.6.0/base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m"
@@ -143,5 +143,3 @@
     checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
 )
 log_processor = dict(window_size=50)
-
-resume = True
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py
index 4f9fb7b65..c9a0050c0 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py
@@ -4,7 +4,7 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_6_2/"
+info_directory_path = "info/user_name/"
 
 experiment_group_name = "bevfusion_lidar_2.6.0/base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2"

From 48879b8cb6540d0e07124a81bf13aa4b67ecb51e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 17 Apr 2026 03:11:22 +0000
Subject: [PATCH 019/183] ci(pre-commit): autofix

---
 ...usion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py | 2 +-
 ...sion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
index 8c02ca112..264f0da77 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
@@ -144,4 +144,4 @@
 )
 log_processor = dict(window_size=50)
 
-load_from "<best_checkpoint>"
\ No newline at end of file
+load_from "<best_checkpoint>"
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
index 3dda36c3a..f505ac5dc 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
@@ -144,4 +144,4 @@
 )
 log_processor = dict(window_size=50)
 
-load_from "<best_checkpoint>"
\ No newline at end of file
+load_from "<best_checkpoint>"

From 82457ab52003421646cf2c15070c60314eaabe6e Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Fri, 17 Apr 2026 12:12:18 +0900
Subject: [PATCH 020/183] Added

---
 .../default/pipelines/default_camera_lidar_intensity_120m.py    | 2 --
 .../configs/t4dataset/default/pipelines/default_lidar_120m.py   | 2 --
 .../t4dataset/default/pipelines/default_lidar_intensity_120m.py | 2 --
 3 files changed, 6 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
index 9d1910dab..963a218e1 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
@@ -84,8 +84,6 @@
             "traffic_cone",
         ],
     ),
-    # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2),
-    # dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1),
     dict(type="PointShuffle"),
     dict(
         type="Pack3DDetInputs",
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
index a74ad2ea0..06d95be16 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
@@ -59,8 +59,6 @@
             "pedestrian",
         ],
     ),
-    # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2),
-    # dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1),
     dict(type="PointShuffle"),
     dict(
         type="Pack3DDetInputs",
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
index ce7985fd8..4e74d3616 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
@@ -59,8 +59,6 @@
             "pedestrian",
         ],
     ),
-    # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2),
-    # dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1),
     dict(type="PointShuffle"),
     dict(
         type="Pack3DDetInputs",

From 827bbb24c66e04e2d6c38f53423305de0010c075 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Fri, 17 Apr 2026 15:50:30 +0900
Subject: [PATCH 021/183] Updated

---
 ...ion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
index 264f0da77..be535c560 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
@@ -15,7 +15,7 @@
 data_root = "data/t4dataset/"
 info_directory_path = "info/user_name/"
 
-experiment_group_name = "bevfusion_lidar_intensity_2.6.1/j6gen2_base/" + _base_.dataset_type
+experiment_group_name = "bevfusion_lidar_intensity_2.7.1/j6gen2_base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
@@ -144,4 +144,4 @@
 )
 log_processor = dict(window_size=50)
 
-load_from "<best_checkpoint>"
+load_from = "work_dirs/bevfusion_lidar_2.7.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_48.pth"

From 5f9a4a55156ee29a689a2e08b12064d208d8118d Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Fri, 17 Apr 2026 16:01:35 +0900
Subject: [PATCH 022/183] Updated

---
 ...n_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
index be535c560..3e615b504 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
@@ -13,7 +13,7 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/user_name/"
+info_directory_path = "info/kokseang_2_6_2/"
 
 experiment_group_name = "bevfusion_lidar_intensity_2.7.1/j6gen2_base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m"
@@ -31,6 +31,10 @@
     pts_middle_encoder=dict(
         in_channels=_base_.point_use_dim,
         sparse_shape=_base_.grid_size,
+        num_aug_features=5,
+        # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
+        aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0, 0.0],
+        aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 255.0, 0.2],
     ),
     bbox_head=dict(
         class_names=_base_.class_names,  # Use class names to identify the correct class indices

From 1d0ac8db9b9ec1a738a4f7294edcd6764adaef19 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sat, 18 Apr 2026 11:44:49 +0900
Subject: [PATCH 023/183] Added

---
 projects/BEVFusion/bevfusion/__init__.py      |   3 +
 projects/BEVFusion/bevfusion/bevfusion.py     |  30 +-
 .../bevfusion/bevfusion_voxel_encoder.py      | 295 ++++++++++++++++++
 ..._voxel_second_secfpn_50e_8xb8_base_120m.py |  30 +-
 ...n_50e_8xb8_base_120m_sincos_10_channels.py | 161 ++++++++++
 5 files changed, 501 insertions(+), 18 deletions(-)
 create mode 100644 projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_10_channels.py

diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py
index 60a64b532..3db358b55 100644
--- a/projects/BEVFusion/bevfusion/__init__.py
+++ b/projects/BEVFusion/bevfusion/__init__.py
@@ -7,6 +7,7 @@
 from .transformer import TransformerDecoderLayer
 from .transforms_3d import BEVFusionGlobalRotScaleTrans, BEVFusionRandomFlip3D, GridMask, ImageAug3D
 from .utils import BBoxBEVL1Cost, HeuristicAssigner3D, HungarianAssigner3D, IoU3DCost, TransFusionBBoxCoder
+from .bevfusion_voxel_encoder import BEVFusionVoxelEncoder, BEVFusionVoxelSinCosEncoder
 
 __all__ = [
     "BEVFusion",
@@ -27,4 +28,6 @@
     "BEVFusionRandomFlip3D",
     "BEVFusionGlobalRotScaleTrans",
     "TransFusionBBoxCoder",
+    "BEVFusionVoxelEncoder",
+    "BEVFusionVoxelSinCosEncoder",
 ]
diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py
index 243b3beb5..bc3f1b094 100644
--- a/projects/BEVFusion/bevfusion/bevfusion.py
+++ b/projects/BEVFusion/bevfusion/bevfusion.py
@@ -207,18 +207,29 @@ def extract_pts_feat(self, feats, coords, sizes, points=None) -> torch.Tensor:
                 points = [point.float() for point in points]
                 feats, coords, sizes = self.voxelize(points)
                 batch_size = coords[-1, 0] + 1
+                
+                if self.pts_voxel_encoder is not None:
+                    assert not self.voxelize_reduce
+                    feats = self.pts_voxel_encoder(feats, sizes, coords)
         else:
             # NOTE(knzo25): onnx inference. Voxelization happens outside the graph
             with torch.cuda.amp.autocast(enabled=False):
                 # with torch.autocast('cuda', enabled=False):
 
+                # NOTE(knzo25): onnx demmands this
+                # batch_size = coords[-1, 0] + 1
+                # with torch.autocast('cuda', enabled=False):
+
                 # NOTE(knzo25): onnx demmands this
                 # batch_size = coords[-1, 0] + 1
                 batch_size = 1
                 print("Run onnx point_eSpConvst")
-                assert self.voxelize_reduce
-                if self.voxelize_reduce:
-                    feats = feats.sum(dim=1, keepdim=False) / sizes.type_as(feats).view(-1, 1)
+                if self.pts_voxel_encoder is not None:
+                    feats = self.pts_voxel_encoder(feats, sizes, coords)
+                else:
+                    assert self.voxelize_reduce
+                    if self.voxelize_reduce:
+                        feats = feats.sum(dim=1, keepdim=False) / sizes.type_as(feats).view(-1, 1)
         x = self.pts_middle_encoder(feats, coords, batch_size)
         return x
 
@@ -241,12 +252,13 @@ def voxelize(self, points):
 
         feats = torch.cat(feats, dim=0)
         coords = torch.cat(coords, dim=0)
-        if len(sizes) > 0:
-            sizes = torch.cat(sizes, dim=0)
-            if self.voxelize_reduce:
-                feats = feats.sum(dim=1, keepdim=False) / sizes.type_as(feats).view(-1, 1)
-                feats = feats.contiguous()
-
+        assert len(sizes) > 0, "No points in the voxel"
+        sizes = torch.cat(sizes, dim=0)
+        
+        if self.voxelize_reduce:
+            feats = feats.sum(dim=1, keepdim=False) / sizes.type_as(feats).view(-1, 1)
+            feats = feats.contiguous()
+        
         return feats, coords, sizes
 
     def predict(
diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
new file mode 100644
index 000000000..efbc995e8
--- /dev/null
+++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
@@ -0,0 +1,295 @@
+from typing import Optional, Tuple
+
+import torch
+import numpy as np
+from mmcv.cnn import build_norm_layer
+from mmcv.ops import DynamicScatter
+from torch import Tensor, nn
+
+from mmdet3d.registry import MODELS
+from mmdet3d.models.voxel_encoders.utils import get_paddings_indicator, PFNLayer
+
+
+@MODELS.register_module()
+class BEVFusionVoxelEncoder(nn.Module):
+    """BEVFusion Voxel Encoder Feature Net.
+    
+    The network is same as pillar featuer net.
+    The network prepares the pillar features and performs forward pass
+    through PFNLayers.
+
+    Args:
+        in_channels (int, optional): Number of input features,
+            either x, y, z or x, y, z, r. Defaults to 4.
+        feat_channels (tuple, optional): Number of features in each of the
+            N PFNLayers. Defaults to (64, ).
+        with_distance (bool, optional): Whether to include Euclidean distance
+            to points. Defaults to False.
+        with_cluster_center (bool, optional): [description]. Defaults to True.
+        with_voxel_center (bool, optional): [description]. Defaults to True.
+        voxel_size (tuple[float], optional): Size of voxels, only utilize x
+            and y size. Defaults to (0.2, 0.2, 4).
+        point_cloud_range (tuple[float], optional): Point cloud range, only
+            utilizes x and y min. Defaults to (0, -40, -3, 70.4, 40, 1).
+        norm_cfg ([type], optional): [description].
+            Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01).
+        mode (str, optional): The mode to gather point features. Options are
+            'max' or 'avg'. Defaults to 'max'.
+        legacy (bool, optional): Whether to use the new behavior or
+            the original behavior. Defaults to True.
+    """
+
+    def __init__(self,
+                 min_norm_values: Optional[Tuple[float]] = None,
+                 max_norm_values: Optional[Tuple[float]] = None,
+                 in_channels: Optional[int] = 4,
+                 feat_channels: Optional[tuple] = (64, ),
+                 with_distance: Optional[bool] = False,
+                 with_cluster_center: Optional[bool] = True,
+                 with_voxel_center: Optional[bool] = True,
+                 voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4),
+                 point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4,
+                                                              40, 1),
+                 norm_cfg: Optional[dict] = dict(
+                     type='BN1d', eps=1e-3, momentum=0.01),
+                 mode: Optional[str] = 'max',
+                 legacy: Optional[bool] = True):
+        super(BEVFusionVoxelEncoder, self).__init__()
+        assert len(feat_channels) > 0
+        self.legacy = legacy
+        if with_cluster_center:
+            in_channels += 3
+        if with_voxel_center:
+            in_channels += 3
+        if with_distance:
+            in_channels += 1
+        self._with_distance = with_distance
+        self._with_cluster_center = with_cluster_center
+        self._with_voxel_center = with_voxel_center
+        # Create PillarFeatureNet layers
+        self.in_channels = in_channels
+        feat_channels = [in_channels] + list(feat_channels)
+        pfn_layers = []
+        for i in range(len(feat_channels) - 1):
+            in_filters = feat_channels[i]
+            out_filters = feat_channels[i + 1]
+            if i < len(feat_channels) - 2:
+                last_layer = False
+            else:
+                last_layer = True
+            pfn_layers.append(
+                PFNLayer(
+                    in_filters,
+                    out_filters,
+                    norm_cfg=norm_cfg,
+                    last_layer=last_layer,
+                    mode=mode))
+        self.pfn_layers = nn.ModuleList(pfn_layers)
+
+        # Need pillar (voxel) size and x/y offset in order to calculate offset
+        self.vx = voxel_size[0]
+        self.vy = voxel_size[1]
+        self.vz = voxel_size[2]
+        self.x_offset = self.vx / 2 + point_cloud_range[0]
+        self.y_offset = self.vy / 2 + point_cloud_range[1]
+        self.z_offset = self.vz / 2 + point_cloud_range[2]
+        self.point_cloud_range = point_cloud_range
+
+        self.register_buffer("min_norm_values", torch.tensor(min_norm_values))
+        self.register_buffer("max_norm_values", torch.tensor(max_norm_values))
+        self.register_buffer("voxel_size", torch.tensor([self.vx, self.vy, self.vz]))
+
+    def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
+                *args, **kwargs) -> Tensor:
+        """Forward function.
+
+        Args:
+            features (torch.Tensor): Point features or raw points in shape
+                (N, M, C).
+            num_points (torch.Tensor): Number of points in each pillar in shape (M).
+            coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx).
+
+        Returns:
+            torch.Tensor: Features of pillars in shape (M, C).
+        """
+        if self.min_norm_values is not None and self.max_norm_values is not None:
+            features_norm = (features - self.min_norm_values) / (self.max_norm_values - self.min_norm_values)
+        else:
+            features_norm = features
+
+        features_ls = [features_norm]
+        # Find distance of x, y, and z from cluster center, mapped to [-1,   1] if available
+        if self._with_cluster_center:
+            points_mean = features[:, :, :3].sum(
+                dim=1, keepdim=True) / num_points.type_as(features).view(
+                    -1, 1, 1)
+            f_cluster = features[:, :, :3] - points_mean
+            # Map to [0, 1] if available
+            if self.min_norm_values is not None and self.max_norm_values is not None:
+                voxel_size = features.new_tensor([self.vx, self.vy, self.vz])
+                f_cluster = f_cluster / voxel_size
+            features_ls.append(f_cluster)
+
+        # Find distance of x, y, and z from pillar center
+        dtype = features.dtype
+        if self._with_voxel_center:
+            if not self.legacy:
+                f_center = torch.zeros_like(features[:, :, :3])
+                f_center[:, :, 0] = features[:, :, 0] - (
+                    coors[:, 3].to(dtype).unsqueeze(1) * self.vx +
+                    self.x_offset)
+                f_center[:, :, 1] = features[:, :, 1] - (
+                    coors[:, 2].to(dtype).unsqueeze(1) * self.vy +
+                    self.y_offset)
+                f_center[:, :, 2] = features[:, :, 2] - (
+                    coors[:, 1].to(dtype).unsqueeze(1) * self.vz +
+                    self.z_offset)
+            else:
+                f_center = features[:, :, :3]
+                f_center[:, :, 0] = f_center[:, :, 0] - (
+                    coors[:, 3].type_as(features).unsqueeze(1) * self.vx +
+                    self.x_offset)
+                f_center[:, :, 1] = f_center[:, :, 1] - (
+                    coors[:, 2].type_as(features).unsqueeze(1) * self.vy +
+                    self.y_offset)
+                f_center[:, :, 2] = f_center[:, :, 2] - (
+                    coors[:, 1].type_as(features).unsqueeze(1) * self.vz +
+                    self.z_offset)
+            
+            if self.min_norm_values is not None and self.max_norm_values is not None:
+                f_center = f_center / (voxel_size * 0.5)
+            features_ls.append(f_center)
+
+        if self._with_distance:
+            points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)
+            features_ls.append(points_dist)
+
+        # Combine together feature decorations
+        features = torch.cat(features_ls, dim=-1)
+        # The feature decorations were calculated without regard to whether
+        # pillar was empty. Need to ensure that
+        # empty pillars remain set to zeros.
+        voxel_count = features.shape[1]
+        mask = get_paddings_indicator(num_points, voxel_count, axis=0)
+        mask = torch.unsqueeze(mask, -1).type_as(features)
+        features *= mask
+
+        for pfn in self.pfn_layers:
+            features = pfn(features, num_points)
+
+        return features.squeeze(1)
+
+
+@MODELS.register_module()
+class BEVFusionVoxelSinCosEncoder(nn.Module):
+    def __init__(self, 
+                 min_norm_values: Tuple[float],
+                 max_norm_values: Tuple[float],
+                 in_channels: Optional[int] = 4,
+                 with_distance: Optional[bool] = False,
+                 with_cluster_center: Optional[bool] = True,
+                 with_voxel_center: Optional[bool] = True,
+                 voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4),
+                 point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4,
+                                                              40, 1),):
+        super(BEVFusionVoxelSinCosEncoder, self).__init__()
+
+        if with_cluster_center:
+            in_channels += 3
+        if with_voxel_center:
+            in_channels += 3
+        if with_distance:
+            in_channels += 1
+        self._with_distance = with_distance
+        self._with_cluster_center = with_cluster_center
+        self._with_voxel_center = with_voxel_center
+        # Create PillarFeatureNet layers
+        self.in_channels = in_channels
+
+        # Need pillar (voxel) size and x/y offset in order to calculate offset
+        self.vx = voxel_size[0]
+        self.vy = voxel_size[1]
+        self.vz = voxel_size[2]
+        self.x_offset = self.vx / 2 + point_cloud_range[0]
+        self.y_offset = self.vy / 2 + point_cloud_range[1]
+        self.z_offset = self.vz / 2 + point_cloud_range[2]
+        self.point_cloud_range = point_cloud_range
+
+        self.register_buffer("min_norm_values", torch.tensor(min_norm_values))
+        self.register_buffer("max_norm_values", torch.tensor(max_norm_values))
+        self.register_buffer("voxel_size", torch.tensor([self.vx, self.vy, self.vz]))
+        self.register_buffer("exponents", (2 ** torch.arange(0, in_channels).float()))
+
+    def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
+                *args, **kwargs) -> Tensor:
+        """Forward function.
+
+        Args:
+            features (torch.Tensor): Point features or raw points in shape
+                (N, M, C).
+            num_points (torch.Tensor): Number of points in each pillar in shape (M).
+            coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx).
+
+        Returns:
+            torch.Tensor: Features of pillars in shape (M, C).
+        """
+        features_norm = (features - self.min_norm_values) / (self.max_norm_values - self.min_norm_values)
+        features_ls = [features_norm]
+        # Find distance of x, y, and z from cluster center
+        if self._with_cluster_center:
+            points_mean = features[:, :, :3].sum(
+                dim=1, keepdim=True) / num_points.type_as(features).view(
+                    -1, 1, 1)
+            
+            # Map to [-1, 1]
+            f_cluster = (features[:, :, :3] - points_mean) / self.voxel_size
+            # f_cluster = features[:, :, :3] - points_mean
+            features_ls.append(f_cluster)
+
+        # Find distance of x, y, and z from pillar center
+        dtype = features.dtype
+        if self._with_voxel_center:
+            f_center = torch.zeros_like(features[:, :, :3])
+            f_center[:, :, 0] = features[:, :, 0] - (
+                coors[:, 3].to(dtype).unsqueeze(1) * self.vx +
+                self.x_offset)
+            f_center[:, :, 1] = features[:, :, 1] - (
+                coors[:, 2].to(dtype).unsqueeze(1) * self.vy +
+                self.y_offset)
+            f_center[:, :, 2] = features[:, :, 2] - (
+                coors[:, 1].to(dtype).unsqueeze(1) * self.vz +
+                self.z_offset)
+            
+            # Map to [-1, 1]
+            f_center = f_center / (self.voxel_size * 0.5)
+            features_ls.append(f_center)
+
+        if self._with_distance:
+            points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)
+            features_ls.append(points_dist)
+        
+        # Combine together feature decorations
+        features = torch.cat(features_ls, dim=-1)
+        num_voxels, max_points_per_voxel = features.shape[0], features.shape[1]
+
+        # SinCos encoding
+        # (N, M, C) -> (N, M, C, 1) -> (N, M, C, 1) * (1, 1, 1, C) -> (N, M, C, C)
+        y = features.unsqueeze(-1) * np.pi * self.exponents.unsqueeze(0).unsqueeze(0).unsqueeze(0)
+        # (N, M, C, C) -> (N, M, C*C)
+        y = y.reshape(num_voxels, max_points_per_voxel, self.in_channels ** 2)
+        # (N, M, C*C) -> (N, M, C*C*2)
+        features = torch.cat([torch.cos(y), torch.sin(y)], dim=-1)
+
+        # The feature decorations were calculated without regard to whether
+        # pillar was empty. Need to ensure that
+        # empty pillars remain set to zeros.
+        mask = get_paddings_indicator(num_points, max_points_per_voxel, axis=0)
+        mask = torch.unsqueeze(mask, -1).type_as(features)
+        features *= mask
+
+        # Reduction by mean
+        # (N, M, C*C*2) -> (N, C*C*2)
+        features = features.sum(dim=1, keepdim=False) / num_points.type_as(features).view(-1, 1)
+        features = features.contiguous()
+        
+        return features
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
index d33b33c56..17f16254d 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
@@ -13,10 +13,10 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/user_name/"
+info_directory_path = "info/kokseang_2_6_2/"
 
-experiment_group_name = "bevfusion_lidar_2.7.0/base/" + _base_.dataset_type
-experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m"
+experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type
+experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
@@ -25,16 +25,28 @@
     voxelize_cfg=dict(
         point_cloud_range=_base_.point_cloud_range,
         voxel_size=_base_.voxel_size,
-        voxelize_reduce=True,
+        voxelize_reduce=False,
+    ),
+    pts_voxel_encoder=dict(
+        _delete_=True,
+        type="BEVFusionVoxelSinCosEncoder", 
+        in_channels=4,
+        with_distance=False,
+        with_cluster_center=True,
+        with_voxel_center=True,
+        point_cloud_range=_base_.point_cloud_range,
+        voxel_size=_base_.voxel_size,
+        # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
+        min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
+        max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
     ),
-    pts_voxel_encoder=dict(num_features=_base_.point_use_dim),
     pts_middle_encoder=dict(
-        in_channels=_base_.point_use_dim,
+        in_channels=100,
         sparse_shape=_base_.grid_size,
-        num_aug_features=4,
+        # num_aug_features=4,
         # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here
-        aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
-        aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
+        # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
+        # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
     ),
     bbox_head=dict(
         class_names=_base_.class_names,  # Use class names to identify the correct class indices
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_10_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_10_channels.py
new file mode 100644
index 000000000..531a07673
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_10_channels.py
@@ -0,0 +1,161 @@
+_base_ = [
+    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
+    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
+    "../default/pipelines/default_lidar_120m.py",
+    "../default/models/default_lidar_second_secfpn_120m.py",
+    "../default/schedulers/default_50e_8xb8_adamw_cosine.py",
+    "../default/default_misc.py",
+]
+
+custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
+custom_imports["imports"] += _base_.custom_imports["imports"]
+custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
+
+# user setting
+data_root = "data/t4dataset/"
+info_directory_path = "info/kokseang_2_6_2/"
+
+experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type
+experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_10_channels"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    voxelize_cfg=dict(
+        point_cloud_range=_base_.point_cloud_range,
+        voxel_size=_base_.voxel_size,
+        voxelize_reduce=False,
+    ),
+    pts_voxel_encoder=dict(
+        _delete_=True,
+        type="BEVFusionVoxelSinCosEncoder", 
+        in_channels=4,
+        with_distance=False,
+        with_cluster_center=True,
+        with_voxel_center=True,
+        point_cloud_range=_base_.point_cloud_range,
+        voxel_size=_base_.voxel_size,
+        # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
+        min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
+        max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
+    ),
+    pts_middle_encoder=dict(
+        in_channels=100,
+        sparse_shape=_base_.grid_size,
+        # num_aug_features=4,
+        # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here
+        # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
+        # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
+    ),
+    bbox_head=dict(
+        class_names=_base_.class_names,  # Use class names to identify the correct class indices
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+    ),
+)
+
+# Dataset parameters
+train_dataloader = dict(
+    batch_size=_base_.train_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=True),
+    dataset=dict(
+        type=_base_.dataset_type,
+        pipeline=_base_.train_pipeline,
+        modality=_base_.input_modality,
+        backend_args=_base_.backend_args,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_train_file_name,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        test_mode=False,
+        data_prefix=_base_.data_prefix,
+        box_type_3d="LiDAR",
+        filter_cfg=_base_.filter_cfg,
+    ),
+)
+
+val_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_val_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+test_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_test_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+val_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+)
+
+test_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+    save_csv=True,
+)
+
+default_hooks = dict(
+    logger=dict(type="LoggerHook", interval=50),
+    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
+)
+log_processor = dict(window_size=50)

From d063cf2a68c2fdd0e3129ff94f5f89299fea51be Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sat, 18 Apr 2026 12:18:46 +0900
Subject: [PATCH 024/183] Added

---
 .../BEVFusion/bevfusion/bevfusion_head.py     |   4 +-
 projects/BEVFusion/bevfusion/depth_lss.py     |   3 +-
 ...sion_camera_swin_fpn_30e_8xb8_base_120m.py | 137 ++++++++++++++++++
 ...mera_swin_fpn_30e_8xb8_j6gen2_base_120m.py |   2 +-
 ...amera_swin_fpn_lidar_second_secfpn_120m.py |   2 +-
 .../default_50e_8xb8_adamw_linear_cosine.py   |  63 ++++++++
 6 files changed, 206 insertions(+), 5 deletions(-)
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_base_120m.py
 create mode 100644 projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py

diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index 853523c4f..e17bab12a 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -25,11 +25,11 @@ def clip_sigmoid(x, eps=1e-4):
 @MODELS.register_module()
 class ConvFuser(nn.Sequential):
 
-    def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: int) -> None:
+    def __init__(self, in_channels: int, out_channels: int, kernel_size: int, stride: int, padding: int) -> None:
         self.in_channels = in_channels
         self.out_channels = out_channels
         super().__init__(
-            nn.Conv2d(sum(in_channels), out_channels, kernel_size, padding, bias=False),
+            nn.Conv2d(sum(in_channels), out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False),
             nn.BatchNorm2d(out_channels),
             nn.ReLU(True),
         )
diff --git a/projects/BEVFusion/bevfusion/depth_lss.py b/projects/BEVFusion/bevfusion/depth_lss.py
index ac7c5b503..03c0ec2f2 100644
--- a/projects/BEVFusion/bevfusion/depth_lss.py
+++ b/projects/BEVFusion/bevfusion/depth_lss.py
@@ -567,7 +567,8 @@ def __init__(
             zbound=zbound,
             dbound=dbound,
         )
-
+        
+        if lidar_depth_image_last_stride is not None:
         self.dtransform = LidarDepthImageNet(in_channels=1, out_channels=64, last_stride=lidar_depth_image_last_stride)
         self.depthnet = DepthLSSNet(
             in_channels=in_channels + self.dtransform.out_channels, out_channels=self.D + self.C
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_base_120m.py
new file mode 100644
index 000000000..2debcccb8
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_base_120m.py
@@ -0,0 +1,137 @@
+_base_ = [
+    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
+    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
+    "../default/pipelines/default_camera_lidar_intensity_120m.py",
+    "../default/models/default_camera_swin_fpn_120m.py",
+    "../default/schedulers/default_50e_8xb8_adamw_linear_cosine.py",
+    "../default/default_misc.py",
+]
+
+custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
+custom_imports["imports"] += _base_.custom_imports["imports"]
+custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
+
+# user setting
+data_root = "data/t4dataset/"
+info_directory_path = "info/kokseang_2_6_2/"
+
+experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type
+experiment_name = "bevfusion_camera_swin_fpn_30e_8xb8_base_120m"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    view_transform=dict(image_size=_base_.image_size),
+    bbox_head=dict(
+        class_names=_base_.class_names,
+        in_channels=80,
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+    ),
+)
+
+# Dataset parameters
+train_dataloader = dict(
+    batch_size=_base_.train_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=True),
+    dataset=dict(
+        type=_base_.dataset_type,
+        pipeline=_base_.train_pipeline,
+        modality=_base_.input_modality,
+        backend_args=_base_.backend_args,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_train_file_name,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        test_mode=False,
+        data_prefix=_base_.data_prefix,
+        box_type_3d="LiDAR",
+        filter_cfg=_base_.filter_cfg,
+    ),
+)
+
+val_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_val_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+test_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_test_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+val_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+)
+
+test_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+    save_csv=True,
+)
+
+default_hooks = dict(
+    logger=dict(type="LoggerHook", interval=50),
+    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
+)
+log_processor = dict(window_size=50)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py
index e65c52ece..3a69a2cd2 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py
@@ -13,7 +13,7 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/user_name/"
+info_directory_path = "info/kokseang_2_6_2/"
 
 experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type
 experiment_name = "bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m"
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lidar_second_secfpn_120m.py
index c4097de3d..55c6ca3cd 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lidar_second_secfpn_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lidar_second_secfpn_120m.py
@@ -56,5 +56,5 @@
         dbound=[1.0, 130, 1.0],
         downsample=2,
     ),
-    fusion_layer=dict(type="ConvFuser", in_channels=[80, 256], out_channels=256, kernel_size=5, padding=2),
+    fusion_layer=dict(type="ConvFuser", in_channels=[80, 256], out_channels=256, kernel_size=5, stride=2, padding=2),
 )
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py
new file mode 100644
index 000000000..9517aba86
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py
@@ -0,0 +1,63 @@
+# learning rate
+# 1e-4 * sqrt(2) = 0.0001414
+lr = 1.4141e-4
+t_max = 8
+max_epochs = 50
+val_interval = 5
+
+train_gpu_size = 8
+test_batch_size = 2
+train_batch_size = 8
+
+param_scheduler = [
+    # learning rate scheduler
+    dict(type="LinearLR", start_factor=1.0 / 3, begin=0, end=t_max, by_epoch=True),
+    dict(
+        type="CosineAnnealingLR",
+        T_max=(max_epochs - t_max),
+        eta_min=lr * 1e-4,
+        begin=t_max,
+        end=max_epochs,
+        by_epoch=True,
+        convert_to_iter_based=True,
+    ),
+    # momentum scheduler
+    # During the first (0.4 * max_epochs) epochs, momentum increases from 0 to 0.85 / 0.95
+    # during the next epochs, momentum increases from 0.85 / 0.95 to 1
+    dict(
+        type="CosineAnnealingMomentum",
+        T_max=t_max,
+        eta_min=0.85 / 0.95,
+        begin=0,
+        end=t_max,
+        by_epoch=True,
+        convert_to_iter_based=True,
+    ),
+    dict(
+        type="CosineAnnealingMomentum",
+        T_max=(max_epochs - t_max),
+        eta_min=1,
+        begin=t_max,
+        end=max_epochs,
+        by_epoch=True,
+        convert_to_iter_based=True,
+    ),
+]
+
+train_cfg = dict(
+    by_epoch=True, max_epochs=max_epochs, val_interval=val_interval, dynamic_intervals=[(max_epochs - 5, 1)]
+)
+val_cfg = dict()
+test_cfg = dict()
+
+optim_wrapper = dict(
+    type="OptimWrapper",
+    optimizer=dict(type="AdamW", lr=lr, weight_decay=0.01),
+    clip_grad=dict(max_norm=0.1, norm_type=2),
+)
+
+auto_scale_lr = dict(enable=False, base_batch_size=train_gpu_size * train_batch_size)
+
+# Only set if the number of train_gpu_size more than 1
+if train_gpu_size > 1:
+    sync_bn = "torch"

From e547e0514420c4cd26b68d13ffacdf65901cefc8 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sat, 18 Apr 2026 17:08:16 +0900
Subject: [PATCH 025/183] Add bevfusion camera 2.6

---
 projects/BEVFusion/bevfusion/transforms_3d.py |  12 ++
 ...camera_swin_fpn_lss_50e_8xb8_base_120m.py} |   6 +-
 .../default_camera_swin_fpn_lss_120m.py       |  65 +++++++++
 .../pipelines/default_camera_base_120m.py     | 131 ++++++++++++++++++
 .../default_camera_lidar_intensity_120m.py    |   6 +-
 5 files changed, 214 insertions(+), 6 deletions(-)
 rename projects/BEVFusion/configs/t4dataset/BEVFusion-C/{bevfusion_camera_swin_fpn_30e_8xb8_base_120m.py => bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m.py} (95%)
 create mode 100644 projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_120m.py
 create mode 100644 projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py

diff --git a/projects/BEVFusion/bevfusion/transforms_3d.py b/projects/BEVFusion/bevfusion/transforms_3d.py
index 7e9faca24..c311f9254 100644
--- a/projects/BEVFusion/bevfusion/transforms_3d.py
+++ b/projects/BEVFusion/bevfusion/transforms_3d.py
@@ -188,6 +188,18 @@ def transform(self, input_dict: dict) -> dict:
         return input_dict
 
 
+@TRANSFORMS.register_module()
+class BEVFusionRemoveLiDARPoints(BaseTransform):
+    """Remove LiDAR points from the data."""
+    def __init__(self):
+        super().__init__()
+
+    def transform(self, results: Dict[str, Any]) -> Dict[str, Any]:
+        if "points" in results:
+            results["points"] = None
+        return results
+
+
 @TRANSFORMS.register_module()
 class GridMask(BaseTransform):
 
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m.py
similarity index 95%
rename from projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_base_120m.py
rename to projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m.py
index 2debcccb8..1c30d708a 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m.py
@@ -1,8 +1,8 @@
 _base_ = [
     "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
     "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
-    "../default/pipelines/default_camera_lidar_intensity_120m.py",
-    "../default/models/default_camera_swin_fpn_120m.py",
+    "../default/pipelines/default_camera_base_120m.py",
+    "../default/models/default_camera_swin_fpn_lss_120m.py",
     "../default/schedulers/default_50e_8xb8_adamw_linear_cosine.py",
     "../default/default_misc.py",
 ]
@@ -16,7 +16,7 @@
 info_directory_path = "info/kokseang_2_6_2/"
 
 experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type
-experiment_name = "bevfusion_camera_swin_fpn_30e_8xb8_base_120m"
+experiment_name = "bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_120m.py
new file mode 100644
index 000000000..2f1d1f3be
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_120m.py
@@ -0,0 +1,65 @@
+_base_ = [
+    "./default_lidar_second_secfpn_120m.py",
+]
+
+# Image network
+model = dict(
+    # Remove all lidar related configs
+    voxelize_cfg=None,
+    pts_voxel_encoder=None,
+    pts_middle_encoder=None,
+    pts_neck=None,
+    pts_backbone=None,
+    data_preprocessor=dict(
+        type="Det3DDataPreprocessor",
+        pad_size_divisor=32,
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=False,
+        rgb_to_bgr=False,
+    ),
+    img_backbone=dict(
+        type="mmdet.SwinTransformer",
+        pretrain_img_size=(256, 704),
+        embed_dims=96,
+        depths=[2, 2, 6, 2],
+        num_heads=[3, 6, 12, 24],
+        window_size=7,
+        mlp_ratio=4,
+        qkv_bias=True,
+        qk_scale=None,
+        drop_rate=0.0,
+        attn_drop_rate=0.0,
+        drop_path_rate=0.2,
+        patch_norm=True,
+        out_indices=[1, 2, 3],
+        with_cp=False,
+        convert_weights=True,
+        init_cfg=dict(
+            type="Pretrained",
+            # https://download.openmmlab.com/mmdetection3d/v1.1.0_models/bevfusion/swint-nuimages-pretrained.pth
+            checkpoint="work_dirs/swin_transformer/swint_nuimages_pretrained.pth",  # noqa: E251
+        ),
+    ),
+    img_neck=dict(
+        type="GeneralizedLSSFPN",
+        in_channels=[192, 384, 768],
+        out_channels=256,
+        start_level=0,
+        num_outs=3,
+        norm_cfg=dict(type="BN2d", requires_grad=True),
+        act_cfg=dict(type="ReLU", inplace=True),
+        upsample_cfg=dict(mode="bilinear", align_corners=False),
+    ),
+    view_transform=dict(
+        type="LSSTransform",
+        in_channels=256,
+        out_channels=80,
+        feature_size=[48, 96],
+        xbound=[-122.40, 122.40, 0.68],
+        ybound=[-122.40, 122.40, 0.68],
+        zbound=[-10.0, 10.0, 20.0],
+        dbound=[1.0, 130, 1.0],
+        downsample=2,
+    ),
+)
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py
new file mode 100644
index 000000000..a96773825
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py
@@ -0,0 +1,131 @@
+## This config is for the camera_base only model, without lidar points
+
+_base_ = [
+    "./default_lidar_120m.py",
+]
+
+# Image parameters
+image_size = [384, 768]  # Height, Width
+camera_order = ["CAM_FRONT", "CAM_FRONT_LEFT", "CAM_BACK_LEFT", "CAM_FRONT_RIGHT", "CAM_BACK_RIGHT"]
+
+train_pipeline = [
+    dict(
+        type="BEVLoadMultiViewImageFromFiles",
+        to_float32=True,
+        color_type="color",
+        backend_args=backend_args,
+        camera_order=camera_order,
+    ),
+    # We keep loading LiDAR points to make downstream BEV augmentation easier 
+    dict(
+        type="LoadPointsFromFile",
+        coord_type="LIDAR",
+        load_dim=point_load_dim,
+        use_dim=point_load_dim,
+        backend_args=backend_args,
+    ),
+    dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False),
+    dict(
+        type="ImageAug3D",
+        final_dim=image_size,
+        resize_lim=[0.28, 0.40],
+        bot_pct_lim=[0.0, 0.0],
+        rot_lim=[0.0, 0.0],
+        rand_flip=True,
+        is_train=True,
+    ),
+    dict(
+        type="BEVFusionGlobalRotScaleTrans",
+        scale_ratio_range=[0.95, 1.05],
+        rot_range=[-0.78539816, 0.78539816],
+        translation_std=[0.5, 0.5, 0.2],
+    ),
+    dict(type="BEVFusionRandomFlip3D"),
+    dict(type="ObjectRangeFilter", point_cloud_range=point_cloud_range),
+    # Remove LiDAR points from the data
+    dict(type="BEVFusionRemoveLiDARPoints"),
+    dict(
+        type="ObjectNameFilter",
+        classes=[
+            "car",
+            "truck",
+            "construction_vehicle",
+            "bus",
+            "trailer",
+            "barrier",
+            "motorcycle",
+            "bicycle",
+            "pedestrian",
+            "traffic_cone",
+        ],
+    ),
+    dict(
+        type="Pack3DDetInputs",
+        keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"],
+        meta_keys=[
+            "cam2img",
+            "ori_cam2img",
+            "lidar2cam",
+            "lidar2img",
+            "cam2lidar",
+            "ori_lidar2img",
+            "img_aug_matrix",
+            "box_type_3d",
+            "sample_idx",
+            "lidar_path",
+            "img_path",
+            "transformation_3d_flow",
+            "pcd_rotation",
+            "pcd_scale_factor",
+            "pcd_trans",
+            "img_aug_matrix",
+            "lidar_aug_matrix",
+            "timestamp",
+            "vehicle_type",
+            "city",
+        ],
+    ),
+]
+
+test_pipeline = [
+    dict(
+        type="BEVLoadMultiViewImageFromFiles",
+        to_float32=True,
+        color_type="color",
+        backend_args=backend_args,
+        camera_order=camera_order,
+    ),
+    dict(
+        type="ImageAug3D",
+        final_dim=image_size,
+        resize_lim=[0.34, 0.34],
+        bot_pct_lim=[0.0, 0.0],
+        rot_lim=[0.0, 0.0],
+        rand_flip=False,
+        is_train=False,
+    ),
+    dict(
+        type="Pack3DDetInputs",
+        keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"],
+        meta_keys=[
+            "cam2img",
+            "ori_cam2img",
+            "lidar2cam",
+            "lidar2img",
+            "cam2lidar",
+            "ori_lidar2img",
+            "img_aug_matrix",
+            "box_type_3d",
+            "sample_idx",
+            "lidar_path",
+            "img_path",
+            "num_pts_feats",
+            "num_views",
+            "timestamp",
+            "vehicle_type",
+            "city",
+        ],
+    ),
+]
+
+filter_cfg = dict(filter_frames_with_camera_order=camera_order)
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
index 963a218e1..2ed285200 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
@@ -54,9 +54,9 @@
     dict(
         type="ImageAug3D",
         final_dim=image_size,
-        resize_lim=[0.29, 0.35],
+        resize_lim=[0.28, 0.40],
         bot_pct_lim=[0.0, 0.0],
-        rot_lim=[-5.4, 5.4],
+        rot_lim=[0.0, 0.0],
         rand_flip=True,
         is_train=True,
     ),
@@ -139,7 +139,7 @@
     dict(
         type="ImageAug3D",
         final_dim=image_size,
-        resize_lim=[0.32, 0.32],
+        resize_lim=[0.34, 0.34],
         bot_pct_lim=[0.0, 0.0],
         rot_lim=[0.0, 0.0],
         rand_flip=False,

From 2ef16e885b2c0c653761664d0fd2e5cf6740740e Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sat, 18 Apr 2026 17:20:34 +0900
Subject: [PATCH 026/183] Add bevfusion camera 2.6

---
 projects/BEVFusion/bevfusion/depth_lss.py            |  1 -
 .../default/pipelines/default_camera_base_120m.py    | 12 ++++++------
 .../pipelines/default_camera_lidar_intensity_120m.py |  4 ++++
 .../default/pipelines/default_lidar_120m.py          |  2 +-
 .../pipelines/default_lidar_intensity_120m.py        |  2 +-
 5 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/depth_lss.py b/projects/BEVFusion/bevfusion/depth_lss.py
index 03c0ec2f2..f202c3777 100644
--- a/projects/BEVFusion/bevfusion/depth_lss.py
+++ b/projects/BEVFusion/bevfusion/depth_lss.py
@@ -568,7 +568,6 @@ def __init__(
             dbound=dbound,
         )
         
-        if lidar_depth_image_last_stride is not None:
         self.dtransform = LidarDepthImageNet(in_channels=1, out_channels=64, last_stride=lidar_depth_image_last_stride)
         self.depthnet = DepthLSSNet(
             in_channels=in_channels + self.dtransform.out_channels, out_channels=self.D + self.C
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py
index a96773825..8d1c6da86 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py
@@ -13,16 +13,16 @@
         type="BEVLoadMultiViewImageFromFiles",
         to_float32=True,
         color_type="color",
-        backend_args=backend_args,
+        backend_args=_base_.backend_args,
         camera_order=camera_order,
     ),
     # We keep loading LiDAR points to make downstream BEV augmentation easier 
     dict(
         type="LoadPointsFromFile",
         coord_type="LIDAR",
-        load_dim=point_load_dim,
-        use_dim=point_load_dim,
-        backend_args=backend_args,
+        load_dim=_base_.point_load_dim,
+        use_dim=_base_.point_load_dim,
+        backend_args=_base_.backend_args,
     ),
     dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False),
     dict(
@@ -41,7 +41,7 @@
         translation_std=[0.5, 0.5, 0.2],
     ),
     dict(type="BEVFusionRandomFlip3D"),
-    dict(type="ObjectRangeFilter", point_cloud_range=point_cloud_range),
+    dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range),
     # Remove LiDAR points from the data
     dict(type="BEVFusionRemoveLiDARPoints"),
     dict(
@@ -92,7 +92,7 @@
         type="BEVLoadMultiViewImageFromFiles",
         to_float32=True,
         color_type="color",
-        backend_args=backend_args,
+        backend_args=_base_.backend_args,
         camera_order=camera_order,
     ),
     dict(
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
index 2ed285200..18e154921 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
@@ -107,6 +107,8 @@
             "img_aug_matrix",
             "lidar_aug_matrix",
             "timestamp",
+            "vehicle_type",
+            "city",
         ],
     ),
 ]
@@ -164,6 +166,8 @@
             "num_pts_feats",
             "num_views",
             "timestamp",
+            "vehicle_type",
+            "city",
         ],
     ),
 ]
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
index 06d95be16..da2b775e2 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
@@ -137,4 +137,4 @@
 #   e.g., dict(filter_frames_with_missing_image=True).
 # - This is a LiDAR-only config (`input_modality['use_camera'] = False`), so
 #   image-based filtering does not apply and `filter_cfg` is intentionally None.
-filter_cfg = None
+filter_cfg = dict()
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
index 4e74d3616..723a241d7 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
@@ -137,4 +137,4 @@
 #   e.g., dict(filter_frames_with_missing_image=True).
 # - This is a LiDAR-only config (`input_modality['use_camera'] = False`), so
 #   image-based filtering does not apply and `filter_cfg` is intentionally None.
-filter_cfg = None
+filter_cfg = dict()

From a26782abfb7c9b088845d9d778ab0babd520ed74 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sat, 18 Apr 2026 17:24:44 +0900
Subject: [PATCH 027/183] Updated

---
 ...l_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py | 4 ++--
 ..._lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py | 6 +-----
 ..._second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py | 4 ++--
 3 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py
index afb150284..62ea479fb 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py
@@ -42,7 +42,7 @@
     testing_statistics_parquet_path=testing_statistics_parquet_path,
     validation_statistics_parquet_path=validation_statistics_parquet_path,
     output_dir="validation",
-    dataset_name="base",
+    dataset_name="j6gen2_base",
     perception_evaluator_configs=perception_evaluator_configs,
     critical_object_filter_config=None,
     frame_pass_fail_config=frame_pass_fail_config,
@@ -64,7 +64,7 @@
     testing_statistics_parquet_path=testing_statistics_parquet_path,
     validation_statistics_parquet_path=validation_statistics_parquet_path,
     output_dir="testing",
-    dataset_name="base",
+    dataset_name="j6gen2_base",
     perception_evaluator_configs=perception_evaluator_configs,
     critical_object_filter_config=None,
     frame_pass_fail_config=frame_pass_fail_config,
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
index 0eb440472..73c1e4671 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
@@ -148,8 +148,4 @@
 )
 log_processor = dict(window_size=50)
 
-<<<<<<< HEAD
-load_from = "work_dirs/bevfusion_lidar_2.7.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_48.pth"
-=======
-load_from "<best_checkpoint>"
->>>>>>> feat/releave_bevfusion_2_6
+# load_from = "<best_checkpoint>"
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py
index 3320d2b08..0109e96d9 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py
@@ -42,7 +42,7 @@
     testing_statistics_parquet_path=testing_statistics_parquet_path,
     validation_statistics_parquet_path=validation_statistics_parquet_path,
     output_dir="validation",
-    dataset_name="base",
+    dataset_name="jpntaxi_base",
     perception_evaluator_configs=perception_evaluator_configs,
     critical_object_filter_config=None,
     frame_pass_fail_config=frame_pass_fail_config,
@@ -64,7 +64,7 @@
     testing_statistics_parquet_path=testing_statistics_parquet_path,
     validation_statistics_parquet_path=validation_statistics_parquet_path,
     output_dir="testing",
-    dataset_name="base",
+    dataset_name="jpntaxi_base",
     perception_evaluator_configs=perception_evaluator_configs,
     critical_object_filter_config=None,
     frame_pass_fail_config=frame_pass_fail_config,

From f4c01a542618b3c592ddf1cd6e2b9d1657abccbf Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sat, 18 Apr 2026 17:51:58 +0900
Subject: [PATCH 028/183] Updated

---
 autoware_ml/detection3d/datasets/t4dataset.py              | 2 +-
 ..._second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py | 7 ++-----
 ...lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py | 2 +-
 ...second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py | 3 ---
 ...r_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py | 3 ---
 5 files changed, 4 insertions(+), 13 deletions(-)

diff --git a/autoware_ml/detection3d/datasets/t4dataset.py b/autoware_ml/detection3d/datasets/t4dataset.py
index ce1c78f31..74d274b87 100644
--- a/autoware_ml/detection3d/datasets/t4dataset.py
+++ b/autoware_ml/detection3d/datasets/t4dataset.py
@@ -64,7 +64,7 @@ def filter_data(self) -> List[dict]:
                     break
 
                 if entry["images"][camera_order]["img_path"] is None or not osp.exists(
-                    entry["images"][camera_order]["img_path"]
+                    self.data_root + entry["images"][camera_order]["img_path"]
                 ):
                     filtered = True
                     break
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py
index 62ea479fb..3476011ff 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py
@@ -3,9 +3,6 @@
 ]
 
 # user setting
-data_root = "data/t4dataset/"
-info_directory_path = "info/user_name/"
-
 experiment_group_name = "bevfusion_lidar_intensity_2.6.1/j6gen2_base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
@@ -42,7 +39,7 @@
     testing_statistics_parquet_path=testing_statistics_parquet_path,
     validation_statistics_parquet_path=validation_statistics_parquet_path,
     output_dir="validation",
-    dataset_name="j6gen2_base",
+    dataset_name="base",
     perception_evaluator_configs=perception_evaluator_configs,
     critical_object_filter_config=None,
     frame_pass_fail_config=frame_pass_fail_config,
@@ -64,7 +61,7 @@
     testing_statistics_parquet_path=testing_statistics_parquet_path,
     validation_statistics_parquet_path=validation_statistics_parquet_path,
     output_dir="testing",
-    dataset_name="j6gen2_base",
+    dataset_name="base",
     perception_evaluator_configs=perception_evaluator_configs,
     critical_object_filter_config=None,
     frame_pass_fail_config=frame_pass_fail_config,
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
index 73c1e4671..4eea4c2aa 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
@@ -13,7 +13,7 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/user_name/"
+info_directory_path = "info/kokseang_2_6_2/"
 
 experiment_group_name = "bevfusion_lidar_intensity_2.7.1/jpntaxi_base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m"
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py
index 0109e96d9..49d91e05d 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py
@@ -3,9 +3,6 @@
 ]
 
 # user setting
-data_root = "data/t4dataset/"
-info_directory_path = "info/user_name/"
-
 experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py
index c9a0050c0..3d976d970 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py
@@ -3,9 +3,6 @@
 ]
 
 # user setting
-data_root = "data/t4dataset/"
-info_directory_path = "info/user_name/"
-
 experiment_group_name = "bevfusion_lidar_2.6.0/base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name

From 15371af9539e84c46d844efa5007d565acb80878 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sat, 18 Apr 2026 17:52:51 +0900
Subject: [PATCH 029/183] Update dataset name

---
 ...xel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py
index 3476011ff..0748008ba 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py
@@ -39,7 +39,7 @@
     testing_statistics_parquet_path=testing_statistics_parquet_path,
     validation_statistics_parquet_path=validation_statistics_parquet_path,
     output_dir="validation",
-    dataset_name="base",
+    dataset_name="j6gen2_base",
     perception_evaluator_configs=perception_evaluator_configs,
     critical_object_filter_config=None,
     frame_pass_fail_config=frame_pass_fail_config,
@@ -61,7 +61,7 @@
     testing_statistics_parquet_path=testing_statistics_parquet_path,
     validation_statistics_parquet_path=validation_statistics_parquet_path,
     output_dir="testing",
-    dataset_name="base",
+    dataset_name="j6gen2_base",
     perception_evaluator_configs=perception_evaluator_configs,
     critical_object_filter_config=None,
     frame_pass_fail_config=frame_pass_fail_config,

From c6e8a10856085304b458e7204bfbc0246492dccd Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sat, 18 Apr 2026 18:40:44 +0900
Subject: [PATCH 030/183] Add bevfusion camera 2.6

---
 autoware_ml/detection3d/datasets/t4dataset.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/autoware_ml/detection3d/datasets/t4dataset.py b/autoware_ml/detection3d/datasets/t4dataset.py
index ce1c78f31..a01d9fbe6 100644
--- a/autoware_ml/detection3d/datasets/t4dataset.py
+++ b/autoware_ml/detection3d/datasets/t4dataset.py
@@ -5,6 +5,7 @@
 from mmdet3d.datasets import NuScenesDataset
 from mmengine.logging import print_log
 from mmengine.registry import DATASETS
+import tqdm
 
 
 @DATASETS.register_module()
@@ -56,16 +57,14 @@ def filter_data(self) -> List[dict]:
             return self.data_list
 
         filtered_data_list = []
-        for entry in self.data_list:
+        for entry in tqdm.tqdm(self.data_list, desc="Filtering data"):
             filtered = False
             for camera_order in filter_frames_with_camera_order:
                 if camera_order not in entry["images"]:
                     filtered = True
                     break
-
-                if entry["images"][camera_order]["img_path"] is None or not osp.exists(
-                    entry["images"][camera_order]["img_path"]
-                ):
+                
+                if entry["images"][camera_order]["img_path"] is None:
                     filtered = True
                     break
 

From 7252126bcf3dce3b73ef26db0fc7fe2f59ba1cb6 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sat, 18 Apr 2026 19:21:52 +0900
Subject: [PATCH 031/183] Add bevfusion camera 2.6

---
 autoware_ml/detection3d/datasets/t4dataset.py                    | 1 +
 .../t4dataset/default/pipelines/default_camera_base_120m.py      | 1 +
 2 files changed, 2 insertions(+)

diff --git a/autoware_ml/detection3d/datasets/t4dataset.py b/autoware_ml/detection3d/datasets/t4dataset.py
index a01d9fbe6..2406bfcb1 100644
--- a/autoware_ml/detection3d/datasets/t4dataset.py
+++ b/autoware_ml/detection3d/datasets/t4dataset.py
@@ -179,6 +179,7 @@ def parse_data_info(self, info: dict) -> dict:
                             cam_prefix,
                             img_info["img_path"],
                         )
+                    # print_log(f"Camera path: {img_info['img_path']}", logger="current")
 
             if self.default_cam_key is not None:
                 info["img_path"] = info["images"][self.default_cam_key]["img_path"]
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py
index 8d1c6da86..39972b6d0 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py
@@ -3,6 +3,7 @@
 _base_ = [
     "./default_lidar_120m.py",
 ]
+input_modality = dict(use_lidar=True, use_camera=True)
 
 # Image parameters
 image_size = [384, 768]  # Height, Width

From 9e793246cd49b5849a12c38821e6623d7d542a2c Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sun, 19 Apr 2026 04:57:19 +0900
Subject: [PATCH 032/183] Add bevfusion camera 2.6

---
 projects/BEVFusion/bevfusion/__init__.py                   | 3 ++-
 .../default/pipelines/default_camera_base_120m.py          | 7 +++++--
 .../pipelines/default_camera_lidar_intensity_120m.py       | 1 +
 .../schedulers/default_20e_8xb8_adamw_linear_cosine.py     | 2 +-
 .../schedulers/default_30e_8xb8_adamw_linear_cosine.py     | 2 +-
 .../schedulers/default_50e_8xb8_adamw_linear_cosine.py     | 4 ++--
 6 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py
index 60a64b532..4732eabfc 100644
--- a/projects/BEVFusion/bevfusion/__init__.py
+++ b/projects/BEVFusion/bevfusion/__init__.py
@@ -5,7 +5,7 @@
 from .loading import BEVLoadMultiViewImageFromFiles
 from .sparse_encoder import BEVFusionSparseEncoder
 from .transformer import TransformerDecoderLayer
-from .transforms_3d import BEVFusionGlobalRotScaleTrans, BEVFusionRandomFlip3D, GridMask, ImageAug3D
+from .transforms_3d import BEVFusionGlobalRotScaleTrans, BEVFusionRandomFlip3D, GridMask, ImageAug3D, BEVFusionRemoveLiDARPoints
 from .utils import BBoxBEVL1Cost, HeuristicAssigner3D, HungarianAssigner3D, IoU3DCost, TransFusionBBoxCoder
 
 __all__ = [
@@ -26,5 +26,6 @@
     "TransformerDecoderLayer",
     "BEVFusionRandomFlip3D",
     "BEVFusionGlobalRotScaleTrans",
+    "BEVFusionRemoveLiDARPoints",
     "TransFusionBBoxCoder",
 ]
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py
index 39972b6d0..5f926f867 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py
@@ -29,7 +29,8 @@
     dict(
         type="ImageAug3D",
         final_dim=image_size,
-        resize_lim=[0.28, 0.40],
+        # resize_lim=[0.28, 0.40],
+        resize_lim=0.02,
         bot_pct_lim=[0.0, 0.0],
         rot_lim=[0.0, 0.0],
         rand_flip=True,
@@ -99,12 +100,14 @@
     dict(
         type="ImageAug3D",
         final_dim=image_size,
-        resize_lim=[0.34, 0.34],
+        # resize_lim=[0.34, 0.34],
+        resize_lim=0.02,
         bot_pct_lim=[0.0, 0.0],
         rot_lim=[0.0, 0.0],
         rand_flip=False,
         is_train=False,
     ),
+    dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range),
     dict(
         type="Pack3DDetInputs",
         keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"],
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
index 18e154921..53a149f07 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
@@ -148,6 +148,7 @@
         is_train=False,
     ),
     dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range),
+    dict(type="ObjectRangeFilter", point_cloud_range=point_cloud_range),
     dict(
         type="Pack3DDetInputs",
         keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"],
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py
index 15ba38878..db5515b46 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py
@@ -1,6 +1,6 @@
 # learning rate
 # 1e-4 * sqrt(2) = 0.0001414
-lr = 1.4141e-4
+lr = 1e-4
 t_max = 6
 max_epochs = 20
 val_interval = 1
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py
index 264eda921..f1a1f52b4 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py
@@ -1,6 +1,6 @@
 # learning rate
 # 1e-4 * sqrt(2) = 0.0001414
-lr = 1.4141e-4
+lr = 1e-4
 t_max = 8
 max_epochs = 30
 val_interval = 1
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py
index 9517aba86..1161621b8 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py
@@ -1,7 +1,7 @@
 # learning rate
 # 1e-4 * sqrt(2) = 0.0001414
-lr = 1.4141e-4
-t_max = 8
+lr = 1e-4
+t_max = 15
 max_epochs = 50
 val_interval = 5
 

From a249d423aafc5595cfa2546f7e0599e555d8882f Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sun, 19 Apr 2026 18:24:39 +0900
Subject: [PATCH 033/183] Add bevfusion camera 2.6

---
 ...ra_swin_fpn_depthlss_50e_8xb8_base_120m.py | 137 +++++++++++++++++
 ...n_camera_swin_fpn_lss_50e_8xb8_base_50m.py | 137 +++++++++++++++++
 .../default_camera_swin_fpn_depthlss_120m.py  |  65 ++++++++
 .../models/default_camera_swin_fpn_lss_50m.py |  70 +++++++++
 .../pipelines/default_camera_base_120m.py     |  10 +-
 .../pipelines/default_camera_base_50m.py      | 135 +++++++++++++++++
 .../default/pipelines/default_lidar_50m.py    | 140 ++++++++++++++++++
 .../default_50e_8xb8_adamw_linear_cosine.py   |   2 +-
 8 files changed, 691 insertions(+), 5 deletions(-)
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m.py
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_50m.py
 create mode 100644 projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_depthlss_120m.py
 create mode 100644 projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_50m.py
 create mode 100644 projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_50m.py
 create mode 100644 projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m.py
new file mode 100644
index 000000000..8c8d84d18
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m.py
@@ -0,0 +1,137 @@
+_base_ = [
+    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
+    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
+    "../default/pipelines/default_camera_lidar_intensity_120m.py",
+    "../default/models/default_camera_swin_fpn_depthlss_120m.py",
+    "../default/schedulers/default_50e_8xb8_adamw_linear_cosine.py",
+    "../default/default_misc.py",
+]
+
+custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
+custom_imports["imports"] += _base_.custom_imports["imports"]
+custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
+
+# user setting
+data_root = "data/t4dataset/"
+info_directory_path = "info/kokseang_2_6_2/"
+
+experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type
+experiment_name = "bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    view_transform=dict(image_size=_base_.image_size),
+    bbox_head=dict(
+        class_names=_base_.class_names,
+        in_channels=80,
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+    ),
+)
+
+# Dataset parameters
+train_dataloader = dict(
+    batch_size=_base_.train_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=True),
+    dataset=dict(
+        type=_base_.dataset_type,
+        pipeline=_base_.train_pipeline,
+        modality=_base_.input_modality,
+        backend_args=_base_.backend_args,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_train_file_name,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        test_mode=False,
+        data_prefix=_base_.data_prefix,
+        box_type_3d="LiDAR",
+        filter_cfg=_base_.filter_cfg,
+    ),
+)
+
+val_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_val_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+test_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_test_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+val_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+)
+
+test_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+    save_csv=True,
+)
+
+default_hooks = dict(
+    logger=dict(type="LoggerHook", interval=50),
+    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
+)
+log_processor = dict(window_size=50)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_50m.py
new file mode 100644
index 000000000..7c5a5f91f
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_50m.py
@@ -0,0 +1,137 @@
+_base_ = [
+    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
+    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
+    "../default/pipelines/default_camera_base_50m.py",
+    "../default/models/default_camera_swin_fpn_lss_50m.py",
+    "../default/schedulers/default_50e_8xb8_adamw_linear_cosine.py",
+    "../default/default_misc.py",
+]
+
+custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
+custom_imports["imports"] += _base_.custom_imports["imports"]
+custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
+
+# user setting
+data_root = "data/t4dataset/"
+info_directory_path = "info/kokseang_2_6_2/"
+
+experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type
+experiment_name = "bevfusion_camera_swin_fpn_lss_50e_8xb8_base_50m"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    view_transform=dict(image_size=_base_.image_size),
+    bbox_head=dict(
+        class_names=_base_.class_names,
+        in_channels=80,
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+    ),
+)
+
+# Dataset parameters
+train_dataloader = dict(
+    batch_size=_base_.train_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=True),
+    dataset=dict(
+        type=_base_.dataset_type,
+        pipeline=_base_.train_pipeline,
+        modality=_base_.input_modality,
+        backend_args=_base_.backend_args,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_train_file_name,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        test_mode=False,
+        data_prefix=_base_.data_prefix,
+        box_type_3d="LiDAR",
+        filter_cfg=_base_.filter_cfg,
+    ),
+)
+
+val_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_val_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+test_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_test_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+val_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+)
+
+test_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+    save_csv=True,
+)
+
+default_hooks = dict(
+    logger=dict(type="LoggerHook", interval=50),
+    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
+)
+log_processor = dict(window_size=50)
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_depthlss_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_depthlss_120m.py
new file mode 100644
index 000000000..c4b0cd9ab
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_depthlss_120m.py
@@ -0,0 +1,65 @@
+_base_ = [
+    "./default_lidar_second_secfpn_120m.py",
+]
+
+# Image network
+model = dict(
+    # Remove all lidar related configs
+    voxelize_cfg=None,
+    pts_voxel_encoder=None,
+    pts_middle_encoder=None,
+    pts_neck=None,
+    pts_backbone=None,
+    data_preprocessor=dict(
+        type="Det3DDataPreprocessor",
+        pad_size_divisor=32,
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=False,
+        rgb_to_bgr=False,
+    ),
+    img_backbone=dict(
+        type="mmdet.SwinTransformer",
+        pretrain_img_size=(256, 704),
+        embed_dims=96,
+        depths=[2, 2, 6, 2],
+        num_heads=[3, 6, 12, 24],
+        window_size=7,
+        mlp_ratio=4,
+        qkv_bias=True,
+        qk_scale=None,
+        drop_rate=0.0,
+        attn_drop_rate=0.0,
+        drop_path_rate=0.2,
+        patch_norm=True,
+        out_indices=[1, 2, 3],
+        with_cp=False,
+        convert_weights=True,
+        init_cfg=dict(
+            type="Pretrained",
+            # https://download.openmmlab.com/mmdetection3d/v1.1.0_models/bevfusion/swint-nuimages-pretrained.pth
+            checkpoint="work_dirs/swin_transformer/swint_nuimages_pretrained.pth",  # noqa: E251
+        ),
+    ),
+    img_neck=dict(
+        type="GeneralizedLSSFPN",
+        in_channels=[192, 384, 768],
+        out_channels=256,
+        start_level=0,
+        num_outs=3,
+        norm_cfg=dict(type="BN2d", requires_grad=True),
+        act_cfg=dict(type="ReLU", inplace=True),
+        upsample_cfg=dict(mode="bilinear", align_corners=False),
+    ),
+    view_transform=dict(
+        type="DepthLSSTransform",
+        in_channels=256,
+        out_channels=80,
+        feature_size=[48, 96],
+        xbound=[-122.40, 122.40, 0.68],
+        ybound=[-122.40, 122.40, 0.68],
+        zbound=[-10.0, 10.0, 20.0],
+        dbound=[1.0, 130, 1.0],
+        downsample=2,
+    ),
+)
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_50m.py
new file mode 100644
index 000000000..a35e3a79a
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_50m.py
@@ -0,0 +1,70 @@
+_base_ = [
+    "./default_lidar_second_secfpn_120m.py",
+]
+
+# Image network
+model = dict(
+    # Remove all lidar related configs
+    voxelize_cfg=None,
+    pts_voxel_encoder=None,
+    pts_middle_encoder=None,
+    pts_neck=None,
+    pts_backbone=None,
+    data_preprocessor=dict(
+        type="Det3DDataPreprocessor",
+        pad_size_divisor=32,
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=False,
+        rgb_to_bgr=False,
+    ),
+    img_backbone=dict(
+        type="mmdet.SwinTransformer",
+        pretrain_img_size=(256, 704),
+        embed_dims=96,
+        depths=[2, 2, 6, 2],
+        num_heads=[3, 6, 12, 24],
+        window_size=7,
+        mlp_ratio=4,
+        qkv_bias=True,
+        qk_scale=None,
+        drop_rate=0.0,
+        attn_drop_rate=0.0,
+        drop_path_rate=0.2,
+        patch_norm=True,
+        out_indices=[1, 2, 3],
+        with_cp=False,
+        convert_weights=True,
+        init_cfg=dict(
+            type="Pretrained",
+            # https://download.openmmlab.com/mmdetection3d/v1.1.0_models/bevfusion/swint-nuimages-pretrained.pth
+            checkpoint="work_dirs/swin_transformer/swint_nuimages_pretrained.pth",  # noqa: E251
+        ),
+    ),
+    img_neck=dict(
+        type="GeneralizedLSSFPN",
+        in_channels=[192, 384, 768],
+        out_channels=256,
+        start_level=0,
+        num_outs=3,
+        norm_cfg=dict(type="BN2d", requires_grad=True),
+        act_cfg=dict(type="ReLU", inplace=True),
+        upsample_cfg=dict(mode="bilinear", align_corners=False),
+    ),
+    view_transform=dict(
+        type="LSSTransform",
+        in_channels=256,
+        out_channels=80,
+        feature_size=[48, 96],
+        xbound=[-54.0, 54.0, 0.3],
+        ybound=[-54.0, 54.0, 0.3],
+        zbound=[-10.0, 10.0, 20.0],
+        dbound=[1.0, 60, 0.5],
+        downsample=2,
+    ),
+    bbox_head=dict(
+        bbox_coder=dict(
+            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
+        ),
+    )
+)
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py
index 5f926f867..410968467 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py
@@ -38,9 +38,12 @@
     ),
     dict(
         type="BEVFusionGlobalRotScaleTrans",
-        scale_ratio_range=[0.95, 1.05],
-        rot_range=[-0.78539816, 0.78539816],
-        translation_std=[0.5, 0.5, 0.2],
+        # scale_ratio_range=[0.95, 1.05],
+        # rot_range=[-0.78539816, 0.78539816],
+        # translation_std=[0.5, 0.5, 0.2],
+        scale_ratio_range=[0.98, 1.02],
+        rot_range=[-0.3925, 0.3925],
+        translation_std=[0.2, 0.2, 0.1],
     ),
     dict(type="BEVFusionRandomFlip3D"),
     dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range),
@@ -107,7 +110,6 @@
         rand_flip=False,
         is_train=False,
     ),
-    dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range),
     dict(
         type="Pack3DDetInputs",
         keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"],
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_50m.py
new file mode 100644
index 000000000..c9010038f
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_50m.py
@@ -0,0 +1,135 @@
+## This config is for the camera_base only model, without lidar points
+
+_base_ = [
+    "./default_lidar_50m.py",
+]
+input_modality = dict(use_lidar=True, use_camera=True)
+
+# Image parameters
+image_size = [384, 768]  # Height, Width
+camera_order = ["CAM_FRONT", "CAM_FRONT_LEFT", "CAM_BACK_LEFT", "CAM_FRONT_RIGHT", "CAM_BACK_RIGHT"]
+
+train_pipeline = [
+    dict(
+        type="BEVLoadMultiViewImageFromFiles",
+        to_float32=True,
+        color_type="color",
+        backend_args=_base_.backend_args,
+        camera_order=camera_order,
+    ),
+    # We keep loading LiDAR points to make downstream BEV augmentation easier 
+    dict(
+        type="LoadPointsFromFile",
+        coord_type="LIDAR",
+        load_dim=_base_.point_load_dim,
+        use_dim=_base_.point_load_dim,
+        backend_args=_base_.backend_args,
+    ),
+    dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False),
+    dict(
+        type="ImageAug3D",
+        final_dim=image_size,
+        resize_lim=[0.28, 0.40],
+        bot_pct_lim=[0.0, 0.0],
+        rot_lim=[0.0, 0.0],
+        rand_flip=True,
+        is_train=True,
+    ),
+    dict(
+        type="BEVFusionGlobalRotScaleTrans",
+        # scale_ratio_range=[0.95, 1.05],
+        # rot_range=[-0.78539816, 0.78539816],
+        # translation_std=[0.5, 0.5, 0.2],
+        scale_ratio_range=[0.98, 1.02],
+        rot_range=[-0.3925, 0.3925],
+        translation_std=[0.2, 0.2, 0.1],
+    ),
+    dict(type="BEVFusionRandomFlip3D"),
+    dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range),
+    # Remove LiDAR points from the data
+    dict(type="BEVFusionRemoveLiDARPoints"),
+    dict(
+        type="ObjectNameFilter",
+        classes=[
+            "car",
+            "truck",
+            "construction_vehicle",
+            "bus",
+            "trailer",
+            "barrier",
+            "motorcycle",
+            "bicycle",
+            "pedestrian",
+            "traffic_cone",
+        ],
+    ),
+    dict(
+        type="Pack3DDetInputs",
+        keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"],
+        meta_keys=[
+            "cam2img",
+            "ori_cam2img",
+            "lidar2cam",
+            "lidar2img",
+            "cam2lidar",
+            "ori_lidar2img",
+            "img_aug_matrix",
+            "box_type_3d",
+            "sample_idx",
+            "lidar_path",
+            "img_path",
+            "transformation_3d_flow",
+            "pcd_rotation",
+            "pcd_scale_factor",
+            "pcd_trans",
+            "img_aug_matrix",
+            "lidar_aug_matrix",
+            "timestamp",
+            "vehicle_type",
+            "city",
+        ],
+    ),
+]
+
+test_pipeline = [
+    dict(
+        type="BEVLoadMultiViewImageFromFiles",
+        to_float32=True,
+        color_type="color",
+        backend_args=_base_.backend_args,
+        camera_order=camera_order,
+    ),
+    dict(
+        type="ImageAug3D",
+        final_dim=image_size,
+        resize_lim=[0.34, 0.34],
+        bot_pct_lim=[0.0, 0.0],
+        rot_lim=[0.0, 0.0],
+        rand_flip=False,
+        is_train=False,
+    ),
+    dict(
+        type="Pack3DDetInputs",
+        keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"],
+        meta_keys=[
+            "cam2img",
+            "ori_cam2img",
+            "lidar2cam",
+            "lidar2img",
+            "cam2lidar",
+            "ori_lidar2img",
+            "img_aug_matrix",
+            "box_type_3d",
+            "sample_idx",
+            "lidar_path",
+            "img_path",
+            "num_pts_feats",
+            "num_views",
+            "timestamp",
+            "vehicle_type",
+            "city",
+        ],
+    ),
+]
+
+filter_cfg = dict(filter_frames_with_camera_order=camera_order)
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py
new file mode 100644
index 000000000..964d6eef9
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py
@@ -0,0 +1,140 @@
+# Dataset parameters
+backend_args = None
+num_workers = 16
+input_modality = dict(use_lidar=True, use_camera=False)
+
+# range setting
+point_cloud_range = [-54.0, -54.0, -3.0, 54.0, 54.0, 5.0]
+voxel_size = [0.075, 0.075, 0.2]
+grid_size = [1440, 1440, 41]
+eval_class_range = {
+    "car": 54.0,
+    "truck": 54.0,
+    "bus": 54.0,
+    "bicycle": 54.0,
+    "pedestrian": 54.0,
+}
+
+# LiDAR parameters
+point_load_dim = 5  # x, y, z, intensity, ring_id
+point_use_dim = 4
+lidar_sweep_dims = [0, 1, 2, 4]  # x, y, z, time_lag
+sweeps_num = 1
+
+train_pipeline = [
+    dict(
+        type="LoadPointsFromFile",
+        coord_type="LIDAR",
+        load_dim=point_load_dim,
+        use_dim=point_load_dim,
+        backend_args=backend_args,
+    ),
+    dict(
+        type="LoadPointsFromMultiSweeps",
+        sweeps_num=sweeps_num,
+        load_dim=point_load_dim,
+        use_dim=lidar_sweep_dims,
+        pad_empty_sweeps=True,
+        remove_close=True,
+        backend_args=backend_args,
+        test_mode=False,
+    ),
+    dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False),
+    dict(
+        type="BEVFusionGlobalRotScaleTrans",
+        scale_ratio_range=[0.95, 1.05],
+        rot_range=[-0.78539816, 0.78539816],
+        translation_std=[0.5, 0.5, 0.2],
+    ),
+    dict(type="BEVFusionRandomFlip3D"),
+    dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range),
+    dict(type="ObjectRangeFilter", point_cloud_range=point_cloud_range),
+    dict(
+        type="ObjectNameFilter",
+        classes=[
+            "car",
+            "truck",
+            "bus",
+            "bicycle",
+            "pedestrian",
+        ],
+    ),
+    dict(type="PointShuffle"),
+    dict(
+        type="Pack3DDetInputs",
+        keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"],
+        meta_keys=[
+            "cam2img",
+            "ori_cam2img",
+            "lidar2cam",
+            "lidar2img",
+            "cam2lidar",
+            "ori_lidar2img",
+            "img_aug_matrix",
+            "box_type_3d",
+            "sample_idx",
+            "lidar_path",
+            "img_path",
+            "transformation_3d_flow",
+            "pcd_rotation",
+            "pcd_scale_factor",
+            "pcd_trans",
+            "img_aug_matrix",
+            "lidar_aug_matrix",
+            "timestamp",
+            "vehicle_type",
+            "city",
+        ],
+    ),
+]
+
+test_pipeline = [
+    dict(
+        type="LoadPointsFromFile",
+        coord_type="LIDAR",
+        load_dim=point_load_dim,
+        use_dim=point_load_dim,
+        backend_args=backend_args,
+    ),
+    dict(
+        type="LoadPointsFromMultiSweeps",
+        sweeps_num=sweeps_num,
+        load_dim=point_load_dim,
+        use_dim=lidar_sweep_dims,
+        pad_empty_sweeps=True,
+        remove_close=True,
+        backend_args=backend_args,
+        test_mode=True,
+    ),
+    dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range),
+    dict(
+        type="Pack3DDetInputs",
+        keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"],
+        meta_keys=[
+            "cam2img",
+            "ori_cam2img",
+            "lidar2cam",
+            "lidar2img",
+            "cam2lidar",
+            "ori_lidar2img",
+            "img_aug_matrix",
+            "box_type_3d",
+            "sample_idx",
+            "lidar_path",
+            "img_path",
+            "num_pts_feats",
+            "num_views",
+            "timestamp",
+            "vehicle_type",
+            "city",
+        ],
+    ),
+]
+
+# Filtering configuration
+# Note:
+# - In camera–LiDAR configs, `filter_cfg` can enable image-based frame filtering,
+#   e.g., dict(filter_frames_with_missing_image=True).
+# - This is a LiDAR-only config (`input_modality['use_camera'] = False`), so
+#   image-based filtering does not apply and `filter_cfg` is intentionally None.
+filter_cfg = dict()
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py
index 1161621b8..d569900d6 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py
@@ -1,7 +1,7 @@
 # learning rate
 # 1e-4 * sqrt(2) = 0.0001414
 lr = 1e-4
-t_max = 15
+t_max = 3
 max_epochs = 50
 val_interval = 5
 

From 9d7ddc21adff7115a303d42b024b40e95c38a7a4 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sun, 19 Apr 2026 18:26:57 +0900
Subject: [PATCH 034/183] Add bevfusion camera 2.6

---
 ...mera_swin_fpn_30e_8xb8_j6gen2_base_120m.py | 137 ------------------
 .../models/default_camera_swin_fpn_120m.py    |  65 ---------
 .../default_30e_8xb8_adamw_linear_cosine.py   |   2 +-
 3 files changed, 1 insertion(+), 203 deletions(-)
 delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py
 delete mode 100644 projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_120m.py

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py
deleted file mode 100644
index 3a69a2cd2..000000000
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py
+++ /dev/null
@@ -1,137 +0,0 @@
-_base_ = [
-    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
-    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
-    "../default/pipelines/default_camera_lidar_intensity_120m.py",
-    "../default/models/default_camera_swin_fpn_120m.py",
-    "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py",
-    "../default/default_misc.py",
-]
-
-custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
-custom_imports["imports"] += _base_.custom_imports["imports"]
-custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
-
-# user setting
-data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_6_2/"
-
-experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type
-experiment_name = "bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m"
-work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
-
-# model parameter
-model = dict(
-    type="BEVFusion",
-    view_transform=dict(image_size=_base_.image_size),
-    bbox_head=dict(
-        class_names=_base_.class_names,
-        in_channels=80,
-        train_cfg=dict(
-            point_cloud_range=_base_.point_cloud_range,
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size,
-        ),
-        test_cfg=dict(
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size[0:2],
-            pc_range=_base_.point_cloud_range[0:2],
-        ),
-        bbox_coder=dict(
-            pc_range=_base_.point_cloud_range[0:2],
-            voxel_size=_base_.voxel_size[0:2],
-        ),
-    ),
-)
-
-# Dataset parameters
-train_dataloader = dict(
-    batch_size=_base_.train_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=True),
-    dataset=dict(
-        type=_base_.dataset_type,
-        pipeline=_base_.train_pipeline,
-        modality=_base_.input_modality,
-        backend_args=_base_.backend_args,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_train_file_name,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        test_mode=False,
-        data_prefix=_base_.data_prefix,
-        box_type_3d="LiDAR",
-        filter_cfg=_base_.filter_cfg,
-    ),
-)
-
-val_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_val_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-test_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_test_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-val_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-)
-
-test_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-    save_csv=True,
-)
-
-default_hooks = dict(
-    logger=dict(type="LoggerHook", interval=50),
-    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
-)
-log_processor = dict(window_size=50)
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_120m.py
deleted file mode 100644
index c4b0cd9ab..000000000
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_120m.py
+++ /dev/null
@@ -1,65 +0,0 @@
-_base_ = [
-    "./default_lidar_second_secfpn_120m.py",
-]
-
-# Image network
-model = dict(
-    # Remove all lidar related configs
-    voxelize_cfg=None,
-    pts_voxel_encoder=None,
-    pts_middle_encoder=None,
-    pts_neck=None,
-    pts_backbone=None,
-    data_preprocessor=dict(
-        type="Det3DDataPreprocessor",
-        pad_size_divisor=32,
-        mean=[123.675, 116.28, 103.53],
-        std=[58.395, 57.12, 57.375],
-        bgr_to_rgb=False,
-        rgb_to_bgr=False,
-    ),
-    img_backbone=dict(
-        type="mmdet.SwinTransformer",
-        pretrain_img_size=(256, 704),
-        embed_dims=96,
-        depths=[2, 2, 6, 2],
-        num_heads=[3, 6, 12, 24],
-        window_size=7,
-        mlp_ratio=4,
-        qkv_bias=True,
-        qk_scale=None,
-        drop_rate=0.0,
-        attn_drop_rate=0.0,
-        drop_path_rate=0.2,
-        patch_norm=True,
-        out_indices=[1, 2, 3],
-        with_cp=False,
-        convert_weights=True,
-        init_cfg=dict(
-            type="Pretrained",
-            # https://download.openmmlab.com/mmdetection3d/v1.1.0_models/bevfusion/swint-nuimages-pretrained.pth
-            checkpoint="work_dirs/swin_transformer/swint_nuimages_pretrained.pth",  # noqa: E251
-        ),
-    ),
-    img_neck=dict(
-        type="GeneralizedLSSFPN",
-        in_channels=[192, 384, 768],
-        out_channels=256,
-        start_level=0,
-        num_outs=3,
-        norm_cfg=dict(type="BN2d", requires_grad=True),
-        act_cfg=dict(type="ReLU", inplace=True),
-        upsample_cfg=dict(mode="bilinear", align_corners=False),
-    ),
-    view_transform=dict(
-        type="DepthLSSTransform",
-        in_channels=256,
-        out_channels=80,
-        feature_size=[48, 96],
-        xbound=[-122.40, 122.40, 0.68],
-        ybound=[-122.40, 122.40, 0.68],
-        zbound=[-10.0, 10.0, 20.0],
-        dbound=[1.0, 130, 1.0],
-        downsample=2,
-    ),
-)
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py
index f1a1f52b4..2181cbebb 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py
@@ -1,7 +1,7 @@
 # learning rate
 # 1e-4 * sqrt(2) = 0.0001414
 lr = 1e-4
-t_max = 8
+t_max = 2
 max_epochs = 30
 val_interval = 1
 

From 0286f25f66b0910f8ea3e9230331ea1ec279c42b Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sun, 19 Apr 2026 18:27:55 +0900
Subject: [PATCH 035/183] Add bevfusion camera 2.6

---
 ..._fpn_depthlss_30e_8xb8_j6gen2_base_120m.py | 137 ++++++++++++++++++
 1 file changed, 137 insertions(+)
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_30e_8xb8_j6gen2_base_120m.py

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_30e_8xb8_j6gen2_base_120m.py
new file mode 100644
index 000000000..d31630dd0
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_30e_8xb8_j6gen2_base_120m.py
@@ -0,0 +1,137 @@
+_base_ = [
+    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
+    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
+    "../default/pipelines/default_camera_lidar_intensity_120m.py",
+    "../default/models/default_camera_swin_fpn_depthlss_120m.py",
+    "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py",
+    "../default/default_misc.py",
+]
+
+custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
+custom_imports["imports"] += _base_.custom_imports["imports"]
+custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
+
+# user setting
+data_root = "data/t4dataset/"
+info_directory_path = "info/kokseang_2_6_2/"
+
+experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type
+experiment_name = "bevfusion_camera_swin_fpn_depthlss_30e_8xb8_j6gen2_base_120m"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    view_transform=dict(image_size=_base_.image_size),
+    bbox_head=dict(
+        class_names=_base_.class_names,
+        in_channels=80,
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+    ),
+)
+
+# Dataset parameters
+train_dataloader = dict(
+    batch_size=_base_.train_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=True),
+    dataset=dict(
+        type=_base_.dataset_type,
+        pipeline=_base_.train_pipeline,
+        modality=_base_.input_modality,
+        backend_args=_base_.backend_args,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_train_file_name,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        test_mode=False,
+        data_prefix=_base_.data_prefix,
+        box_type_3d="LiDAR",
+        filter_cfg=_base_.filter_cfg,
+    ),
+)
+
+val_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_val_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+test_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_test_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+val_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+)
+
+test_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+    save_csv=True,
+)
+
+default_hooks = dict(
+    logger=dict(type="LoggerHook", interval=50),
+    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
+)
+log_processor = dict(window_size=50)

From 4977b332437c647cd617c66a88f1f109129ec9a2 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Mon, 20 Apr 2026 06:46:48 +0900
Subject: [PATCH 036/183] Update dataset name

---
 ...voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py | 2 +-
 ...oxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py | 2 +-
 .../bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py   | 2 +-
 ..._lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py
index 0748008ba..d1950d39a 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py
@@ -3,7 +3,7 @@
 ]
 
 # user setting
-experiment_group_name = "bevfusion_lidar_intensity_2.6.1/j6gen2_base/" + _base_.dataset_type
+experiment_group_name = "bevfusion_lidar_intensity_2.7.1/j6gen2_base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py
index 49d91e05d..6bd285ce1 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py
@@ -3,7 +3,7 @@
 ]
 
 # user setting
-experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type
+experiment_group_name = "bevfusion_lidar_intensity_2.7.1/jpntaxi_base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
index d33b33c56..78d287af6 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
@@ -13,7 +13,7 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/user_name/"
+info_directory_path = "info/kokseang_2_6_2/"
 
 experiment_group_name = "bevfusion_lidar_2.7.0/base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m"
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py
index 3d976d970..fbcfe2dce 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py
@@ -3,7 +3,7 @@
 ]
 
 # user setting
-experiment_group_name = "bevfusion_lidar_2.6.0/base/" + _base_.dataset_type
+experiment_group_name = "bevfusion_lidar_2.7.0/base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 

From f85a8e906b887d56d67a4e7f88673c1032432a0c Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Mon, 20 Apr 2026 13:04:20 +0900
Subject: [PATCH 037/183] Update dataset name

---
 ...usion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
index 3e615b504..4f220cbcb 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
@@ -148,4 +148,4 @@
 )
 log_processor = dict(window_size=50)
 
-load_from = "work_dirs/bevfusion_lidar_2.7.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_48.pth"
+# load_from = "<best_checkpoint>"

From cf3310c090d254dc184bf798e196c8d92d9a2a98 Mon Sep 17 00:00:00 2001
From: KokSeang <kseangtan@google.com>
Date: Tue, 21 Apr 2026 14:10:39 +0900
Subject: [PATCH 038/183] Added

---
 ..._swin_fpn_lss_30e_8xb8_j6gen2_base_120m.py | 137 ++++++++++++++++++
 .../pipelines/default_camera_base_120m.py     |   9 ++
 .../default_camera_lidar_intensity_120m.py    |  48 ++----
 3 files changed, 161 insertions(+), 33 deletions(-)
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_120m.py

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_120m.py
new file mode 100644
index 000000000..4c809264e
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_120m.py
@@ -0,0 +1,137 @@
+_base_ = [
+    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
+    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
+    "../default/pipelines/default_camera_lidar_intensity_120m.py",
+    "../default/models/default_camera_swin_fpn_lss_120m.py",
+    "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py",
+    "../default/default_misc.py",
+]
+
+custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
+custom_imports["imports"] += _base_.custom_imports["imports"]
+custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
+
+# user setting
+data_root = "data/t4datasets/"
+info_directory_path = "info/kokseang_2_6_1/"
+
+experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type
+experiment_name = "bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_120m"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    view_transform=dict(image_size=_base_.image_size),
+    bbox_head=dict(
+        class_names=_base_.class_names,
+        in_channels=80,
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+    ),
+)
+
+# Dataset parameters
+train_dataloader = dict(
+    batch_size=_base_.train_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=True),
+    dataset=dict(
+        type=_base_.dataset_type,
+        pipeline=_base_.train_pipeline,
+        modality=_base_.input_modality,
+        backend_args=_base_.backend_args,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_train_file_name,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        test_mode=False,
+        data_prefix=_base_.data_prefix,
+        box_type_3d="LiDAR",
+        filter_cfg=_base_.filter_cfg,
+    ),
+)
+
+val_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_val_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+test_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_test_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+val_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+)
+
+test_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+    save_csv=True,
+)
+
+default_hooks = dict(
+    logger=dict(type="LoggerHook", interval=50),
+    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
+)
+log_processor = dict(window_size=50)
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py
index 410968467..227c8241e 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py
@@ -38,12 +38,21 @@
     ),
     dict(
         type="BEVFusionGlobalRotScaleTrans",
+<<<<<<< HEAD
         # scale_ratio_range=[0.95, 1.05],
         # rot_range=[-0.78539816, 0.78539816],
         # translation_std=[0.5, 0.5, 0.2],
         scale_ratio_range=[0.98, 1.02],
         rot_range=[-0.3925, 0.3925],
         translation_std=[0.2, 0.2, 0.1],
+=======
+        scale_ratio_range=[0.95, 1.05],
+        rot_range=[-0.78539816, 0.78539816],
+        translation_std=[0.5, 0.5, 0.2],
+        # scale_ratio_range=[0.98, 1.02],
+        # rot_range=[-0.3925, 0.3925],
+        # translation_std=[0.2, 0.2, 0.1],
+>>>>>>> e7daa8a9 (Added)
     ),
     dict(type="BEVFusionRandomFlip3D"),
     dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range),
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
index 53a149f07..439459010 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
@@ -40,21 +40,21 @@
         use_dim=point_load_dim,
         backend_args=backend_args,
     ),
-    dict(
-        type="LoadPointsFromMultiSweeps",
-        sweeps_num=sweeps_num,
-        load_dim=point_load_dim,
-        use_dim=lidar_sweep_dims,
-        pad_empty_sweeps=True,
-        remove_close=True,
-        backend_args=backend_args,
-        test_mode=False,
-    ),
+    # dict(
+    #     type="LoadPointsFromMultiSweeps",
+    #     sweeps_num=sweeps_num,
+    #     load_dim=point_load_dim,
+    #     use_dim=lidar_sweep_dims,
+    #     pad_empty_sweeps=True,
+    #     remove_close=True,
+    #     backend_args=backend_args,
+    #     test_mode=False,
+    # ),
     dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False),
     dict(
         type="ImageAug3D",
         final_dim=image_size,
-        resize_lim=[0.28, 0.40],
+        resize_lim=[0.29, 0.35],
         bot_pct_lim=[0.0, 0.0],
         rot_lim=[0.0, 0.0],
         rand_flip=True,
@@ -67,8 +67,9 @@
         translation_std=[0.5, 0.5, 0.2],
     ),
     dict(type="BEVFusionRandomFlip3D"),
-    dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range),
+    # dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range),
     dict(type="ObjectRangeFilter", point_cloud_range=point_cloud_range),
+    dict(type="BEVFusionRemoveLiDARPoints"),
     dict(
         type="ObjectNameFilter",
         classes=[
@@ -84,7 +85,7 @@
             "traffic_cone",
         ],
     ),
-    dict(type="PointShuffle"),
+    # dict(type="PointShuffle"),
     dict(
         type="Pack3DDetInputs",
         keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"],
@@ -121,34 +122,15 @@
         backend_args=backend_args,
         camera_order=camera_order,
     ),
-    dict(
-        type="LoadPointsFromFile",
-        coord_type="LIDAR",
-        load_dim=point_load_dim,
-        use_dim=point_load_dim,
-        backend_args=backend_args,
-    ),
-    dict(
-        type="LoadPointsFromMultiSweeps",
-        sweeps_num=sweeps_num,
-        load_dim=point_load_dim,
-        use_dim=lidar_sweep_dims,
-        pad_empty_sweeps=True,
-        remove_close=True,
-        backend_args=backend_args,
-        test_mode=True,
-    ),
     dict(
         type="ImageAug3D",
         final_dim=image_size,
-        resize_lim=[0.34, 0.34],
+        resize_lim=[0.32, 0.32],
         bot_pct_lim=[0.0, 0.0],
         rot_lim=[0.0, 0.0],
         rand_flip=False,
         is_train=False,
     ),
-    dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range),
-    dict(type="ObjectRangeFilter", point_cloud_range=point_cloud_range),
     dict(
         type="Pack3DDetInputs",
         keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"],

From 655a0a1321e5f5c5f86f520cc1ba565098b1879f Mon Sep 17 00:00:00 2001
From: KokSeang <kseangtan@google.com>
Date: Tue, 21 Apr 2026 14:11:29 +0900
Subject: [PATCH 039/183] Added

---
 .../default/pipelines/default_camera_base_120m.py        | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py
index 227c8241e..5bc85cbbe 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py
@@ -38,21 +38,12 @@
     ),
     dict(
         type="BEVFusionGlobalRotScaleTrans",
-<<<<<<< HEAD
-        # scale_ratio_range=[0.95, 1.05],
-        # rot_range=[-0.78539816, 0.78539816],
-        # translation_std=[0.5, 0.5, 0.2],
-        scale_ratio_range=[0.98, 1.02],
-        rot_range=[-0.3925, 0.3925],
-        translation_std=[0.2, 0.2, 0.1],
-=======
         scale_ratio_range=[0.95, 1.05],
         rot_range=[-0.78539816, 0.78539816],
         translation_std=[0.5, 0.5, 0.2],
         # scale_ratio_range=[0.98, 1.02],
         # rot_range=[-0.3925, 0.3925],
         # translation_std=[0.2, 0.2, 0.1],
->>>>>>> e7daa8a9 (Added)
     ),
     dict(type="BEVFusionRandomFlip3D"),
     dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range),

From 0f5b5888148efcd2aac5af2315befd9301907745 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 21 Apr 2026 15:06:00 +0900
Subject: [PATCH 040/183] Update configs

---
 autoware_ml/detection3d/datasets/t4dataset.py                  | 2 +-
 .../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/autoware_ml/detection3d/datasets/t4dataset.py b/autoware_ml/detection3d/datasets/t4dataset.py
index 74d274b87..ce1c78f31 100644
--- a/autoware_ml/detection3d/datasets/t4dataset.py
+++ b/autoware_ml/detection3d/datasets/t4dataset.py
@@ -64,7 +64,7 @@ def filter_data(self) -> List[dict]:
                     break
 
                 if entry["images"][camera_order]["img_path"] is None or not osp.exists(
-                    self.data_root + entry["images"][camera_order]["img_path"]
+                    entry["images"][camera_order]["img_path"]
                 ):
                     filtered = True
                     break
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py
index 264eda921..23d29acc1 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py
@@ -1,6 +1,5 @@
 # learning rate
-# 1e-4 * sqrt(2) = 0.0001414
-lr = 1.4141e-4
+lr = 1e-4
 t_max = 8
 max_epochs = 30
 val_interval = 1

From caecca60228a1468c1f139d331b096884da19a4b Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 21 Apr 2026 17:38:51 +0900
Subject: [PATCH 041/183] Update base docstring

---
 .../BEVFusion/docs/BEVFusion-L/v2/base.md     | 288 +++++++++++++++++-
 1 file changed, 272 insertions(+), 16 deletions(-)

diff --git a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md
index 9de8a2e34..72d47c4b3 100644
--- a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md
+++ b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md
@@ -70,18 +70,20 @@
   <details>
   <summary> Eval Range: 0.0 - 50.0m </summary>
 
-  | Model version | mAP | car<br>(107,309) | truck<br>(24,206) | bus<br>(5,712) | bicycle<br>(4,060) | pedestrian<br>(77,369) |
-  | :---- | ---: | ---: | ---: | ---: | ---: | ---: |
-  | BEVFusion-LiDAR base/2.6.0 | 0.8774 | 0.9049 | 0.8514 | 0.8824 | 0.8543 | 0.8941 |
+  | Model version | mAP | mAPH | car<br>(107,309) | truck<br>(24,206) | bus<br>(5,712) | bicycle<br>(4,060) | pedestrian<br>(77,369) |
+  | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
+  | BEVFusion-LiDAR base/2.7.0 | 0.8817 | 0.8496 | 0.9131 | 0.8552 | 0.9081 | 0.8357 | 0.8966 |
+  | BEVFusion-LiDAR base/2.6.0 | 0.8774 | 0.8443 | 0.9049 | 0.8514 | 0.8824 | 0.8543 | 0.8941 |
 
-  </details>
+	</details>
 
   <details>
   <summary> Eval Range: 50.0 - 90.0m </summary>
 
   | Model version | mAP | mAPH | car<br>(94,080) | truck<br>(27,651) | bus<br>(4,761) | bicycle<br>(2,365) | pedestrian<br>(37,523) |
   | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-  | BEVFusion-LiDAR base/2.6.0 | 0.6824 | 0.6437 | 0.8005 | 0.6567 | 0.5783 | 0.6322 | 0.7445 |
+  | BEVFusion-LiDAR base/2.7.0 | 0.7002 | 0.6621 | 0.8174 | 0.6660 | 0.6414 | 0.6430 | 0.7331 |
+	| BEVFusion-LiDAR base/2.6.0 | 0.6824 | 0.6437 | 0.8005 | 0.6567 | 0.5783 | 0.6322 | 0.7445 |
 
   </details>
 
@@ -90,7 +92,8 @@
 
   | Model version | mAP | mAPH | car<br>(36,895) | truck<br>(17,759) | bus<br>(2,852) | bicycle<br>(519) | pedestrian<br>(17,091) |
   | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-  | BEVFusion-LiDAR base/2.6.0 | 0.5136 | 0.4788 | 0.6552 | 0.5023 | 0.2849 | 0.4369 | 0.6887 |
+  | BEVFusion-LiDAR base/2.7.0 | 0.5600 | 0.5254 | 0.6578 | 0.5131 | 0.5178 | 0.4296 | 0.6815 |
+	| BEVFusion-LiDAR base/2.6.0 | 0.5136 | 0.4788 | 0.6552 | 0.5023 | 0.2849 | 0.4369 | 0.6887 |
 
   </details>
 
@@ -99,7 +102,8 @@
 
   | Model version | mAP | mAPH | car<br>(238,284) | truck<br>(69,616) | bus<br>(13,325) | bicycle<br>(6,944) | pedestrian<br>(131,983) |
   | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-  | BEVFusion-LiDAR base/2.6.0 | 0.7592 | 0.7227 | 0.8398 | 0.6994 | 0.6621 | 0.7595 | 0.8351 |
+  | BEVFusion-LiDAR base/2.7.0 | 0.7777 | 0.7420 | 0.8504 | 0.7065 | 0.7443 | 0.7538 | 0.8332 |
+	| BEVFusion-LiDAR base/2.6.0 | 0.7592 | 0.7227 | 0.8398 | 0.6994 | 0.6621 | 0.7595 | 0.8351 |
 
   </details>
 
@@ -119,7 +123,8 @@
 
   | Model version | mAP | mAPH | car<br>(42,789) | truck<br>(17,259) | bus<br>(3,437) | bicycle<br>(2,681) | pedestrian<br>(57,948) |
   | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-  | BEVFusion-LiDAR base/2.6.0 | 0.8784 | 0.8487 | 0.9436 | 0.8531 | 0.8284 | 0.8546 | 0.9123 |
+  | BEVFusion-LiDAR base/2.7.0 | 0.8837 | 0.8562 | 0.9393 | 0.8587 | 0.8802 | 0.8268 | 0.9135 |
+	| BEVFusion-LiDAR base/2.6.0 | 0.8784 | 0.8487 | 0.9436 | 0.8531 | 0.8284 | 0.8546 | 0.9123 |
 
   </details>
 
@@ -128,7 +133,8 @@
 
   | Model version | mAP | mAPH | car<br>(35,518) | truck<br>(22,550) | bus<br>(2,683) | bicycle<br>(1,607) | pedestrian<br>(27,240) |
   | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-  | BEVFusion-LiDAR base/2.6.0 | 0.6692 | 0.6414 | 0.8323 | 0.6571 | 0.4033 | 0.6721 | 0.7812 |
+  | BEVFusion-LiDAR base/2.7.0 | 0.6901 | 0.6630 | 0.8382 | 0.6676 | 0.5007 | 0.6794 | 0.7645 |
+	| BEVFusion-LiDAR base/2.6.0 | 0.6692 | 0.6414 | 0.8323 | 0.6571 | 0.4033 | 0.6721 | 0.7812 |
 
   </details>
 
@@ -137,7 +143,8 @@
 
   | Model version | mAP | mAPH | car<br>(16,524) | truck<br>(14,587) | bus<br>(2,476) | bicycle<br>(364) | pedestrian<br>(14,297) |
   | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-  | BEVFusion-LiDAR base/2.6.0 | 0.5300 | 0.5010 | 0.6692 | 0.5020 | 0.2822 | 0.4586 | 0.7380 |
+  | BEVFusion-LiDAR base/2.7.0 | 0.5750 | 0.5466 | 0.6601 | 0.5131 | 0.5145 | 0.4541 | 0.7331 |
+	| BEVFusion-LiDAR base/2.6.0 | 0.5300 | 0.5010 | 0.6692 | 0.5020 | 0.2822 | 0.4586 | 0.7380 |
 
   </details>
 
@@ -146,6 +153,7 @@
 
   | Model version | mAP | mAPH | car<br>(94,831) | truck<br>(54,396) | bus<br>(8,596) | bicycle<br>(4,652) | pedestrian<br>(99,485) |
   | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
+  | BEVFusion-LiDAR base/2.7.0 | 0.7715 | 0.7432 | 0.8661 | 0.7010 | 0.6721 | 0.7611 | 0.8573 |
   | BEVFusion-LiDAR base/2.6.0 | 0.7471 | 0.7176 | 0.8667 | 0.6928 | 0.5446 | 0.7710 | 0.8606 |
 
   </details>
@@ -167,7 +175,8 @@
 
   | Model version | mAP | mAPH | car<br>(14,883) | truck<br>(1,193) | bus<br>(336) | bicycle<br>(740) | pedestrian<br>(5,059) |
   | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-  | BEVFusion-LiDAR base/2.6.0 | 0.8882 | 0.8475 | 0.9045 | 0.8793 | 0.9482 | 0.8489 | 0.8598 |
+	| BEVFusion-LiDAR base/2.7.0 | 0.8876 | 0.8447 | 0.9176 | 0.8727 | 0.9443 | 0.8396 | 0.8639 |
+	| BEVFusion-LiDAR base/2.6.0 | 0.8882 | 0.8475 | 0.9045 | 0.8793 | 0.9482 | 0.8489 | 0.8598 |
 
   </details>
 
@@ -176,7 +185,8 @@
 
   | Model version | mAP | mAPH | car<br>(10,994) | truck<br>(1,011) | bus<br>(143) | bicycle<br>(463) | pedestrian<br>(3,754) |
   | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-  | BEVFusion-LiDAR base/2.6.0 | 0.7132 | 0.6586 | 0.8237 | 0.7245 | 0.7811 | 0.5497 | 0.6871 |
+  | BEVFusion-LiDAR base/2.7.0 | 0.7392 | 0.6842 | 0.8425 | 0.7288 | 0.8580 | 0.5826 | 0.6839 |
+	| BEVFusion-LiDAR base/2.6.0 | 0.7132 | 0.6586 | 0.8237 | 0.7245 | 0.7811 | 0.5497 | 0.6871 |
 
   </details>
 
@@ -185,7 +195,8 @@
 
   | Model version | mAP | mAPH | car<br>(3,018) | truck<br>(602) | bus<br>(60) | bicycle<br>(85) | pedestrian<br>(1,121) |
   | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-  | BEVFusion-LiDAR base/2.6.0 | 0.5202 | 0.4736 | 0.6989 | 0.6297 | 0.4058 | 0.3609 | 0.5056 |
+  | BEVFusion-LiDAR base/2.7.0 | 0.5572 | 0.5118 | 0.7091 | 0.6393 | 0.6121 | 0.3386 | 0.4870 |
+	| BEVFusion-LiDAR base/2.6.0 | 0.5202 | 0.4736 | 0.6989 | 0.6297 | 0.4058 | 0.3609 | 0.5056 |
 
   </details>
 
@@ -194,6 +205,7 @@
 
   | Model version | mAP | mAPH | car<br>(28,895) | truck<br>(2,806) | bus<br>(539) | bicycle<br>(1,288) | pedestrian<br>(9,934) |
   | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
+  | BEVFusion-LiDAR base/2.7.0 | 0.8086 | 0.7594 | 0.8789 | 0.7783 | 0.8898 | 0.7288 | 0.7670 |
   | BEVFusion-LiDAR base/2.6.0 | 0.7995 | 0.7514 | 0.8640 | 0.7788 | 0.8608 | 0.7272 | 0.7669 |
 
   </details>
@@ -221,7 +233,8 @@
 
   | Model version | mAP | mAPH | car<br>(49,637) | truck<br>(5,754) | bus<br>(1,939) | bicycle<br>(639) | pedestrian<br>(14,362) |
   | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-  | BEVFusion-LiDAR base/2.6.0 | 0.8702 | 0.8284 | 0.8758 | 0.8410 | 0.9408 | 0.8590 | 0.8344 |
+  | BEVFusion-LiDAR base/2.7.0 | 0.8776 | 0.8370 | 0.8907 | 0.8438 | 0.9473 | 0.8665 | 0.8397 |
+	| BEVFusion-LiDAR base/2.6.0 | 0.8702 | 0.8284 | 0.8758 | 0.8410 | 0.9408 | 0.8590 | 0.8344 |
 
   </details>
 
@@ -230,7 +243,8 @@
 
   | Model version | mAP | mAPH | car<br>(47,568) | truck<br>(4,090) | bus<br>(1,935) | bicycle<br>(295) | pedestrian<br>(6,529) |
   | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-  | BEVFusion-LiDAR base/2.6.0 | 0.6708 | 0.6165 | 0.7721 | 0.6421 | 0.7731 | 0.5472 | 0.6192 |
+  | BEVFusion-LiDAR base/2.7.0 | 0.6805 | 0.6279 | 0.7957 | 0.6451 | 0.7955 | 0.5394 | 0.6266 |
+	| BEVFusion-LiDAR base/2.6.0 | 0.6708 | 0.6165 | 0.7721 | 0.6421 | 0.7731 | 0.5472 | 0.6192 |
 
   </details>
 
@@ -239,6 +253,7 @@
 
   | Model version | mAP | mAPH | car<br>(17,353) | truck<br>(2,570) | bus<br>(316) | bicycle<br>(70) | pedestrian<br>(1,673) |
   | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
+	| BEVFusion-LiDAR base/2.7.0 | 0.4902 | 0.4491 | 0.6483 | 0.4871 | 0.5172 | 0.4406 | 0.3578 |
   | BEVFusion-LiDAR base/2.6.0 | 0.4462 | 0.4042 | 0.6346 | 0.4758 | 0.3215 | 0.4303 | 0.3688 |
 
   </details>
@@ -248,7 +263,8 @@
 
   | Model version | mAP | mAPH | car<br>(114,558) | truck<br>(12,414) | bus<br>(4,190) | bicycle<br>(1,004) | pedestrian<br>(22,564) |
   | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-  | BEVFusion-LiDAR base/2.6.0 | 0.7712 | 0.7223 | 0.8110 | 0.7129 | 0.8348 | 0.7458 | 0.7515 |
+  | BEVFusion-LiDAR base/2.7.0 | 0.7822 | 0.7349 | 0.8292 | 0.7169 | 0.8590 | 0.7505 | 0.7556 |
+	| BEVFusion-LiDAR base/2.6.0 | 0.7712 | 0.7223 | 0.8110 | 0.7129 | 0.8348 | 0.7458 | 0.7515 |
 
   </details>
 
@@ -256,6 +272,246 @@
 
 ## Release
 
+### BEVFusion-LiDAR base/2.7.0
+
+<details>
+<summary> Changes  </summary>
+
+- Train by min-max normalizing (x, y, z, intensity, time_lag) into [0, 1], and then mapping it to fourier features [[1]](https://arxiv.org/pdf/2006.10739).
+</details>
+
+<details>
+<summary> Artifacts </summary>
+
+- Deployed onnx and ROS parameter files (for internal)
+  - [WebAuto](https://evaluation.tier4.jp/evaluation/mlpackages/46f8188d-e3be-4f2f-b989-fd27002610d7/releases/51628f64-9c15-4029-b3c5-5bf501d879e2?project_id=zWhWRzei)
+  - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/t4base/v2.7.0/deployment.zip)
+  - [Google drive](https://drive.google.com/file/d/1zopj68qxLmI244qi3NgxB0ELT997V4W3/view?usp=drive_link)
+- Logs (for internal)
+  - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/t4base/v2.7.0/logs.zip)
+  - [Google drive](https://drive.google.com/file/d/1-OIvsmsB69a5L_4sqjOSJ9IOltRWFDIv/view?usp=drive_link)
+- Pytorch Best checkpoints:
+  - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/t4base/v2.7.0/best_epoch_48.pth)
+  - [Google drive](https://drive.google.com/file/d/1b8iwwLBLAmn0NwqRaTJOWHMINfS9p_fc/view?usp=drive_link)
+
+</details>
+
+<details>
+<summary> Training configs </summary>
+
+- [Config file path](https://github.com/KSeangTan/AWML/blob/0f5b5888148efcd2aac5af2315befd9301907745/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py)
+- Train time: NVIDIA H100 80GB * 8 * 50 epochs ~= 4 days
+- Batch size: 8*8 = 64
+- Training Dataset (frames: 142,196):
+  - jpntaxi: db_jpntaxi_v1 + db_jpntaxi_v2 + db_jpntaxi_v4 (28,161 frames)
+  - j6: db_gsm8_v1 + db_j6_v1 + db_j6_v2 + db_j6_v3 + db_j6_v5 (29,336 frames)
+  - j6gen2: db_j6gen2_v1 + db_j6gen2_v2 + db_j6gen2_v3 + db_j6gen2_v4 + db_j6gen2_v5 + db_j6gen2_v6 + db_j6gen2_v7 + db_j6gen2_v8 + db_j6gen2_v9 (43,968 frames)
+  - largebus: db_largebus_v1 + db_largebus_v2 (12,605 frames)
+  - jpntaxi_gen2: db_jpntaxigen2_v1 + db_jpntaxigen2_v2 (28,126 frames)
+
+</details>
+
+<details>
+<summary> Evaluation </summary>
+
+**Base Datasets (15,154 frames)**:
+
+  - j6gen2 (3,951 frames): db_j6gen2_v1 + db_j6gen2_v2 + db_j6gen2_v3 + db_j6gen2_v4 + db_j6gen2_v5 + db_j6gen2_v6 + db_j6gen2_v7 + db_j6gen2_v8 + db_j6gen2_v9
+  - largebus (1,228 frames): db_largebus_v1 + db_largebus_v2 + db_largebus_v3
+  - jpntaxi_gen2 (9,975 frames): db_jpntaxigen2_v1 + db_jpntaxigen2_v2
+
+**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.8817**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 107,309 | 0.9131 | 0.862 / 0.914 / 0.933 / 0.943 | 0.905 / 0.935 / 0.942 / 0.945 | 0.233 / 0.192 / 0.159 / 0.142 |
+| truck | 24,206 | 0.8552 | 0.711 / 0.843 / 0.919 / 0.948 | 0.795 / 0.877 / 0.918 / 0.934 | 0.297 / 0.225 / 0.192 / 0.180 |
+| bus | 5,712 | 0.9081 | 0.829 / 0.912 / 0.945 / 0.947 | 0.876 / 0.916 / 0.931 / 0.932 | 0.312 / 0.146 / 0.146 / 0.146 |
+| bicycle | 4,060 | 0.8357 | 0.813 / 0.840 / 0.844 / 0.846 | 0.857 / 0.868 / 0.869 / 0.870 | 0.210 / 0.194 / 0.194 / 0.194 |
+| pedestrian | 77,369 | 0.8966 | 0.877 / 0.895 / 0.903 / 0.911 | 0.857 / 0.867 / 0.874 / 0.878 | 0.148 / 0.148 / 0.148 / 0.147 |
+| **ALL** | 218,656 | 0.8817 | — | — | — |
+
+**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.7002**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 94,080 | 0.8174 | 0.708 / 0.817 / 0.864 / 0.881 | 0.782 / 0.844 / 0.867 / 0.872 | 0.212 / 0.166 / 0.164 / 0.161 |
+| truck | 27,651 | 0.6660 | 0.463 / 0.626 / 0.759 / 0.815 | 0.612 / 0.714 / 0.787 / 0.812 | 0.229 / 0.190 / 0.154 / 0.130 |
+| bus | 4,761 | 0.6414 | 0.393 / 0.602 / 0.775 / 0.795 | 0.554 / 0.691 / 0.798 / 0.807 | 0.324 / 0.219 / 0.181 / 0.138 |
+| bicycle | 2,365 | 0.6430 | 0.586 / 0.658 / 0.663 / 0.666 | 0.683 / 0.715 / 0.716 / 0.717 | 0.141 / 0.141 / 0.141 / 0.141 |
+| pedestrian | 37,523 | 0.7331 | 0.711 / 0.730 / 0.741 / 0.750 | 0.732 / 0.742 / 0.748 / 0.753 | 0.145 / 0.145 / 0.145 / 0.144 |
+| **ALL** | 166,380 | 0.7002 | — | — | — |
+
+**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.5600**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 36,895 | 0.6578 | 0.498 / 0.656 / 0.726 / 0.751 | 0.626 / 0.714 / 0.750 / 0.760 | 0.168 / 0.143 / 0.137 / 0.132 |
+| truck | 17,759 | 0.5131 | 0.206 / 0.450 / 0.648 / 0.749 | 0.439 / 0.611 / 0.720 / 0.775 | 0.240 / 0.193 / 0.134 / 0.124 |
+| bus | 2,852 | 0.5178 | 0.313 / 0.520 / 0.608 / 0.630 | 0.534 / 0.659 / 0.704 / 0.714 | 0.244 / 0.166 / 0.140 / 0.140 |
+| bicycle | 519 | 0.4296 | 0.315 / 0.421 / 0.491 / 0.491 | 0.503 / 0.563 / 0.592 / 0.592 | 0.180 / 0.180 / 0.180 / 0.180 |
+| pedestrian | 17,091 | 0.6815 | 0.660 / 0.678 / 0.687 / 0.700 | 0.698 / 0.708 / 0.712 / 0.719 | 0.126 / 0.126 / 0.126 / 0.126 |
+| **ALL** | 75,116 | 0.5600 | — | — | — |
+
+**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.7777**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 238,284 | 0.8504 | 0.760 / 0.851 / 0.888 / 0.903 | 0.818 / 0.868 / 0.886 / 0.890 | 0.219 / 0.184 / 0.161 / 0.158 |
+| truck | 69,616 | 0.7065 | 0.492 / 0.671 / 0.802 / 0.861 | 0.641 / 0.752 / 0.822 / 0.851 | 0.251 / 0.216 / 0.173 / 0.136 |
+| bus | 13,325 | 0.7443 | 0.575 / 0.735 / 0.827 / 0.840 | 0.703 / 0.791 / 0.843 / 0.849 | 0.345 / 0.181 / 0.181 / 0.146 |
+| bicycle | 6,944 | 0.7538 | 0.714 / 0.761 / 0.769 / 0.771 | 0.776 / 0.797 / 0.800 / 0.801 | 0.186 / 0.176 / 0.176 / 0.176 |
+| pedestrian | 131,983 | 0.8332 | 0.813 / 0.831 / 0.840 / 0.849 | 0.802 / 0.812 / 0.818 / 0.824 | 0.144 / 0.145 / 0.145 / 0.145 |
+| **ALL** | 460,152 | 0.7777 | — | — | — |
+
+---
+
+**LargeBus**: db_largebus_v1 + db_largebus_v2 + db_largebus_v3 (1,228 frames)  
+
+**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.8876**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 14,883 | 0.9176 | 0.876 / 0.916 / 0.934 / 0.944 | 0.917 / 0.943 / 0.947 / 0.949 | 0.245 / 0.154 / 0.154 / 0.154 |
+| truck | 1,193 | 0.8727 | 0.747 / 0.873 / 0.926 / 0.944 | 0.829 / 0.900 / 0.924 / 0.928 | 0.269 / 0.206 / 0.157 / 0.157 |
+| bus | 336 | 0.9443 | 0.824 / 0.975 / 0.989 / 0.989 | 0.878 / 0.974 / 0.984 / 0.984 | 0.439 / 0.338 / 0.269 / 0.269 |
+| bicycle | 740 | 0.8396 | 0.764 / 0.848 / 0.869 / 0.877 | 0.833 / 0.862 / 0.866 / 0.871 | 0.194 / 0.194 / 0.182 / 0.182 |
+| pedestrian | 5,059 | 0.8639 | 0.848 / 0.863 / 0.869 / 0.876 | 0.837 / 0.845 / 0.850 / 0.853 | 0.167 / 0.167 / 0.167 / 0.154 |
+| **ALL** | 22,211 | 0.8876 | — | — | — |
+
+**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.7392**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 10,994 | 0.8425 | 0.745 / 0.846 / 0.883 / 0.896 | 0.810 / 0.869 / 0.886 / 0.891 | 0.210 / 0.170 / 0.153 / 0.153 |
+| truck | 1,011 | 0.7288 | 0.537 / 0.722 / 0.818 / 0.838 | 0.670 / 0.784 / 0.834 / 0.840 | 0.184 / 0.158 / 0.113 / 0.113 |
+| bus | 143 | 0.8580 | 0.589 / 0.944 / 0.944 / 0.956 | 0.730 / 0.929 / 0.929 / 0.929 | 0.510 / 0.463 / 0.463 / 0.463 |
+| bicycle | 463 | 0.5826 | 0.477 / 0.607 / 0.622 / 0.625 | 0.606 / 0.667 / 0.671 / 0.673 | 0.118 / 0.112 / 0.102 / 0.102 |
+| pedestrian | 3,754 | 0.6839 | 0.664 / 0.681 / 0.690 / 0.702 | 0.698 / 0.705 / 0.711 / 0.717 | 0.121 / 0.117 / 0.117 / 0.117 |
+| **ALL** | 16,365 | 0.7392 | — | — | — |
+
+**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.5572**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 3,018 | 0.7091 | 0.556 / 0.712 / 0.776 / 0.792 | 0.665 / 0.747 / 0.778 / 0.786 | 0.205 / 0.181 / 0.181 / 0.181 |
+| truck | 602 | 0.6393 | 0.365 / 0.651 / 0.760 / 0.781 | 0.553 / 0.730 / 0.789 / 0.798 | 0.208 / 0.208 / 0.152 / 0.152 |
+| bus | 60 | 0.6121 | 0.420 / 0.637 / 0.696 / 0.696 | 0.583 / 0.725 / 0.765 / 0.765 | 0.275 / 0.197 / 0.197 / 0.197 |
+| bicycle | 85 | 0.3386 | 0.244 / 0.355 / 0.378 / 0.378 | 0.446 / 0.514 / 0.524 / 0.524 | 0.181 / 0.181 / 0.137 / 0.137 |
+| pedestrian | 1,121 | 0.4870 | 0.473 / 0.483 / 0.490 / 0.502 | 0.579 / 0.586 / 0.591 / 0.593 | 0.137 / 0.137 / 0.137 / 0.137 |
+| **ALL** | 4,886 | 0.5572 | — | — | — |
+
+**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.8086**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 28,895 | 0.8789 | 0.806 / 0.881 / 0.909 / 0.919 | 0.853 / 0.896 / 0.908 / 0.911 | 0.245 / 0.185 / 0.176 / 0.170 |
+| truck | 2,806 | 0.7783 | 0.597 / 0.778 / 0.859 / 0.880 | 0.714 / 0.824 / 0.865 / 0.870 | 0.206 / 0.206 / 0.157 / 0.155 |
+| bus | 539 | 0.8898 | 0.718 / 0.931 / 0.952 / 0.958 | 0.808 / 0.931 / 0.937 / 0.937 | 0.382 / 0.354 / 0.354 / 0.354 |
+| bicycle | 1,288 | 0.7288 | 0.641 / 0.744 / 0.762 / 0.768 | 0.729 / 0.769 / 0.773 / 0.776 | 0.176 / 0.176 / 0.176 / 0.172 |
+| pedestrian | 9,934 | 0.7670 | 0.749 / 0.765 / 0.772 / 0.782 | 0.757 / 0.765 / 0.771 / 0.775 | 0.137 / 0.137 / 0.137 / 0.137 |
+| **ALL** | 43,462 | 0.8086 | — | — | — |
+
+---
+
+**J6Gen2**: db_j6gen2_v1 + db_j6gen2_v2 + db_j6gen2_v3 + db_j6gen2_v4 + db_j6gen2_v5 + db_j6gen2_v6 + db_j6gen2_v7 + db_j6gen2_v8 + db_j6gen2_v9 (3,951 frames)
+
+**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.8776**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 49,637 | 0.8907 | 0.841 / 0.890 / 0.909 / 0.922 | 0.896 / 0.924 / 0.931 / 0.934 | 0.269 / 0.199 / 0.159 / 0.135 |
+| truck | 5,754 | 0.8438 | 0.718 / 0.833 / 0.894 / 0.930 | 0.794 / 0.862 / 0.893 / 0.915 | 0.222 / 0.194 / 0.171 / 0.171 |
+| bus | 1,939 | 0.9473 | 0.878 / 0.942 / 0.983 / 0.986 | 0.925 / 0.963 / 0.981 / 0.982 | 0.206 / 0.140 / 0.140 / 0.140 |
+| bicycle | 639 | 0.8665 | 0.854 / 0.871 / 0.871 / 0.871 | 0.867 / 0.875 / 0.875 / 0.875 | 0.176 / 0.176 / 0.176 / 0.176 |
+| pedestrian | 14,362 | 0.8397 | 0.813 / 0.836 / 0.849 / 0.861 | 0.806 / 0.817 / 0.824 / 0.831 | 0.169 / 0.151 / 0.151 / 0.165 |
+| **ALL** | 72,331 | 0.8776 | — | — | — |
+
+**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.6805**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 47,568 | 0.7957 | 0.662 / 0.795 / 0.851 / 0.875 | 0.760 / 0.838 / 0.866 / 0.874 | 0.212 / 0.184 / 0.164 / 0.164 |
+| truck | 4,090 | 0.6451 | 0.451 / 0.622 / 0.729 / 0.778 | 0.606 / 0.711 / 0.768 / 0.789 | 0.234 / 0.205 / 0.176 / 0.165 |
+| bus | 1,935 | 0.7955 | 0.571 / 0.760 / 0.912 / 0.938 | 0.694 / 0.815 / 0.906 / 0.916 | 0.345 / 0.240 / 0.182 / 0.168 |
+| bicycle | 295 | 0.5394 | 0.494 / 0.552 / 0.554 / 0.557 | 0.628 / 0.669 / 0.669 / 0.669 | 0.137 / 0.138 / 0.138 / 0.138 |
+| pedestrian | 6,529 | 0.6266 | 0.591 / 0.622 / 0.639 / 0.654 | 0.661 / 0.676 / 0.682 / 0.689 | 0.140 / 0.140 / 0.140 / 0.140 |
+| **ALL** | 60,417 | 0.6805 | — | — | — |
+
+**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.4902**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 17,353 | 0.6483 | 0.452 / 0.639 / 0.734 / 0.768 | 0.608 / 0.712 / 0.760 / 0.774 | 0.168 / 0.153 / 0.143 / 0.132 |
+| truck | 2,570 | 0.4871 | 0.209 / 0.419 / 0.619 / 0.702 | 0.425 / 0.578 / 0.700 / 0.746 | 0.199 / 0.127 / 0.126 / 0.124 |
+| bus | 316 | 0.5172 | 0.246 / 0.532 / 0.626 / 0.665 | 0.433 / 0.640 / 0.701 / 0.721 | 0.173 / 0.100 / 0.100 / 0.089 |
+| bicycle | 70 | 0.4406 | 0.382 / 0.438 / 0.471 / 0.471 | 0.584 / 0.619 / 0.637 / 0.637 | 0.193 / 0.193 / 0.193 / 0.193 |
+| pedestrian | 1,673 | 0.3578 | 0.344 / 0.354 / 0.362 / 0.371 | 0.492 / 0.496 / 0.500 / 0.505 | 0.137 / 0.107 / 0.107 / 0.111 |
+| **ALL** | 21,982 | 0.4902 | — | — | — |
+
+**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.7822**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 114,558 | 0.8292 | 0.725 / 0.826 / 0.872 / 0.894 | 0.800 / 0.859 / 0.881 / 0.888 | 0.232 / 0.194 / 0.164 / 0.158 |
+| truck | 12,414 | 0.7169 | 0.534 / 0.691 / 0.795 / 0.847 | 0.665 / 0.760 / 0.816 / 0.843 | 0.251 / 0.194 / 0.166 / 0.151 |
+| bus | 4,190 | 0.8590 | 0.703 / 0.840 / 0.938 / 0.955 | 0.790 / 0.874 / 0.929 / 0.936 | 0.345 / 0.186 / 0.182 / 0.168 |
+| bicycle | 1,004 | 0.7505 | 0.724 / 0.758 / 0.760 / 0.760 | 0.781 / 0.798 / 0.799 / 0.799 | 0.176 / 0.176 / 0.176 / 0.176 |
+| pedestrian | 22,564 | 0.7556 | 0.727 / 0.752 / 0.766 / 0.778 | 0.744 / 0.756 / 0.763 / 0.770 | 0.152 / 0.151 / 0.151 / 0.151 |
+| **ALL** | 154,730 | 0.7822 | — | — | — |
+
+---
+
+**JPNTaxi_Gen2**: db_jpntaxigen2_v1 + db_jpntaxigen2_v2 (9,975 frames)
+
+**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.8837**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 42,789 | 0.9393 | 0.882 / 0.945 / 0.964 / 0.967 | 0.911 / 0.946 / 0.954 / 0.955 | 0.211 / 0.168 / 0.142 / 0.142 |
+| truck | 17,259 | 0.8587 | 0.709 / 0.846 / 0.926 / 0.954 | 0.795 / 0.881 / 0.926 / 0.941 | 0.371 / 0.243 / 0.234 / 0.189 |
+| bus | 3,437 | 0.8802 | 0.798 / 0.889 / 0.916 / 0.918 | 0.850 / 0.886 / 0.898 / 0.899 | 0.369 / 0.146 / 0.128 / 0.128 |
+| bicycle | 2,681 | 0.8268 | 0.816 / 0.830 / 0.831 / 0.831 | 0.865 / 0.871 / 0.872 / 0.872 | 0.219 / 0.219 / 0.219 / 0.219 |
+| pedestrian | 57,948 | 0.9135 | 0.896 / 0.912 / 0.919 / 0.926 | 0.872 / 0.882 / 0.889 / 0.893 | 0.148 / 0.140 / 0.143 / 0.140 |
+| **ALL** | 124,114 | 0.8837 | — | — | — |
+
+**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.6901**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 35,518 | 0.8382 | 0.757 / 0.838 / 0.874 / 0.885 | 0.803 / 0.847 / 0.862 / 0.865 | 0.212 / 0.165 / 0.162 / 0.161 |
+| truck | 22,550 | 0.6676 | 0.462 / 0.623 / 0.762 / 0.823 | 0.611 / 0.711 / 0.788 / 0.816 | 0.247 / 0.193 / 0.154 / 0.130 |
+| bus | 2,683 | 0.5007 | 0.240 / 0.447 / 0.649 / 0.667 | 0.421 / 0.581 / 0.708 / 0.717 | 0.242 / 0.151 / 0.144 / 0.144 |
+| bicycle | 1,607 | 0.6794 | 0.635 / 0.692 / 0.695 / 0.697 | 0.719 / 0.740 / 0.742 / 0.743 | 0.146 / 0.141 / 0.141 / 0.141 |
+| pedestrian | 27,240 | 0.7645 | 0.745 / 0.762 / 0.772 / 0.780 | 0.753 / 0.764 / 0.769 / 0.773 | 0.156 / 0.144 / 0.145 / 0.145 |
+| **ALL** | 89,598 | 0.6901 | — | — | — |
+
+**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.5750**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 16,524 | 0.6601 | 0.539 / 0.665 / 0.710 / 0.727 | 0.643 / 0.715 / 0.740 / 0.745 | 0.138 / 0.108 / 0.108 / 0.109 |
+| truck | 14,587 | 0.5131 | 0.200 / 0.448 / 0.649 / 0.756 | 0.438 / 0.613 / 0.721 / 0.779 | 0.248 / 0.193 / 0.134 / 0.124 |
+| bus | 2,476 | 0.5145 | 0.318 / 0.515 / 0.602 / 0.623 | 0.547 / 0.661 / 0.704 / 0.714 | 0.244 / 0.163 / 0.152 / 0.148 |
+| bicycle | 364 | 0.4541 | 0.324 / 0.439 / 0.527 / 0.527 | 0.504 / 0.567 / 0.604 / 0.604 | 0.174 / 0.171 / 0.171 / 0.171 |
+| pedestrian | 14,297 | 0.7331 | 0.711 / 0.730 / 0.739 / 0.753 | 0.731 / 0.742 / 0.746 / 0.754 | 0.126 / 0.126 / 0.126 / 0.126 |
+| **ALL** | 48,248 | 0.5750 | — | — | — |
+
+**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.7715**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 94,831 | 0.8661 | 0.785 / 0.869 / 0.900 / 0.910 | 0.828 / 0.871 / 0.884 / 0.887 | 0.198 / 0.165 / 0.150 / 0.141 |
+| truck | 54,396 | 0.7010 | 0.478 / 0.662 / 0.800 / 0.864 | 0.632 / 0.747 / 0.821 / 0.852 | 0.273 / 0.216 / 0.173 / 0.134 |
+| bus | 8,596 | 0.6721 | 0.500 / 0.665 / 0.756 / 0.768 | 0.648 / 0.737 / 0.792 / 0.798 | 0.326 / 0.151 / 0.146 / 0.146 |
+| bicycle | 4,652 | 0.7611 | 0.731 / 0.766 / 0.773 / 0.775 | 0.790 / 0.805 / 0.809 / 0.809 | 0.186 / 0.187 / 0.187 / 0.187 |
+| pedestrian | 99,485 | 0.8573 | 0.838 / 0.855 / 0.864 / 0.872 | 0.820 / 0.830 / 0.836 / 0.841 | 0.145 / 0.143 / 0.145 / 0.143 |
+| **ALL** | 261,960 | 0.7715 | — | — | — |
+
+</details>
+
+---
+
 ### BEVFusion-LiDAR base/2.6.0
 
 <details>

From 07c2e110802ec2537d4c620d9af7f7e1b8120b97 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 21 Apr 2026 17:39:32 +0900
Subject: [PATCH 042/183] Update base docstring

---
 projects/BEVFusion/docs/BEVFusion-L/v2/base.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md
index 72d47c4b3..ecdd1e9a8 100644
--- a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md
+++ b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md
@@ -277,7 +277,7 @@
 <details>
 <summary> Changes  </summary>
 
-- Train by min-max normalizing (x, y, z, intensity, time_lag) into [0, 1], and then mapping it to fourier features [[1]](https://arxiv.org/pdf/2006.10739).
+- Train by min-max normalization (x, y, z, intensity, time_lag) into [0, 1], and then mapping it to fourier features [[1]](https://arxiv.org/pdf/2006.10739).
 </details>
 
 <details>

From 2665b277bda7865a10f04daa37b8eaa8ea6c5606 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 21 Apr 2026 19:15:35 +0900
Subject: [PATCH 043/183] Update j6gen2_base and jpntaxi_base docstring

---
 .../v2/{j6gen2.md => j6gen2_base.md}          | 220 +++++++++++++++++-
 .../docs/BEVFusion-L/v2/jpntaxi_base.md       | 153 ++++++++++++
 2 files changed, 363 insertions(+), 10 deletions(-)
 rename projects/BEVFusion/docs/BEVFusion-L/v2/{j6gen2.md => j6gen2_base.md} (54%)
 create mode 100644 projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md

diff --git a/projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2.md b/projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2_base.md
similarity index 54%
rename from projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2.md
rename to projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2_base.md
index 8ad986677..54e994313 100644
--- a/projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2.md
+++ b/projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2_base.md
@@ -64,7 +64,8 @@
 
   | Model version | mAP | mAPH | car<br>(64,520) | truck<br>(6,947) | bus<br>(2,275) | bicycle<br>(1,379) | pedestrian<br>(19,421) |
   | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-  | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.8810 | 0.8380 | 0.8873 | 0.8586 | 0.9476 | 0.8583 | 0.8534 |
+	| BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.8828 | 0.8387 | 0.9022 | 0.8627 | 0.9440 | 0.8483 | 0.8569 |
+	| BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.8810 | 0.8380 | 0.8873 | 0.8586 | 0.9476 | 0.8583 | 0.8534 |
 
   </details>
 
@@ -73,7 +74,8 @@
 
   | Model version | mAP | mAPH | car<br>(58,562) | truck<br>(5,101) | bus<br>(2,078) | bicycle<br>(758) | pedestrian<br>(10,283) |
   | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-  | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.7032 | 0.6483 | 0.7876 | 0.6830 | 0.7911 | 0.5802 | 0.6741 |
+  | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.7193 | 0.6620 | 0.8197 | 0.6856 | 0.8249 | 0.5862 | 0.6801 |
+	| BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.7032 | 0.6483 | 0.7876 | 0.6830 | 0.7911 | 0.5802 | 0.6741 |
 
   </details>
 
@@ -82,7 +84,8 @@
 
   | Model version | mAP | mAPH | car<br>(20,371) | truck<br>(3,172) | bus<br>(376) | bicycle<br>(155) | pedestrian<br>(2,794) |
   | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-  | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.4938 | 0.4494 | 0.6564 | 0.5192 | 0.3777 | 0.4406 | 0.4752 |
+  | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.5223 | 0.4757 | 0.6814 | 0.5181 | 0.5381 | 0.4165 | 0.4573 |
+	| BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.4938 | 0.4494 | 0.6564 | 0.5192 | 0.3777 | 0.4406 | 0.4752 |
 
   </details>
 
@@ -91,6 +94,7 @@
 
   | Model version | mAP | mAPH | car<br>(143,453) | truck<br>(15,220) | bus<br>(4,729) | bicycle<br>(2,292) | pedestrian<br>(32,498) |
   | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
+  | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.7990 | 0.7487 | 0.8508 | 0.7435 | 0.8711 | 0.7487 | 0.7809 |
   | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.7903 | 0.7413 | 0.8266 | 0.7409 | 0.8510 | 0.7541 | 0.7790 |
 
   </details>
@@ -112,6 +116,7 @@
 
   | Model version | mAP | mAPH | car<br>(14,883) | truck<br>(1,193) | bus<br>(336) | bicycle<br>(740) | pedestrian<br>(5,059) |
   | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
+  | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.8947 | 0.8393 | 0.9231 | 0.8893 | 0.9564 | 0.8264 | 0.8782 |
   | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.8985 | 0.8484 | 0.9087 | 0.8974 | 0.9636 | 0.8447 | 0.8780 |
 
   </details>
@@ -121,7 +126,8 @@
 
   | Model version | mAP | mAPH | car<br>(10,994) | truck<br>(1,011) | bus<br>(143) | bicycle<br>(463) | pedestrian<br>(3,754) |
   | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-  | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.7475 | 0.6925 | 0.8317 | 0.7758 | 0.7910 | 0.5959 | 0.7433 |
+  | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.7679 | 0.7089 | 0.8567 | 0.7666 | 0.8723 | 0.5955 | 0.7485 |
+	| BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.7475 | 0.6925 | 0.8317 | 0.7758 | 0.7910 | 0.5959 | 0.7433 |
 
   </details>
 
@@ -130,7 +136,8 @@
 
   | Model version | mAP | mAPH | car<br>(3,018) | truck<br>(602) | bus<br>(60) | bicycle<br>(85) | pedestrian<br>(1,121) |
   | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-  | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.5636 | 0.5191 | 0.7125 | 0.6383 | 0.4781 | 0.4293 | 0.5595 |
+  | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.5924 | 0.5370 | 0.7238 | 0.6616 | 0.6305 | 0.3964 | 0.5497 |
+	| BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.5636 | 0.5191 | 0.7125 | 0.6383 | 0.4781 | 0.4293 | 0.5595 |
 
   </details>
 
@@ -139,7 +146,8 @@
 
   | Model version | mAP | mAPH | car<br>(28,895) | truck<br>(2,806) | bus<br>(539) | bicycle<br>(1,288) | pedestrian<br>(9,934) |
   | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-  | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.8198 | 0.7666 | 0.8690 | 0.8052 | 0.8756 | 0.7455 | 0.8036 |
+  | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.8267 | 0.7675 | 0.8888 | 0.8055 | 0.9009 | 0.7334 | 0.8051 |
+	| BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.8198 | 0.7666 | 0.8690 | 0.8052 | 0.8756 | 0.7455 | 0.8036 |
 
   </details>
 
@@ -166,7 +174,8 @@
 
   | Model version | mAP | mAPH | car<br>(49,637) | truck<br>(5,754) | bus<br>(1,939) | bicycle<br>(639) | pedestrian<br>(14,362) |
   | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-  | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.8788 | 0.8368 | 0.8813 | 0.8505 | 0.9427 | 0.8749 | 0.8448 |
+  | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.8836 | 0.8431 | 0.8942 | 0.8569 | 0.9393 | 0.8780 | 0.8494 |
+	| BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.8788 | 0.8368 | 0.8813 | 0.8505 | 0.9427 | 0.8749 | 0.8448 |
 
   </details>
 
@@ -175,7 +184,8 @@
 
   | Model version | mAP | mAPH | car<br>(47,568) | truck<br>(4,090) | bus<br>(1,935) | bicycle<br>(295) | pedestrian<br>(6,529) |
   | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-  | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.6864 | 0.6344 | 0.7772 | 0.6609 | 0.7913 | 0.5671 | 0.6357 |
+  | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.7040 | 0.6488 | 0.8118 | 0.6662 | 0.8221 | 0.5781 | 0.6417 |
+	| BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.6864 | 0.6344 | 0.7772 | 0.6609 | 0.7913 | 0.5671 | 0.6357 |
 
   </details>
 
@@ -184,7 +194,8 @@
 
   | Model version | mAP | mAPH | car<br>(17,353) | truck<br>(2,570) | bus<br>(316) | bicycle<br>(70) | pedestrian<br>(1,673) |
   | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-  | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.4766 | 0.4309 | 0.6465 | 0.4903 | 0.3618 | 0.4627 | 0.4214 |
+  | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.5030 | 0.4572 | 0.6739 | 0.4847 | 0.5186 | 0.4430 | 0.3948 |
+	| BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.4766 | 0.4309 | 0.6465 | 0.4903 | 0.3618 | 0.4627 | 0.4214 |
 
   </details>
 
@@ -193,7 +204,8 @@
 
   | Model version | mAP | mAPH | car<br>(114,558) | truck<br>(12,414) | bus<br>(4,190) | bicycle<br>(1,004) | pedestrian<br>(22,564) |
   | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-  | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.7851 | 0.7375 | 0.8166 | 0.7262 | 0.8481 | 0.7661 | 0.7687 |
+  | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.7958 | 0.7472 | 0.8408 | 0.7294 | 0.8673 | 0.7710 | 0.7706 |
+	| BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.7851 | 0.7375 | 0.8166 | 0.7262 | 0.8481 | 0.7661 | 0.7687 |
 
   </details>
 
@@ -201,6 +213,194 @@
 
 ## Release
 
+### BEVFusion-LiDAR J6Gen2_base/2.7.1
+
+<details>
+<summary> Changes  </summary>
+
+- Finetune from `BEVFusion-LiDAR base/2.7.0` with j6gen2 base dataset and intensity.
+</details>
+
+<details>
+<summary> Artifacts </summary>
+
+- Deployed onnx and ROS parameter files (for internal)
+  - [WebAuto](https://evaluation.tier4.jp/evaluation/mlpackages/46f8188d-e3be-4f2f-b989-fd27002610d7/releases/ab0f33f5-2c8e-4adf-b122-f8f0c229c91e?project_id=zWhWRzei)
+  - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/j6gen2_base/v2.7.1/deployment.zip)
+  - [Google drive](https://drive.google.com/file/d/1Sw2UkqsoOP_YhoPpLqaBvHFnBapBV1kw/view?usp=drive_link)
+- Logs (for internal)
+  - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/j6gen2_base/v2.7.1/logs.zip)
+  - [Google drive](https://drive.google.com/file/d/1M_Ae0rQ9L1I4NbzSL9tlJ8D0KVGvunKF/view?usp=drive_link)
+- Pytorch Best checkpoints:
+  - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/j6gen2_base/v2.7.1/best_epoch_28.pth)
+  - [Google drive](https://drive.google.com/file/d/1xsFKCIkqVnt273o2SKjjCayuh_4IV-Vd/view?usp=drive_link)
+
+</details>
+
+<details>
+<summary> Training configs </summary>
+
+- [Config file path](https://github.com/KSeangTan/AWML/blob/07c2e110802ec2537d4c620d9af7f7e1b8120b97/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py)
+- Train time: NVIDIA H100 80GB * 8 * 30 epochs = 20 hours
+- Batch size: 8*8 = 64
+- Training Dataset (frames: 55,714):
+  - j6gen2: db_j6gen2_v1 + db_j6gen2_v2 + db_j6gen2_v3 + db_j6gen2_v4 + db_j6gen2_v5 + db_j6gen2_v6 + db_j6gen2_v7 + db_j6gen2_v8 (43,109 frames)
+  - largebus: db_largebus_v1 + db_largebus_v2 + db_largebus_v3 (12,605 frames)
+
+</details>
+
+<details>
+<summary> Evaluation </summary>
+
+**J6Gen2_base Datasets (5,179 frames)**:
+
+  - j6gen2 (3,951 frames): db_j6gen2_v1 + db_j6gen2_v2 + db_j6gen2_v3 + db_j6gen2_v4 + db_j6gen2_v5 + db_j6gen2_v6 + db_j6gen2_v7 + db_j6gen2_v8 + db_j6gen2_v9
+  - largebus (1,228 frames): db_largebus_v1 + db_largebus_v2 + db_largebus_v3
+
+**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.8828**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 64,520 | 0.9022 | 0.853 / 0.901 / 0.921 / 0.933 | 0.904 / 0.931 / 0.937 / 0.939 | 0.260 / 0.193 / 0.180 / 0.172 |
+| truck | 6,947 | 0.8627 | 0.736 / 0.863 / 0.910 / 0.942 | 0.800 / 0.877 / 0.903 / 0.920 | 0.244 / 0.191 / 0.188 / 0.166 |
+| bus | 2,275 | 0.9440 | 0.866 / 0.940 / 0.983 / 0.986 | 0.912 / 0.958 / 0.978 / 0.980 | 0.203 / 0.177 / 0.163 / 0.138 |
+| bicycle | 1,379 | 0.8483 | 0.802 / 0.849 / 0.869 / 0.874 | 0.847 / 0.867 / 0.876 / 0.879 | 0.205 / 0.191 / 0.172 / 0.172 |
+| pedestrian | 19,421 | 0.8569 | 0.834 / 0.854 / 0.865 / 0.875 | 0.822 / 0.833 / 0.838 / 0.844 | 0.163 / 0.152 / 0.152 / 0.152 |
+| **ALL** | 94,542 | 0.8828 | — | — | — |
+
+**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.7193**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 58,562 | 0.8197 | 0.694 / 0.818 / 0.873 / 0.893 | 0.782 / 0.853 / 0.879 / 0.886 | 0.228 / 0.173 / 0.164 / 0.164 |
+| truck | 5,101 | 0.6856 | 0.484 / 0.670 / 0.773 / 0.815 | 0.633 / 0.743 / 0.798 / 0.816 | 0.213 / 0.206 / 0.184 / 0.164 |
+| bus | 2,078 | 0.8249 | 0.626 / 0.815 / 0.918 / 0.941 | 0.730 / 0.846 / 0.904 / 0.919 | 0.342 / 0.211 / 0.210 / 0.160 |
+| bicycle | 758 | 0.5862 | 0.495 / 0.603 / 0.622 / 0.624 | 0.637 / 0.679 / 0.683 / 0.683 | 0.183 / 0.155 / 0.155 / 0.183 |
+| pedestrian | 10,283 | 0.6801 | 0.650 / 0.676 / 0.691 / 0.703 | 0.692 / 0.705 / 0.713 / 0.720 | 0.136 / 0.136 / 0.136 / 0.136 |
+| **ALL** | 76,782 | 0.7193 | — | — | — |
+
+**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.5223**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 20,371 | 0.6814 | 0.493 / 0.674 / 0.763 / 0.796 | 0.638 / 0.737 / 0.781 / 0.795 | 0.193 / 0.159 / 0.151 / 0.151 |
+| truck | 3,172 | 0.5181 | 0.227 / 0.454 / 0.652 / 0.738 | 0.447 / 0.601 / 0.715 / 0.762 | 0.206 / 0.206 / 0.162 / 0.140 |
+| bus | 376 | 0.5381 | 0.272 / 0.557 / 0.643 / 0.680 | 0.462 / 0.669 / 0.714 / 0.731 | 0.217 / 0.151 / 0.115 / 0.115 |
+| bicycle | 155 | 0.4165 | 0.316 / 0.419 / 0.466 / 0.466 | 0.487 / 0.553 / 0.589 / 0.589 | 0.199 / 0.166 / 0.190 / 0.190 |
+| pedestrian | 2,794 | 0.4573 | 0.443 / 0.452 / 0.462 / 0.472 | 0.564 / 0.569 / 0.573 / 0.578 | 0.120 / 0.120 / 0.120 / 0.120 |
+| **ALL** | 26,868 | 0.5223 | — | — | — |
+
+**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.7990**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 143,453 | 0.8508 | 0.752 / 0.849 / 0.891 / 0.910 | 0.820 / 0.874 / 0.894 / 0.900 | 0.232 / 0.189 / 0.174 / 0.164 |
+| truck | 15,220 | 0.7435 | 0.555 / 0.725 / 0.824 / 0.871 | 0.677 / 0.780 / 0.834 / 0.858 | 0.234 / 0.206 / 0.186 / 0.165 |
+| bus | 4,729 | 0.8711 | 0.726 / 0.865 / 0.939 / 0.954 | 0.804 / 0.890 / 0.928 / 0.937 | 0.408 / 0.211 / 0.177 / 0.161 |
+| bicycle | 2,292 | 0.7487 | 0.682 / 0.754 / 0.777 / 0.781 | 0.760 / 0.789 / 0.799 / 0.801 | 0.191 / 0.189 / 0.189 / 0.190 |
+| pedestrian | 32,498 | 0.7809 | 0.756 / 0.777 / 0.790 / 0.801 | 0.760 / 0.772 / 0.778 / 0.784 | 0.151 / 0.136 / 0.136 / 0.136 |
+| **ALL** | 198,192 | 0.7990 | — | — | — |
+
+---
+
+**LargeBus**: db_largebus_v1 + db_largebus_v2 + db_largebus_v3 (1,228 frames)  
+
+**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.8947**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 14,883 | 0.9231 | 0.884 / 0.925 / 0.937 / 0.946 | 0.923 / 0.947 / 0.952 / 0.953 | 0.234 / 0.178 / 0.178 / 0.178 |
+| truck | 1,193 | 0.8893 | 0.754 / 0.905 / 0.938 / 0.961 | 0.832 / 0.922 / 0.939 / 0.945 | 0.269 / 0.201 / 0.188 / 0.116 |
+| bus | 336 | 0.9564 | 0.872 / 0.983 / 0.985 / 0.986 | 0.904 / 0.962 / 0.965 / 0.965 | 0.419 / 0.174 / 0.174 / 0.174 |
+| bicycle | 740 | 0.8264 | 0.749 / 0.825 / 0.862 / 0.870 | 0.824 / 0.854 / 0.867 / 0.872 | 0.249 / 0.247 / 0.198 / 0.198 |
+| pedestrian | 5,059 | 0.8782 | 0.862 / 0.876 / 0.883 / 0.891 | 0.849 / 0.857 / 0.861 / 0.866 | 0.148 / 0.148 / 0.139 / 0.140 |
+| **ALL** | 22,211 | 0.8947 | — | — | — |
+
+**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.7679**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 10,994 | 0.8567 | 0.759 / 0.860 / 0.897 / 0.911 | 0.824 / 0.881 / 0.898 / 0.901 | 0.210 / 0.164 / 0.160 / 0.160 |
+| truck | 1,011 | 0.7666 | 0.593 / 0.770 / 0.843 / 0.860 | 0.710 / 0.818 / 0.851 / 0.854 | 0.234 / 0.219 / 0.166 / 0.150 |
+| bus | 143 | 0.8723 | 0.698 / 0.921 / 0.932 / 0.939 | 0.788 / 0.904 / 0.911 / 0.911 | 0.294 / 0.498 / 0.498 / 0.498 |
+| bicycle | 463 | 0.5955 | 0.472 / 0.616 / 0.647 / 0.648 | 0.625 / 0.685 / 0.692 / 0.692 | 0.151 / 0.151 / 0.151 / 0.151 |
+| pedestrian | 3,754 | 0.7485 | 0.726 / 0.747 / 0.755 / 0.766 | 0.740 / 0.749 / 0.755 / 0.761 | 0.124 / 0.124 / 0.121 / 0.121 |
+| **ALL** | 16,365 | 0.7679 | — | — | — |
+
+**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.5924**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 3,018 | 0.7238 | 0.573 / 0.728 / 0.789 / 0.806 | 0.688 / 0.765 / 0.792 / 0.801 | 0.221 / 0.228 / 0.158 / 0.158 |
+| truck | 602 | 0.6616 | 0.381 / 0.676 / 0.780 / 0.809 | 0.575 / 0.756 / 0.811 / 0.822 | 0.216 / 0.208 / 0.176 / 0.176 |
+| bus | 60 | 0.6305 | 0.434 / 0.626 / 0.730 / 0.732 | 0.608 / 0.745 / 0.793 / 0.793 | 0.217 / 0.217 / 0.087 / 0.087 |
+| bicycle | 85 | 0.3964 | 0.298 / 0.382 / 0.452 / 0.453 | 0.468 / 0.544 / 0.595 / 0.595 | 0.166 / 0.166 / 0.166 / 0.166 |
+| pedestrian | 1,121 | 0.5497 | 0.536 / 0.546 / 0.552 / 0.565 | 0.624 / 0.629 / 0.633 / 0.638 | 0.120 / 0.118 / 0.118 / 0.118 |
+| **ALL** | 4,886 | 0.5924 | — | — | — |
+
+**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.8267**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 28,895 | 0.8888 | 0.815 / 0.891 / 0.919 / 0.930 | 0.864 / 0.905 / 0.917 / 0.919 | 0.230 / 0.180 / 0.180 / 0.176 |
+| truck | 2,806 | 0.8055 | 0.623 / 0.816 / 0.879 / 0.903 | 0.736 / 0.851 / 0.882 / 0.888 | 0.233 / 0.207 / 0.183 / 0.169 |
+| bus | 539 | 0.9009 | 0.783 / 0.929 / 0.945 / 0.948 | 0.838 / 0.921 / 0.929 / 0.929 | 0.430 / 0.208 / 0.208 / 0.208 |
+| bicycle | 1,288 | 0.7334 | 0.637 / 0.738 / 0.776 / 0.783 | 0.730 / 0.774 / 0.793 / 0.796 | 0.186 / 0.161 / 0.161 / 0.161 |
+| pedestrian | 9,934 | 0.8051 | 0.787 / 0.803 / 0.811 / 0.820 | 0.782 / 0.790 / 0.796 / 0.801 | 0.149 / 0.135 / 0.128 / 0.135 |
+| **ALL** | 43,462 | 0.8267 | — | — | — |
+
+---
+
+**J6Gen2**: db_j6gen2_v1 + db_j6gen2_v2 + db_j6gen2_v3 + db_j6gen2_v4 + db_j6gen2_v5 + db_j6gen2_v6 + db_j6gen2_v7 + db_j6gen2_v8 + db_j6gen2_v9 (3,951 frames)
+
+**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.8836**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 49,637 | 0.8942 | 0.843 / 0.891 / 0.912 / 0.931 | 0.899 / 0.926 / 0.933 / 0.935 | 0.277 / 0.202 / 0.189 / 0.172 |
+| truck | 5,754 | 0.8569 | 0.732 / 0.854 / 0.905 / 0.937 | 0.794 / 0.867 / 0.896 / 0.915 | 0.244 / 0.191 / 0.189 / 0.180 |
+| bus | 1,939 | 0.9393 | 0.864 / 0.932 / 0.975 / 0.986 | 0.916 / 0.958 / 0.981 / 0.984 | 0.203 / 0.187 / 0.139 / 0.138 |
+| bicycle | 639 | 0.8780 | 0.868 / 0.881 / 0.881 / 0.882 | 0.881 / 0.888 / 0.888 / 0.888 | 0.172 / 0.172 / 0.172 / 0.172 |
+| pedestrian | 14,362 | 0.8494 | 0.824 / 0.846 / 0.858 / 0.869 | 0.813 / 0.825 / 0.831 / 0.837 | 0.163 / 0.161 / 0.155 / 0.155 |
+| **ALL** | 72,331 | 0.8836 | — | — | — |
+
+**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.7040**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 47,568 | 0.8118 | 0.679 / 0.810 / 0.868 / 0.890 | 0.772 / 0.846 / 0.874 / 0.883 | 0.228 / 0.173 / 0.164 / 0.163 |
+| truck | 4,090 | 0.6662 | 0.459 / 0.645 / 0.757 / 0.804 | 0.614 / 0.724 / 0.785 / 0.807 | 0.213 / 0.206 / 0.184 / 0.164 |
+| bus | 1,935 | 0.8221 | 0.621 / 0.806 / 0.919 / 0.943 | 0.727 / 0.842 / 0.904 / 0.921 | 0.413 / 0.211 / 0.206 / 0.160 |
+| bicycle | 295 | 0.5781 | 0.542 / 0.588 / 0.590 / 0.592 | 0.674 / 0.686 / 0.686 / 0.690 | 0.215 / 0.206 / 0.206 / 0.206 |
+| pedestrian | 6,529 | 0.6417 | 0.608 / 0.636 / 0.655 / 0.668 | 0.666 / 0.682 / 0.692 / 0.699 | 0.136 / 0.136 / 0.136 / 0.136 |
+| **ALL** | 60,417 | 0.7040 | — | — | — |
+
+**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.5030**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 17,353 | 0.6739 | 0.479 / 0.664 / 0.759 / 0.794 | 0.631 / 0.732 / 0.780 / 0.794 | 0.193 / 0.159 / 0.146 / 0.146 |
+| truck | 2,570 | 0.4847 | 0.194 / 0.401 / 0.621 / 0.723 | 0.414 / 0.562 / 0.692 / 0.751 | 0.206 / 0.179 / 0.130 / 0.128 |
+| bus | 316 | 0.5186 | 0.238 / 0.541 / 0.625 / 0.670 | 0.433 / 0.657 / 0.703 / 0.724 | 0.218 / 0.151 / 0.115 / 0.115 |
+| bicycle | 70 | 0.4430 | 0.340 / 0.465 / 0.483 / 0.483 | 0.513 / 0.584 / 0.602 / 0.602 | 0.199 / 0.199 / 0.199 / 0.199 |
+| pedestrian | 1,673 | 0.3948 | 0.381 / 0.389 / 0.401 / 0.408 | 0.524 / 0.528 / 0.532 / 0.535 | 0.125 / 0.125 / 0.125 / 0.125 |
+| **ALL** | 21,982 | 0.5030 | — | — | — |
+
+**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.7958**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 114,558 | 0.8408 | 0.737 / 0.837 / 0.882 / 0.906 | 0.809 / 0.866 / 0.888 / 0.895 | 0.236 / 0.189 / 0.164 / 0.164 |
+| truck | 12,414 | 0.7294 | 0.539 / 0.704 / 0.811 / 0.863 | 0.664 / 0.764 / 0.823 / 0.851 | 0.244 / 0.206 / 0.183 / 0.164 |
+| bus | 4,190 | 0.8673 | 0.719 / 0.856 / 0.939 / 0.956 | 0.800 / 0.886 / 0.928 / 0.939 | 0.342 / 0.211 / 0.161 / 0.161 |
+| bicycle | 1,004 | 0.7710 | 0.747 / 0.778 / 0.780 / 0.780 | 0.801 / 0.813 / 0.814 / 0.815 | 0.191 / 0.191 / 0.191 / 0.191 |
+| pedestrian | 22,564 | 0.7706 | 0.743 / 0.766 / 0.781 / 0.792 | 0.751 / 0.764 / 0.771 / 0.778 | 0.152 / 0.146 / 0.136 / 0.146 |
+| **ALL** | 154,730 | 0.7958 | — | — | — |
+
+</details>
+
+---
+
 ### BEVFusion-LiDAR J6Gen2_base/2.6.1
 
 <details>
diff --git a/projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md b/projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md
new file mode 100644
index 000000000..fc9e2677d
--- /dev/null
+++ b/projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md
@@ -0,0 +1,153 @@
+# Deployed model for BEVFusion-LiDAR JPNTaxi_base/2.X
+## Summary
+
+### Main Parameters
+
+  - **Range:** [122.40m, 122.40m, 8.0m]
+  - **Voxel Size:** [0.17, 0.17, 0.2]
+  - **Grid Size:** [1440, 1440, 40]
+  - **With Intensity**
+
+### Testing Datasets
+
+- **Total Frames: 5,179**
+
+	<details>
+  <summary> jpntaxi_gen2 (9,975 frames) </summary>
+    - `db_jpntaxigen2_v1`
+    - `db_jpntaxigen2_v2`
+
+  </details>
+
+### mAP -JPNTaxi_gen2
+
+- **Class mAP for BEV Center Distance: 0.5m, 1.0m, 2.0m, 4.0m**
+
+  <details>
+  <summary> Eval Range: 0.0 - 50.0m </summary>
+
+	| Model version | mAP | mAPH | car<br>(42,789) | truck<br>(17,259) | bus<br>(3,437) | bicycle<br>(2,681) | pedestrian<br>(57,948) |
+  | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
+  | BEVFusion-LiDAR jpntaxi_base/2.7.1 | 0.8862 | 0.8586 | 0.9397 | 0.8591 | 0.8839 | 0.8264 | 0.9218 |
+
+  </details>
+
+  <details>
+  <summary> Eval Range: 50.0 - 90.0m </summary>
+
+	| Model version | mAP | mAPH | car<br>(35,518) | truck<br>(22,550) | bus<br>(2,683) | bicycle<br>(1,607) | pedestrian<br>(27,240) |
+  | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
+  | BEVFusion-LiDAR jpntaxi_base/2.7.1 | 0.7125 | 0.6854 | 0.8453 | 0.6838 | 0.5362 | 0.6969 | 0.8003 |
+
+  </details>
+
+  <details>
+  <summary> Eval Range: 90.0 - 121.0m </summary>
+
+	| Model version | mAP | mAPH | car<br>(16,524) | truck<br>(14,587) | bus<br>(2,476) | bicycle<br>(364) | pedestrian<br>(14,297) |
+  | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
+  | BEVFusion-LiDAR jpntaxi_base/2.7.1 | 0.6030 | 0.5762 | 0.6947 | 0.5260 | 0.5030 | 0.5321 | 0.7591 |
+
+  </details>
+
+  <details open>
+  <summary> Eval Range: 0.0 - 121.0m </summary>
+
+	| Model version | mAP | mAPH | car<br>(94,831) | truck<br>(54,396) | bus<br>(8,596) | bicycle<br>(4,652) | pedestrian<br>(99,485) |
+  | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
+  | BEVFusion-LiDAR jpntaxi_base/2.7.1 | 0.7805 | 0.7527 | 0.8730 | 0.7118 | 0.6785 | 0.7655 | 0.8739 |
+
+  </details>
+
+## Release
+
+### BEVFusion-LiDAR JPNTaxi_base/2.7.1
+
+<details>
+<summary> Changes  </summary>
+
+- Finetune from `BEVFusion-LiDAR base/2.7.0` with JPNTaxi_base dataset and intensity.
+</details>
+
+<details>
+<summary> Artifacts </summary>
+
+- Deployed onnx and ROS parameter files (for internal)
+  - [WebAuto](https://evaluation.tier4.jp/evaluation/mlpackages/46f8188d-e3be-4f2f-b989-fd27002610d7/releases/47abcab3-34e1-4971-9bdf-5a2af5d2b2e6?project_id=zWhWRzei)
+  - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/jpntaxi_base/v2.7.1/deployment.zip)
+  - [Google drive](https://drive.google.com/file/d/1nQlYrnCjlxXbUamEj7MCL_sKxojoU_wk/view?usp=drive_link)
+- Logs (for internal)
+  - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/jpntaxi_base/v2.7.1/logs.zip)
+  - [Google drive](https://drive.google.com/file/d/1q_3zj9nF6mnA5IgyO1QRswS7XqnXqvUH/view?usp=drive_link)
+- Pytorch Best checkpoints:
+  - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/jpntaxi_base/v2.7.1/best_epoch_30.pth)
+  - [Google drive](https://drive.google.com/file/d/1K7rDv7fb8T2haXHxttbZN7FUEoLYESTr/view?usp=drive_link)
+
+</details>
+
+<details>
+<summary> Training configs </summary>
+
+- [Config file path](https://github.com/KSeangTan/AWML/blob/07c2e110802ec2537d4c620d9af7f7e1b8120b97/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py)
+- Train time: NVIDIA H100 80GB * 8 * 30 epochs = 20 hours
+- Batch size: 8*8 = 64
+- Training Dataset (frames: 56,287):
+  - jpntaxi: db_jpntaxi_v1 + db_jpntaxi_v2 + db_jpntaxi_v4 (28,161 frames)
+  - jpntaxi_gen2: db_jpntaxigen2_v1 + db_jpntaxigen2_v2 (28,126 frames)
+
+</details>
+
+<details>
+<summary> Evaluation </summary>
+
+**JPNTaxi_gen2 Datasets (9,975 frames)**:
+
+  - jpntaxi_gen2 (9,975 frames): db_jpntaxigen2_v1 + db_jpntaxigen2_v2
+
+**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.8862**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 42,789 | 0.9397 | 0.891 / 0.943 / 0.960 / 0.965 | 0.918 / 0.946 / 0.953 / 0.954 | 0.284 / 0.175 / 0.175 / 0.164 |
+| truck | 17,259 | 0.8591 | 0.701 / 0.842 / 0.935 / 0.958 | 0.792 / 0.882 / 0.932 / 0.946 | 0.409 / 0.321 / 0.241 / 0.241 |
+| bus | 3,437 | 0.8839 | 0.796 / 0.888 / 0.925 / 0.927 | 0.853 / 0.897 / 0.910 / 0.910 | 0.296 / 0.184 / 0.104 / 0.104 |
+| bicycle | 2,681 | 0.8264 | 0.819 / 0.829 / 0.829 / 0.829 | 0.866 / 0.871 / 0.871 / 0.871 | 0.223 / 0.223 / 0.223 / 0.223 |
+| pedestrian | 57,948 | 0.9218 | 0.906 / 0.921 / 0.927 / 0.933 | 0.883 / 0.893 / 0.899 / 0.903 | 0.135 / 0.129 / 0.125 / 0.132 |
+| **ALL** | 124,114 | 0.8862 | — | — | — |
+
+**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.7125**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 35,518 | 0.8453 | 0.763 / 0.846 / 0.881 / 0.891 | 0.819 / 0.860 / 0.875 / 0.879 | 0.227 / 0.180 / 0.166 / 0.166 |
+| truck | 22,550 | 0.6838 | 0.475 / 0.640 / 0.782 / 0.838 | 0.632 / 0.730 / 0.808 / 0.831 | 0.286 / 0.195 / 0.167 / 0.128 |
+| bus | 2,683 | 0.5362 | 0.263 / 0.524 / 0.668 / 0.689 | 0.465 / 0.660 / 0.742 / 0.751 | 0.241 / 0.180 / 0.174 / 0.171 |
+| bicycle | 1,607 | 0.6969 | 0.656 / 0.709 / 0.710 / 0.713 | 0.745 / 0.770 / 0.771 / 0.772 | 0.145 / 0.138 / 0.138 / 0.138 |
+| pedestrian | 27,240 | 0.8003 | 0.782 / 0.798 / 0.807 / 0.814 | 0.782 / 0.790 / 0.795 / 0.799 | 0.163 / 0.163 / 0.163 / 0.164 |
+| **ALL** | 89,598 | 0.7125 | — | — | — |
+
+**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.6030**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 16,524 | 0.6947 | 0.580 / 0.698 / 0.744 / 0.757 | 0.692 / 0.755 / 0.778 / 0.781 | 0.202 / 0.154 / 0.151 / 0.144 |
+| truck | 14,587 | 0.5260 | 0.229 / 0.469 / 0.639 / 0.767 | 0.464 / 0.630 / 0.726 / 0.793 | 0.288 / 0.185 / 0.169 / 0.130 |
+| bus | 2,476 | 0.5030 | 0.305 / 0.486 / 0.597 / 0.624 | 0.530 / 0.636 / 0.703 / 0.719 | 0.297 / 0.201 / 0.149 / 0.156 |
+| bicycle | 364 | 0.5321 | 0.381 / 0.521 / 0.613 / 0.613 | 0.563 / 0.631 / 0.670 / 0.670 | 0.219 / 0.219 / 0.219 / 0.219 |
+| pedestrian | 14,297 | 0.7591 | 0.737 / 0.756 / 0.766 / 0.778 | 0.750 / 0.760 / 0.765 / 0.771 | 0.134 / 0.127 / 0.129 / 0.132 |
+| **ALL** | 48,248 | 0.6030 | — | — | — |
+
+**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.7805**
+
+| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 |
+| :---- | ---: | ---: | :---- | :---- | :---- |
+| car | 94,831 | 0.8730 | 0.799 / 0.875 / 0.905 / 0.914 | 0.845 / 0.884 / 0.896 / 0.899 | 0.235 / 0.189 / 0.165 / 0.165 |
+| truck | 54,396 | 0.7118 | 0.490 / 0.674 / 0.809 / 0.875 | 0.645 / 0.757 / 0.831 / 0.862 | 0.314 / 0.240 / 0.178 / 0.153 |
+| bus | 8,596 | 0.6785 | 0.504 / 0.674 / 0.761 / 0.775 | 0.655 / 0.761 / 0.807 / 0.813 | 0.285 / 0.180 / 0.168 / 0.168 |
+| bicycle | 4,652 | 0.7655 | 0.736 / 0.770 / 0.778 / 0.778 | 0.800 / 0.816 / 0.819 / 0.820 | 0.194 / 0.159 / 0.159 / 0.159 |
+| pedestrian | 99,485 | 0.8739 | 0.857 / 0.872 / 0.880 / 0.887 | 0.835 / 0.845 / 0.850 / 0.854 | 0.142 / 0.137 / 0.135 / 0.137 |
+| **ALL** | 261,960 | 0.7805 | — | — | — |
+
+</details>
+
+---

From 57a91d654b8122fc2ddc0defc79bf168080d4fdd Mon Sep 17 00:00:00 2001
From: KokSeang <kseangtan@google.com>
Date: Wed, 22 Apr 2026 15:02:12 +0900
Subject: [PATCH 044/183] Added

---
 .../default/pipelines/default_camera_base_50m.py     | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_50m.py
index c9010038f..a32e043b3 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_50m.py
@@ -37,12 +37,12 @@
     ),
     dict(
         type="BEVFusionGlobalRotScaleTrans",
-        # scale_ratio_range=[0.95, 1.05],
-        # rot_range=[-0.78539816, 0.78539816],
-        # translation_std=[0.5, 0.5, 0.2],
-        scale_ratio_range=[0.98, 1.02],
-        rot_range=[-0.3925, 0.3925],
-        translation_std=[0.2, 0.2, 0.1],
+        scale_ratio_range=[0.95, 1.05],
+        rot_range=[-0.78539816, 0.78539816],
+        translation_std=[0.5, 0.5, 0.2],
+        # scale_ratio_range=[0.98, 1.02],
+        # rot_range=[-0.3925, 0.3925],
+        # translation_std=[0.2, 0.2, 0.1],
     ),
     dict(type="BEVFusionRandomFlip3D"),
     dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range),

From 5a2b1e0ce4bbcccafc6622ff0234498cdae13633 Mon Sep 17 00:00:00 2001
From: KokSeang <kseangtan@google.com>
Date: Wed, 22 Apr 2026 15:14:51 +0900
Subject: [PATCH 045/183] Added

---
 ...a_swin_fpn_lss_30e_8xb8_j6gen2_base_50m.py | 137 ++++++++++++++++++
 1 file changed, 137 insertions(+)
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_50m.py

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_50m.py
new file mode 100644
index 000000000..927310e7d
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_50m.py
@@ -0,0 +1,137 @@
+_base_ = [
+    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
+    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
+    "../default/pipelines/default_camera_base_50m.py",
+    "../default/models/default_camera_swin_fpn_lss_50m.py",
+    "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py",
+    "../default/default_misc.py",
+]
+
+custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
+custom_imports["imports"] += _base_.custom_imports["imports"]
+custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
+
+# user setting
+data_root = "data/t4datasets/"
+info_directory_path = "info/kokseang_2_6_1/"
+
+experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type
+experiment_name = "bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_50m"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    view_transform=dict(image_size=_base_.image_size),
+    bbox_head=dict(
+        class_names=_base_.class_names,
+        in_channels=80,
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+    ),
+)
+
+# Dataset parameters
+train_dataloader = dict(
+    batch_size=_base_.train_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=True),
+    dataset=dict(
+        type=_base_.dataset_type,
+        pipeline=_base_.train_pipeline,
+        modality=_base_.input_modality,
+        backend_args=_base_.backend_args,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_train_file_name,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        test_mode=False,
+        data_prefix=_base_.data_prefix,
+        box_type_3d="LiDAR",
+        filter_cfg=_base_.filter_cfg,
+    ),
+)
+
+val_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_val_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+test_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_test_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+val_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+)
+
+test_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+    save_csv=True,
+)
+
+default_hooks = dict(
+    logger=dict(type="LoggerHook", interval=50),
+    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
+)
+log_processor = dict(window_size=50)

From 08b50e6d71f31577a1053f8792ae381fcafdf524 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Thu, 23 Apr 2026 14:47:27 +0900
Subject: [PATCH 046/183] Add the script

---
 projects/BEVFusion/bevfusion/__init__.py      |   3 +-
 .../BEVFusion/bevfusion/bevfusion_head.py     |  22 ++-
 .../bevfusion/bevfusion_voxel_encoder.py      | 184 ++++++++++++++----
 ...n_50e_8xb8_base_120m_sincos_10_channels.py | 161 ---------------
 .../default_lidar_second_secfpn_120m.py       |   2 +
 5 files changed, 168 insertions(+), 204 deletions(-)
 delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_10_channels.py

diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py
index 3db358b55..2e9822d76 100644
--- a/projects/BEVFusion/bevfusion/__init__.py
+++ b/projects/BEVFusion/bevfusion/__init__.py
@@ -7,7 +7,7 @@
 from .transformer import TransformerDecoderLayer
 from .transforms_3d import BEVFusionGlobalRotScaleTrans, BEVFusionRandomFlip3D, GridMask, ImageAug3D
 from .utils import BBoxBEVL1Cost, HeuristicAssigner3D, HungarianAssigner3D, IoU3DCost, TransFusionBBoxCoder
-from .bevfusion_voxel_encoder import BEVFusionVoxelEncoder, BEVFusionVoxelSinCosEncoder
+from .bevfusion_voxel_encoder import BEVFusionVoxelEncoder, BEVFusionVoxelSinCosEncoder, BEVFusionVoxelMeanSinCosEncoder
 
 __all__ = [
     "BEVFusion",
@@ -30,4 +30,5 @@
     "TransFusionBBoxCoder",
     "BEVFusionVoxelEncoder",
     "BEVFusionVoxelSinCosEncoder",
+    "BEVFusionVoxelMeanSinCosEncoder",
 ]
diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index 853523c4f..a8ef7129f 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -62,6 +62,7 @@ def __init__(
         norm_cfg=dict(type="BN1d"),
         bias="auto",
         # loss
+				loss_iou=None,
         loss_cls=dict(type="mmdet.GaussianFocalLoss", reduction="mean"),
         loss_bbox=dict(type="mmdet.L1Loss", reduction="mean"),
         loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="mean"),
@@ -87,6 +88,7 @@ def __init__(
         if not self.use_sigmoid_cls:
             self.num_classes += 1
         self.loss_cls = MODELS.build(loss_cls)
+        self.loss_iou = MODELS.build(loss_iou) if loss_iou is not None else None
         self.loss_bbox = MODELS.build(loss_bbox)
         self.loss_heatmap = MODELS.build(loss_heatmap)
 
@@ -369,8 +371,8 @@ def predict_by_feat(self, preds_dicts, metas, img=None, rescale=False, for_roi=F
         for layer_id, preds_dict in enumerate(preds_dicts):
             batch_size = preds_dict[0]["heatmap"].shape[0]
             batch_score = preds_dict[0]["heatmap"][..., -self.num_proposals :].sigmoid()
-            # if self.loss_iou.loss_weight != 0:
-            #    batch_score = torch.sqrt(batch_score * preds_dict[0]['iou'][..., -self.num_proposals:].sigmoid()) # noqa: E501
+            if self.loss_iou is not None:
+               batch_score = torch.sqrt(batch_score * preds_dict[0]['iou'][..., -self.num_proposals:].sigmoid()) # noqa: E501
             one_hot = F.one_hot(self.query_labels, num_classes=self.num_classes).permute(0, 2, 1)
             batch_score = batch_score * preds_dict[0]["query_heatmap_score"] * one_hot
 
@@ -679,7 +681,7 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx):
             ious[None],
             int(pos_inds.shape[0]),
             float(mean_iou),
-            heatmap[None],
+            heatmap[None]
         )
 
     def loss(self, batch_feats, batch_data_samples):
@@ -711,7 +713,7 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li
             ious,
             num_pos,
             matched_ious,
-            heatmap,
+            heatmap
         ) = self.get_targets(batch_gt_instances_3d, preds_dicts[0])
         if hasattr(self, "on_the_image_mask"):
             label_weights = label_weights * self.on_the_image_mask
@@ -798,7 +800,17 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li
 
             loss_dict[f"{prefix}_loss_cls"] = layer_loss_cls
             loss_dict[f"{prefix}_loss_bbox"] = layer_loss_bbox
-            # loss_dict[f'{prefix}_loss_iou'] = layer_loss_iou
+
+						# Output iou for iou-aware loss
+						if self.loss_iou is not None:
+							layer_ious = preds_dict["iou"][
+								...
+								idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals,
+							] # [BS, num_proposals]
+							
+							# [BS, num_proposals]
+							layer_iou_weights = layer_bbox_weights[:, :, 0] 
+							loss_dict[f'{prefix}_loss_iou'] = self.loss_iou(layer_ious, ious, layer_iou_weights, avg_factor=max(num_pos, 1))
 
         loss_dict["matched_ious"] = layer_loss_cls.new_tensor(matched_ious)
 
diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
index efbc995e8..086acc1e0 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
@@ -185,21 +185,21 @@ class BEVFusionVoxelSinCosEncoder(nn.Module):
     def __init__(self, 
                  min_norm_values: Tuple[float],
                  max_norm_values: Tuple[float],
+                 time_lag_channel_index: int = 3,
+                 time_exp_factor: Optional[float] = None,
+                 feat_channels: Optional[tuple] = (16, ),
                  in_channels: Optional[int] = 4,
                  with_distance: Optional[bool] = False,
                  with_cluster_center: Optional[bool] = True,
                  with_voxel_center: Optional[bool] = True,
                  voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4),
                  point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4,
-                                                              40, 1),):
+                                                              40, 1),
+                 norm_cfg: Optional[dict] = dict(
+                     type='BN1d', eps=1e-3, momentum=0.01),
+                 mode: Optional[str] = 'max'):
         super(BEVFusionVoxelSinCosEncoder, self).__init__()
 
-        if with_cluster_center:
-            in_channels += 3
-        if with_voxel_center:
-            in_channels += 3
-        if with_distance:
-            in_channels += 1
         self._with_distance = with_distance
         self._with_cluster_center = with_cluster_center
         self._with_voxel_center = with_voxel_center
@@ -214,11 +214,42 @@ def __init__(self,
         self.y_offset = self.vy / 2 + point_cloud_range[1]
         self.z_offset = self.vz / 2 + point_cloud_range[2]
         self.point_cloud_range = point_cloud_range
+        
+        self.xyz_channels = 3
+        feat_offset_channels = in_channels - self.xyz_channels
+        if with_cluster_center:
+            feat_offset_channels += 3
+        if with_voxel_center:
+            feat_offset_channels += 3
+        if with_distance:
+            feat_offset_channels += 1
+
+        feat_channels = [feat_offset_channels] + list(feat_channels)
+        assert len(feat_channels) > 0, "feat_channels must be greater than 0"
+        pfn_layers = []
+        for i in range(len(feat_channels) - 1):
+            in_filters = feat_channels[i]
+            out_filters = feat_channels[i + 1]
+            if i < len(feat_channels) - 2:
+                last_layer = False
+            else:
+                last_layer = True
+            pfn_layers.append(
+                PFNLayer(
+                    in_filters,
+                    out_filters,
+                    norm_cfg=norm_cfg,
+                    last_layer=last_layer,
+                    mode=mode))
+        self.pfn_layers = nn.ModuleList(pfn_layers)
 
+        self.time_lag_channel_index = time_lag_channel_index
+        self.time_exp_factor = time_exp_factor
+        
         self.register_buffer("min_norm_values", torch.tensor(min_norm_values))
         self.register_buffer("max_norm_values", torch.tensor(max_norm_values))
         self.register_buffer("voxel_size", torch.tensor([self.vx, self.vy, self.vz]))
-        self.register_buffer("exponents", (2 ** torch.arange(0, in_channels).float()))
+        self.register_buffer("exponents", (2 ** torch.arange(0, self.xyz_channels)).float())
 
     def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
                 *args, **kwargs) -> Tensor:
@@ -232,19 +263,53 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
 
         Returns:
             torch.Tensor: Features of pillars in shape (M, C).
-        """
-        features_norm = (features - self.min_norm_values) / (self.max_norm_values - self.min_norm_values)
-        features_ls = [features_norm]
+        """ 
+        num_voxels, max_points_per_voxel = features.shape[0], features.shape[1]
+        
+        # Mean in the voxel
+        # (N, M, 3) -> (N, 3)
+        voxel_features = (features[:, :, :self.xyz_channels].sum(dim=1, keepdim=False) / num_points.type_as(features).view(
+                    -1, 1)).contiguous()
+
+        # min-max normalization, (N, 3) -> (N, 3)
+        voxel_features_norm = (voxel_features - \
+         self.min_norm_values[:self.xyz_channels].view(1, -1)) / ((self.max_norm_values[:self.xyz_channels] - self.min_norm_values[:self.xyz_channels]).view(1, -1))
+        
+        # SinCos encoding
+        # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3)
+        y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1)
+        # (N*3, 3) -> (N, 3*3)
+        y = y.reshape(num_voxels, -1)
+        # (N, 3*3) -> (N, 3*3*2)
+        voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1)
+
+        # PFN 
+        # Other features, for example, intensity or time_lag 
+        other_features = features[:, :, self.xyz_channels:]
+        
+        # Normalization 
+        other_features_norm = (other_features - self.min_norm_values[self.xyz_channels:]) / (self.max_norm_values[self.xyz_channels:] - self.min_norm_values[self.xyz_channels:])    
+
+        time_lag_feature_index = self.time_lag_channel_index - self.xyz_channels
+        # exponentiate time_lag features, it's higher when the normlized time lag is lower 
+        # (1.0 when time_lag_features is 0.0)
+        if self.time_exp_factor is not None:
+            other_features_norm[:, :, time_lag_feature_index] = torch.exp(- other_features_norm[:, :, time_lag_feature_index] * self.time_exp_factor)
+        else:
+            # Inverse the time_lag feature 
+            other_features_norm[:, :, time_lag_feature_index] = 1.0 - other_features_norm[:, :, time_lag_feature_index]
+            
+        # Offsets
+        voxel_feature_offsets = [other_features_norm]
         # Find distance of x, y, and z from cluster center
         if self._with_cluster_center:
             points_mean = features[:, :, :3].sum(
                 dim=1, keepdim=True) / num_points.type_as(features).view(
                     -1, 1, 1)
             
-            # Map to [-1, 1]
-            f_cluster = (features[:, :, :3] - points_mean) / self.voxel_size
-            # f_cluster = features[:, :, :3] - points_mean
-            features_ls.append(f_cluster)
+            # f_cluster = (features[:, :, :3] - points_mean)
+            f_cluster = features[:, :, :3] - points_mean
+            voxel_feature_offsets.append(f_cluster)
 
         # Find distance of x, y, and z from pillar center
         dtype = features.dtype
@@ -261,35 +326,80 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
                 self.z_offset)
             
             # Map to [-1, 1]
-            f_center = f_center / (self.voxel_size * 0.5)
-            features_ls.append(f_center)
+            # f_center = f_center / (self.voxel_size * 0.5)
+            voxel_feature_offsets.append(f_center)
 
         if self._with_distance:
             points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)
-            features_ls.append(points_dist)
+            voxel_feature_offsets.append(points_dist)
         
-        # Combine together feature decorations
-        features = torch.cat(features_ls, dim=-1)
-        num_voxels, max_points_per_voxel = features.shape[0], features.shape[1]
-
-        # SinCos encoding
-        # (N, M, C) -> (N, M, C, 1) -> (N, M, C, 1) * (1, 1, 1, C) -> (N, M, C, C)
-        y = features.unsqueeze(-1) * np.pi * self.exponents.unsqueeze(0).unsqueeze(0).unsqueeze(0)
-        # (N, M, C, C) -> (N, M, C*C)
-        y = y.reshape(num_voxels, max_points_per_voxel, self.in_channels ** 2)
-        # (N, M, C*C) -> (N, M, C*C*2)
-        features = torch.cat([torch.cos(y), torch.sin(y)], dim=-1)
-
+        voxel_feature_offsets = torch.cat(voxel_feature_offsets, dim=-1)
         # The feature decorations were calculated without regard to whether
         # pillar was empty. Need to ensure that
         # empty pillars remain set to zeros.
         mask = get_paddings_indicator(num_points, max_points_per_voxel, axis=0)
-        mask = torch.unsqueeze(mask, -1).type_as(features)
-        features *= mask
-
-        # Reduction by mean
-        # (N, M, C*C*2) -> (N, C*C*2)
-        features = features.sum(dim=1, keepdim=False) / num_points.type_as(features).view(-1, 1)
-        features = features.contiguous()
+        mask = torch.unsqueeze(mask, -1).type_as(voxel_feature_offsets)
+        voxel_feature_offsets *= mask
         
+        # PFN
+        for pfn in self.pfn_layers:
+            voxel_feature_offsets = pfn(voxel_feature_offsets, num_points)
+        
+        # Concat 
+        features = torch.cat([voxel_fourier_features, voxel_feature_offsets.squeeze(1)], dim=-1)
         return features
+
+
+
+@MODELS.register_module()
+class BEVFusionVoxelMeanSinCosEncoder(nn.Module):
+    def __init__(self, 
+                 min_norm_values: Tuple[float],
+                 max_norm_values: Tuple[float],
+                 in_channels: Optional[int] = 4,
+                 voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4),
+                 point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4,
+                                                              40, 1),
+                 mode: Optional[str] = 'max'):
+        super(BEVFusionVoxelSinCosEncoder, self).__init__()
+
+        # Create PillarFeatureNet layers
+        self.in_channels = in_channels
+
+        self.register_buffer("min_norm_values", torch.tensor(min_norm_values))
+        self.register_buffer("max_norm_values", torch.tensor(max_norm_values))
+        self.register_buffer("exponents", (2 ** torch.arange(0, self.in_channels)).float())
+
+    def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
+                *args, **kwargs) -> Tensor:
+        """Forward function.
+
+        Args:
+            features (torch.Tensor): Point features or raw points in shape
+                (N, M, C).
+            num_points (torch.Tensor): Number of points in each pillar in shape (M).
+            coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx).
+
+        Returns:
+            torch.Tensor: Features of pillars in shape (M, C).
+        """ 
+        num_voxels, max_points_per_voxel = features.shape[0], features.shape[1]
+        
+        # Mean in the voxel
+        # (N, M, 3) -> (N, 3)
+        voxel_features = (features.sum(dim=1, keepdim=False) / num_points.type_as(features).view(
+                    -1, 1)).contiguous()
+
+        # min-max normalization, (N, 3) -> (N, 3)
+        voxel_features_norm = (voxel_features - \
+         self.min_norm_values.view(1, -1)) / ((self.max_norm_values - self.min_norm_values).view(1, -1))
+        
+        # SinCos encoding
+        # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3)
+        y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1)
+        # (N*3, 3) -> (N, 3*3)
+        y = y.reshape(num_voxels, -1)
+        # (N, 3*3) -> (N, 3*3*2)
+        voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1)
+        
+        return voxel_fourier_features
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_10_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_10_channels.py
deleted file mode 100644
index 531a07673..000000000
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_10_channels.py
+++ /dev/null
@@ -1,161 +0,0 @@
-_base_ = [
-    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
-    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
-    "../default/pipelines/default_lidar_120m.py",
-    "../default/models/default_lidar_second_secfpn_120m.py",
-    "../default/schedulers/default_50e_8xb8_adamw_cosine.py",
-    "../default/default_misc.py",
-]
-
-custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
-custom_imports["imports"] += _base_.custom_imports["imports"]
-custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
-
-# user setting
-data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_6_2/"
-
-experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type
-experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_10_channels"
-work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
-
-# model parameter
-model = dict(
-    type="BEVFusion",
-    voxelize_cfg=dict(
-        point_cloud_range=_base_.point_cloud_range,
-        voxel_size=_base_.voxel_size,
-        voxelize_reduce=False,
-    ),
-    pts_voxel_encoder=dict(
-        _delete_=True,
-        type="BEVFusionVoxelSinCosEncoder", 
-        in_channels=4,
-        with_distance=False,
-        with_cluster_center=True,
-        with_voxel_center=True,
-        point_cloud_range=_base_.point_cloud_range,
-        voxel_size=_base_.voxel_size,
-        # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
-        min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
-        max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
-    ),
-    pts_middle_encoder=dict(
-        in_channels=100,
-        sparse_shape=_base_.grid_size,
-        # num_aug_features=4,
-        # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here
-        # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
-        # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
-    ),
-    bbox_head=dict(
-        class_names=_base_.class_names,  # Use class names to identify the correct class indices
-        train_cfg=dict(
-            point_cloud_range=_base_.point_cloud_range,
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size,
-        ),
-        test_cfg=dict(
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size[0:2],
-            pc_range=_base_.point_cloud_range[0:2],
-        ),
-        bbox_coder=dict(
-            pc_range=_base_.point_cloud_range[0:2],
-            voxel_size=_base_.voxel_size[0:2],
-        ),
-    ),
-)
-
-# Dataset parameters
-train_dataloader = dict(
-    batch_size=_base_.train_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=True),
-    dataset=dict(
-        type=_base_.dataset_type,
-        pipeline=_base_.train_pipeline,
-        modality=_base_.input_modality,
-        backend_args=_base_.backend_args,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_train_file_name,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        test_mode=False,
-        data_prefix=_base_.data_prefix,
-        box_type_3d="LiDAR",
-        filter_cfg=_base_.filter_cfg,
-    ),
-)
-
-val_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_val_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-test_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_test_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-val_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-)
-
-test_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-    save_csv=True,
-)
-
-default_hooks = dict(
-    logger=dict(type="LoggerHook", interval=50),
-    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
-)
-log_processor = dict(window_size=50)
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
index b5d9a8fdc..4843f5677 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
@@ -94,6 +94,7 @@
             ],
         ),
         dense_heatmap_pooling_classes=["car", "truck", "bus", "bicycle"],  # Use class indices for pooling
+        # common_heads=dict(center=[2, 2], height=[1, 2], dim=[3, 2], rot=[2, 2], vel=[2, 2], iou=[1, 2]),
         common_heads=dict(center=[2, 2], height=[1, 2], dim=[3, 2], rot=[2, 2], vel=[2, 2]),
         bbox_coder=dict(
             type="TransFusionBBoxCoder",
@@ -110,6 +111,7 @@
             reduction="mean",
             loss_weight=1.0,
         ),
+				# loss_iou=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=1.0),
         loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="mean", loss_weight=1.0),
         loss_bbox=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=0.25),
     ),

From dead69b6bf0a744cde4fc4db0d410b974ac4f40a Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Thu, 23 Apr 2026 14:47:39 +0900
Subject: [PATCH 047/183] Add the script

---
 ...second_secfpn_50e_8xb8_base_120m_sincos.py | 156 +++++++++++++++++
 ...n_50e_8xb8_base_120m_sincos_34_channels.py | 163 +++++++++++++++++
 ...b8_base_120m_sincos_timeexp_34_channels.py | 165 ++++++++++++++++++
 ...fault_lidar_second_secfpn_120m_iou_loss.py | 117 +++++++++++++
 4 files changed, 601 insertions(+)
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos.py
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_34_channels.py
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels.py
 create mode 100644 projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m_iou_loss.py

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos.py
new file mode 100644
index 000000000..d856b1d4b
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos.py
@@ -0,0 +1,156 @@
+_base_ = [
+    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
+    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
+    "../default/pipelines/default_lidar_120m.py",
+    "../default/models/default_lidar_second_secfpn_120m.py",
+    "../default/schedulers/default_50e_8xb8_adamw_cosine.py",
+    "../default/default_misc.py",
+]
+
+custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
+custom_imports["imports"] += _base_.custom_imports["imports"]
+custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
+
+# user setting
+data_root = "data/t4dataset/"
+info_directory_path = "info/kokseang_2_6_2/"
+
+experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type
+experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_sincos"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    voxelize_cfg=dict(
+        point_cloud_range=_base_.point_cloud_range,
+        voxel_size=_base_.voxel_size,
+        voxelize_reduce=False,
+    ),
+    pts_voxel_encoder=dict(
+        _delete_=True,
+        type="BEVFusionVoxelMeanSinCosEncoder", 
+        in_channels=4,
+        # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
+        min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
+        max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
+    ),
+    pts_middle_encoder=dict(
+        in_channels=32,
+        sparse_shape=_base_.grid_size,
+        # num_aug_features=4,
+        # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here
+        # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
+        # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
+    ),
+    bbox_head=dict(
+        class_names=_base_.class_names,  # Use class names to identify the correct class indices
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+    ),
+)
+
+# Dataset parameters
+train_dataloader = dict(
+    batch_size=_base_.train_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=True),
+    dataset=dict(
+        type=_base_.dataset_type,
+        pipeline=_base_.train_pipeline,
+        modality=_base_.input_modality,
+        backend_args=_base_.backend_args,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_train_file_name,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        test_mode=False,
+        data_prefix=_base_.data_prefix,
+        box_type_3d="LiDAR",
+        filter_cfg=_base_.filter_cfg,
+    ),
+)
+
+val_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_val_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+test_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_test_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+val_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+)
+
+test_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+    save_csv=True,
+)
+
+default_hooks = dict(
+    logger=dict(type="LoggerHook", interval=50),
+    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
+)
+log_processor = dict(window_size=50)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_34_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_34_channels.py
new file mode 100644
index 000000000..54af6be5f
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_34_channels.py
@@ -0,0 +1,163 @@
+_base_ = [
+    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
+    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
+    "../default/pipelines/default_lidar_120m.py",
+    "../default/models/default_lidar_second_secfpn_120m.py",
+    "../default/schedulers/default_50e_8xb8_adamw_cosine.py",
+    "../default/default_misc.py",
+]
+
+custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
+custom_imports["imports"] += _base_.custom_imports["imports"]
+custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
+
+# user setting
+data_root = "data/t4dataset/"
+info_directory_path = "info/kokseang_2_6_2/"
+
+experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type
+experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_34_channels"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    voxelize_cfg=dict(
+        point_cloud_range=_base_.point_cloud_range,
+        voxel_size=_base_.voxel_size,
+        voxelize_reduce=False,
+    ),
+    pts_voxel_encoder=dict(
+        _delete_=True,
+        type="BEVFusionVoxelSinCosEncoder", 
+        in_channels=4,
+        with_distance=False,
+        with_cluster_center=True,
+        with_voxel_center=True,
+        feat_channels=[16],
+        point_cloud_range=_base_.point_cloud_range,
+        voxel_size=_base_.voxel_size,
+        norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01),
+        # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
+        min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
+        max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
+    ),
+    pts_middle_encoder=dict(
+        in_channels=34,
+        sparse_shape=_base_.grid_size,
+        # num_aug_features=4,
+        # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here
+        # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
+        # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
+    ),
+    bbox_head=dict(
+        class_names=_base_.class_names,  # Use class names to identify the correct class indices
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+    ),
+)
+
+# Dataset parameters
+train_dataloader = dict(
+    batch_size=_base_.train_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=True),
+    dataset=dict(
+        type=_base_.dataset_type,
+        pipeline=_base_.train_pipeline,
+        modality=_base_.input_modality,
+        backend_args=_base_.backend_args,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_train_file_name,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        test_mode=False,
+        data_prefix=_base_.data_prefix,
+        box_type_3d="LiDAR",
+        filter_cfg=_base_.filter_cfg,
+    ),
+)
+
+val_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_val_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+test_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_test_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+val_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+)
+
+test_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+    save_csv=True,
+)
+
+default_hooks = dict(
+    logger=dict(type="LoggerHook", interval=50),
+    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
+)
+log_processor = dict(window_size=50)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels.py
new file mode 100644
index 000000000..d7e61102b
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels.py
@@ -0,0 +1,165 @@
+_base_ = [
+    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
+    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
+    "../default/pipelines/default_lidar_120m.py",
+    "../default/models/default_lidar_second_secfpn_120m.py",
+    "../default/schedulers/default_50e_8xb8_adamw_cosine.py",
+    "../default/default_misc.py",
+]
+
+custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
+custom_imports["imports"] += _base_.custom_imports["imports"]
+custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
+
+# user setting
+data_root = "data/t4dataset/"
+info_directory_path = "info/kokseang_2_6_2/"
+
+experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type
+experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_34_channels_timeexp"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    voxelize_cfg=dict(
+        point_cloud_range=_base_.point_cloud_range,
+        voxel_size=_base_.voxel_size,
+        voxelize_reduce=False,
+    ),
+    pts_voxel_encoder=dict(
+        _delete_=True,
+        type="BEVFusionVoxelSinCosEncoder", 
+        in_channels=4,
+        time_lag_channel_index=3,
+        time_exp_factor=1.0,
+        with_distance=False,
+        with_cluster_center=True,
+        with_voxel_center=True,
+        feat_channels=[16],
+        point_cloud_range=_base_.point_cloud_range,
+        voxel_size=_base_.voxel_size,
+        norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01),
+        # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
+        min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
+        max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
+    ),
+    pts_middle_encoder=dict(
+        in_channels=34,
+        sparse_shape=_base_.grid_size,
+        # num_aug_features=4,
+        # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here
+        # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
+        # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
+    ),
+    bbox_head=dict(
+        class_names=_base_.class_names,  # Use class names to identify the correct class indices
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+    ),
+)
+
+# Dataset parameters
+train_dataloader = dict(
+    batch_size=_base_.train_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=True),
+    dataset=dict(
+        type=_base_.dataset_type,
+        pipeline=_base_.train_pipeline,
+        modality=_base_.input_modality,
+        backend_args=_base_.backend_args,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_train_file_name,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        test_mode=False,
+        data_prefix=_base_.data_prefix,
+        box_type_3d="LiDAR",
+        filter_cfg=_base_.filter_cfg,
+    ),
+)
+
+val_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_val_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+test_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_test_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+val_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+)
+
+test_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+    save_csv=True,
+)
+
+default_hooks = dict(
+    logger=dict(type="LoggerHook", interval=50),
+    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
+)
+log_processor = dict(window_size=50)
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m_iou_loss.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m_iou_loss.py
new file mode 100644
index 000000000..4c7e996d9
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m_iou_loss.py
@@ -0,0 +1,117 @@
+num_proposals = 500
+max_num_points = 10
+max_voxels = [120000, 160000]
+
+model = dict(
+    type="BEVFusion",
+    voxelize_cfg=dict(
+        max_num_points=max_num_points,
+        max_voxels=max_voxels,
+        voxelize_reduce=True,
+    ),
+    data_preprocessor=dict(
+        type="Det3DDataPreprocessor",
+        pad_size_divisor=32,
+    ),
+    pts_voxel_encoder=dict(type="HardSimpleVFE"),
+    pts_middle_encoder=dict(
+        type="BEVFusionSparseEncoder",
+        in_channels=5,
+        aug_features_min_values=[],
+        aug_features_max_values=[],
+        num_aug_features=0,
+        order=("conv", "norm", "act"),
+        norm_cfg=dict(type="BN1d", eps=0.001, momentum=0.01),
+        encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)),
+        encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, (1, 1, 0)), (0, 0)),
+        block_type="basicblock",
+    ),
+    pts_backbone=dict(
+        type="SECOND",
+        in_channels=256,
+        out_channels=[128, 256],
+        layer_nums=[5, 5],
+        layer_strides=[1, 2],
+        norm_cfg=dict(type="BN", eps=0.001, momentum=0.01),
+        conv_cfg=dict(type="Conv2d", bias=False),
+    ),
+    pts_neck=dict(
+        type="SECONDFPN",
+        in_channels=[128, 256],
+        out_channels=[256, 256],
+        upsample_strides=[1, 2],
+        norm_cfg=dict(type="BN", eps=0.001, momentum=0.01),
+        upsample_cfg=dict(type="deconv", bias=False),
+        use_conv_for_no_stride=True,
+    ),
+    bbox_head=dict(
+        type="BEVFusionHead",
+        num_proposals=num_proposals,
+        auxiliary=True,
+        in_channels=512,
+        hidden_channel=128,
+        nms_kernel_size=3,
+        bn_momentum=0.1,
+        num_decoder_layers=1,
+        decoder_layer=dict(
+            type="TransformerDecoderLayer",
+            self_attn_cfg=dict(embed_dims=128, num_heads=8, dropout=0.1),
+            cross_attn_cfg=dict(embed_dims=128, num_heads=8, dropout=0.1),
+            ffn_cfg=dict(
+                embed_dims=128,
+                feedforward_channels=256,
+                num_fcs=2,
+                ffn_drop=0.1,
+                act_cfg=dict(type="ReLU", inplace=True),
+            ),
+            norm_cfg=dict(type="LN"),
+            pos_encoding_cfg=dict(input_channel=2, num_pos_feats=128),
+        ),
+        train_cfg=dict(
+            dataset="t4datasets",
+            out_size_factor=8,
+            gaussian_overlap=0.1,
+            min_radius=2,
+            pos_weight=-1,
+            code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
+            assigner=dict(
+                type="HungarianAssigner3D",
+                iou_calculator=dict(type="BboxOverlaps3D", coordinate="lidar"),
+                cls_cost=dict(type="mmdet.FocalLossCost", gamma=2.0, alpha=0.25, weight=0.15),
+                reg_cost=dict(type="BBoxBEVL1Cost", weight=0.25),
+                iou_cost=dict(type="IoU3DCost", weight=0.25),
+            ),
+        ),
+        test_cfg=dict(
+            dataset="t4datasets",
+            out_size_factor=8,
+            nms_type=None,  # Set to "circle" for circle_nms
+            # Set NMS for different clusters
+            nms_clusters=[
+                dict(class_names=["car", "truck", "bus"], nms_threshold=0.5),  # It's radius if using circle_nms
+                dict(class_names=["bicycle"], nms_threshold=0.5),
+                dict(class_names=["pedestrian"], nms_threshold=0.175),
+            ],
+        ),
+        dense_heatmap_pooling_classes=["car", "truck", "bus", "bicycle"],  # Use class indices for pooling
+        common_heads=dict(center=[2, 2], height=[1, 2], dim=[3, 2], rot=[2, 2], vel=[2, 2], iou=[1, 2]),
+        bbox_coder=dict(
+            type="TransFusionBBoxCoder",
+            post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0],
+            score_threshold=0.0,
+            out_size_factor=8,
+            code_size=10,
+        ),
+        loss_cls=dict(
+            type="mmdet.FocalLoss",
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            reduction="mean",
+            loss_weight=1.0,
+        ),
+		loss_iou=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=1.0),
+        loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="mean", loss_weight=1.0),
+        loss_bbox=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=0.25),
+    ),
+)

From db756f57c3cf97e808bd4ae5d57b5ae785285ada Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Thu, 23 Apr 2026 14:50:26 +0900
Subject: [PATCH 048/183] Update dataset name

---
 .../bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m.py   | 2 ++
 .../default/pipelines/default_camera_lidar_intensity_120m.py   | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m.py
index 8c8d84d18..4ac46afea 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m.py
@@ -135,3 +135,5 @@
     checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
 )
 log_processor = dict(window_size=50)
+
+resume = True
\ No newline at end of file
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
index 439459010..7dac0838f 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
@@ -1,6 +1,6 @@
 # Dataset parameters
 backend_args = None
-num_workers = 32
+num_workers = 8
 input_modality = dict(use_lidar=True, use_camera=True)
 
 # range setting
@@ -131,6 +131,7 @@
         rand_flip=False,
         is_train=False,
     ),
+    dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range),
     dict(
         type="Pack3DDetInputs",
         keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"],

From 1725f79575ff203ae80300504d7b85b5b0f5f796 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Thu, 23 Apr 2026 16:36:11 +0900
Subject: [PATCH 049/183] Update dataset name

---
 .../BEVFusion/bevfusion/bevfusion_head.py     | 25 ++++++++++---------
 .../bevfusion/bevfusion_voxel_encoder.py      |  2 +-
 ..._voxel_second_secfpn_50e_8xb8_base_120m.py | 13 +++-------
 3 files changed, 18 insertions(+), 22 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index a8ef7129f..0852ebf16 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -62,7 +62,7 @@ def __init__(
         norm_cfg=dict(type="BN1d"),
         bias="auto",
         # loss
-				loss_iou=None,
+        loss_iou=None,
         loss_cls=dict(type="mmdet.GaussianFocalLoss", reduction="mean"),
         loss_bbox=dict(type="mmdet.L1Loss", reduction="mean"),
         loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="mean"),
@@ -372,7 +372,7 @@ def predict_by_feat(self, preds_dicts, metas, img=None, rescale=False, for_roi=F
             batch_size = preds_dict[0]["heatmap"].shape[0]
             batch_score = preds_dict[0]["heatmap"][..., -self.num_proposals :].sigmoid()
             if self.loss_iou is not None:
-               batch_score = torch.sqrt(batch_score * preds_dict[0]['iou'][..., -self.num_proposals:].sigmoid()) # noqa: E501
+               batch_score = torch.sqrt(batch_score * preds_dict[0]['iou'][..., -self.num_proposals:].clamp(min=0.0, max=1.0)) # noqa: E501
             one_hot = F.one_hot(self.query_labels, num_classes=self.num_classes).permute(0, 2, 1)
             batch_score = batch_score * preds_dict[0]["query_heatmap_score"] * one_hot
 
@@ -801,16 +801,17 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li
             loss_dict[f"{prefix}_loss_cls"] = layer_loss_cls
             loss_dict[f"{prefix}_loss_bbox"] = layer_loss_bbox
 
-						# Output iou for iou-aware loss
-						if self.loss_iou is not None:
-							layer_ious = preds_dict["iou"][
-								...
-								idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals,
-							] # [BS, num_proposals]
-							
-							# [BS, num_proposals]
-							layer_iou_weights = layer_bbox_weights[:, :, 0] 
-							loss_dict[f'{prefix}_loss_iou'] = self.loss_iou(layer_ious, ious, layer_iou_weights, avg_factor=max(num_pos, 1))
+            # Output iou for iou-aware loss
+            if self.loss_iou is not None:
+              layer_ious = preds_dict["iou"][
+                ...,
+                idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals,
+              ].squeeze(1) # [BS, num_proposals]
+              
+              # [BS, num_proposals]
+              layer_iou_weights = layer_bbox_weights[:, :, 0]
+            #   print(layer_ious.shape, ious.shape, layer_iou_weights.shape, "layer_ious.shape, ious.shape, layer_iou_weights.shape")
+              loss_dict[f'{prefix}_loss_iou'] = self.loss_iou(layer_ious, ious, layer_iou_weights, avg_factor=max(num_pos, 1))
 
         loss_dict["matched_ious"] = layer_loss_cls.new_tensor(matched_ious)
 
diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
index 086acc1e0..5037113aa 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
@@ -361,7 +361,7 @@ def __init__(self,
                  point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4,
                                                               40, 1),
                  mode: Optional[str] = 'max'):
-        super(BEVFusionVoxelSinCosEncoder, self).__init__()
+        super(BEVFusionVoxelMeanSinCosEncoder, self).__init__()
 
         # Create PillarFeatureNet layers
         self.in_channels = in_channels
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
index 17f16254d..7c1286df8 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
@@ -2,7 +2,7 @@
     "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
     "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
     "../default/pipelines/default_lidar_120m.py",
-    "../default/models/default_lidar_second_secfpn_120m.py",
+    "../default/models/default_lidar_second_secfpn_120m_iou_loss.py",
     "../default/schedulers/default_50e_8xb8_adamw_cosine.py",
     "../default/default_misc.py",
 ]
@@ -16,7 +16,7 @@
 info_directory_path = "info/kokseang_2_6_2/"
 
 experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type
-experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m"
+experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_iou_loss"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
@@ -29,19 +29,14 @@
     ),
     pts_voxel_encoder=dict(
         _delete_=True,
-        type="BEVFusionVoxelSinCosEncoder", 
+        type="BEVFusionVoxelMeanSinCosEncoder", 
         in_channels=4,
-        with_distance=False,
-        with_cluster_center=True,
-        with_voxel_center=True,
-        point_cloud_range=_base_.point_cloud_range,
-        voxel_size=_base_.voxel_size,
         # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
         min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
         max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
     ),
     pts_middle_encoder=dict(
-        in_channels=100,
+        in_channels=32,
         sparse_shape=_base_.grid_size,
         # num_aug_features=4,
         # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here

From 24d780bb788fd25813481007d898c85051c944cb Mon Sep 17 00:00:00 2001
From: KokSeang <kseangtan@google.com>
Date: Fri, 24 Apr 2026 16:31:47 +0900
Subject: [PATCH 050/183] Added

---
 tools/detection3d/t4dataset_converters/t4converter.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/detection3d/t4dataset_converters/t4converter.py b/tools/detection3d/t4dataset_converters/t4converter.py
index 842b0f458..5dfd1dc1f 100644
--- a/tools/detection3d/t4dataset_converters/t4converter.py
+++ b/tools/detection3d/t4dataset_converters/t4converter.py
@@ -626,6 +626,10 @@ def get_lidarseg_annotations(
 ) -> dict:
     if not hasattr(t4, "lidarseg") or not t4.lidarseg:
         return dict()
+    
+    if sd_record.info_filename is None:
+        print(f"sample {lidar_token} doesn't have lidar info_filename")
+        return dict()
 
     assert i < len(t4.lidarseg), "Index exceeds number of lidarseg records!"
     assert t4.lidarseg[i].sample_data_token == lidar_token, "Sample data token mismatch!"

From 8175419ca1604a9fe25b39ab3715616f3c8fc07f Mon Sep 17 00:00:00 2001
From: KokSeang <kseangtan@google.com>
Date: Fri, 24 Apr 2026 16:54:00 +0900
Subject: [PATCH 051/183] Added

---
 .../configs/detection3d/dataset/t4dataset/base.py  | 13 +++++++++----
 .../detection3d/dataset/t4dataset/j6gen2.py        | 13 +++++++++----
 .../detection3d/dataset/t4dataset/j6gen2_base.py   | 14 ++++++++++----
 .../detection3d/dataset/t4dataset/jpntaxi_base.py  | 14 ++++++++++----
 .../detection3d/dataset/t4dataset/jpntaxi_gen2.py  | 14 ++++++++++----
 .../detection3d/dataset/t4dataset/largebus.py      | 14 ++++++++++----
 6 files changed, 58 insertions(+), 24 deletions(-)

diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py
index d0744a131..3f90e7e0c 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py
@@ -91,8 +91,8 @@
     "pedestrian.stroller": "pedestrian",
     "pedestrian.wheelchair": "pedestrian",
     "movable_object.barrier": "barrier",
-    "movable_object.debris": "debris",
-    "movable_object.pushable_pullable": "pushable_pullable",
+    "movable_object.debris": "barrier",
+    "movable_object.pushable_pullable": "barrier",
     "movable_object.trafficcone": "traffic_cone",
     "movable_object.traffic_cone": "traffic_cone",
     "animal": "animal",
@@ -113,7 +113,7 @@
     # DBv2.0 and DBv3.0
     "animal": "animal",
     "movable_object.barrier": "barrier",
-    "movable_object.pushable_pullable": "pushable_pullable",
+    "movable_object.pushable_pullable": "barrier",
     "movable_object.traffic_cone": "traffic_cone",
     "pedestrian.adult": "pedestrian",
     "pedestrian.child": "pedestrian",
@@ -123,7 +123,7 @@
     "pedestrian.stroller": "pedestrian",
     "pedestrian.wheelchair": "pedestrian",
     "static_object.bicycle rack": "bicycle rack",
-    "static_object.bollard": "bollard",
+    "static_object.bollard": "barrier",
     "vehicle.ambulance": "car",  # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car
     "vehicle.bicycle": "bicycle",
     "vehicle.bus": "bus",
@@ -143,6 +143,9 @@
     "semi_trailer": "trailer",
     "tractor_unit": "truck",
     "construction_vehicle": "truck",
+		"traffic_cone": "traffic_cone",
+		"trafficcone": "traffic_cone",
+		"barrier": "barrier",
 }
 
 class_names = [
@@ -151,6 +154,8 @@
     "bus",
     "bicycle",
     "pedestrian",
+		"traffic_cone",
+		"barrier"
 ]
 num_class = len(class_names)
 metainfo = dict(classes=class_names)
diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py
index 3c8675c13..e737994aa 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py
@@ -72,8 +72,8 @@
     "pedestrian.stroller": "pedestrian",
     "pedestrian.wheelchair": "pedestrian",
     "movable_object.barrier": "barrier",
-    "movable_object.debris": "debris",
-    "movable_object.pushable_pullable": "pushable_pullable",
+    "movable_object.debris": "barrier",
+    "movable_object.pushable_pullable": "barrier",
     "movable_object.trafficcone": "traffic_cone",
     "movable_object.traffic_cone": "traffic_cone",
     "animal": "animal",
@@ -94,7 +94,7 @@
     # DBv2.0 and DBv3.0
     "animal": "animal",
     "movable_object.barrier": "barrier",
-    "movable_object.pushable_pullable": "pushable_pullable",
+    "movable_object.pushable_pullable": "barrier",
     "movable_object.traffic_cone": "traffic_cone",
     "pedestrian.adult": "pedestrian",
     "pedestrian.child": "pedestrian",
@@ -104,7 +104,7 @@
     "pedestrian.stroller": "pedestrian",
     "pedestrian.wheelchair": "pedestrian",
     "static_object.bicycle rack": "bicycle rack",
-    "static_object.bollard": "bollard",
+    "static_object.bollard": "barrier",
     "vehicle.ambulance": "car",  # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car
     "vehicle.bicycle": "bicycle",
     "vehicle.bus": "bus",
@@ -124,6 +124,9 @@
     "semi_trailer": "trailer",
     "tractor_unit": "truck",
     "construction_vehicle": "truck",
+		"traffic_cone": "traffic_cone",
+		"trafficcone": "traffic_cone",
+		"barrier": "barrier",
 }
 
 class_names = [
@@ -132,6 +135,8 @@
     "bus",
     "bicycle",
     "pedestrian",
+		"traffic_cone",
+		"barrier",
 ]
 num_class = len(class_names)
 metainfo = dict(classes=class_names)
diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py
index cc3a86d3e..a8f6c6e7d 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py
@@ -78,8 +78,8 @@
     "pedestrian.stroller": "pedestrian",
     "pedestrian.wheelchair": "pedestrian",
     "movable_object.barrier": "barrier",
-    "movable_object.debris": "debris",
-    "movable_object.pushable_pullable": "pushable_pullable",
+    "movable_object.debris": "barrier",
+    "movable_object.pushable_pullable": "barrier",
     "movable_object.trafficcone": "traffic_cone",
     "movable_object.traffic_cone": "traffic_cone",
     "animal": "animal",
@@ -100,7 +100,7 @@
     # DBv2.0 and DBv3.0
     "animal": "animal",
     "movable_object.barrier": "barrier",
-    "movable_object.pushable_pullable": "pushable_pullable",
+    "movable_object.pushable_pullable": "barrier",
     "movable_object.traffic_cone": "traffic_cone",
     "pedestrian.adult": "pedestrian",
     "pedestrian.child": "pedestrian",
@@ -110,7 +110,7 @@
     "pedestrian.stroller": "pedestrian",
     "pedestrian.wheelchair": "pedestrian",
     "static_object.bicycle rack": "bicycle rack",
-    "static_object.bollard": "bollard",
+    "static_object.bollard": "barrier",
     "vehicle.ambulance": "car",  # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car
     "vehicle.bicycle": "bicycle",
     "vehicle.bus": "bus",
@@ -130,14 +130,20 @@
     "semi_trailer": "trailer",
     "tractor_unit": "truck",
     "construction_vehicle": "truck",
+		"traffic_cone": "traffic_cone",
+		"trafficcone": "traffic_cone",
+		"barrier": "barrier",
 }
 
+
 class_names = [
     "car",
     "truck",
     "bus",
     "bicycle",
     "pedestrian",
+		"traffic_cone",
+		"barrier",
 ]
 num_class = len(class_names)
 metainfo = dict(classes=class_names)
diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py
index b7ddb799a..229ff7604 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py
@@ -68,8 +68,8 @@
     "pedestrian.stroller": "pedestrian",
     "pedestrian.wheelchair": "pedestrian",
     "movable_object.barrier": "barrier",
-    "movable_object.debris": "debris",
-    "movable_object.pushable_pullable": "pushable_pullable",
+    "movable_object.debris": "barrier",
+    "movable_object.pushable_pullable": "barrier",
     "movable_object.trafficcone": "traffic_cone",
     "movable_object.traffic_cone": "traffic_cone",
     "animal": "animal",
@@ -90,7 +90,7 @@
     # DBv2.0 and DBv3.0
     "animal": "animal",
     "movable_object.barrier": "barrier",
-    "movable_object.pushable_pullable": "pushable_pullable",
+    "movable_object.pushable_pullable": "barrier",
     "movable_object.traffic_cone": "traffic_cone",
     "pedestrian.adult": "pedestrian",
     "pedestrian.child": "pedestrian",
@@ -100,7 +100,7 @@
     "pedestrian.stroller": "pedestrian",
     "pedestrian.wheelchair": "pedestrian",
     "static_object.bicycle rack": "bicycle rack",
-    "static_object.bollard": "bollard",
+    "static_object.bollard": "barrier",
     "vehicle.ambulance": "car",  # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car
     "vehicle.bicycle": "bicycle",
     "vehicle.bus": "bus",
@@ -120,6 +120,9 @@
     "semi_trailer": "trailer",
     "tractor_unit": "truck",
     "construction_vehicle": "truck",
+		"traffic_cone": "traffic_cone",
+		"trafficcone": "traffic_cone",
+		"barrier": "barrier",
 }
 
 class_names = [
@@ -128,7 +131,10 @@
     "bus",
     "bicycle",
     "pedestrian",
+		"traffic_cone",
+		"barrier",
 ]
+
 num_class = len(class_names)
 metainfo = dict(classes=class_names)
 
diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py
index f91bbc22f..411cabe7e 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py
@@ -65,8 +65,8 @@
     "pedestrian.stroller": "pedestrian",
     "pedestrian.wheelchair": "pedestrian",
     "movable_object.barrier": "barrier",
-    "movable_object.debris": "debris",
-    "movable_object.pushable_pullable": "pushable_pullable",
+    "movable_object.debris": "barrier",
+    "movable_object.pushable_pullable": "barrier",
     "movable_object.trafficcone": "traffic_cone",
     "movable_object.traffic_cone": "traffic_cone",
     "animal": "animal",
@@ -87,7 +87,7 @@
     # DBv2.0 and DBv3.0
     "animal": "animal",
     "movable_object.barrier": "barrier",
-    "movable_object.pushable_pullable": "pushable_pullable",
+    "movable_object.pushable_pullable": "barrier",
     "movable_object.traffic_cone": "traffic_cone",
     "pedestrian.adult": "pedestrian",
     "pedestrian.child": "pedestrian",
@@ -97,7 +97,7 @@
     "pedestrian.stroller": "pedestrian",
     "pedestrian.wheelchair": "pedestrian",
     "static_object.bicycle rack": "bicycle rack",
-    "static_object.bollard": "bollard",
+    "static_object.bollard": "barrier",
     "vehicle.ambulance": "car",  # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car
     "vehicle.bicycle": "bicycle",
     "vehicle.bus": "bus",
@@ -117,6 +117,9 @@
     "semi_trailer": "trailer",
     "tractor_unit": "truck",
     "construction_vehicle": "truck",
+		"traffic_cone": "traffic_cone",
+		"trafficcone": "traffic_cone",
+		"barrier": "barrier",
 }
 
 class_names = [
@@ -125,7 +128,10 @@
     "bus",
     "bicycle",
     "pedestrian",
+		"traffic_cone",
+		"barrier",
 ]
+
 num_class = len(class_names)
 metainfo = dict(classes=class_names)
 
diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py b/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py
index b117c3798..a611750d3 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py
@@ -67,8 +67,8 @@
     "pedestrian.stroller": "pedestrian",
     "pedestrian.wheelchair": "pedestrian",
     "movable_object.barrier": "barrier",
-    "movable_object.debris": "debris",
-    "movable_object.pushable_pullable": "pushable_pullable",
+    "movable_object.debris": "barrier",
+    "movable_object.pushable_pullable": "barrier",
     "movable_object.trafficcone": "traffic_cone",
     "movable_object.traffic_cone": "traffic_cone",
     "animal": "animal",
@@ -89,7 +89,7 @@
     # DBv2.0 and DBv3.0
     "animal": "animal",
     "movable_object.barrier": "barrier",
-    "movable_object.pushable_pullable": "pushable_pullable",
+    "movable_object.pushable_pullable": "barrier",
     "movable_object.traffic_cone": "traffic_cone",
     "pedestrian.adult": "pedestrian",
     "pedestrian.child": "pedestrian",
@@ -99,7 +99,7 @@
     "pedestrian.stroller": "pedestrian",
     "pedestrian.wheelchair": "pedestrian",
     "static_object.bicycle rack": "bicycle rack",
-    "static_object.bollard": "bollard",
+    "static_object.bollard": "barrier",
     "vehicle.ambulance": "car",  # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car
     "vehicle.bicycle": "bicycle",
     "vehicle.bus": "bus",
@@ -119,6 +119,9 @@
     "semi_trailer": "trailer",
     "tractor_unit": "truck",
     "construction_vehicle": "truck",
+		"traffic_cone": "traffic_cone",
+		"trafficcone": "traffic_cone",
+		"barrier": "barrier",
 }
 
 class_names = [
@@ -127,7 +130,10 @@
     "bus",
     "bicycle",
     "pedestrian",
+		"traffic_cone",
+		"barrier",
 ]
+
 num_class = len(class_names)
 metainfo = dict(classes=class_names)
 

From 33f11cd5db171246654950d2a0afc22a757dcce5 Mon Sep 17 00:00:00 2001
From: KokSeang <kseangtan@google.com>
Date: Sat, 25 Apr 2026 14:08:04 +0900
Subject: [PATCH 052/183] Added

---
 .../download_t4dataset/download_t4dataset.py        |  4 ++--
 tools/detection3d/create_data_t4dataset.py          | 13 ++++++++++---
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/pipelines/webauto/download_t4dataset/download_t4dataset.py b/pipelines/webauto/download_t4dataset/download_t4dataset.py
index f06f6979d..d06b85717 100644
--- a/pipelines/webauto/download_t4dataset/download_t4dataset.py
+++ b/pipelines/webauto/download_t4dataset/download_t4dataset.py
@@ -68,8 +68,8 @@ def get_t4dataset_ids(config_path: str) -> list[str]:
     for key in required_keys:
         for t4dataset_ids in data_splits[key]:
             t4dataset_ids = t4dataset_ids.split("/")
-            if len(t4dataset_ids) == 4:
-                t4dataset_id, t4dataset_version_id, city, vehicle_type = t4dataset_ids
+            if len(t4dataset_ids) == 5:
+                t4dataset_id, t4dataset_version_id, city, vehicle_type, traffic_cone_barrier_status = t4dataset_ids
             elif len(t4dataset_ids) == 2:
                 t4dataset_id, t4dataset_version_id = t4dataset_ids
             elif len(t4dataset_ids) == 1:
diff --git a/tools/detection3d/create_data_t4dataset.py b/tools/detection3d/create_data_t4dataset.py
index 1e61af9d8..e75a36a04 100644
--- a/tools/detection3d/create_data_t4dataset.py
+++ b/tools/detection3d/create_data_t4dataset.py
@@ -104,6 +104,7 @@ def get_info(
     max_sweeps: int,
     city: Optional[str] = None,
     vehicle_type: Optional[str] = None,
+    traffic_cone_barrier_status: Optional[str] = None,
 ) -> Dict[str, Any]:
     lidar_token = get_lidar_token(sample)
     if lidar_token is None:
@@ -129,6 +130,11 @@ def get_info(
     sd_record: SampleData = t4.get("sample_data", lidar_token)
 
     info = get_empty_standard_data_info(cfg.camera_types)
+    
+    if traffic_cone_barrier_status is not None and traffic_cone_barrier_status == "true":
+        traffic_cone_barrier_status = True
+    else:
+        traffic_cone_barrier_status = False
 
     basic_info = dict(
         sample_idx=i,
@@ -139,6 +145,7 @@ def get_info(
         scene_name=scene_record.name,
         city=city,
         vehicle_type=vehicle_type,
+        traffic_cone_barrier_status=traffic_cone_barrier_status,
     )
 
     for new_info in [
@@ -302,8 +309,8 @@ def main():
                     f"Creating data info for scene: {scene_id}, steps: {sample_steps}, sweeps: {args.max_sweeps}"
                 )
                 dataset_scene_info = scene_id.split("/")
-                if len(dataset_scene_info) == 4:
-                    t4_dataset_id, t4_dataset_version_id, city, vehicle_type = dataset_scene_info
+                if len(dataset_scene_info) == 5:
+                    t4_dataset_id, t4_dataset_version_id, city, vehicle_type, traffic_cone_barrier_status = dataset_scene_info
                 elif len(dataset_scene_info) == 2:
                     t4_dataset_id, t4_dataset_version_id = dataset_scene_info
                     city = vehicle_type = None
@@ -326,7 +333,7 @@ def main():
                 infos = []
                 for i in range(0, len(t4.sample), sample_steps):
                     sample = t4.sample[i]
-                    info = get_info(cfg, t4, sample, i, args.max_sweeps, city, vehicle_type)
+                    info = get_info(cfg, t4, sample, i, args.max_sweeps, city, vehicle_type, traffic_cone_barrier_status)
                     if info is None:
                         continue
                     # info["version"] = dataset_version             # used for visualizations during debugging.

From 2237522607f97186e9900c4b4884159684ba9fad Mon Sep 17 00:00:00 2001
From: KokSeang <kseangtan@google.com>
Date: Sat, 25 Apr 2026 14:36:45 +0900
Subject: [PATCH 053/183] Added

---
 .../configs/detection3d/dataset/t4dataset/base.py    | 12 ++++++------
 .../configs/detection3d/dataset/t4dataset/j6gen2.py  | 12 ++++++------
 .../detection3d/dataset/t4dataset/j6gen2_base.py     | 12 ++++++------
 .../detection3d/dataset/t4dataset/jpntaxi_base.py    | 12 ++++++------
 .../detection3d/dataset/t4dataset/jpntaxi_gen2.py    | 12 ++++++------
 .../detection3d/dataset/t4dataset/largebus.py        | 12 ++++++------
 6 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py
index 3f90e7e0c..4248c90e6 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py
@@ -123,7 +123,7 @@
     "pedestrian.stroller": "pedestrian",
     "pedestrian.wheelchair": "pedestrian",
     "static_object.bicycle rack": "bicycle rack",
-    "static_object.bollard": "barrier",
+    "static_object.bollard": "bollard",
     "vehicle.ambulance": "car",  # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car
     "vehicle.bicycle": "bicycle",
     "vehicle.bus": "bus",
@@ -143,9 +143,9 @@
     "semi_trailer": "trailer",
     "tractor_unit": "truck",
     "construction_vehicle": "truck",
-		"traffic_cone": "traffic_cone",
-		"trafficcone": "traffic_cone",
-		"barrier": "barrier",
+	"traffic_cone": "traffic_cone",
+	"trafficcone": "traffic_cone",
+    "barrier": "barrier",
 }
 
 class_names = [
@@ -154,8 +154,8 @@
     "bus",
     "bicycle",
     "pedestrian",
-		"traffic_cone",
-		"barrier"
+	"traffic_cone",
+	"barrier"
 ]
 num_class = len(class_names)
 metainfo = dict(classes=class_names)
diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py
index e737994aa..0324e7207 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py
@@ -104,7 +104,7 @@
     "pedestrian.stroller": "pedestrian",
     "pedestrian.wheelchair": "pedestrian",
     "static_object.bicycle rack": "bicycle rack",
-    "static_object.bollard": "barrier",
+    "static_object.bollard": "bollard",
     "vehicle.ambulance": "car",  # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car
     "vehicle.bicycle": "bicycle",
     "vehicle.bus": "bus",
@@ -124,9 +124,9 @@
     "semi_trailer": "trailer",
     "tractor_unit": "truck",
     "construction_vehicle": "truck",
-		"traffic_cone": "traffic_cone",
-		"trafficcone": "traffic_cone",
-		"barrier": "barrier",
+    "traffic_cone": "traffic_cone",
+    "trafficcone": "traffic_cone",
+    "barrier": "barrier",
 }
 
 class_names = [
@@ -135,8 +135,8 @@
     "bus",
     "bicycle",
     "pedestrian",
-		"traffic_cone",
-		"barrier",
+    "traffic_cone",
+    "barrier",
 ]
 num_class = len(class_names)
 metainfo = dict(classes=class_names)
diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py
index a8f6c6e7d..b9ec03f27 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py
@@ -110,7 +110,7 @@
     "pedestrian.stroller": "pedestrian",
     "pedestrian.wheelchair": "pedestrian",
     "static_object.bicycle rack": "bicycle rack",
-    "static_object.bollard": "barrier",
+    "static_object.bollard": "bollard",
     "vehicle.ambulance": "car",  # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car
     "vehicle.bicycle": "bicycle",
     "vehicle.bus": "bus",
@@ -130,9 +130,9 @@
     "semi_trailer": "trailer",
     "tractor_unit": "truck",
     "construction_vehicle": "truck",
-		"traffic_cone": "traffic_cone",
-		"trafficcone": "traffic_cone",
-		"barrier": "barrier",
+    "traffic_cone": "traffic_cone",
+    "trafficcone": "traffic_cone",
+    "barrier": "barrier",
 }
 
 
@@ -142,8 +142,8 @@
     "bus",
     "bicycle",
     "pedestrian",
-		"traffic_cone",
-		"barrier",
+    "traffic_cone",
+    "barrier",
 ]
 num_class = len(class_names)
 metainfo = dict(classes=class_names)
diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py
index 229ff7604..c08decfa1 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py
@@ -100,7 +100,7 @@
     "pedestrian.stroller": "pedestrian",
     "pedestrian.wheelchair": "pedestrian",
     "static_object.bicycle rack": "bicycle rack",
-    "static_object.bollard": "barrier",
+    "static_object.bollard": "bollard",
     "vehicle.ambulance": "car",  # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car
     "vehicle.bicycle": "bicycle",
     "vehicle.bus": "bus",
@@ -120,9 +120,9 @@
     "semi_trailer": "trailer",
     "tractor_unit": "truck",
     "construction_vehicle": "truck",
-		"traffic_cone": "traffic_cone",
-		"trafficcone": "traffic_cone",
-		"barrier": "barrier",
+    "traffic_cone": "traffic_cone",
+    "trafficcone": "traffic_cone",
+    "barrier": "barrier",
 }
 
 class_names = [
@@ -131,8 +131,8 @@
     "bus",
     "bicycle",
     "pedestrian",
-		"traffic_cone",
-		"barrier",
+    "traffic_cone",
+    "barrier",
 ]
 
 num_class = len(class_names)
diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py
index 411cabe7e..6b7250673 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py
@@ -97,7 +97,7 @@
     "pedestrian.stroller": "pedestrian",
     "pedestrian.wheelchair": "pedestrian",
     "static_object.bicycle rack": "bicycle rack",
-    "static_object.bollard": "barrier",
+    "static_object.bollard": "bollard",
     "vehicle.ambulance": "car",  # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car
     "vehicle.bicycle": "bicycle",
     "vehicle.bus": "bus",
@@ -117,9 +117,9 @@
     "semi_trailer": "trailer",
     "tractor_unit": "truck",
     "construction_vehicle": "truck",
-		"traffic_cone": "traffic_cone",
-		"trafficcone": "traffic_cone",
-		"barrier": "barrier",
+	"traffic_cone": "traffic_cone",
+	"trafficcone": "traffic_cone",
+	"barrier": "barrier",
 }
 
 class_names = [
@@ -128,8 +128,8 @@
     "bus",
     "bicycle",
     "pedestrian",
-		"traffic_cone",
-		"barrier",
+    "traffic_cone",
+    "barrier",
 ]
 
 num_class = len(class_names)
diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py b/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py
index a611750d3..2b54629eb 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py
@@ -99,7 +99,7 @@
     "pedestrian.stroller": "pedestrian",
     "pedestrian.wheelchair": "pedestrian",
     "static_object.bicycle rack": "bicycle rack",
-    "static_object.bollard": "barrier",
+    "static_object.bollard": "bollard",
     "vehicle.ambulance": "car",  # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car
     "vehicle.bicycle": "bicycle",
     "vehicle.bus": "bus",
@@ -119,9 +119,9 @@
     "semi_trailer": "trailer",
     "tractor_unit": "truck",
     "construction_vehicle": "truck",
-		"traffic_cone": "traffic_cone",
-		"trafficcone": "traffic_cone",
-		"barrier": "barrier",
+    "traffic_cone": "traffic_cone",
+    "trafficcone": "traffic_cone",
+    "barrier": "barrier",
 }
 
 class_names = [
@@ -130,8 +130,8 @@
     "bus",
     "bicycle",
     "pedestrian",
-		"traffic_cone",
-		"barrier",
+	"traffic_cone",
+	"barrier",
 ]
 
 num_class = len(class_names)

From 60df4c0911f1559db0e917a2f2d9045ab07f83f3 Mon Sep 17 00:00:00 2001
From: KokSeang <kseangtan@google.com>
Date: Sat, 25 Apr 2026 23:16:34 +0900
Subject: [PATCH 054/183] Added

---
 tools/detection3d/create_data_t4dataset.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tools/detection3d/create_data_t4dataset.py b/tools/detection3d/create_data_t4dataset.py
index e75a36a04..62169bc7a 100644
--- a/tools/detection3d/create_data_t4dataset.py
+++ b/tools/detection3d/create_data_t4dataset.py
@@ -102,9 +102,9 @@ def get_info(
     sample: Sample,
     i: int,
     max_sweeps: int,
+    traffic_cone_barrier_status: str,
     city: Optional[str] = None,
     vehicle_type: Optional[str] = None,
-    traffic_cone_barrier_status: Optional[str] = None,
 ) -> Dict[str, Any]:
     lidar_token = get_lidar_token(sample)
     if lidar_token is None:
@@ -130,8 +130,7 @@ def get_info(
     sd_record: SampleData = t4.get("sample_data", lidar_token)
 
     info = get_empty_standard_data_info(cfg.camera_types)
-    
-    if traffic_cone_barrier_status is not None and traffic_cone_barrier_status == "true":
+    if traffic_cone_barrier_status == "true":
         traffic_cone_barrier_status = True
     else:
         traffic_cone_barrier_status = False
@@ -333,7 +332,7 @@ def main():
                 infos = []
                 for i in range(0, len(t4.sample), sample_steps):
                     sample = t4.sample[i]
-                    info = get_info(cfg, t4, sample, i, args.max_sweeps, city, vehicle_type, traffic_cone_barrier_status)
+                    info = get_info(cfg, t4, sample, i, args.max_sweeps, traffic_cone_barrier_status, city, vehicle_type)
                     if info is None:
                         continue
                     # info["version"] = dataset_version             # used for visualizations during debugging.

From b5dabf2d53e4cde87b994c723f5233dadb267ec6 Mon Sep 17 00:00:00 2001
From: KokSeang <kseangtan@google.com>
Date: Mon, 27 Apr 2026 14:27:49 +0900
Subject: [PATCH 055/183] Added

---
 .../BEVFusion/bevfusion/bevfusion_head.py     | 79 +++++++++++++++----
 projects/BEVFusion/bevfusion/utils.py         | 11 ++-
 .../default_lidar_second_secfpn_120m.py       |  5 +-
 .../default/pipelines/default_lidar_120m.py   |  2 +
 .../pipelines/default_lidar_intensity_120m.py |  2 +
 5 files changed, 81 insertions(+), 18 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index 853523c4f..9bbc6469c 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -69,6 +69,7 @@ def __init__(
         train_cfg=None,
         test_cfg=None,
         bbox_coder=None,
+        partial_traffic_cone_barrier=False,
     ):
         super().__init__()
         self.class_names = class_names
@@ -82,7 +83,8 @@ def __init__(
         self.nms_kernel_size = nms_kernel_size
         self.train_cfg = train_cfg
         self.test_cfg = test_cfg
-
+        # If true, only compute loss for traffic cone and barrier when it's available in the frame
+        self.partial_traffic_cone_barrier = partial_traffic_cone_barrier
         self.use_sigmoid_cls = loss_cls.get("use_sigmoid", False)
         if not self.use_sigmoid_cls:
             self.num_classes += 1
@@ -185,6 +187,13 @@ def __init__(
             cluster["class_indices"] = sorted(
                 [self.class_name_to_indices[class_name] for class_name in cluster["class_names"]]
             )
+        
+        if self.partial_traffic_cone_barrier:
+            assert loss_heatmap['reduction'] == 'none', "Loss reduction must be 'none' for partial traffic cone and barrier"
+            self.ignore_labels = [self.class_name_to_indices["traffic_cone"], self.class_name_to_indices["barrier"]]
+        else:
+            self.ignore_labels = None
+        
 
     def create_2D_grid(self, x_size, y_size):
         meshgrid = [[0, x_size - 1, x_size], [0, y_size - 1, y_size]]
@@ -456,7 +465,7 @@ def predict_by_feat(self, preds_dicts, metas, img=None, rescale=False, for_roi=F
 
         return rets[0]
 
-    def get_targets(self, batch_gt_instances_3d: List[InstanceData], preds_dict: List[dict]):
+    def get_targets(self, batch_gt_instances_3d: List[InstanceData], preds_dict: List[dict], batch_metadata: List[dict]):
         """Generate training targets.
         Args:
             batch_gt_instances_3d (List[InstanceData]):
@@ -500,6 +509,7 @@ def get_targets(self, batch_gt_instances_3d: List[InstanceData], preds_dict: Lis
             batch_gt_instances_3d,
             list_of_pred_dict,
             np.arange(len(batch_gt_instances_3d)),
+            batch_metadata,
         )
         labels = torch.cat(res_tuple[0], dim=0)
         label_weights = torch.cat(res_tuple[1], dim=0)
@@ -509,6 +519,7 @@ def get_targets(self, batch_gt_instances_3d: List[InstanceData], preds_dict: Lis
         num_pos = np.sum(res_tuple[5])
         matched_ious = np.mean(res_tuple[6])
         heatmap = torch.cat(res_tuple[7], dim=0)
+        heatmap_weights = torch.cat(res_tuple[8], dim=0)
         return (
             labels,
             label_weights,
@@ -518,9 +529,10 @@ def get_targets(self, batch_gt_instances_3d: List[InstanceData], preds_dict: Lis
             num_pos,
             matched_ious,
             heatmap,
+            heatmap_weights,
         )
 
-    def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx):
+    def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, batch_metadata):
         """Generate training targets for a single sample.
         Args:
             gt_instances_3d (:obj:`InstanceData`): ground truth of instances.
@@ -563,6 +575,12 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx):
             num_layer = self.num_decoder_layers
         else:
             num_layer = 1
+        
+        traffic_cone_barrier_status = batch_metadata[batch_idx].get("traffic_cone_barrier_status", True)
+        if self.ignore_labels is not None and not traffic_cone_barrier_status:
+            ignore_labels = self.ignore_labels
+        else:
+            ignore_labels = None
 
         assign_result_list = []
         for idx_layer in range(num_layer):
@@ -581,6 +599,7 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx):
                     gt_labels_3d,
                     score_layer,
                     self.train_cfg,
+                    ignore_labels,
                 )
             elif self.train_cfg.assigner.type == "HeuristicAssigner":
                 assign_result = self.bbox_assigner.assign(
@@ -637,10 +656,10 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx):
                 label_weights[pos_inds] = 1.0
             else:
                 label_weights[pos_inds] = self.train_cfg.pos_weight
-
+            
         if len(neg_inds) > 0:
             label_weights[neg_inds] = 1.0
-
+        
         # # compute dense heatmap targets
         device = labels.device
         gt_bboxes_3d = torch.cat([gt_bboxes_3d.gravity_center, gt_bboxes_3d.tensor[:, 3:]], dim=1).to(device)
@@ -671,6 +690,15 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx):
                 draw_heatmap_gaussian(heatmap[gt_labels_3d[idx]], center_int[[1, 0]], radius)
 
         mean_iou = ious[pos_inds].sum() / max(len(pos_inds), 1)
+        heatmap_weights = torch.ones_like(heatmap)
+
+        # Ignore labels for traffic cone and barrier
+        if self.ignore_labels is not None and not traffic_cone_barrier_status:
+            pred_labels = pred_instances.scores.argmax(dim=1, keepdim=False)
+            ignore_preds_masks = pred_labels.isin(self.ignore_labels)
+            label_weights[ignore_preds_masks] = 0.0 # Set to 0 to ignore these proposals
+            heatmap_weights[self.ignore_labels] = 0.0 # Set to 0 to ignore these proposals
+
         return (
             labels[None],
             label_weights[None],
@@ -680,6 +708,7 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx):
             int(pos_inds.shape[0]),
             float(mean_iou),
             heatmap[None],
+            heatmap_weights[None],
         )
 
     def loss(self, batch_feats, batch_data_samples):
@@ -698,11 +727,11 @@ def loss(self, batch_feats, batch_data_samples):
             batch_input_metas.append(data_sample.metainfo)
             batch_gt_instances_3d.append(data_sample.gt_instances_3d)
         preds_dicts = self(batch_feats, batch_input_metas)
-        loss = self.loss_by_feat(preds_dicts, batch_gt_instances_3d)
+        loss = self.loss_by_feat(preds_dicts, batch_gt_instances_3d, batch_input_metas)
 
         return loss
 
-    def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: List[InstanceData], *args, **kwargs):
+    def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: List[InstanceData], batch_input_metas):
         (
             labels,
             label_weights,
@@ -712,7 +741,8 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li
             num_pos,
             matched_ious,
             heatmap,
-        ) = self.get_targets(batch_gt_instances_3d, preds_dicts[0])
+            heatmap_weights,
+        ) = self.get_targets(batch_gt_instances_3d, preds_dicts[0], batch_input_metas)
         if hasattr(self, "on_the_image_mask"):
             label_weights = label_weights * self.on_the_image_mask
             bbox_weights = bbox_weights * self.on_the_image_mask[:, :, None]
@@ -721,13 +751,32 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li
         loss_dict = dict()
 
         # compute heatmap loss
-        loss_heatmap = self.loss_heatmap(
-            clip_sigmoid(preds_dict["dense_heatmap"]).float(),
-            heatmap.float(),
-            avg_factor=max(heatmap.eq(1).float().sum().item(), 1),
-        )
-        loss_dict["loss_heatmap"] = loss_heatmap
-
+        preds_dense_heatmap = clip_sigmoid(preds_dict["dense_heatmap"].float())
+        num_pos_dense_heatmap = max(heatmap.eq(1).float().sum().item(), 1)
+        if self.ignore_labels is not None:
+            loss_heatmap = self.loss_heatmap(
+                preds_dense_heatmap,
+                heatmap.float(),
+                avg_factor=num_pos_dense_heatmap,
+            )
+            loss_dict["loss_heatmap"] = loss_heatmap
+        else:
+            # When ignore labels is found, we compute the loss for each class
+            # heatmap focal loss
+            loss_heatmap_cls: torch.Tensor = self.loss_heatmap(
+                preds_dense_heatmap,
+                heatmap.float(),
+            )
+            # (Batch, num_classes, height, width) * (Batch, num_classes, height, width)
+            loss_heatmap_cls = loss_heatmap_cls * heatmap_weights.float()
+            loss_heatmap_cls = loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap
+            # (Batch, num_classes)
+            for cls_i, class_name in enumerate(self.class_names):
+                loss_dict[f"loss_heatmap_{class_name}"] = loss_heatmap_cls[cls_i]
+            
+            # Prevent loss item to avoid computing gradients twice. This is for logging.
+            loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum()
+        
         # compute loss for each layer
         for idx_layer in range(self.num_decoder_layers if self.auxiliary else 1):
             if idx_layer == self.num_decoder_layers - 1 or (idx_layer == 0 and self.auxiliary is False):
diff --git a/projects/BEVFusion/bevfusion/utils.py b/projects/BEVFusion/bevfusion/utils.py
index c47604dbd..b27d9e681 100644
--- a/projects/BEVFusion/bevfusion/utils.py
+++ b/projects/BEVFusion/bevfusion/utils.py
@@ -238,7 +238,7 @@ def __init__(
         self.iou_cost = TASK_UTILS.build(iou_cost)
         self.iou_calculator = TASK_UTILS.build(iou_calculator)
 
-    def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, train_cfg):
+    def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, train_cfg, ignore_labels=None):
         num_gts, num_bboxes = gt_bboxes.size(0), bboxes.size(0)
 
         # 1. assign -1 by default
@@ -259,9 +259,16 @@ def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, train_cfg):
         reg_cost = self.reg_cost(bboxes, gt_bboxes, train_cfg)
         iou = self.iou_calculator(bboxes, gt_bboxes)
         iou_cost = self.iou_cost(iou)
-
+     
         # weighted sum of above three costs
         cost = cls_cost + reg_cost + iou_cost
+        
+        if ignore_labels is not None:
+            preds_labels = pred_instances.scores.argmax(dim=1, keepdim=False)
+            print("shape of pred_instances.scores, preds_labels", pred_instances.scores.shape, preds_labels.shape)
+            ignore_preds_masks = preds_labels.isin(ignore_labels)
+            cost[ignore_preds_masks] = 10000
+            print("shape of ignore_preds_masks, cost", ignore_preds_masks.shape, cost.shape)
 
         # 3. do Hungarian matching on CPU using linear_sum_assignment
         cost = cost.detach().cpu()
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
index b5d9a8fdc..94fca2829 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
@@ -91,9 +91,11 @@
                 dict(class_names=["car", "truck", "bus"], nms_threshold=0.5),  # It's radius if using circle_nms
                 dict(class_names=["bicycle"], nms_threshold=0.5),
                 dict(class_names=["pedestrian"], nms_threshold=0.175),
+                dict(class_names=["barrier"], nms_threshold=0.25),
+                dict(class_names=["traffic_cone"], nms_threshold=0.175),
             ],
         ),
-        dense_heatmap_pooling_classes=["car", "truck", "bus", "bicycle"],  # Use class indices for pooling
+        dense_heatmap_pooling_classes=["car", "truck", "bus", "bicycle", "barrier"],  # Use class indices for pooling
         common_heads=dict(center=[2, 2], height=[1, 2], dim=[3, 2], rot=[2, 2], vel=[2, 2]),
         bbox_coder=dict(
             type="TransFusionBBoxCoder",
@@ -112,5 +114,6 @@
         ),
         loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="mean", loss_weight=1.0),
         loss_bbox=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=0.25),
+        partial_traffic_cone_barrier=True
     ),
 )
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
index 06d95be16..c3e8e18ee 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
@@ -84,6 +84,7 @@
             "timestamp",
             "vehicle_type",
             "city",
+            "traffic_cone_barrier_status",
         ],
     ),
 ]
@@ -127,6 +128,7 @@
             "timestamp",
             "vehicle_type",
             "city",
+            "traffic_cone_barrier_status",
         ],
     ),
 ]
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
index 4e74d3616..a9032fcdc 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
@@ -84,6 +84,7 @@
             "timestamp",
             "vehicle_type",
             "city",
+            "traffic_cone_barrier_status",
         ],
     ),
 ]
@@ -127,6 +128,7 @@
             "timestamp",
             "vehicle_type",
             "city",
+            "traffic_cone_barrier_status",  
         ],
     ),
 ]

From 79024cf9f861e086d77b7b1362b62be6c81f6bc5 Mon Sep 17 00:00:00 2001
From: KokSeang <kseangtan@google.com>
Date: Mon, 27 Apr 2026 14:41:33 +0900
Subject: [PATCH 056/183] Added

---
 projects/BEVFusion/bevfusion/bevfusion_head.py                 | 3 +++
 .../default/models/default_lidar_second_secfpn_120m.py         | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index 9bbc6469c..d616725d2 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -15,6 +15,7 @@
 from mmdet.models.utils import multi_apply
 from mmengine.structures import InstanceData
 from torch import nn
+from mmengine.logging import print_log
 
 
 def clip_sigmoid(x, eps=1e-4):
@@ -194,6 +195,8 @@ def __init__(
         else:
             self.ignore_labels = None
         
+        print_log(f"BEVFusionHead Ignore labels: {self.ignore_labels}, dense heatmap pooling classes: \
+        {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}", logger="current")
 
     def create_2D_grid(self, x_size, y_size):
         meshgrid = [[0, x_size - 1, x_size], [0, y_size - 1, y_size]]
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
index 94fca2829..ec37de42a 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
@@ -91,7 +91,7 @@
                 dict(class_names=["car", "truck", "bus"], nms_threshold=0.5),  # It's radius if using circle_nms
                 dict(class_names=["bicycle"], nms_threshold=0.5),
                 dict(class_names=["pedestrian"], nms_threshold=0.175),
-                dict(class_names=["barrier"], nms_threshold=0.25),
+                dict(class_names=["barrier"], nms_threshold=0.5),
                 dict(class_names=["traffic_cone"], nms_threshold=0.175),
             ],
         ),

From e771a69cb77196e16263b0049919837d0edb275e Mon Sep 17 00:00:00 2001
From: KokSeang <kseangtan@google.com>
Date: Mon, 27 Apr 2026 18:18:08 +0900
Subject: [PATCH 057/183] Added

---
 .../BEVFusion/bevfusion/bevfusion_head.py     | 21 +++++++++++--------
 projects/BEVFusion/bevfusion/utils.py         | 12 +++++------
 2 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index d616725d2..7e62d21a4 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -579,12 +579,6 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, batch_metad
         else:
             num_layer = 1
         
-        traffic_cone_barrier_status = batch_metadata[batch_idx].get("traffic_cone_barrier_status", True)
-        if self.ignore_labels is not None and not traffic_cone_barrier_status:
-            ignore_labels = self.ignore_labels
-        else:
-            ignore_labels = None
-
         assign_result_list = []
         for idx_layer in range(num_layer):
             bboxes_tensor_layer = bboxes_tensor[
@@ -638,7 +632,8 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, batch_metad
         ious = assign_result_ensemble.max_overlaps
         ious = torch.clamp(ious, min=0.0, max=1.0)
         labels = bboxes_tensor.new_zeros(num_proposals, dtype=torch.long)
-        label_weights = bboxes_tensor.new_zeros(num_proposals, dtype=torch.long)
+        label_weights = bboxes_tensor.new_zeros([num_proposals, self.num_classes], dtype=torch.long)
+        # label_weights = bboxes_tensor.new_zeros(num_proposals, dtype=torch.long)
 
         if gt_labels_3d is not None:  # default label is -1
             labels += self.num_classes
@@ -696,11 +691,12 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, batch_metad
         heatmap_weights = torch.ones_like(heatmap)
 
         # Ignore labels for traffic cone and barrier
+        traffic_cone_barrier_status = batch_metadata[batch_idx].get("traffic_cone_barrier_status", True)
         if self.ignore_labels is not None and not traffic_cone_barrier_status:
             pred_labels = pred_instances.scores.argmax(dim=1, keepdim=False)
             ignore_preds_masks = pred_labels.isin(self.ignore_labels)
-            label_weights[ignore_preds_masks] = 0.0 # Set to 0 to ignore these proposals
             heatmap_weights[self.ignore_labels] = 0.0 # Set to 0 to ignore these proposals
+            label_weights[:, self.ignore_labels] = 0.0 # Set to 0 to ignore traffic_cone and barrier
 
         return (
             labels[None],
@@ -791,10 +787,17 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li
                 ...,
                 idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals,
             ].reshape(-1)
+            # layer_label_weights = label_weights[
+            #     ...,
+            #     idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals,
+            # ].reshape(-1)
             layer_label_weights = label_weights[
                 ...,
                 idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals,
-            ].reshape(-1)
+            ]
+            # (Batch*num_proposals, num_classes)
+            layer_label_weights = layer_label_weights.reshape(-1, self.num_classes)
+            print_log(f"layer_label_weights: {layer_label_weights.shape}", logger="current")
             layer_score = preds_dict["heatmap"][
                 ...,
                 idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals,
diff --git a/projects/BEVFusion/bevfusion/utils.py b/projects/BEVFusion/bevfusion/utils.py
index b27d9e681..b6bd2be41 100644
--- a/projects/BEVFusion/bevfusion/utils.py
+++ b/projects/BEVFusion/bevfusion/utils.py
@@ -263,12 +263,12 @@ def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, train_cfg, ignore_label
         # weighted sum of above three costs
         cost = cls_cost + reg_cost + iou_cost
         
-        if ignore_labels is not None:
-            preds_labels = pred_instances.scores.argmax(dim=1, keepdim=False)
-            print("shape of pred_instances.scores, preds_labels", pred_instances.scores.shape, preds_labels.shape)
-            ignore_preds_masks = preds_labels.isin(ignore_labels)
-            cost[ignore_preds_masks] = 10000
-            print("shape of ignore_preds_masks, cost", ignore_preds_masks.shape, cost.shape)
+        # if ignore_labels is not None:
+        #     preds_labels = pred_instances.scores.argmax(dim=1, keepdim=False)
+        #     print("shape of pred_instances.scores, preds_labels", pred_instances.scores.shape, preds_labels.shape)
+        #     ignore_preds_masks = preds_labels.isin(ignore_labels)
+        #     cost[ignore_preds_masks] = 10000
+        #     print("shape of ignore_preds_masks, cost", ignore_preds_masks.shape, cost.shape)
 
         # 3. do Hungarian matching on CPU using linear_sum_assignment
         cost = cost.detach().cpu()

From 05703cbad1bf353279c4feff7af59ae6926c6281 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Mon, 27 Apr 2026 20:07:18 +0900
Subject: [PATCH 058/183] Update configs

---
 .../BEVFusion/bevfusion/bevfusion_head.py     |  30 ++--
 ..._secfpn_50e_8xb8_base_120m_traffic_cone.py | 163 ++++++++++++++++++
 .../default_lidar_second_secfpn_120m.py       |   2 +-
 .../default/pipelines/default_lidar_120m.py   |   2 +
 4 files changed, 179 insertions(+), 18 deletions(-)
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone.py

diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index 7e62d21a4..ace7f26b8 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -70,7 +70,7 @@ def __init__(
         train_cfg=None,
         test_cfg=None,
         bbox_coder=None,
-        partial_traffic_cone_barrier=False,
+        partial_ignore_labels=None
     ):
         super().__init__()
         self.class_names = class_names
@@ -84,8 +84,6 @@ def __init__(
         self.nms_kernel_size = nms_kernel_size
         self.train_cfg = train_cfg
         self.test_cfg = test_cfg
-        # If true, only compute loss for traffic cone and barrier when it's available in the frame
-        self.partial_traffic_cone_barrier = partial_traffic_cone_barrier
         self.use_sigmoid_cls = loss_cls.get("use_sigmoid", False)
         if not self.use_sigmoid_cls:
             self.num_classes += 1
@@ -189,13 +187,14 @@ def __init__(
                 [self.class_name_to_indices[class_name] for class_name in cluster["class_names"]]
             )
         
-        if self.partial_traffic_cone_barrier:
+        # If true, only compute loss for traffic cone and barrier when it's available in the frame
+        if partial_ignore_labels is not None:
             assert loss_heatmap['reduction'] == 'none', "Loss reduction must be 'none' for partial traffic cone and barrier"
-            self.ignore_labels = [self.class_name_to_indices["traffic_cone"], self.class_name_to_indices["barrier"]]
+            self.partial_ignore_labels = [self.class_name_to_indices[class_name] for class_name in partial_ignore_labels]
         else:
-            self.ignore_labels = None
+            self.partial_ignore_labels = None
         
-        print_log(f"BEVFusionHead Ignore labels: {self.ignore_labels}, dense heatmap pooling classes: \
+        print_log(f"BEVFusionHead Partial ignore labels: {self.partial_ignore_labels}, dense heatmap pooling classes: \
         {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}", logger="current")
 
     def create_2D_grid(self, x_size, y_size):
@@ -535,7 +534,7 @@ def get_targets(self, batch_gt_instances_3d: List[InstanceData], preds_dict: Lis
             heatmap_weights,
         )
 
-    def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, batch_metadata):
+    def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata):
         """Generate training targets for a single sample.
         Args:
             gt_instances_3d (:obj:`InstanceData`): ground truth of instances.
@@ -596,7 +595,6 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, batch_metad
                     gt_labels_3d,
                     score_layer,
                     self.train_cfg,
-                    ignore_labels,
                 )
             elif self.train_cfg.assigner.type == "HeuristicAssigner":
                 assign_result = self.bbox_assigner.assign(
@@ -691,12 +689,10 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, batch_metad
         heatmap_weights = torch.ones_like(heatmap)
 
         # Ignore labels for traffic cone and barrier
-        traffic_cone_barrier_status = batch_metadata[batch_idx].get("traffic_cone_barrier_status", True)
-        if self.ignore_labels is not None and not traffic_cone_barrier_status:
-            pred_labels = pred_instances.scores.argmax(dim=1, keepdim=False)
-            ignore_preds_masks = pred_labels.isin(self.ignore_labels)
-            heatmap_weights[self.ignore_labels] = 0.0 # Set to 0 to ignore these proposals
-            label_weights[:, self.ignore_labels] = 0.0 # Set to 0 to ignore traffic_cone and barrier
+        traffic_cone_barrier_status = metadata.get("traffic_cone_barrier_status", True)
+        if self.partial_ignore_labels is not None and not traffic_cone_barrier_status:
+            heatmap_weights[self.partial_ignore_labels] = 0.0 # Set to 0 to ignore these proposals
+            label_weights[:, self.partial_ignore_labels] = 0.0 # Set to 0 to ignore traffic_cone and barrier
 
         return (
             labels[None],
@@ -752,7 +748,7 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li
         # compute heatmap loss
         preds_dense_heatmap = clip_sigmoid(preds_dict["dense_heatmap"].float())
         num_pos_dense_heatmap = max(heatmap.eq(1).float().sum().item(), 1)
-        if self.ignore_labels is not None:
+        if self.partial_ignore_labels is not None:
             loss_heatmap = self.loss_heatmap(
                 preds_dense_heatmap,
                 heatmap.float(),
@@ -797,12 +793,12 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li
             ]
             # (Batch*num_proposals, num_classes)
             layer_label_weights = layer_label_weights.reshape(-1, self.num_classes)
-            print_log(f"layer_label_weights: {layer_label_weights.shape}", logger="current")
             layer_score = preds_dict["heatmap"][
                 ...,
                 idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals,
             ]
             layer_cls_score = layer_score.permute(0, 2, 1).reshape(-1, self.num_classes)
+            print_log(f"layer_label_weights: {layer_label_weights.shape}, layer_score: {layer_score.shape}, layer_labels: {layer_labels.shape}", logger="current")
             layer_loss_cls = self.loss_cls(
                 layer_cls_score.float(),
                 layer_labels,
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone.py
new file mode 100644
index 000000000..39c6ddf54
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone.py
@@ -0,0 +1,163 @@
+_base_ = [
+    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
+    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
+    "../default/pipelines/default_lidar_120m.py",
+    "../default/models/default_lidar_second_secfpn_120m.py",
+    "../default/schedulers/default_50e_8xb8_adamw_cosine.py",
+    "../default/default_misc.py",
+]
+
+custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
+custom_imports["imports"] += _base_.custom_imports["imports"]
+custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
+
+# user setting
+data_root = "data/t4dataset/"
+info_directory_path = "info/kokseang_2_8/"
+
+experiment_group_name = "bevfusion_lidar_traffic_cone/base/" + _base_.dataset_type
+experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    voxelize_cfg=dict(
+        point_cloud_range=_base_.point_cloud_range,
+        voxel_size=_base_.voxel_size,
+        voxelize_reduce=True,
+    ),
+    pts_voxel_encoder=dict(num_features=_base_.point_use_dim),
+    pts_middle_encoder=dict(
+        in_channels=_base_.point_use_dim,
+        sparse_shape=_base_.grid_size,
+        num_aug_features=4,
+        # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here
+        aug_features_min_values=[
+            _base_.point_cloud_range[0],
+            _base_.point_cloud_range[1],
+            _base_.point_cloud_range[2],
+            0.0,
+        ],
+        aug_features_max_values=[
+            _base_.point_cloud_range[3],
+            _base_.point_cloud_range[4],
+            _base_.point_cloud_range[5],
+            0.2,
+        ],
+    ),
+    bbox_head=dict(
+        class_names=_base_.class_names,  # Use class names to identify the correct class indices
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+        partial_ignore_labels=["traffic_cone", "barrier"],
+        loss_heatmap=dict(
+            reduction="none",
+        ),
+    ),
+)
+
+# Dataset parameters
+train_dataloader = dict(
+    batch_size=_base_.train_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=True),
+    dataset=dict(
+        type=_base_.dataset_type,
+        pipeline=_base_.train_pipeline,
+        modality=_base_.input_modality,
+        backend_args=_base_.backend_args,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_train_file_name,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        test_mode=False,
+        data_prefix=_base_.data_prefix,
+        box_type_3d="LiDAR",
+        filter_cfg=_base_.filter_cfg,
+    ),
+)
+
+val_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_val_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+test_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_test_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+val_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+)
+
+test_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+    save_csv=True,
+)
+
+default_hooks = dict(
+    logger=dict(type="LoggerHook", interval=50),
+    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
+)
+log_processor = dict(window_size=50)
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
index ec37de42a..023c6774d 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
@@ -114,6 +114,6 @@
         ),
         loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="mean", loss_weight=1.0),
         loss_bbox=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=0.25),
-        partial_traffic_cone_barrier=True
+        partial_ignore_labels=None
     ),
 )
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
index c3e8e18ee..455c2761a 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
@@ -13,6 +13,8 @@
     "bus": 120,
     "bicycle": 120,
     "pedestrian": 120,
+    "traffic_cone": 120,
+    "barrier": 120,
 }
 
 # LiDAR parameters

From 06ae9c2f75fc2060d102a1731d6365001bd91de8 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Mon, 27 Apr 2026 20:53:07 +0900
Subject: [PATCH 059/183] Update configs

---
 projects/BEVFusion/bevfusion/bevfusion_head.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index ace7f26b8..c58a04dca 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -748,7 +748,7 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li
         # compute heatmap loss
         preds_dense_heatmap = clip_sigmoid(preds_dict["dense_heatmap"].float())
         num_pos_dense_heatmap = max(heatmap.eq(1).float().sum().item(), 1)
-        if self.partial_ignore_labels is not None:
+        if self.partial_ignore_labels is None:
             loss_heatmap = self.loss_heatmap(
                 preds_dense_heatmap,
                 heatmap.float(),
@@ -798,7 +798,6 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li
                 idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals,
             ]
             layer_cls_score = layer_score.permute(0, 2, 1).reshape(-1, self.num_classes)
-            print_log(f"layer_label_weights: {layer_label_weights.shape}, layer_score: {layer_score.shape}, layer_labels: {layer_labels.shape}", logger="current")
             layer_loss_cls = self.loss_cls(
                 layer_cls_score.float(),
                 layer_labels,

From d8c19749f3d54dd9a437b02ce58b2b0ca1af755d Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 28 Apr 2026 09:52:10 +0900
Subject: [PATCH 060/183] Update configs

---
 projects/BEVFusion/bevfusion/bevfusion_head.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index c58a04dca..adbd64835 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -762,6 +762,10 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li
                 preds_dense_heatmap,
                 heatmap.float(),
             )
+            loss_heatmap_cls_before_reduction = (loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap).clone().detach()
+            for cls_i, class_name in enumerate(self.class_names):
+                loss_dict[f"heatmap_{class_name}_before_reduction"] = loss_heatmap_cls_before_reduction[cls_i]
+
             # (Batch, num_classes, height, width) * (Batch, num_classes, height, width)
             loss_heatmap_cls = loss_heatmap_cls * heatmap_weights.float()
             loss_heatmap_cls = loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap

From 25e581c902b3ad907da44a5bcbd41d9d249a65d3 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 28 Apr 2026 10:02:59 +0900
Subject: [PATCH 061/183] Update configs

---
 tools/detection3d/create_data_t4dataset.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/detection3d/create_data_t4dataset.py b/tools/detection3d/create_data_t4dataset.py
index 62169bc7a..9550b2872 100644
--- a/tools/detection3d/create_data_t4dataset.py
+++ b/tools/detection3d/create_data_t4dataset.py
@@ -273,7 +273,8 @@ def main():
 
     if cfg.filter_attributes is None:
         print_log("No attribute filtering is applied!")
-
+    
+    remove_non_traffic_cone_barrier = cfg.get("remove_non_traffic_cone_barrier", False)
     # Get every pair of min-max distance filtering thresholds
     bev_distance_ranges = []
     if hasattr(cfg, "evaluator_metric_configs"):
@@ -310,6 +311,9 @@ def main():
                 dataset_scene_info = scene_id.split("/")
                 if len(dataset_scene_info) == 5:
                     t4_dataset_id, t4_dataset_version_id, city, vehicle_type, traffic_cone_barrier_status = dataset_scene_info
+                    if remove_non_traffic_cone_barrier and traffic_cone_barrier_status == "false":
+                        print_log(f"Skipping scene: {scene_id} because it does not have traffic cone or barrier", logger="current")
+                        continue
                 elif len(dataset_scene_info) == 2:
                     t4_dataset_id, t4_dataset_version_id = dataset_scene_info
                     city = vehicle_type = None

From 9ba440a640eb8f9742cc47425e3e4d6977f58705 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 28 Apr 2026 10:05:29 +0900
Subject: [PATCH 062/183] Update configs

---
 .../t4dataset/j6gen2_base_traffic_cone.py     | 211 ++++++++++++++++++
 .../t4dataset/jpntaxi_base_traffic_cone.py    | 202 +++++++++++++++++
 2 files changed, 413 insertions(+)
 create mode 100644 autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py
 create mode 100644 autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py

diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py
new file mode 100644
index 000000000..8c57cf4fa
--- /dev/null
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py
@@ -0,0 +1,211 @@
+custom_imports = dict(
+    imports=[
+        "autoware_ml.detection3d.datasets.t4dataset",
+        "autoware_ml.detection3d.evaluation.t4metric.t4metric",
+        "autoware_ml.detection3d.evaluation.t4metric.t4metric_v2",
+    ]
+)
+
+# dataset type setting
+dataset_type = "T4Dataset"
+info_train_file_name = "t4dataset_j6gen2_base_traffic_cone_infos_train.pkl"
+info_val_file_name = "t4dataset_j6gen2_base_traffic_cone_infos_val.pkl"
+info_test_file_name = "t4dataset_j6gen2_base_traffic_cone_infos_test.pkl"
+
+info_train_statistics_file_name = "t4dataset_j6gen2_base_traffic_cone_statistics_train.parquet"
+info_val_statistics_file_name = "t4dataset_j6gen2_base_traffic_cone_statistics_val.parquet"
+info_test_statistics_file_name = "t4dataset_j6gen2_base_traffic_cone_statistics_test.parquet"
+
+# dataset scene setting
+dataset_version_list = [
+    "db_j6gen2_v1",
+    "db_j6gen2_v2",
+    "db_j6gen2_v3",
+    "db_j6gen2_v4",
+    "db_j6gen2_v5",
+    "db_j6gen2_v6",
+    "db_j6gen2_v7",
+    "db_j6gen2_v8",
+    "db_j6gen2_v9",
+    "db_largebus_v1",
+    "db_largebus_v2",
+    "db_largebus_v3",
+]
+
+dataset_test_groups = {
+    "largebus": ("t4dataset_largebus_traffic_cone_infos_test.pkl", False),
+    "j6gen2": ("t4dataset_j6gen2_traffic_cone_infos_test.pkl", False),
+    "j6gen2_base": ("t4dataset_j6gen2_base_traffic_cone_infos_test.pkl", True),
+}
+
+# dataset format setting
+data_prefix = dict(
+    pts="",
+    CAM_FRONT="",
+    CAM_FRONT_LEFT="",
+    CAM_FRONT_RIGHT="",
+    CAM_BACK="",
+    CAM_BACK_RIGHT="",
+    CAM_BACK_LEFT="",
+    sweeps="",
+)
+
+camera_types = {
+    "CAM_FRONT",
+    "CAM_FRONT_RIGHT",
+    "CAM_FRONT_LEFT",
+    "CAM_BACK",
+    "CAM_BACK_LEFT",
+    "CAM_BACK_RIGHT",
+}
+
+# class setting
+name_mapping = {
+    # DBv1.0
+    "vehicle.car": "car",
+    "vehicle.construction": "truck",
+    "vehicle.emergency (ambulance & police)": "car",
+    "vehicle.motorcycle": "bicycle",
+    "vehicle.trailer": "trailer",
+    "vehicle.truck": "truck",
+    "vehicle.bicycle": "bicycle",
+    "vehicle.bus (bendy & rigid)": "bus",
+    "pedestrian.adult": "pedestrian",
+    "pedestrian.child": "pedestrian",
+    "pedestrian.construction_worker": "pedestrian",
+    "pedestrian.personal_mobility": "pedestrian",
+    "pedestrian.police_officer": "pedestrian",
+    "pedestrian.stroller": "pedestrian",
+    "pedestrian.wheelchair": "pedestrian",
+    "movable_object.barrier": "barrier",
+    "movable_object.debris": "barrier",
+    "movable_object.pushable_pullable": "barrier",
+    "movable_object.trafficcone": "traffic_cone",
+    "movable_object.traffic_cone": "traffic_cone",
+    "animal": "animal",
+    "static_object.bicycle_rack": "bicycle_rack",
+    # DBv1.1 and UCv2.0
+    "car": "car",
+    "truck": "truck",
+    "bus": "bus",
+    "trailer": "trailer",
+    "motorcycle": "bicycle",
+    "bicycle": "bicycle",
+    "police_car": "car",
+    "pedestrian": "pedestrian",
+    "police_officer": "pedestrian",
+    "forklift": "car",
+    "construction_worker": "pedestrian",
+    "stroller": "pedestrian",
+    # DBv2.0 and DBv3.0
+    "animal": "animal",
+    "movable_object.barrier": "barrier",
+    "movable_object.pushable_pullable": "barrier",
+    "movable_object.traffic_cone": "traffic_cone",
+    "pedestrian.adult": "pedestrian",
+    "pedestrian.child": "pedestrian",
+    "pedestrian.construction_worker": "pedestrian",
+    "pedestrian.personal_mobility": "pedestrian",
+    "pedestrian.police_officer": "pedestrian",
+    "pedestrian.stroller": "pedestrian",
+    "pedestrian.wheelchair": "pedestrian",
+    "static_object.bicycle rack": "bicycle rack",
+    "static_object.bollard": "bollard",
+    "vehicle.ambulance": "car",  # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car
+    "vehicle.bicycle": "bicycle",
+    "vehicle.bus": "bus",
+    "vehicle.car": "car",
+    "vehicle.construction": "truck",
+    "vehicle.fire": "truck",
+    "vehicle.motorcycle": "bicycle",
+    "vehicle.police": "car",
+    "vehicle.trailer": "trailer",
+    "vehicle.truck": "truck",
+    # DBv1.3
+    "ambulance": "car",
+    "kart": "car",
+    "wheelchair": "pedestrian",
+    "personal_mobility": "pedestrian",
+    "fire_truck": "truck",
+    "semi_trailer": "trailer",
+    "tractor_unit": "truck",
+    "construction_vehicle": "truck",
+    "traffic_cone": "traffic_cone",
+    "trafficcone": "traffic_cone",
+    "barrier": "barrier",
+}
+
+
+class_names = [
+    "car",
+    "truck",
+    "bus",
+    "bicycle",
+    "pedestrian",
+    "traffic_cone",
+    "barrier",
+]
+num_class = len(class_names)
+metainfo = dict(classes=class_names)
+
+merge_objects = [
+    ("truck", ["truck", "trailer"]),
+]
+merge_type = "extend_longer"  # One of ["extend_longer","union", None]
+
+# visualization
+class_colors = {
+    "car": (30, 144, 255),
+    "truck": (140, 0, 255),
+    "construction_vehicle": (255, 255, 0),
+    "bus": (111, 255, 111),
+    "trailer": (0, 255, 255),
+    "barrier": (0, 0, 0),
+    "motorcycle": (100, 0, 30),
+    "bicycle": (255, 0, 30),
+    "pedestrian": (255, 200, 200),
+    "traffic_cone": (120, 120, 120),
+}
+camera_panels = [
+    "data/CAM_FRONT_LEFT",
+    "data/CAM_FRONT",
+    "data/CAM_FRONT_RIGHT",
+    "data/CAM_BACK_LEFT",
+    "data/CAM_BACK",
+    "data/CAM_BACK_RIGHT",
+]
+
+filter_attributes = [
+    ("vehicle.bicycle", "vehicle_state.parked"),
+    ("vehicle.bicycle", "cycle_state.without_rider"),
+    ("vehicle.bicycle", "motorcycle_state.without_rider"),
+    ("vehicle.motorcycle", "vehicle_state.parked"),
+    ("vehicle.motorcycle", "cycle_state.without_rider"),
+    ("vehicle.motorcycle", "motorcycle_state.without_rider"),
+    ("bicycle", "vehicle_state.parked"),
+    ("bicycle", "cycle_state.without_rider"),
+    ("bicycle", "motorcycle_state.without_rider"),
+    ("motorcycle", "vehicle_state.parked"),
+    ("motorcycle", "cycle_state.without_rider"),
+    ("motorcycle", "motorcycle_state.without_rider"),
+]
+
+evaluator_metric_configs = dict(
+    evaluation_task="detection",
+    target_labels=class_names,
+    center_distance_bev_thresholds=[0.5, 1.0, 2.0, 4.0],
+    # plane_distance_thresholds is required for the pass fail evaluation
+    plane_distance_thresholds=[2.0, 4.0],
+    iou_2d_thresholds=None,
+    iou_3d_thresholds=None,
+    label_prefix="autoware",
+    # bev minimum distance ranges for each range bucket, must be the same length as max_distance,
+    # they will form bev distance ranges in [(min_distance[0], max_distance[0]), (min_distance[1], max_distance[1]), ...] when filtering
+    min_distance=[0.0, 50.0, 90.0, 0.0],
+    # bev maximum distance ranges for each range bucket, must be the same length as min_distance
+    max_distance=[50.0, 90.0, 121.0, 121.0],
+    min_point_numbers=0,
+    matching_class_agnostic_fps=False,
+)
+
+remove_non_traffic_cone_barrier = True
\ No newline at end of file
diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py
new file mode 100644
index 000000000..3643b4475
--- /dev/null
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py
@@ -0,0 +1,202 @@
+custom_imports = dict(
+    imports=[
+        "autoware_ml.detection3d.datasets.t4dataset",
+        "autoware_ml.detection3d.evaluation.t4metric.t4metric",
+        "autoware_ml.detection3d.evaluation.t4metric.t4metric_v2",
+    ]
+)
+
+# dataset type setting
+dataset_type = "T4Dataset"
+info_train_file_name = "t4dataset_jpntaxi_base_traffic_cone_infos_train.pkl"
+info_val_file_name = "t4dataset_jpntaxi_base_traffic_cone_infos_val.pkl"
+info_test_file_name = "t4dataset_jpntaxi_base_traffic_cone_infos_test.pkl"
+
+info_train_statistics_file_name = "t4dataset_jpntaxi_base_traffic_cone_statistics_train.parquet"
+info_val_statistics_file_name = "t4dataset_jpntaxi_base_traffic_cone_statistics_val.parquet"
+info_test_statistics_file_name = "t4dataset_jpntaxi_base_traffic_cone_statistics_test.parquet"
+
+# dataset scene setting
+dataset_test_groups = {
+    "jpntaxi_gen2": ("t4dataset_jpntaxi_gen2_traffic_cone_infos_test.pkl", True),
+}
+
+dataset_version_list = [
+    "db_jpntaxigen2_v1",
+    "db_jpntaxigen2_v2",
+    "db_jpntaxi_v1",
+    "db_jpntaxi_v2",
+    "db_jpntaxi_v4",
+]
+
+# dataset format setting
+data_prefix = dict(
+    pts="",
+    CAM_FRONT="",
+    CAM_FRONT_LEFT="",
+    CAM_FRONT_RIGHT="",
+    CAM_BACK="",
+    CAM_BACK_RIGHT="",
+    CAM_BACK_LEFT="",
+    sweeps="",
+)
+camera_types = {
+    "CAM_FRONT",
+    "CAM_FRONT_RIGHT",
+    "CAM_FRONT_LEFT",
+    "CAM_BACK",
+    "CAM_BACK_LEFT",
+    "CAM_BACK_RIGHT",
+}
+
+# class setting
+name_mapping = {
+    # DBv1.0
+    "vehicle.car": "car",
+    "vehicle.construction": "truck",
+    "vehicle.emergency (ambulance & police)": "car",
+    "vehicle.motorcycle": "bicycle",
+    "vehicle.trailer": "trailer",
+    "vehicle.truck": "truck",
+    "vehicle.bicycle": "bicycle",
+    "vehicle.bus (bendy & rigid)": "bus",
+    "pedestrian.adult": "pedestrian",
+    "pedestrian.child": "pedestrian",
+    "pedestrian.construction_worker": "pedestrian",
+    "pedestrian.personal_mobility": "pedestrian",
+    "pedestrian.police_officer": "pedestrian",
+    "pedestrian.stroller": "pedestrian",
+    "pedestrian.wheelchair": "pedestrian",
+    "movable_object.barrier": "barrier",
+    "movable_object.debris": "barrier",
+    "movable_object.pushable_pullable": "barrier",
+    "movable_object.trafficcone": "traffic_cone",
+    "movable_object.traffic_cone": "traffic_cone",
+    "animal": "animal",
+    "static_object.bicycle_rack": "bicycle_rack",
+    # DBv1.1 and UCv2.0
+    "car": "car",
+    "truck": "truck",
+    "bus": "bus",
+    "trailer": "trailer",
+    "motorcycle": "bicycle",
+    "bicycle": "bicycle",
+    "police_car": "car",
+    "pedestrian": "pedestrian",
+    "police_officer": "pedestrian",
+    "forklift": "car",
+    "construction_worker": "pedestrian",
+    "stroller": "pedestrian",
+    # DBv2.0 and DBv3.0
+    "animal": "animal",
+    "movable_object.barrier": "barrier",
+    "movable_object.pushable_pullable": "barrier",
+    "movable_object.traffic_cone": "traffic_cone",
+    "pedestrian.adult": "pedestrian",
+    "pedestrian.child": "pedestrian",
+    "pedestrian.construction_worker": "pedestrian",
+    "pedestrian.personal_mobility": "pedestrian",
+    "pedestrian.police_officer": "pedestrian",
+    "pedestrian.stroller": "pedestrian",
+    "pedestrian.wheelchair": "pedestrian",
+    "static_object.bicycle rack": "bicycle rack",
+    "static_object.bollard": "bollard",
+    "vehicle.ambulance": "car",  # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car
+    "vehicle.bicycle": "bicycle",
+    "vehicle.bus": "bus",
+    "vehicle.car": "car",
+    "vehicle.construction": "truck",
+    "vehicle.fire": "truck",
+    "vehicle.motorcycle": "bicycle",
+    "vehicle.police": "car",
+    "vehicle.trailer": "trailer",
+    "vehicle.truck": "truck",
+    # DBv1.3
+    "ambulance": "car",
+    "kart": "car",
+    "wheelchair": "pedestrian",
+    "personal_mobility": "pedestrian",
+    "fire_truck": "truck",
+    "semi_trailer": "trailer",
+    "tractor_unit": "truck",
+    "construction_vehicle": "truck",
+    "traffic_cone": "traffic_cone",
+    "trafficcone": "traffic_cone",
+    "barrier": "barrier",
+}
+
+class_names = [
+    "car",
+    "truck",
+    "bus",
+    "bicycle",
+    "pedestrian",
+    "traffic_cone",
+    "barrier",
+]
+
+num_class = len(class_names)
+metainfo = dict(classes=class_names)
+
+merge_objects = [
+    ("truck", ["truck", "trailer"]),
+]
+merge_type = "extend_longer"  # One of ["extend_longer","union", None]
+
+# visualization
+class_colors = {
+    "car": (30, 144, 255),
+    "truck": (140, 0, 255),
+    "construction_vehicle": (255, 255, 0),
+    "bus": (111, 255, 111),
+    "trailer": (0, 255, 255),
+    "barrier": (0, 0, 0),
+    "motorcycle": (100, 0, 30),
+    "bicycle": (255, 0, 30),
+    "pedestrian": (255, 200, 200),
+    "traffic_cone": (120, 120, 120),
+}
+camera_panels = [
+    "data/CAM_FRONT_LEFT",
+    "data/CAM_FRONT",
+    "data/CAM_FRONT_RIGHT",
+    "data/CAM_BACK_LEFT",
+    "data/CAM_BACK",
+    "data/CAM_BACK_RIGHT",
+]
+
+# Add filter attributes
+filter_attributes = [
+    ("vehicle.bicycle", "vehicle_state.parked"),
+    ("vehicle.bicycle", "cycle_state.without_rider"),
+    ("vehicle.bicycle", "motorcycle_state.without_rider"),
+    ("vehicle.motorcycle", "vehicle_state.parked"),
+    ("vehicle.motorcycle", "cycle_state.without_rider"),
+    ("vehicle.motorcycle", "motorcycle_state.without_rider"),
+    ("bicycle", "vehicle_state.parked"),
+    ("bicycle", "cycle_state.without_rider"),
+    ("bicycle", "motorcycle_state.without_rider"),
+    ("motorcycle", "vehicle_state.parked"),
+    ("motorcycle", "cycle_state.without_rider"),
+    ("motorcycle", "motorcycle_state.without_rider"),
+]
+
+evaluator_metric_configs = dict(
+    evaluation_task="detection",
+    target_labels=class_names,
+    center_distance_bev_thresholds=[0.5, 1.0, 2.0, 4.0],
+    # plane_distance_thresholds is required for the pass fail evaluation
+    plane_distance_thresholds=[2.0, 4.0],
+    iou_2d_thresholds=None,
+    iou_3d_thresholds=None,
+    label_prefix="autoware",
+    # bev minimum distance ranges for each range bucket, must be the same length as max_distance,
+    # they will form bev distance ranges in [(min_distance[0], max_distance[0]), (min_distance[1], max_distance[1]), ...] when filtering
+    min_distance=[0.0, 50.0, 90.0, 0.0],
+    # bev maximum distance ranges for each range bucket, must be the same length as min_distance
+    max_distance=[50.0, 90.0, 121.0, 121.0],
+    min_point_numbers=0,
+    matching_class_agnostic_fps=False,
+)
+
+remove_non_traffic_cone_barrier = True
\ No newline at end of file

From aac1e1d3fa8c342d628ccaf723fe7a2419cf0a8a Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 28 Apr 2026 10:14:25 +0900
Subject: [PATCH 063/183] Update configs

---
 projects/BEVFusion/bevfusion/bevfusion_head.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index adbd64835..b0a42249a 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -692,7 +692,7 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata):
         traffic_cone_barrier_status = metadata.get("traffic_cone_barrier_status", True)
         if self.partial_ignore_labels is not None and not traffic_cone_barrier_status:
             heatmap_weights[self.partial_ignore_labels] = 0.0 # Set to 0 to ignore these proposals
-            label_weights[:, self.partial_ignore_labels] = 0.0 # Set to 0 to ignore traffic_cone and barrier
+            label_weights[neg_inds, self.partial_ignore_labels] = 0.0 # Set to 0 to ignore traffic_cone and barrier
 
         return (
             labels[None],

From 09c06d79ee992ef0479876b374271b6276aa632c Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 28 Apr 2026 10:17:53 +0900
Subject: [PATCH 064/183] Update configs

---
 projects/BEVFusion/bevfusion/bevfusion_head.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index b0a42249a..226237ff3 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -762,7 +762,7 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li
                 preds_dense_heatmap,
                 heatmap.float(),
             )
-            loss_heatmap_cls_before_reduction = (loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap).clone().detach()
+            loss_heatmap_cls_before_reduction = (loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap).detach()
             for cls_i, class_name in enumerate(self.class_names):
                 loss_dict[f"heatmap_{class_name}_before_reduction"] = loss_heatmap_cls_before_reduction[cls_i]
 

From 5279b178195f4a8aa7290eb7cec905aeb354576c Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 28 Apr 2026 12:18:52 +0900
Subject: [PATCH 065/183] Update configs

---
 projects/BEVFusion/bevfusion/bevfusion_head.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index 226237ff3..3f857e22d 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -692,7 +692,12 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata):
         traffic_cone_barrier_status = metadata.get("traffic_cone_barrier_status", True)
         if self.partial_ignore_labels is not None and not traffic_cone_barrier_status:
             heatmap_weights[self.partial_ignore_labels] = 0.0 # Set to 0 to ignore these proposals
-            label_weights[neg_inds, self.partial_ignore_labels] = 0.0 # Set to 0 to ignore traffic_cone and barrier
+            if len(neg_inds) > 0:
+                # neg_inds [N] and column indices [K] must broadcast (not pair); see IndexError N vs K.
+                _cols = torch.as_tensor(
+                    self.partial_ignore_labels, device=label_weights.device, dtype=torch.long
+                )
+                label_weights[neg_inds.unsqueeze(1), _cols.unsqueeze(0)] = 0.0
 
         return (
             labels[None],

From 0ce7a720a12904400e236281b8a4e3f4c17fd9a7 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 28 Apr 2026 12:19:27 +0900
Subject: [PATCH 066/183] Add the script

---
 ...0m_sincos_timeexp_34_channels_32_points.py | 166 ++++++++++++++++++
 1 file changed, 166 insertions(+)
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels_32_points.py

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels_32_points.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels_32_points.py
new file mode 100644
index 000000000..f784b2386
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels_32_points.py
@@ -0,0 +1,166 @@
+_base_ = [
+    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
+    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
+    "../default/pipelines/default_lidar_120m.py",
+    "../default/models/default_lidar_second_secfpn_120m.py",
+    "../default/schedulers/default_50e_8xb8_adamw_cosine.py",
+    "../default/default_misc.py",
+]
+
+custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
+custom_imports["imports"] += _base_.custom_imports["imports"]
+custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
+
+# user setting
+data_root = "data/t4dataset/"
+info_directory_path = "info/kokseang_2_6_2/"
+
+experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type
+experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_34_channels_timeexp_32_points"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    voxelize_cfg=dict(
+        max_num_points=32,
+        point_cloud_range=_base_.point_cloud_range,
+        voxel_size=_base_.voxel_size,
+        voxelize_reduce=False,
+    ),
+    pts_voxel_encoder=dict(
+        _delete_=True,
+        type="BEVFusionVoxelSinCosEncoder", 
+        in_channels=4,
+        time_lag_channel_index=3,
+        time_exp_factor=1.0,
+        with_distance=False,
+        with_cluster_center=True,
+        with_voxel_center=True,
+        feat_channels=[16],
+        point_cloud_range=_base_.point_cloud_range,
+        voxel_size=_base_.voxel_size,
+        norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01),
+        # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
+        min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
+        max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
+    ),
+    pts_middle_encoder=dict(
+        in_channels=34,
+        sparse_shape=_base_.grid_size,
+        # num_aug_features=4,
+        # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here
+        # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
+        # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
+    ),
+    bbox_head=dict(
+        class_names=_base_.class_names,  # Use class names to identify the correct class indices
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+    ),
+)
+
+# Dataset parameters
+train_dataloader = dict(
+    batch_size=_base_.train_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=True),
+    dataset=dict(
+        type=_base_.dataset_type,
+        pipeline=_base_.train_pipeline,
+        modality=_base_.input_modality,
+        backend_args=_base_.backend_args,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_train_file_name,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        test_mode=False,
+        data_prefix=_base_.data_prefix,
+        box_type_3d="LiDAR",
+        filter_cfg=_base_.filter_cfg,
+    ),
+)
+
+val_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_val_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+test_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_test_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+val_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+)
+
+test_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+    save_csv=True,
+)
+
+default_hooks = dict(
+    logger=dict(type="LoggerHook", interval=50),
+    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
+)
+log_processor = dict(window_size=50)

From 9b4c2f292c10bc3561110541089c3c7bb0d0dcb9 Mon Sep 17 00:00:00 2001
From: KokSeang <kseangtan@google.com>
Date: Tue, 28 Apr 2026 12:48:58 +0900
Subject: [PATCH 067/183] Added

---
 .../detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py
index 3643b4475..c7e631458 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py
@@ -18,7 +18,7 @@
 
 # dataset scene setting
 dataset_test_groups = {
-    "jpntaxi_gen2": ("t4dataset_jpntaxi_gen2_traffic_cone_infos_test.pkl", True),
+    "jpntaxi_base_traffic_cone": ("t4dataset_jpntaxi_base_traffic_cone_infos_test.pkl", True),
 }
 
 dataset_version_list = [

From 5c0ada87126561c3483253ebd0a684ece02b332c Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 28 Apr 2026 12:49:32 +0900
Subject: [PATCH 068/183] Add the script

---
 .../BEVFusion/bevfusion/bevfusion_head.py     | 71 ++++++++++++-------
 1 file changed, 45 insertions(+), 26 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index 3f857e22d..a1819b309 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -753,34 +753,53 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li
         # compute heatmap loss
         preds_dense_heatmap = clip_sigmoid(preds_dict["dense_heatmap"].float())
         num_pos_dense_heatmap = max(heatmap.eq(1).float().sum().item(), 1)
-        if self.partial_ignore_labels is None:
-            loss_heatmap = self.loss_heatmap(
-                preds_dense_heatmap,
-                heatmap.float(),
-                avg_factor=num_pos_dense_heatmap,
-            )
-            loss_dict["loss_heatmap"] = loss_heatmap
-        else:
-            # When ignore labels is found, we compute the loss for each class
-            # heatmap focal loss
-            loss_heatmap_cls: torch.Tensor = self.loss_heatmap(
-                preds_dense_heatmap,
-                heatmap.float(),
-            )
-            loss_heatmap_cls_before_reduction = (loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap).detach()
-            for cls_i, class_name in enumerate(self.class_names):
-                loss_dict[f"heatmap_{class_name}_before_reduction"] = loss_heatmap_cls_before_reduction[cls_i]
-
-            # (Batch, num_classes, height, width) * (Batch, num_classes, height, width)
-            loss_heatmap_cls = loss_heatmap_cls * heatmap_weights.float()
-            loss_heatmap_cls = loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap
-            # (Batch, num_classes)
-            for cls_i, class_name in enumerate(self.class_names):
-                loss_dict[f"loss_heatmap_{class_name}"] = loss_heatmap_cls[cls_i]
+        # if self.partial_ignore_labels is None:
+        #     loss_heatmap = self.loss_heatmap(
+        #         preds_dense_heatmap,
+        #         heatmap.float(),
+        #         avg_factor=num_pos_dense_heatmap,
+        #     )
+        #     loss_dict["loss_heatmap"] = loss_heatmap
+        # else:
+            # # When ignore labels is found, we compute the loss for each class
+            # # heatmap focal loss
+            # loss_heatmap_cls: torch.Tensor = self.loss_heatmap(
+            #     preds_dense_heatmap,
+            #     heatmap.float(),
+            # )
+            # # loss_heatmap_cls_before_reduction = (loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap).detach()
+            # # for cls_i, class_name in enumerate(self.class_names):
+            # #     loss_dict[f"heatmap_{class_name}_before_reduction"] = loss_heatmap_cls_before_reduction[cls_i]
+
+            # # (Batch, num_classes, height, width) * (Batch, num_classes, height, width)
+            # loss_heatmap_cls = loss_heatmap_cls * heatmap_weights.float()
+            # loss_heatmap_cls = loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap
+            # # (Batch, num_classes)
+            # for cls_i, class_name in enumerate(self.class_names):
+            #     loss_dict[f"loss_heatmap_{class_name}"] = loss_heatmap_cls[cls_i]
             
-            # Prevent loss item to avoid computing gradients twice. This is for logging.
-            loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum()
+            # # Prevent loss item to avoid computing gradients twice. This is for logging.
+            # loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum()
+
+         # # When ignore labels is found, we compute the loss for each class
+            # # heatmap focal loss
+        loss_heatmap_cls: torch.Tensor = self.loss_heatmap(
+            preds_dense_heatmap,
+            heatmap.float(),
+        )
+        # loss_heatmap_cls_before_reduction = (loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap).detach()
+        # for cls_i, class_name in enumerate(self.class_names):
+        #     loss_dict[f"heatmap_{class_name}_before_reduction"] = loss_heatmap_cls_before_reduction[cls_i]
+
+        # (Batch, num_classes, height, width) * (Batch, num_classes, height, width)
+        loss_heatmap_cls = loss_heatmap_cls * heatmap_weights.float()
+        loss_heatmap_cls = loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap
+        # (Batch, num_classes)
+        for cls_i, class_name in enumerate(self.class_names):
+            loss_dict[f"loss_heatmap_{class_name}"] = loss_heatmap_cls[cls_i]
         
+        # Prevent loss item to avoid computing gradients twice. This is for logging.
+        loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum() 
         # compute loss for each layer
         for idx_layer in range(self.num_decoder_layers if self.auxiliary else 1):
             if idx_layer == self.num_decoder_layers - 1 or (idx_layer == 0 and self.auxiliary is False):

From ad6b07a14afd1bd08a3178342856660fe364185a Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 28 Apr 2026 12:49:46 +0900
Subject: [PATCH 069/183] Add the script

---
 ...pn_50e_8xb8_base_120m_traffic_cone_full.py | 163 ++++++++++++++++++
 1 file changed, 163 insertions(+)
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_full.py

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_full.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_full.py
new file mode 100644
index 000000000..38b1e8ea5
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_full.py
@@ -0,0 +1,163 @@
+_base_ = [
+    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
+    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
+    "../default/pipelines/default_lidar_120m.py",
+    "../default/models/default_lidar_second_secfpn_120m.py",
+    "../default/schedulers/default_50e_8xb8_adamw_cosine.py",
+    "../default/default_misc.py",
+]
+
+custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
+custom_imports["imports"] += _base_.custom_imports["imports"]
+custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
+
+# user setting
+data_root = "data/t4dataset/"
+info_directory_path = "info/kokseang_2_8/"
+
+experiment_group_name = "bevfusion_lidar_traffic_cone_full/base/" + _base_.dataset_type
+experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    voxelize_cfg=dict(
+        point_cloud_range=_base_.point_cloud_range,
+        voxel_size=_base_.voxel_size,
+        voxelize_reduce=True,
+    ),
+    pts_voxel_encoder=dict(num_features=_base_.point_use_dim),
+    pts_middle_encoder=dict(
+        in_channels=_base_.point_use_dim,
+        sparse_shape=_base_.grid_size,
+        num_aug_features=4,
+        # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here
+        aug_features_min_values=[
+            _base_.point_cloud_range[0],
+            _base_.point_cloud_range[1],
+            _base_.point_cloud_range[2],
+            0.0,
+        ],
+        aug_features_max_values=[
+            _base_.point_cloud_range[3],
+            _base_.point_cloud_range[4],
+            _base_.point_cloud_range[5],
+            0.2,
+        ],
+    ),
+    bbox_head=dict(
+        class_names=_base_.class_names,  # Use class names to identify the correct class indices
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+        partial_ignore_labels=None,
+        loss_heatmap=dict(
+            reduction="none",
+        ),
+    ),
+)
+
+# Dataset parameters
+train_dataloader = dict(
+    batch_size=_base_.train_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=True),
+    dataset=dict(
+        type=_base_.dataset_type,
+        pipeline=_base_.train_pipeline,
+        modality=_base_.input_modality,
+        backend_args=_base_.backend_args,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_train_file_name,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        test_mode=False,
+        data_prefix=_base_.data_prefix,
+        box_type_3d="LiDAR",
+        filter_cfg=_base_.filter_cfg,
+    ),
+)
+
+val_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_val_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+test_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_test_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+val_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+)
+
+test_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+    save_csv=True,
+)
+
+default_hooks = dict(
+    logger=dict(type="LoggerHook", interval=50),
+    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
+)
+log_processor = dict(window_size=50)

From a592868d6add0b0e589a1e7dd2ee47d55848be13 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 28 Apr 2026 16:28:38 +0900
Subject: [PATCH 070/183] Add the script

---
 .../BEVFusion/bevfusion/bevfusion_head.py     |  70 +++-----
 ...8xb8_j6gen2_base_120m_traffic_cone_full.py | 164 ++++++++++++++++++
 2 files changed, 188 insertions(+), 46 deletions(-)
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py

diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index a1819b309..0b18803c4 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -753,53 +753,31 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li
         # compute heatmap loss
         preds_dense_heatmap = clip_sigmoid(preds_dict["dense_heatmap"].float())
         num_pos_dense_heatmap = max(heatmap.eq(1).float().sum().item(), 1)
-        # if self.partial_ignore_labels is None:
-        #     loss_heatmap = self.loss_heatmap(
-        #         preds_dense_heatmap,
-        #         heatmap.float(),
-        #         avg_factor=num_pos_dense_heatmap,
-        #     )
-        #     loss_dict["loss_heatmap"] = loss_heatmap
-        # else:
-            # # When ignore labels is found, we compute the loss for each class
-            # # heatmap focal loss
-            # loss_heatmap_cls: torch.Tensor = self.loss_heatmap(
-            #     preds_dense_heatmap,
-            #     heatmap.float(),
-            # )
-            # # loss_heatmap_cls_before_reduction = (loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap).detach()
-            # # for cls_i, class_name in enumerate(self.class_names):
-            # #     loss_dict[f"heatmap_{class_name}_before_reduction"] = loss_heatmap_cls_before_reduction[cls_i]
-
-            # # (Batch, num_classes, height, width) * (Batch, num_classes, height, width)
-            # loss_heatmap_cls = loss_heatmap_cls * heatmap_weights.float()
-            # loss_heatmap_cls = loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap
-            # # (Batch, num_classes)
-            # for cls_i, class_name in enumerate(self.class_names):
-            #     loss_dict[f"loss_heatmap_{class_name}"] = loss_heatmap_cls[cls_i]
+        if self.partial_ignore_labels is None:
+            loss_heatmap = self.loss_heatmap(
+                preds_dense_heatmap,
+                heatmap.float(),
+                avg_factor=num_pos_dense_heatmap,
+            )
+            loss_dict["loss_heatmap"] = loss_heatmap
+        else:
+            # When ignore labels is found, we compute the loss for each class
+            # heatmap focal loss
+            loss_heatmap_cls: torch.Tensor = self.loss_heatmap(
+                preds_dense_heatmap,
+                heatmap.float(),
+            )
+
+            # (Batch, num_classes, height, width) * (Batch, num_classes, height, width)
+            loss_heatmap_cls = loss_heatmap_cls * heatmap_weights.float()
+            loss_heatmap_cls = loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap
+            # (Batch, num_classes)
+            for cls_i, class_name in enumerate(self.class_names):
+                loss_dict[f"loss_heatmap_{class_name}"] = loss_heatmap_cls[cls_i]
             
-            # # Prevent loss item to avoid computing gradients twice. This is for logging.
-            # loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum()
-
-         # # When ignore labels is found, we compute the loss for each class
-            # # heatmap focal loss
-        loss_heatmap_cls: torch.Tensor = self.loss_heatmap(
-            preds_dense_heatmap,
-            heatmap.float(),
-        )
-        # loss_heatmap_cls_before_reduction = (loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap).detach()
-        # for cls_i, class_name in enumerate(self.class_names):
-        #     loss_dict[f"heatmap_{class_name}_before_reduction"] = loss_heatmap_cls_before_reduction[cls_i]
-
-        # (Batch, num_classes, height, width) * (Batch, num_classes, height, width)
-        loss_heatmap_cls = loss_heatmap_cls * heatmap_weights.float()
-        loss_heatmap_cls = loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap
-        # (Batch, num_classes)
-        for cls_i, class_name in enumerate(self.class_names):
-            loss_dict[f"loss_heatmap_{class_name}"] = loss_heatmap_cls[cls_i]
-        
-        # Prevent loss item to avoid computing gradients twice. This is for logging.
-        loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum() 
+            # Prevent loss item to avoid computing gradients twice. This is for logging.
+            loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum()
+
         # compute loss for each layer
         for idx_layer in range(self.num_decoder_layers if self.auxiliary else 1):
             if idx_layer == self.num_decoder_layers - 1 or (idx_layer == 0 and self.auxiliary is False):
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py
new file mode 100644
index 000000000..1ca622714
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py
@@ -0,0 +1,164 @@
+_base_ = [
+    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
+    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
+    "../default/pipelines/default_lidar_intensity_120m.py",
+    "../default/models/default_lidar_second_secfpn_120m.py",
+    "../default/schedulers/default_30e_8xb8_adamw_cosine.py",
+    "../default/default_misc.py",
+]
+
+custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
+custom_imports["imports"] += _base_.custom_imports["imports"]
+custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
+
+# user setting
+data_root = "data/t4dataset/"
+info_directory_path = "info/user_name/"
+
+experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type
+experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    voxelize_cfg=dict(
+        point_cloud_range=_base_.point_cloud_range,
+        voxel_size=_base_.voxel_size,
+        voxelize_reduce=True,
+    ),
+    pts_voxel_encoder=dict(num_features=_base_.point_use_dim),
+    pts_middle_encoder=dict(
+        in_channels=_base_.point_use_dim,
+        sparse_shape=_base_.grid_size,
+        num_aug_features=5,
+        # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
+        aug_features_min_values=[
+            _base_.point_cloud_range[0],
+            _base_.point_cloud_range[1],
+            _base_.point_cloud_range[2],
+            0.0,
+            0.0,
+        ],
+        aug_features_max_values=[
+            _base_.point_cloud_range[3],
+            _base_.point_cloud_range[4],
+            _base_.point_cloud_range[5],
+            255.0,
+            0.2,
+        ],
+    ),
+    bbox_head=dict(
+        class_names=_base_.class_names,  # Use class names to identify the correct class indices
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+    ),
+		partial_ignore_labels=None,
+)
+
+# Dataset parameters
+train_dataloader = dict(
+    batch_size=_base_.train_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=True),
+    dataset=dict(
+        type=_base_.dataset_type,
+        pipeline=_base_.train_pipeline,
+        modality=_base_.input_modality,
+        backend_args=_base_.backend_args,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_train_file_name,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        test_mode=False,
+        data_prefix=_base_.data_prefix,
+        box_type_3d="LiDAR",
+        filter_cfg=_base_.filter_cfg,
+    ),
+)
+
+val_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_val_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+test_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_test_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+val_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+)
+
+test_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+    save_csv=True,
+)
+
+default_hooks = dict(
+    logger=dict(type="LoggerHook", interval=50),
+    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
+)
+log_processor = dict(window_size=50)
+
+load_from = None

From 2929ff67b324627b3adbbe50da221e235f004ec5 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 28 Apr 2026 20:35:00 +0900
Subject: [PATCH 071/183] Update configs

---
 ...cond_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py | 4 ++--
 .../default/pipelines/default_lidar_intensity_120m.py         | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py
index 1ca622714..57afc7e75 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py
@@ -13,7 +13,7 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/user_name/"
+info_directory_path = "info/kokseang_2_8/"
 
 experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full"
@@ -161,4 +161,4 @@
 )
 log_processor = dict(window_size=50)
 
-load_from = None
+load_from = "work_dirs/bevfusion_lidar_2.7.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_48.pth"
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
index a9032fcdc..19051a04f 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
@@ -13,6 +13,8 @@
     "bus": 120,
     "bicycle": 120,
     "pedestrian": 120,
+	  "traffic_cone": 120, 
+	  "barrier": 120,
 }
 
 # LiDAR parameters

From 57d6ae6771fac9787e9ac4deeb3f6edca50e5bc3 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 28 Apr 2026 21:18:15 +0900
Subject: [PATCH 072/183] Update configs

---
 .../datasets/transforms/loading.py            |  41 +++++
 ...b8_j6gen2_base_120m_traffic_cone_ignore.py | 167 ++++++++++++++++++
 .../default_30e_8xb8_adamw_cosine.py          |   2 +-
 3 files changed, 209 insertions(+), 1 deletion(-)
 create mode 100644 autoware_ml/detection3d/datasets/transforms/loading.py
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py

diff --git a/autoware_ml/detection3d/datasets/transforms/loading.py b/autoware_ml/detection3d/datasets/transforms/loading.py
new file mode 100644
index 000000000..3e23218e4
--- /dev/null
+++ b/autoware_ml/detection3d/datasets/transforms/loading.py
@@ -0,0 +1,41 @@
+from mmcv.transforms import BaseTransform
+from mmdet3d.structures.ops import box_np_ops
+from mmengine.registry import TRANSFORMS
+
+
+@TRANSFORMS.register_module()
+class LoadPointsFromCurrentFileSweep(BaseTransform):
+	"""Load points from the current file and sweep. 
+	This is used to load the points from the current file and sweep for copy-paste augmentation.
+
+	Args:
+		coord_type (str): The type of coordinates of points cloud.
+		load_dim (int): The dimension of the loaded points.
+		use_dim (list[int] | int): Which dimensions of the points to use.
+		backend_args (dict, optional): Arguments to instantiate the
+			corresponding backend. Defaults to None.
+	"""
+
+	def __init__(self,
+                 coord_type: str,
+                 load_dim: int = 6,
+                 use_dim: Union[int, List[int]] = [0, 1, 2],
+                 shift_height: bool = False,
+                 use_color: bool = False,
+                 norm_intensity: bool = False,
+                 norm_elongation: bool = False,
+                 backend_args: Optional[dict] = None) -> None:
+        self.shift_height = shift_height
+        self.use_color = use_color
+        if isinstance(use_dim, int):
+            use_dim = list(range(use_dim))
+        assert max(use_dim) < load_dim, \
+            f'Expect all used dimensions < {load_dim}, got {use_dim}'
+        assert coord_type in ['CAMERA', 'LIDAR', 'DEPTH']
+
+        self.coord_type = coord_type
+        self.load_dim = load_dim
+        self.use_dim = use_dim
+        self.norm_intensity = norm_intensity
+        self.norm_elongation = norm_elongation
+        self.backend_args = backend_args
\ No newline at end of file
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py
new file mode 100644
index 000000000..80bd595dd
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py
@@ -0,0 +1,167 @@
+_base_ = [
+    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
+    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
+    "../default/pipelines/default_lidar_intensity_120m.py",
+    "../default/models/default_lidar_second_secfpn_120m.py",
+    "../default/schedulers/default_30e_8xb8_adamw_cosine.py",
+    "../default/default_misc.py",
+]
+
+custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
+custom_imports["imports"] += _base_.custom_imports["imports"]
+custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
+
+# user setting
+data_root = "data/t4dataset/"
+info_directory_path = "info/kokseang_2_8/"
+
+experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type
+experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    voxelize_cfg=dict(
+        point_cloud_range=_base_.point_cloud_range,
+        voxel_size=_base_.voxel_size,
+        voxelize_reduce=True,
+    ),
+    pts_voxel_encoder=dict(num_features=_base_.point_use_dim),
+    pts_middle_encoder=dict(
+        in_channels=_base_.point_use_dim,
+        sparse_shape=_base_.grid_size,
+        num_aug_features=5,
+        # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
+        aug_features_min_values=[
+            _base_.point_cloud_range[0],
+            _base_.point_cloud_range[1],
+            _base_.point_cloud_range[2],
+            0.0,
+            0.0,
+        ],
+        aug_features_max_values=[
+            _base_.point_cloud_range[3],
+            _base_.point_cloud_range[4],
+            _base_.point_cloud_range[5],
+            255.0,
+            0.2,
+        ],
+    ),
+    bbox_head=dict(
+        class_names=_base_.class_names,  # Use class names to identify the correct class indices
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+    ),
+		partial_ignore_labels=["traffic_cone", "barrier"],
+		loss_heatmap=dict(
+            reduction="none",
+        ),
+)
+
+# Dataset parameters
+train_dataloader = dict(
+    batch_size=_base_.train_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=True),
+    dataset=dict(
+        type=_base_.dataset_type,
+        pipeline=_base_.train_pipeline,
+        modality=_base_.input_modality,
+        backend_args=_base_.backend_args,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_train_file_name,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        test_mode=False,
+        data_prefix=_base_.data_prefix,
+        box_type_3d="LiDAR",
+        filter_cfg=_base_.filter_cfg,
+    ),
+)
+
+val_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_val_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+test_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_test_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+val_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+)
+
+test_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+    save_csv=True,
+)
+
+default_hooks = dict(
+    logger=dict(type="LoggerHook", interval=50),
+    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
+)
+log_processor = dict(window_size=50)
+
+load_from = "work_dirs/bevfusion_lidar_2.7.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_48.pth"
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py
index a2cd2d2e9..388705848 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py
@@ -3,7 +3,7 @@
 lr = 1.4141e-4
 t_max = 8
 max_epochs = 30
-val_interval = 5
+val_interval = 1
 
 train_gpu_size = 8
 test_batch_size = 2

From 2e03655ea03b7b9147c49e30d591ae33df1ee08a Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Wed, 29 Apr 2026 01:31:43 +0900
Subject: [PATCH 073/183] Add the script

---
 .../datasets/transforms/__init__.py           |   3 +-
 .../datasets/transforms/loading.py            |  53 +--
 ..._base_120m_traffic_cone_full_copy_paste.py | 312 ++++++++++++++++++
 ...b8_j6gen2_base_120m_traffic_cone_ignore.py |  10 +-
 .../default/pipelines/default_lidar_120m.py   |   2 +
 .../pipelines/default_lidar_intensity_120m.py |   2 +
 ...default_lidar_intensity_120m_copy_paste.py | 180 ++++++++++
 7 files changed, 532 insertions(+), 30 deletions(-)
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py
 create mode 100644 projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m_copy_paste.py

diff --git a/autoware_ml/detection3d/datasets/transforms/__init__.py b/autoware_ml/detection3d/datasets/transforms/__init__.py
index 6bc932f1a..dc95d27f8 100644
--- a/autoware_ml/detection3d/datasets/transforms/__init__.py
+++ b/autoware_ml/detection3d/datasets/transforms/__init__.py
@@ -1,3 +1,4 @@
 from .object_min_points_filter import ObjectMinPointsFilter
+from .loading import LoadPointsFromCurrentFileSweep
 
-__all__ = ["ObjectMinPointsFilter"]
+__all__ = ["ObjectMinPointsFilter", "LoadPointsFromCurrentFileSweep"]
diff --git a/autoware_ml/detection3d/datasets/transforms/loading.py b/autoware_ml/detection3d/datasets/transforms/loading.py
index 3e23218e4..d96a87b5a 100644
--- a/autoware_ml/detection3d/datasets/transforms/loading.py
+++ b/autoware_ml/detection3d/datasets/transforms/loading.py
@@ -1,22 +1,24 @@
 from mmcv.transforms import BaseTransform
 from mmdet3d.structures.ops import box_np_ops
+from mmdet3d.datasets.transforms import LoadPointsFromFile, LoadPointsFromMultiSweeps
 from mmengine.registry import TRANSFORMS
 
 
+
 @TRANSFORMS.register_module()
 class LoadPointsFromCurrentFileSweep(BaseTransform):
-	"""Load points from the current file and sweep. 
-	This is used to load the points from the current file and sweep for copy-paste augmentation.
+    """Load points from the current file and sweep. 
+    This is used to load the points from the current file and sweep for copy-paste augmentation.
 
-	Args:
-		coord_type (str): The type of coordinates of points cloud.
-		load_dim (int): The dimension of the loaded points.
-		use_dim (list[int] | int): Which dimensions of the points to use.
-		backend_args (dict, optional): Arguments to instantiate the
-			corresponding backend. Defaults to None.
-	"""
+    Args:
+        coord_type (str): The type of coordinates of points cloud.
+        load_dim (int): The dimension of the loaded points.
+        use_dim (list[int] | int): Which dimensions of the points to use.
+        backend_args (dict, optional): Arguments to instantiate the
+            corresponding backend. Defaults to None.
+    """
 
-	def __init__(self,
+    def __init__(self,
                  coord_type: str,
                  load_dim: int = 6,
                  use_dim: Union[int, List[int]] = [0, 1, 2],
@@ -24,18 +26,21 @@ def __init__(self,
                  use_color: bool = False,
                  norm_intensity: bool = False,
                  norm_elongation: bool = False,
-                 backend_args: Optional[dict] = None) -> None:
-        self.shift_height = shift_height
-        self.use_color = use_color
-        if isinstance(use_dim, int):
-            use_dim = list(range(use_dim))
-        assert max(use_dim) < load_dim, \
-            f'Expect all used dimensions < {load_dim}, got {use_dim}'
-        assert coord_type in ['CAMERA', 'LIDAR', 'DEPTH']
+                 backend_args: Optional[dict] = None, 
+                 sweeps_num: int = 10,
+                 pad_empty_sweeps: bool = False,
+                 remove_close: bool = False,
+                 test_mode: bool = False
+                 ) -> None:
+        
+        self.points_loader = LoadPointsFromFile(coord_type=coord_type, load_dim=load_dim, use_dim=use_dim, backend_args=backend_args)
+        if sweeps_num > 0:
+            self.points_from_multi_sweeps_loader = LoadPointsFromMultiSweeps(sweeps_num=sweeps_num, pad_empty_sweeps=pad_empty_sweeps, remove_close=remove_close, test_mode=test_mode)
+        else:
+            self.points_from_multi_sweeps_loader = None
 
-        self.coord_type = coord_type
-        self.load_dim = load_dim
-        self.use_dim = use_dim
-        self.norm_intensity = norm_intensity
-        self.norm_elongation = norm_elongation
-        self.backend_args = backend_args
\ No newline at end of file
+    def transform(self, results: dict) -> dict:
+        points = self.points_loader(results)
+        if self.points_from_multi_sweeps_loader is not None:
+            points = self.points_from_multi_sweeps_loader(points)
+        return points
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py
new file mode 100644
index 000000000..6c7fb78a8
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py
@@ -0,0 +1,312 @@
+_base_ = [
+    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
+    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
+    "../default/pipelines/default_lidar_intensity_120m.py",
+    "../default/models/default_lidar_second_secfpn_120m.py",
+    "../default/schedulers/default_30e_8xb8_adamw_cosine.py",
+    "../default/default_misc.py",
+]
+
+custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
+custom_imports["imports"] += _base_.custom_imports["imports"]
+custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
+
+# user setting
+data_root = "data/t4dataset/"
+info_directory_path = "info/kokseang_2_8/"
+
+experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type
+experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    voxelize_cfg=dict(
+        point_cloud_range=_base_.point_cloud_range,
+        voxel_size=_base_.voxel_size,
+        voxelize_reduce=True,
+    ),
+    pts_voxel_encoder=dict(num_features=_base_.point_use_dim),
+    pts_middle_encoder=dict(
+        in_channels=_base_.point_use_dim,
+        sparse_shape=_base_.grid_size,
+        num_aug_features=5,
+        # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
+        aug_features_min_values=[
+            _base_.point_cloud_range[0],
+            _base_.point_cloud_range[1],
+            _base_.point_cloud_range[2],
+            0.0,
+            0.0,
+        ],
+        aug_features_max_values=[
+            _base_.point_cloud_range[3],
+            _base_.point_cloud_range[4],
+            _base_.point_cloud_range[5],
+            255.0,
+            0.2,
+        ],
+    ),
+    bbox_head=dict(
+        class_names=_base_.class_names,  # Use class names to identify the correct class indices
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        )
+    ),
+)
+
+db_sampler = dict(
+    data_root=data_root,
+    info_path=info_directory_path + _base_.info_train_file_name,
+    rate=1.0,
+    prepare=dict(
+        filter_by_difficulty=[-1],
+        filter_by_min_points=dict(
+            car=5,
+            truck=5,
+            bus=5,
+            trailer=5,
+            traffic_cone=5,
+            barrier=5,
+            bicycle=5,
+            pedestrian=5)),
+    classes=_base_.class_names,
+    sample_groups=dict(
+        car=0,
+        truck=0,
+        bus=0,
+        barrier=2,
+        traffic_cone=4),
+    points_loader=dict(
+        type='LoadPointsFromCurrentFileSweep',
+        coord_type='LIDAR',
+        load_dim=_base_.point_load_dim,
+        use_dim=_base_.point_use_dim,
+        backend_args=_base_.backend_args,
+        sweeps_num=_base_.sweeps_num,
+        pad_empty_sweeps=True,
+        remove_close=True,
+        test_mode=False,
+    ))
+        
+train_pipeline = [
+    dict(
+        type="LoadPointsFromFile",
+        coord_type="LIDAR",
+        load_dim=_base_.point_load_dim,
+        use_dim=_base_.point_use_dim,
+        backend_args=_base_.backend_args,
+    ),
+    dict(
+        type="LoadPointsFromMultiSweeps",
+        sweeps_num=_base_.sweeps_num,
+        load_dim=_base_.point_load_dim,
+        use_dim=_base_.point_use_dim,
+        pad_empty_sweeps=True,
+        remove_close=True,
+        backend_args=_base_.backend_args,
+        test_mode=False,
+    ),
+    dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False),
+    dict(type="ObjectSample", db_sampler=db_sampler),
+    dict(
+        type="BEVFusionGlobalRotScaleTrans",
+        scale_ratio_range=[0.95, 1.05],
+        rot_range=[-0.78539816, 0.78539816],
+        translation_std=[0.5, 0.5, 0.2],
+    ),
+    dict(type="BEVFusionRandomFlip3D"),
+    dict(type="PointsRangeFilter", point_cloud_range=_base_.point_cloud_range),
+    dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range),
+    dict(
+        type="ObjectNameFilter",
+        classes=[
+            "car",
+            "truck",
+            "bus",
+            "bicycle",
+            "pedestrian",
+        ],
+    ),
+    dict(type="PointShuffle"),
+    dict(
+        type="Pack3DDetInputs",
+        keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"],
+        meta_keys=[
+            "cam2img",
+            "ori_cam2img",
+            "lidar2cam",
+            "lidar2img",
+            "cam2lidar",
+            "ori_lidar2img",
+            "img_aug_matrix",
+            "box_type_3d",
+            "sample_idx",
+            "lidar_path",
+            "img_path",
+            "transformation_3d_flow",
+            "pcd_rotation",
+            "pcd_scale_factor",
+            "pcd_trans",
+            "img_aug_matrix",
+            "lidar_aug_matrix",
+            "timestamp",
+            "vehicle_type",
+            "city",
+            "traffic_cone_barrier_status",
+        ],
+    ),
+]
+
+test_pipeline = [
+    dict(
+        type="LoadPointsFromFile",
+        coord_type="LIDAR",
+        load_dim=point_load_dim,
+        use_dim=point_load_dim,
+        backend_args=backend_args,
+    ),
+    dict(
+        type="LoadPointsFromMultiSweeps",
+        sweeps_num=sweeps_num,
+        load_dim=point_load_dim,
+        use_dim=lidar_sweep_dims,
+        pad_empty_sweeps=True,
+        remove_close=True,
+        backend_args=backend_args,
+        test_mode=True,
+    ),
+    dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range),
+    dict(
+        type="Pack3DDetInputs",
+        keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"],
+        meta_keys=[
+            "cam2img",
+            "ori_cam2img",
+            "lidar2cam",
+            "lidar2img",
+            "cam2lidar",
+            "ori_lidar2img",
+            "img_aug_matrix",
+            "box_type_3d",
+            "sample_idx",
+            "lidar_path",
+            "img_path",
+            "num_pts_feats",
+            "num_views",
+            "timestamp",
+            "vehicle_type",
+            "city",
+            "traffic_cone_barrier_status",  
+        ],
+    ),
+]
+
+# Dataset parameters
+train_dataloader = dict(
+    batch_size=_base_.train_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=True),
+    dataset=dict(
+        type=_base_.dataset_type,
+        pipeline=_base_.train_pipeline,
+        modality=_base_.input_modality,
+        backend_args=_base_.backend_args,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_train_file_name,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        test_mode=False,
+        data_prefix=_base_.data_prefix,
+        box_type_3d="LiDAR",
+        filter_cfg=_base_.filter_cfg,
+    ),
+)
+
+val_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_val_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+test_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_test_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+val_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+)
+
+test_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+    save_csv=True,
+)
+
+default_hooks = dict(
+    logger=dict(type="LoggerHook", interval=50),
+    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
+)
+log_processor = dict(window_size=50)
+
+load_from = "work_dirs/bevfusion_lidar_2.7.0/base/epoch_48.pth"
+
+custom_hooks = []
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py
index 80bd595dd..68c736749 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py
@@ -16,7 +16,7 @@
 info_directory_path = "info/kokseang_2_8/"
 
 experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type
-experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full"
+experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
@@ -64,11 +64,11 @@
             pc_range=_base_.point_cloud_range[0:2],
             voxel_size=_base_.voxel_size[0:2],
         ),
-    ),
-		partial_ignore_labels=["traffic_cone", "barrier"],
-		loss_heatmap=dict(
+        partial_ignore_labels=["traffic_cone", "barrier"],
+        loss_heatmap=dict(
             reduction="none",
         ),
+    ),
 )
 
 # Dataset parameters
@@ -164,4 +164,4 @@
 )
 log_processor = dict(window_size=50)
 
-load_from = "work_dirs/bevfusion_lidar_2.7.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_48.pth"
+load_from = "work_dirs/bevfusion_lidar_2.7.0/base/epoch_48.pth"
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
index 455c2761a..09b9f7b26 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
@@ -59,6 +59,8 @@
             "bus",
             "bicycle",
             "pedestrian",
+            "traffic_cone",
+            "barrier",
         ],
     ),
     dict(type="PointShuffle"),
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
index 19051a04f..9c7e02977 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
@@ -59,6 +59,8 @@
             "bus",
             "bicycle",
             "pedestrian",
+            "traffic_cone",
+            "barrier",
         ],
     ),
     dict(type="PointShuffle"),
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m_copy_paste.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m_copy_paste.py
new file mode 100644
index 000000000..a7c7cddfe
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m_copy_paste.py
@@ -0,0 +1,180 @@
+# Dataset parameters
+backend_args = None
+num_workers = 32
+input_modality = dict(use_lidar=True, use_camera=False)
+
+# range setting
+point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0]
+voxel_size = [0.17, 0.17, 0.2]
+grid_size = [1440, 1440, 41]
+eval_class_range = {
+    "car": 120,
+    "truck": 120,
+    "bus": 120,
+    "bicycle": 120,
+    "pedestrian": 120,
+	  "traffic_cone": 120, 
+	  "barrier": 120,
+}
+
+# LiDAR parameters
+point_load_dim = 5  # x, y, z, intensity, ring_id
+point_use_dim = 5
+lidar_sweep_dims = [0, 1, 2, 3, 4]  # x, y, z, intensity, time_lag
+sweeps_num = 1
+
+db_sampler = dict(
+    data_root=data_root,
+    info_path=data_root + 'nuscenes_dbinfos_train.pkl',
+    rate=1.0,
+    prepare=dict(
+        filter_by_difficulty=[-1],
+        filter_by_min_points=dict(
+            car=5,
+            truck=5,
+            bus=5,
+            trailer=5,
+            construction_vehicle=5,
+            traffic_cone=5,
+            barrier=5,
+            motorcycle=5,
+            bicycle=5,
+            pedestrian=5)),
+    classes=class_names,
+    sample_groups=dict(
+        car=2,
+        truck=3,
+        construction_vehicle=7,
+        bus=4,
+        trailer=6,
+        barrier=2,
+        motorcycle=6,
+        bicycle=6,
+        pedestrian=2,
+        traffic_cone=2),
+    points_loader=dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=[0, 1, 2, 3, 4],
+        backend_args=backend_args))
+        
+train_pipeline = [
+    dict(
+        type="LoadPointsFromFile",
+        coord_type="LIDAR",
+        load_dim=point_load_dim,
+        use_dim=point_load_dim,
+        backend_args=backend_args,
+    ),
+    dict(
+        type="LoadPointsFromMultiSweeps",
+        sweeps_num=sweeps_num,
+        load_dim=point_load_dim,
+        use_dim=lidar_sweep_dims,
+        pad_empty_sweeps=True,
+        remove_close=True,
+        backend_args=backend_args,
+        test_mode=False,
+    ),
+    dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False),
+    dict(
+        type="BEVFusionGlobalRotScaleTrans",
+        scale_ratio_range=[0.95, 1.05],
+        rot_range=[-0.78539816, 0.78539816],
+        translation_std=[0.5, 0.5, 0.2],
+    ),
+    dict(type="BEVFusionRandomFlip3D"),
+    dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range),
+    dict(type="ObjectRangeFilter", point_cloud_range=point_cloud_range),
+    dict(
+        type="ObjectNameFilter",
+        classes=[
+            "car",
+            "truck",
+            "bus",
+            "bicycle",
+            "pedestrian",
+        ],
+    ),
+    dict(type="PointShuffle"),
+    dict(
+        type="Pack3DDetInputs",
+        keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"],
+        meta_keys=[
+            "cam2img",
+            "ori_cam2img",
+            "lidar2cam",
+            "lidar2img",
+            "cam2lidar",
+            "ori_lidar2img",
+            "img_aug_matrix",
+            "box_type_3d",
+            "sample_idx",
+            "lidar_path",
+            "img_path",
+            "transformation_3d_flow",
+            "pcd_rotation",
+            "pcd_scale_factor",
+            "pcd_trans",
+            "img_aug_matrix",
+            "lidar_aug_matrix",
+            "timestamp",
+            "vehicle_type",
+            "city",
+            "traffic_cone_barrier_status",
+        ],
+    ),
+]
+
+test_pipeline = [
+    dict(
+        type="LoadPointsFromFile",
+        coord_type="LIDAR",
+        load_dim=point_load_dim,
+        use_dim=point_load_dim,
+        backend_args=backend_args,
+    ),
+    dict(
+        type="LoadPointsFromMultiSweeps",
+        sweeps_num=sweeps_num,
+        load_dim=point_load_dim,
+        use_dim=lidar_sweep_dims,
+        pad_empty_sweeps=True,
+        remove_close=True,
+        backend_args=backend_args,
+        test_mode=True,
+    ),
+    dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range),
+    dict(
+        type="Pack3DDetInputs",
+        keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"],
+        meta_keys=[
+            "cam2img",
+            "ori_cam2img",
+            "lidar2cam",
+            "lidar2img",
+            "cam2lidar",
+            "ori_lidar2img",
+            "img_aug_matrix",
+            "box_type_3d",
+            "sample_idx",
+            "lidar_path",
+            "img_path",
+            "num_pts_feats",
+            "num_views",
+            "timestamp",
+            "vehicle_type",
+            "city",
+            "traffic_cone_barrier_status",  
+        ],
+    ),
+]
+
+# Filtering configuration
+# Note:
+# - In camera–LiDAR configs, `filter_cfg` can enable image-based frame filtering,
+#   e.g., dict(filter_frames_with_missing_image=True).
+# - This is a LiDAR-only config (`input_modality['use_camera'] = False`), so
+#   image-based filtering does not apply and `filter_cfg` is intentionally None.
+filter_cfg = None

From e2a69c1851b6149f256e32f48803b968756f018d Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Wed, 29 Apr 2026 01:35:46 +0900
Subject: [PATCH 074/183] Add the script

---
 .../datasets/transforms/loading.py            |   2 +
 ..._base_120m_traffic_cone_full_copy_paste.py |   2 +
 ...default_lidar_intensity_120m_copy_paste.py | 180 ------------------
 3 files changed, 4 insertions(+), 180 deletions(-)
 delete mode 100644 projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m_copy_paste.py

diff --git a/autoware_ml/detection3d/datasets/transforms/loading.py b/autoware_ml/detection3d/datasets/transforms/loading.py
index d96a87b5a..535653d9b 100644
--- a/autoware_ml/detection3d/datasets/transforms/loading.py
+++ b/autoware_ml/detection3d/datasets/transforms/loading.py
@@ -1,3 +1,5 @@
+from typing import List, Optional, Union
+
 from mmcv.transforms import BaseTransform
 from mmdet3d.structures.ops import box_np_ops
 from mmdet3d.datasets.transforms import LoadPointsFromFile, LoadPointsFromMultiSweeps
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py
index 6c7fb78a8..6e0d7445b 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py
@@ -138,6 +138,8 @@
             "bus",
             "bicycle",
             "pedestrian",
+            "traffic_cone",
+            "barrier",
         ],
     ),
     dict(type="PointShuffle"),
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m_copy_paste.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m_copy_paste.py
deleted file mode 100644
index a7c7cddfe..000000000
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m_copy_paste.py
+++ /dev/null
@@ -1,180 +0,0 @@
-# Dataset parameters
-backend_args = None
-num_workers = 32
-input_modality = dict(use_lidar=True, use_camera=False)
-
-# range setting
-point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0]
-voxel_size = [0.17, 0.17, 0.2]
-grid_size = [1440, 1440, 41]
-eval_class_range = {
-    "car": 120,
-    "truck": 120,
-    "bus": 120,
-    "bicycle": 120,
-    "pedestrian": 120,
-	  "traffic_cone": 120, 
-	  "barrier": 120,
-}
-
-# LiDAR parameters
-point_load_dim = 5  # x, y, z, intensity, ring_id
-point_use_dim = 5
-lidar_sweep_dims = [0, 1, 2, 3, 4]  # x, y, z, intensity, time_lag
-sweeps_num = 1
-
-db_sampler = dict(
-    data_root=data_root,
-    info_path=data_root + 'nuscenes_dbinfos_train.pkl',
-    rate=1.0,
-    prepare=dict(
-        filter_by_difficulty=[-1],
-        filter_by_min_points=dict(
-            car=5,
-            truck=5,
-            bus=5,
-            trailer=5,
-            construction_vehicle=5,
-            traffic_cone=5,
-            barrier=5,
-            motorcycle=5,
-            bicycle=5,
-            pedestrian=5)),
-    classes=class_names,
-    sample_groups=dict(
-        car=2,
-        truck=3,
-        construction_vehicle=7,
-        bus=4,
-        trailer=6,
-        barrier=2,
-        motorcycle=6,
-        bicycle=6,
-        pedestrian=2,
-        traffic_cone=2),
-    points_loader=dict(
-        type='LoadPointsFromFile',
-        coord_type='LIDAR',
-        load_dim=5,
-        use_dim=[0, 1, 2, 3, 4],
-        backend_args=backend_args))
-        
-train_pipeline = [
-    dict(
-        type="LoadPointsFromFile",
-        coord_type="LIDAR",
-        load_dim=point_load_dim,
-        use_dim=point_load_dim,
-        backend_args=backend_args,
-    ),
-    dict(
-        type="LoadPointsFromMultiSweeps",
-        sweeps_num=sweeps_num,
-        load_dim=point_load_dim,
-        use_dim=lidar_sweep_dims,
-        pad_empty_sweeps=True,
-        remove_close=True,
-        backend_args=backend_args,
-        test_mode=False,
-    ),
-    dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False),
-    dict(
-        type="BEVFusionGlobalRotScaleTrans",
-        scale_ratio_range=[0.95, 1.05],
-        rot_range=[-0.78539816, 0.78539816],
-        translation_std=[0.5, 0.5, 0.2],
-    ),
-    dict(type="BEVFusionRandomFlip3D"),
-    dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range),
-    dict(type="ObjectRangeFilter", point_cloud_range=point_cloud_range),
-    dict(
-        type="ObjectNameFilter",
-        classes=[
-            "car",
-            "truck",
-            "bus",
-            "bicycle",
-            "pedestrian",
-        ],
-    ),
-    dict(type="PointShuffle"),
-    dict(
-        type="Pack3DDetInputs",
-        keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"],
-        meta_keys=[
-            "cam2img",
-            "ori_cam2img",
-            "lidar2cam",
-            "lidar2img",
-            "cam2lidar",
-            "ori_lidar2img",
-            "img_aug_matrix",
-            "box_type_3d",
-            "sample_idx",
-            "lidar_path",
-            "img_path",
-            "transformation_3d_flow",
-            "pcd_rotation",
-            "pcd_scale_factor",
-            "pcd_trans",
-            "img_aug_matrix",
-            "lidar_aug_matrix",
-            "timestamp",
-            "vehicle_type",
-            "city",
-            "traffic_cone_barrier_status",
-        ],
-    ),
-]
-
-test_pipeline = [
-    dict(
-        type="LoadPointsFromFile",
-        coord_type="LIDAR",
-        load_dim=point_load_dim,
-        use_dim=point_load_dim,
-        backend_args=backend_args,
-    ),
-    dict(
-        type="LoadPointsFromMultiSweeps",
-        sweeps_num=sweeps_num,
-        load_dim=point_load_dim,
-        use_dim=lidar_sweep_dims,
-        pad_empty_sweeps=True,
-        remove_close=True,
-        backend_args=backend_args,
-        test_mode=True,
-    ),
-    dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range),
-    dict(
-        type="Pack3DDetInputs",
-        keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"],
-        meta_keys=[
-            "cam2img",
-            "ori_cam2img",
-            "lidar2cam",
-            "lidar2img",
-            "cam2lidar",
-            "ori_lidar2img",
-            "img_aug_matrix",
-            "box_type_3d",
-            "sample_idx",
-            "lidar_path",
-            "img_path",
-            "num_pts_feats",
-            "num_views",
-            "timestamp",
-            "vehicle_type",
-            "city",
-            "traffic_cone_barrier_status",  
-        ],
-    ),
-]
-
-# Filtering configuration
-# Note:
-# - In camera–LiDAR configs, `filter_cfg` can enable image-based frame filtering,
-#   e.g., dict(filter_frames_with_missing_image=True).
-# - This is a LiDAR-only config (`input_modality['use_camera'] = False`), so
-#   image-based filtering does not apply and `filter_cfg` is intentionally None.
-filter_cfg = None

From ebc80340e07dac07833f2b2b5d7bd9df15fe3450 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Wed, 29 Apr 2026 01:37:23 +0900
Subject: [PATCH 075/183] Update configs

---
 ..._second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py
index 57afc7e75..b9fafe7a9 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py
@@ -65,7 +65,6 @@
             voxel_size=_base_.voxel_size[0:2],
         ),
     ),
-		partial_ignore_labels=None,
 )
 
 # Dataset parameters

From bb35205a445beb1ad37bfe075a9400d0a8fb960a Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Wed, 29 Apr 2026 10:33:08 +0900
Subject: [PATCH 076/183] Add the script

---
 .../default/schedulers/default_30e_8xb8_adamw_cosine.py         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py
index 388705848..a2cd2d2e9 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py
@@ -3,7 +3,7 @@
 lr = 1.4141e-4
 t_max = 8
 max_epochs = 30
-val_interval = 1
+val_interval = 5
 
 train_gpu_size = 8
 test_batch_size = 2

From f343fbeae0811127e1d54870e21f7eb5850af3b2 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Wed, 29 Apr 2026 10:42:46 +0900
Subject: [PATCH 077/183] Update configs

---
 autoware_ml/detection3d/datasets/t4dataset.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/autoware_ml/detection3d/datasets/t4dataset.py b/autoware_ml/detection3d/datasets/t4dataset.py
index ce1c78f31..526150755 100644
--- a/autoware_ml/detection3d/datasets/t4dataset.py
+++ b/autoware_ml/detection3d/datasets/t4dataset.py
@@ -191,5 +191,8 @@ def parse_data_info(self, info: dict) -> dict:
                     info["lidar2img"] = np.array(info["images"][self.default_cam_key]["lidar2img"])
                 else:
                     info["lidar2img"] = info["cam2img"] @ info["lidar2cam"]
-
+        
+        # Default difficulty to 0 if not present
+        if 'difficulty' not in info:
+            info['difficulty'] = 0
         return info

From 6f55027b662c778c264d5ebe967d8d2f34813676 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Thu, 30 Apr 2026 03:22:00 +0900
Subject: [PATCH 078/183] Update configs

---
 ...xel_second_secfpn_50e_8xb8_base_120m_traffic_cone_ignore.py} | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone.py => bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_ignore.py} (98%)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_ignore.py
similarity index 98%
rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone.py
rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_ignore.py
index 39c6ddf54..90136a748 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_ignore.py
@@ -16,7 +16,7 @@
 info_directory_path = "info/kokseang_2_8/"
 
 experiment_group_name = "bevfusion_lidar_traffic_cone/base/" + _base_.dataset_type
-experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m"
+experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m_ignore"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter

From d99abd97ed98a0bdfcc90c1b26383aded31eedcf Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Thu, 30 Apr 2026 12:25:17 +0900
Subject: [PATCH 079/183] Add the script

---
 .../BEVFusion/bevfusion/bevfusion_head.py     |   9 +-
 ...8xb8_j6gen2_base_120m_traffic_cone_full.py |   1 +
 ..._base_120m_traffic_cone_full_copy_paste.py |  21 +-
 ...b8_j6gen2_base_120m_traffic_cone_ignore.py |   1 +
 ...ase_120m_traffic_cone_ignore_copy_paste.py | 317 ++++++++++++++++++
 5 files changed, 336 insertions(+), 13 deletions(-)
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py

diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index 0b18803c4..4894ad2e7 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -70,7 +70,8 @@ def __init__(
         train_cfg=None,
         test_cfg=None,
         bbox_coder=None,
-        partial_ignore_labels=None
+        partial_ignore_labels=None,
+        partial_ignore_dense_heatmap=False
     ):
         super().__init__()
         self.class_names = class_names
@@ -194,7 +195,8 @@ def __init__(
         else:
             self.partial_ignore_labels = None
         
-        print_log(f"BEVFusionHead Partial ignore labels: {self.partial_ignore_labels}, dense heatmap pooling classes: \
+        self.partial_ignore_dense_heatmap = partial_ignore_dense_heatmap
+        print_log(f"BEVFusionHead Partial ignore labels: {self.partial_ignore_labels}, partial ignore dense heatmap: {self.partial_ignore_dense_heatmap}, dense heatmap pooling classes: \
         {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}", logger="current")
 
     def create_2D_grid(self, x_size, y_size):
@@ -691,7 +693,8 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata):
         # Ignore labels for traffic cone and barrier
         traffic_cone_barrier_status = metadata.get("traffic_cone_barrier_status", True)
         if self.partial_ignore_labels is not None and not traffic_cone_barrier_status:
-            heatmap_weights[self.partial_ignore_labels] = 0.0 # Set to 0 to ignore these proposals
+            if self.partial_ignore_dense_heatmap:
+                heatmap_weights[self.partial_ignore_labels] = 0.0 # Set to 0 to ignore these proposals
             if len(neg_inds) > 0:
                 # neg_inds [N] and column indices [K] must broadcast (not pair); see IndexError N vs K.
                 _cols = torch.as_tensor(
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py
index b9fafe7a9..88e3cbc54 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py
@@ -64,6 +64,7 @@
             pc_range=_base_.point_cloud_range[0:2],
             voxel_size=_base_.voxel_size[0:2],
         ),
+        partial_ignore_dense_heatmap=False
     ),
 )
 
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py
index 6e0d7445b..903df577c 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py
@@ -63,7 +63,8 @@
         bbox_coder=dict(
             pc_range=_base_.point_cloud_range[0:2],
             voxel_size=_base_.voxel_size[0:2],
-        )
+        ),
+        partial_ignore_dense_heatmap=False
     ),
 )
 
@@ -113,7 +114,7 @@
         type="LoadPointsFromMultiSweeps",
         sweeps_num=_base_.sweeps_num,
         load_dim=_base_.point_load_dim,
-        use_dim=_base_.point_use_dim,
+        use_dim=_base_.lidar_sweep_dims,
         pad_empty_sweeps=True,
         remove_close=True,
         backend_args=_base_.backend_args,
@@ -176,21 +177,21 @@
     dict(
         type="LoadPointsFromFile",
         coord_type="LIDAR",
-        load_dim=point_load_dim,
-        use_dim=point_load_dim,
-        backend_args=backend_args,
+        load_dim=_base_.point_load_dim,
+        use_dim=_base_.point_use_dim,
+        backend_args=_base_.backend_args,
     ),
     dict(
         type="LoadPointsFromMultiSweeps",
-        sweeps_num=sweeps_num,
-        load_dim=point_load_dim,
-        use_dim=lidar_sweep_dims,
+        sweeps_num=_base_.sweeps_num,
+        load_dim=_base_.point_load_dim,
+        use_dim=_base_.lidar_sweep_dims,
         pad_empty_sweeps=True,
         remove_close=True,
-        backend_args=backend_args,
+        backend_args=_base_.backend_args,
         test_mode=True,
     ),
-    dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range),
+    dict(type="PointsRangeFilter", point_cloud_range=_base_.point_cloud_range),
     dict(
         type="Pack3DDetInputs",
         keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"],
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py
index 68c736749..bb10d484d 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py
@@ -65,6 +65,7 @@
             voxel_size=_base_.voxel_size[0:2],
         ),
         partial_ignore_labels=["traffic_cone", "barrier"],
+        partial_ignore_dense_heatmap=True,
         loss_heatmap=dict(
             reduction="none",
         ),
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py
new file mode 100644
index 000000000..61b9d35f3
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py
@@ -0,0 +1,317 @@
+_base_ = [
+    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
+    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
+    "../default/pipelines/default_lidar_intensity_120m.py",
+    "../default/models/default_lidar_second_secfpn_120m.py",
+    "../default/schedulers/default_30e_8xb8_adamw_cosine.py",
+    "../default/default_misc.py",
+]
+
+custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
+custom_imports["imports"] += _base_.custom_imports["imports"]
+custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
+
+# user setting
+data_root = "data/t4dataset/"
+info_directory_path = "info/kokseang_2_8/"
+
+experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type
+experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    voxelize_cfg=dict(
+        point_cloud_range=_base_.point_cloud_range,
+        voxel_size=_base_.voxel_size,
+        voxelize_reduce=True,
+    ),
+    pts_voxel_encoder=dict(num_features=_base_.point_use_dim),
+    pts_middle_encoder=dict(
+        in_channels=_base_.point_use_dim,
+        sparse_shape=_base_.grid_size,
+        num_aug_features=5,
+        # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
+        aug_features_min_values=[
+            _base_.point_cloud_range[0],
+            _base_.point_cloud_range[1],
+            _base_.point_cloud_range[2],
+            0.0,
+            0.0,
+        ],
+        aug_features_max_values=[
+            _base_.point_cloud_range[3],
+            _base_.point_cloud_range[4],
+            _base_.point_cloud_range[5],
+            255.0,
+            0.2,
+        ],
+    ),
+    bbox_head=dict(
+        class_names=_base_.class_names,  # Use class names to identify the correct class indices
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+        partial_ignore_labels=["traffic_cone", "barrier"],
+        partial_ignore_dense_heatmap=False,
+        loss_heatmap=dict(
+            reduction="none",
+        ),
+    ),
+)
+
+db_sampler = dict(
+    data_root=data_root,
+    info_path=info_directory_path + _base_.info_train_file_name,
+    rate=1.0,
+    prepare=dict(
+        filter_by_difficulty=[-1],
+        filter_by_min_points=dict(
+            car=5,
+            truck=5,
+            bus=5,
+            trailer=5,
+            traffic_cone=5,
+            barrier=5,
+            bicycle=5,
+            pedestrian=5)),
+    classes=_base_.class_names,
+    sample_groups=dict(
+        car=0,
+        truck=0,
+        bus=0,
+        barrier=2,
+        traffic_cone=4),
+    points_loader=dict(
+        type='LoadPointsFromCurrentFileSweep',
+        coord_type='LIDAR',
+        load_dim=_base_.point_load_dim,
+        use_dim=_base_.point_use_dim,
+        backend_args=_base_.backend_args,
+        sweeps_num=_base_.sweeps_num,
+        pad_empty_sweeps=True,
+        remove_close=True,
+        test_mode=False,
+    ))
+        
+train_pipeline = [
+    dict(
+        type="LoadPointsFromFile",
+        coord_type="LIDAR",
+        load_dim=_base_.point_load_dim,
+        use_dim=_base_.point_use_dim,
+        backend_args=_base_.backend_args,
+    ),
+    dict(
+        type="LoadPointsFromMultiSweeps",
+        sweeps_num=_base_.sweeps_num,
+        load_dim=_base_.point_load_dim,
+        use_dim=_base_.lidar_sweep_dims,
+        pad_empty_sweeps=True,
+        remove_close=True,
+        backend_args=_base_.backend_args,
+        test_mode=False,
+    ),
+    dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False),
+    dict(type="ObjectSample", db_sampler=db_sampler),
+    dict(
+        type="BEVFusionGlobalRotScaleTrans",
+        scale_ratio_range=[0.95, 1.05],
+        rot_range=[-0.78539816, 0.78539816],
+        translation_std=[0.5, 0.5, 0.2],
+    ),
+    dict(type="BEVFusionRandomFlip3D"),
+    dict(type="PointsRangeFilter", point_cloud_range=_base_.point_cloud_range),
+    dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range),
+    dict(
+        type="ObjectNameFilter",
+        classes=[
+            "car",
+            "truck",
+            "bus",
+            "bicycle",
+            "pedestrian",
+            "traffic_cone",
+            "barrier",
+        ],
+    ),
+    dict(type="PointShuffle"),
+    dict(
+        type="Pack3DDetInputs",
+        keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"],
+        meta_keys=[
+            "cam2img",
+            "ori_cam2img",
+            "lidar2cam",
+            "lidar2img",
+            "cam2lidar",
+            "ori_lidar2img",
+            "img_aug_matrix",
+            "box_type_3d",
+            "sample_idx",
+            "lidar_path",
+            "img_path",
+            "transformation_3d_flow",
+            "pcd_rotation",
+            "pcd_scale_factor",
+            "pcd_trans",
+            "img_aug_matrix",
+            "lidar_aug_matrix",
+            "timestamp",
+            "vehicle_type",
+            "city",
+            "traffic_cone_barrier_status",
+        ],
+    ),
+]
+
+test_pipeline = [
+    dict(
+        type="LoadPointsFromFile",
+        coord_type="LIDAR",
+        load_dim=_base_.point_load_dim,
+        use_dim=_base_.point_use_dim,
+        backend_args=_base_.backend_args,
+    ),
+    dict(
+        type="LoadPointsFromMultiSweeps",
+        sweeps_num=_base_.sweeps_num,
+        load_dim=_base_.point_load_dim,
+        use_dim=_base_.lidar_sweep_dims,
+        pad_empty_sweeps=True,
+        remove_close=True,
+        backend_args=_base_.backend_args,
+        test_mode=True,
+    ),
+    dict(type="PointsRangeFilter", point_cloud_range=_base_.point_cloud_range),
+    dict(
+        type="Pack3DDetInputs",
+        keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"],
+        meta_keys=[
+            "cam2img",
+            "ori_cam2img",
+            "lidar2cam",
+            "lidar2img",
+            "cam2lidar",
+            "ori_lidar2img",
+            "img_aug_matrix",
+            "box_type_3d",
+            "sample_idx",
+            "lidar_path",
+            "img_path",
+            "num_pts_feats",
+            "num_views",
+            "timestamp",
+            "vehicle_type",
+            "city",
+            "traffic_cone_barrier_status",  
+        ],
+    ),
+]
+
+# Dataset parameters
+train_dataloader = dict(
+    batch_size=_base_.train_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=True),
+    dataset=dict(
+        type=_base_.dataset_type,
+        pipeline=_base_.train_pipeline,
+        modality=_base_.input_modality,
+        backend_args=_base_.backend_args,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_train_file_name,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        test_mode=False,
+        data_prefix=_base_.data_prefix,
+        box_type_3d="LiDAR",
+        filter_cfg=_base_.filter_cfg,
+    ),
+)
+
+val_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_val_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+test_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_test_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+val_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+)
+
+test_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+    save_csv=True,
+)
+
+default_hooks = dict(
+    logger=dict(type="LoggerHook", interval=50),
+    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
+)
+log_processor = dict(window_size=50)
+
+load_from = "work_dirs/bevfusion_lidar_2.7.0/base/epoch_48.pth"

From b1b247a148f1a060eec5f70f7d8f1d8911524ebe Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Thu, 30 Apr 2026 12:28:55 +0900
Subject: [PATCH 080/183] Add the script

---
 ...pn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py | 2 ++
 ..._30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py
index 903df577c..7fef2db47 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py
@@ -88,6 +88,8 @@
         car=0,
         truck=0,
         bus=0,
+        bicycle=0,
+        pedestrian=0,
         barrier=2,
         traffic_cone=4),
     points_loader=dict(
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py
index 61b9d35f3..e5e9c9ff3 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py
@@ -92,6 +92,8 @@
         car=0,
         truck=0,
         bus=0,
+        bicycle=0,
+        pedestrian=0,
         barrier=2,
         traffic_cone=4),
     points_loader=dict(

From 36e3811139144ea84899aadfa8b94f66e421dd9d Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Thu, 30 Apr 2026 20:38:01 +0900
Subject: [PATCH 081/183] Add the script

---
 .../BEVFusion/bevfusion/bevfusion_head.py     |  3 +
 ...ase_120m_traffic_cone_ignore_copy_paste.py | 73 ++++---------------
 2 files changed, 19 insertions(+), 57 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index 4894ad2e7..5b0e156d0 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -701,6 +701,9 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata):
                     self.partial_ignore_labels, device=label_weights.device, dtype=torch.long
                 )
                 label_weights[neg_inds.unsqueeze(1), _cols.unsqueeze(0)] = 0.0
+            
+            print("heatmap with traffic cone: ", heatmap[5].sum())
+            print("heatmap with barrier: ", heatmap[6].sum())
 
         return (
             labels[None],
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py
index e5e9c9ff3..41629bb17 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py
@@ -74,19 +74,20 @@
 
 db_sampler = dict(
     data_root=data_root,
-    info_path=info_directory_path + _base_.info_train_file_name,
+    info_path=data_root + info_directory_path + _base_.info_train_file_name,
     rate=1.0,
-    prepare=dict(
-        filter_by_difficulty=[-1],
-        filter_by_min_points=dict(
-            car=5,
-            truck=5,
-            bus=5,
-            trailer=5,
-            traffic_cone=5,
-            barrier=5,
-            bicycle=5,
-            pedestrian=5)),
+    prepare=dict(),
+    # prepare=dict(
+    #     filter_by_difficulty=[-1],
+    #     filter_by_min_points=dict(
+    #         car=5,
+    #         truck=5,
+    #         bus=5,
+    #         trailer=5,
+    #         traffic_cone=5,
+    #         barrier=5,
+    #         bicycle=5,
+    #         pedestrian=5)),
     classes=_base_.class_names,
     sample_groups=dict(
         car=0,
@@ -179,50 +180,6 @@
     ),
 ]
 
-test_pipeline = [
-    dict(
-        type="LoadPointsFromFile",
-        coord_type="LIDAR",
-        load_dim=_base_.point_load_dim,
-        use_dim=_base_.point_use_dim,
-        backend_args=_base_.backend_args,
-    ),
-    dict(
-        type="LoadPointsFromMultiSweeps",
-        sweeps_num=_base_.sweeps_num,
-        load_dim=_base_.point_load_dim,
-        use_dim=_base_.lidar_sweep_dims,
-        pad_empty_sweeps=True,
-        remove_close=True,
-        backend_args=_base_.backend_args,
-        test_mode=True,
-    ),
-    dict(type="PointsRangeFilter", point_cloud_range=_base_.point_cloud_range),
-    dict(
-        type="Pack3DDetInputs",
-        keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"],
-        meta_keys=[
-            "cam2img",
-            "ori_cam2img",
-            "lidar2cam",
-            "lidar2img",
-            "cam2lidar",
-            "ori_lidar2img",
-            "img_aug_matrix",
-            "box_type_3d",
-            "sample_idx",
-            "lidar_path",
-            "img_path",
-            "num_pts_feats",
-            "num_views",
-            "timestamp",
-            "vehicle_type",
-            "city",
-            "traffic_cone_barrier_status",  
-        ],
-    ),
-]
-
 # Dataset parameters
 train_dataloader = dict(
     batch_size=_base_.train_batch_size,
@@ -231,7 +188,7 @@
     sampler=dict(type="DefaultSampler", shuffle=True),
     dataset=dict(
         type=_base_.dataset_type,
-        pipeline=_base_.train_pipeline,
+        pipeline=train_pipeline,
         modality=_base_.input_modality,
         backend_args=_base_.backend_args,
         data_root=data_root,
@@ -317,3 +274,5 @@
 log_processor = dict(window_size=50)
 
 load_from = "work_dirs/bevfusion_lidar_2.7.0/base/epoch_48.pth"
+
+custom_hooks = []

From e9052633e80ae8ba8b36fb6554ded9c04e7bf672 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Thu, 7 May 2026 15:29:37 +0900
Subject: [PATCH 082/183] Add traffic cone and barrier

---
 .../BEVFusion/bevfusion/bevfusion_head.py     |  13 +-
 ...second_secfpn_30e_8xb8_j6gen2_base_120m.py |   4 +
 ...8xb8_j6gen2_base_120m_traffic_cone_full.py | 164 ---------
 ..._base_120m_traffic_cone_full_copy_paste.py | 317 ------------------
 ...b8_j6gen2_base_120m_traffic_cone_ignore.py | 168 ----------
 ...ase_120m_traffic_cone_ignore_copy_paste.py | 278 ---------------
 ...econd_secfpn_30e_8xb8_jpntaxi_base_120m.py |   4 +
 ..._voxel_second_secfpn_50e_8xb8_base_120m.py |   4 +
 ...pn_50e_8xb8_base_120m_traffic_cone_full.py | 163 ---------
 ..._50e_8xb8_base_120m_traffic_cone_ignore.py | 163 ---------
 10 files changed, 14 insertions(+), 1264 deletions(-)
 delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py
 delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py
 delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py
 delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py
 delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_full.py
 delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_ignore.py

diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index 5b0e156d0..da056efcc 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -633,7 +633,6 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata):
         ious = torch.clamp(ious, min=0.0, max=1.0)
         labels = bboxes_tensor.new_zeros(num_proposals, dtype=torch.long)
         label_weights = bboxes_tensor.new_zeros([num_proposals, self.num_classes], dtype=torch.long)
-        # label_weights = bboxes_tensor.new_zeros(num_proposals, dtype=torch.long)
 
         if gt_labels_3d is not None:  # default label is -1
             labels += self.num_classes
@@ -693,17 +692,13 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata):
         # Ignore labels for traffic cone and barrier
         traffic_cone_barrier_status = metadata.get("traffic_cone_barrier_status", True)
         if self.partial_ignore_labels is not None and not traffic_cone_barrier_status:
-            if self.partial_ignore_dense_heatmap:
-                heatmap_weights[self.partial_ignore_labels] = 0.0 # Set to 0 to ignore these proposals
+            heatmap_weights[self.partial_ignore_labels] = 0.0 # Set to 0 to ignore these grids 
             if len(neg_inds) > 0:
-                # neg_inds [N] and column indices [K] must broadcast (not pair); see IndexError N vs K.
+                # neg_inds [N] and column indices [K] must broadcast (not pair);
                 _cols = torch.as_tensor(
                     self.partial_ignore_labels, device=label_weights.device, dtype=torch.long
                 )
                 label_weights[neg_inds.unsqueeze(1), _cols.unsqueeze(0)] = 0.0
-            
-            print("heatmap with traffic cone: ", heatmap[5].sum())
-            print("heatmap with barrier: ", heatmap[6].sum())
 
         return (
             labels[None],
@@ -795,10 +790,6 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li
                 ...,
                 idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals,
             ].reshape(-1)
-            # layer_label_weights = label_weights[
-            #     ...,
-            #     idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals,
-            # ].reshape(-1)
             layer_label_weights = label_weights[
                 ...,
                 idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals,
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
index 9da67036e..d32dc9c70 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
@@ -64,6 +64,10 @@
             pc_range=_base_.point_cloud_range[0:2],
             voxel_size=_base_.voxel_size[0:2],
         ),
+        partial_ignore_labels=["traffic_cone", "barrier"],
+        loss_heatmap=dict(
+            reduction="none",
+        ),
     ),
 )
 
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py
deleted file mode 100644
index 88e3cbc54..000000000
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py
+++ /dev/null
@@ -1,164 +0,0 @@
-_base_ = [
-    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
-    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
-    "../default/pipelines/default_lidar_intensity_120m.py",
-    "../default/models/default_lidar_second_secfpn_120m.py",
-    "../default/schedulers/default_30e_8xb8_adamw_cosine.py",
-    "../default/default_misc.py",
-]
-
-custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
-custom_imports["imports"] += _base_.custom_imports["imports"]
-custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
-
-# user setting
-data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_8/"
-
-experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type
-experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full"
-work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
-
-# model parameter
-model = dict(
-    type="BEVFusion",
-    voxelize_cfg=dict(
-        point_cloud_range=_base_.point_cloud_range,
-        voxel_size=_base_.voxel_size,
-        voxelize_reduce=True,
-    ),
-    pts_voxel_encoder=dict(num_features=_base_.point_use_dim),
-    pts_middle_encoder=dict(
-        in_channels=_base_.point_use_dim,
-        sparse_shape=_base_.grid_size,
-        num_aug_features=5,
-        # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
-        aug_features_min_values=[
-            _base_.point_cloud_range[0],
-            _base_.point_cloud_range[1],
-            _base_.point_cloud_range[2],
-            0.0,
-            0.0,
-        ],
-        aug_features_max_values=[
-            _base_.point_cloud_range[3],
-            _base_.point_cloud_range[4],
-            _base_.point_cloud_range[5],
-            255.0,
-            0.2,
-        ],
-    ),
-    bbox_head=dict(
-        class_names=_base_.class_names,  # Use class names to identify the correct class indices
-        train_cfg=dict(
-            point_cloud_range=_base_.point_cloud_range,
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size,
-        ),
-        test_cfg=dict(
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size[0:2],
-            pc_range=_base_.point_cloud_range[0:2],
-        ),
-        bbox_coder=dict(
-            pc_range=_base_.point_cloud_range[0:2],
-            voxel_size=_base_.voxel_size[0:2],
-        ),
-        partial_ignore_dense_heatmap=False
-    ),
-)
-
-# Dataset parameters
-train_dataloader = dict(
-    batch_size=_base_.train_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=True),
-    dataset=dict(
-        type=_base_.dataset_type,
-        pipeline=_base_.train_pipeline,
-        modality=_base_.input_modality,
-        backend_args=_base_.backend_args,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_train_file_name,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        test_mode=False,
-        data_prefix=_base_.data_prefix,
-        box_type_3d="LiDAR",
-        filter_cfg=_base_.filter_cfg,
-    ),
-)
-
-val_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_val_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-test_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_test_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-val_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-)
-
-test_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-    save_csv=True,
-)
-
-default_hooks = dict(
-    logger=dict(type="LoggerHook", interval=50),
-    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
-)
-log_processor = dict(window_size=50)
-
-load_from = "work_dirs/bevfusion_lidar_2.7.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_48.pth"
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py
deleted file mode 100644
index 7fef2db47..000000000
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py
+++ /dev/null
@@ -1,317 +0,0 @@
-_base_ = [
-    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
-    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
-    "../default/pipelines/default_lidar_intensity_120m.py",
-    "../default/models/default_lidar_second_secfpn_120m.py",
-    "../default/schedulers/default_30e_8xb8_adamw_cosine.py",
-    "../default/default_misc.py",
-]
-
-custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
-custom_imports["imports"] += _base_.custom_imports["imports"]
-custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
-
-# user setting
-data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_8/"
-
-experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type
-experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste"
-work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
-
-# model parameter
-model = dict(
-    type="BEVFusion",
-    voxelize_cfg=dict(
-        point_cloud_range=_base_.point_cloud_range,
-        voxel_size=_base_.voxel_size,
-        voxelize_reduce=True,
-    ),
-    pts_voxel_encoder=dict(num_features=_base_.point_use_dim),
-    pts_middle_encoder=dict(
-        in_channels=_base_.point_use_dim,
-        sparse_shape=_base_.grid_size,
-        num_aug_features=5,
-        # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
-        aug_features_min_values=[
-            _base_.point_cloud_range[0],
-            _base_.point_cloud_range[1],
-            _base_.point_cloud_range[2],
-            0.0,
-            0.0,
-        ],
-        aug_features_max_values=[
-            _base_.point_cloud_range[3],
-            _base_.point_cloud_range[4],
-            _base_.point_cloud_range[5],
-            255.0,
-            0.2,
-        ],
-    ),
-    bbox_head=dict(
-        class_names=_base_.class_names,  # Use class names to identify the correct class indices
-        train_cfg=dict(
-            point_cloud_range=_base_.point_cloud_range,
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size,
-        ),
-        test_cfg=dict(
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size[0:2],
-            pc_range=_base_.point_cloud_range[0:2],
-        ),
-        bbox_coder=dict(
-            pc_range=_base_.point_cloud_range[0:2],
-            voxel_size=_base_.voxel_size[0:2],
-        ),
-        partial_ignore_dense_heatmap=False
-    ),
-)
-
-db_sampler = dict(
-    data_root=data_root,
-    info_path=info_directory_path + _base_.info_train_file_name,
-    rate=1.0,
-    prepare=dict(
-        filter_by_difficulty=[-1],
-        filter_by_min_points=dict(
-            car=5,
-            truck=5,
-            bus=5,
-            trailer=5,
-            traffic_cone=5,
-            barrier=5,
-            bicycle=5,
-            pedestrian=5)),
-    classes=_base_.class_names,
-    sample_groups=dict(
-        car=0,
-        truck=0,
-        bus=0,
-        bicycle=0,
-        pedestrian=0,
-        barrier=2,
-        traffic_cone=4),
-    points_loader=dict(
-        type='LoadPointsFromCurrentFileSweep',
-        coord_type='LIDAR',
-        load_dim=_base_.point_load_dim,
-        use_dim=_base_.point_use_dim,
-        backend_args=_base_.backend_args,
-        sweeps_num=_base_.sweeps_num,
-        pad_empty_sweeps=True,
-        remove_close=True,
-        test_mode=False,
-    ))
-        
-train_pipeline = [
-    dict(
-        type="LoadPointsFromFile",
-        coord_type="LIDAR",
-        load_dim=_base_.point_load_dim,
-        use_dim=_base_.point_use_dim,
-        backend_args=_base_.backend_args,
-    ),
-    dict(
-        type="LoadPointsFromMultiSweeps",
-        sweeps_num=_base_.sweeps_num,
-        load_dim=_base_.point_load_dim,
-        use_dim=_base_.lidar_sweep_dims,
-        pad_empty_sweeps=True,
-        remove_close=True,
-        backend_args=_base_.backend_args,
-        test_mode=False,
-    ),
-    dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False),
-    dict(type="ObjectSample", db_sampler=db_sampler),
-    dict(
-        type="BEVFusionGlobalRotScaleTrans",
-        scale_ratio_range=[0.95, 1.05],
-        rot_range=[-0.78539816, 0.78539816],
-        translation_std=[0.5, 0.5, 0.2],
-    ),
-    dict(type="BEVFusionRandomFlip3D"),
-    dict(type="PointsRangeFilter", point_cloud_range=_base_.point_cloud_range),
-    dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range),
-    dict(
-        type="ObjectNameFilter",
-        classes=[
-            "car",
-            "truck",
-            "bus",
-            "bicycle",
-            "pedestrian",
-            "traffic_cone",
-            "barrier",
-        ],
-    ),
-    dict(type="PointShuffle"),
-    dict(
-        type="Pack3DDetInputs",
-        keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"],
-        meta_keys=[
-            "cam2img",
-            "ori_cam2img",
-            "lidar2cam",
-            "lidar2img",
-            "cam2lidar",
-            "ori_lidar2img",
-            "img_aug_matrix",
-            "box_type_3d",
-            "sample_idx",
-            "lidar_path",
-            "img_path",
-            "transformation_3d_flow",
-            "pcd_rotation",
-            "pcd_scale_factor",
-            "pcd_trans",
-            "img_aug_matrix",
-            "lidar_aug_matrix",
-            "timestamp",
-            "vehicle_type",
-            "city",
-            "traffic_cone_barrier_status",
-        ],
-    ),
-]
-
-test_pipeline = [
-    dict(
-        type="LoadPointsFromFile",
-        coord_type="LIDAR",
-        load_dim=_base_.point_load_dim,
-        use_dim=_base_.point_use_dim,
-        backend_args=_base_.backend_args,
-    ),
-    dict(
-        type="LoadPointsFromMultiSweeps",
-        sweeps_num=_base_.sweeps_num,
-        load_dim=_base_.point_load_dim,
-        use_dim=_base_.lidar_sweep_dims,
-        pad_empty_sweeps=True,
-        remove_close=True,
-        backend_args=_base_.backend_args,
-        test_mode=True,
-    ),
-    dict(type="PointsRangeFilter", point_cloud_range=_base_.point_cloud_range),
-    dict(
-        type="Pack3DDetInputs",
-        keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"],
-        meta_keys=[
-            "cam2img",
-            "ori_cam2img",
-            "lidar2cam",
-            "lidar2img",
-            "cam2lidar",
-            "ori_lidar2img",
-            "img_aug_matrix",
-            "box_type_3d",
-            "sample_idx",
-            "lidar_path",
-            "img_path",
-            "num_pts_feats",
-            "num_views",
-            "timestamp",
-            "vehicle_type",
-            "city",
-            "traffic_cone_barrier_status",  
-        ],
-    ),
-]
-
-# Dataset parameters
-train_dataloader = dict(
-    batch_size=_base_.train_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=True),
-    dataset=dict(
-        type=_base_.dataset_type,
-        pipeline=_base_.train_pipeline,
-        modality=_base_.input_modality,
-        backend_args=_base_.backend_args,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_train_file_name,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        test_mode=False,
-        data_prefix=_base_.data_prefix,
-        box_type_3d="LiDAR",
-        filter_cfg=_base_.filter_cfg,
-    ),
-)
-
-val_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_val_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-test_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_test_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-val_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-)
-
-test_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-    save_csv=True,
-)
-
-default_hooks = dict(
-    logger=dict(type="LoggerHook", interval=50),
-    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
-)
-log_processor = dict(window_size=50)
-
-load_from = "work_dirs/bevfusion_lidar_2.7.0/base/epoch_48.pth"
-
-custom_hooks = []
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py
deleted file mode 100644
index bb10d484d..000000000
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py
+++ /dev/null
@@ -1,168 +0,0 @@
-_base_ = [
-    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
-    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
-    "../default/pipelines/default_lidar_intensity_120m.py",
-    "../default/models/default_lidar_second_secfpn_120m.py",
-    "../default/schedulers/default_30e_8xb8_adamw_cosine.py",
-    "../default/default_misc.py",
-]
-
-custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
-custom_imports["imports"] += _base_.custom_imports["imports"]
-custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
-
-# user setting
-data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_8/"
-
-experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type
-experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore"
-work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
-
-# model parameter
-model = dict(
-    type="BEVFusion",
-    voxelize_cfg=dict(
-        point_cloud_range=_base_.point_cloud_range,
-        voxel_size=_base_.voxel_size,
-        voxelize_reduce=True,
-    ),
-    pts_voxel_encoder=dict(num_features=_base_.point_use_dim),
-    pts_middle_encoder=dict(
-        in_channels=_base_.point_use_dim,
-        sparse_shape=_base_.grid_size,
-        num_aug_features=5,
-        # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
-        aug_features_min_values=[
-            _base_.point_cloud_range[0],
-            _base_.point_cloud_range[1],
-            _base_.point_cloud_range[2],
-            0.0,
-            0.0,
-        ],
-        aug_features_max_values=[
-            _base_.point_cloud_range[3],
-            _base_.point_cloud_range[4],
-            _base_.point_cloud_range[5],
-            255.0,
-            0.2,
-        ],
-    ),
-    bbox_head=dict(
-        class_names=_base_.class_names,  # Use class names to identify the correct class indices
-        train_cfg=dict(
-            point_cloud_range=_base_.point_cloud_range,
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size,
-        ),
-        test_cfg=dict(
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size[0:2],
-            pc_range=_base_.point_cloud_range[0:2],
-        ),
-        bbox_coder=dict(
-            pc_range=_base_.point_cloud_range[0:2],
-            voxel_size=_base_.voxel_size[0:2],
-        ),
-        partial_ignore_labels=["traffic_cone", "barrier"],
-        partial_ignore_dense_heatmap=True,
-        loss_heatmap=dict(
-            reduction="none",
-        ),
-    ),
-)
-
-# Dataset parameters
-train_dataloader = dict(
-    batch_size=_base_.train_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=True),
-    dataset=dict(
-        type=_base_.dataset_type,
-        pipeline=_base_.train_pipeline,
-        modality=_base_.input_modality,
-        backend_args=_base_.backend_args,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_train_file_name,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        test_mode=False,
-        data_prefix=_base_.data_prefix,
-        box_type_3d="LiDAR",
-        filter_cfg=_base_.filter_cfg,
-    ),
-)
-
-val_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_val_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-test_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_test_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-val_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-)
-
-test_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-    save_csv=True,
-)
-
-default_hooks = dict(
-    logger=dict(type="LoggerHook", interval=50),
-    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
-)
-log_processor = dict(window_size=50)
-
-load_from = "work_dirs/bevfusion_lidar_2.7.0/base/epoch_48.pth"
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py
deleted file mode 100644
index 41629bb17..000000000
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py
+++ /dev/null
@@ -1,278 +0,0 @@
-_base_ = [
-    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
-    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
-    "../default/pipelines/default_lidar_intensity_120m.py",
-    "../default/models/default_lidar_second_secfpn_120m.py",
-    "../default/schedulers/default_30e_8xb8_adamw_cosine.py",
-    "../default/default_misc.py",
-]
-
-custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
-custom_imports["imports"] += _base_.custom_imports["imports"]
-custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
-
-# user setting
-data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_8/"
-
-experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type
-experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste"
-work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
-
-# model parameter
-model = dict(
-    type="BEVFusion",
-    voxelize_cfg=dict(
-        point_cloud_range=_base_.point_cloud_range,
-        voxel_size=_base_.voxel_size,
-        voxelize_reduce=True,
-    ),
-    pts_voxel_encoder=dict(num_features=_base_.point_use_dim),
-    pts_middle_encoder=dict(
-        in_channels=_base_.point_use_dim,
-        sparse_shape=_base_.grid_size,
-        num_aug_features=5,
-        # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
-        aug_features_min_values=[
-            _base_.point_cloud_range[0],
-            _base_.point_cloud_range[1],
-            _base_.point_cloud_range[2],
-            0.0,
-            0.0,
-        ],
-        aug_features_max_values=[
-            _base_.point_cloud_range[3],
-            _base_.point_cloud_range[4],
-            _base_.point_cloud_range[5],
-            255.0,
-            0.2,
-        ],
-    ),
-    bbox_head=dict(
-        class_names=_base_.class_names,  # Use class names to identify the correct class indices
-        train_cfg=dict(
-            point_cloud_range=_base_.point_cloud_range,
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size,
-        ),
-        test_cfg=dict(
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size[0:2],
-            pc_range=_base_.point_cloud_range[0:2],
-        ),
-        bbox_coder=dict(
-            pc_range=_base_.point_cloud_range[0:2],
-            voxel_size=_base_.voxel_size[0:2],
-        ),
-        partial_ignore_labels=["traffic_cone", "barrier"],
-        partial_ignore_dense_heatmap=False,
-        loss_heatmap=dict(
-            reduction="none",
-        ),
-    ),
-)
-
-db_sampler = dict(
-    data_root=data_root,
-    info_path=data_root + info_directory_path + _base_.info_train_file_name,
-    rate=1.0,
-    prepare=dict(),
-    # prepare=dict(
-    #     filter_by_difficulty=[-1],
-    #     filter_by_min_points=dict(
-    #         car=5,
-    #         truck=5,
-    #         bus=5,
-    #         trailer=5,
-    #         traffic_cone=5,
-    #         barrier=5,
-    #         bicycle=5,
-    #         pedestrian=5)),
-    classes=_base_.class_names,
-    sample_groups=dict(
-        car=0,
-        truck=0,
-        bus=0,
-        bicycle=0,
-        pedestrian=0,
-        barrier=2,
-        traffic_cone=4),
-    points_loader=dict(
-        type='LoadPointsFromCurrentFileSweep',
-        coord_type='LIDAR',
-        load_dim=_base_.point_load_dim,
-        use_dim=_base_.point_use_dim,
-        backend_args=_base_.backend_args,
-        sweeps_num=_base_.sweeps_num,
-        pad_empty_sweeps=True,
-        remove_close=True,
-        test_mode=False,
-    ))
-        
-train_pipeline = [
-    dict(
-        type="LoadPointsFromFile",
-        coord_type="LIDAR",
-        load_dim=_base_.point_load_dim,
-        use_dim=_base_.point_use_dim,
-        backend_args=_base_.backend_args,
-    ),
-    dict(
-        type="LoadPointsFromMultiSweeps",
-        sweeps_num=_base_.sweeps_num,
-        load_dim=_base_.point_load_dim,
-        use_dim=_base_.lidar_sweep_dims,
-        pad_empty_sweeps=True,
-        remove_close=True,
-        backend_args=_base_.backend_args,
-        test_mode=False,
-    ),
-    dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False),
-    dict(type="ObjectSample", db_sampler=db_sampler),
-    dict(
-        type="BEVFusionGlobalRotScaleTrans",
-        scale_ratio_range=[0.95, 1.05],
-        rot_range=[-0.78539816, 0.78539816],
-        translation_std=[0.5, 0.5, 0.2],
-    ),
-    dict(type="BEVFusionRandomFlip3D"),
-    dict(type="PointsRangeFilter", point_cloud_range=_base_.point_cloud_range),
-    dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range),
-    dict(
-        type="ObjectNameFilter",
-        classes=[
-            "car",
-            "truck",
-            "bus",
-            "bicycle",
-            "pedestrian",
-            "traffic_cone",
-            "barrier",
-        ],
-    ),
-    dict(type="PointShuffle"),
-    dict(
-        type="Pack3DDetInputs",
-        keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"],
-        meta_keys=[
-            "cam2img",
-            "ori_cam2img",
-            "lidar2cam",
-            "lidar2img",
-            "cam2lidar",
-            "ori_lidar2img",
-            "img_aug_matrix",
-            "box_type_3d",
-            "sample_idx",
-            "lidar_path",
-            "img_path",
-            "transformation_3d_flow",
-            "pcd_rotation",
-            "pcd_scale_factor",
-            "pcd_trans",
-            "img_aug_matrix",
-            "lidar_aug_matrix",
-            "timestamp",
-            "vehicle_type",
-            "city",
-            "traffic_cone_barrier_status",
-        ],
-    ),
-]
-
-# Dataset parameters
-train_dataloader = dict(
-    batch_size=_base_.train_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=True),
-    dataset=dict(
-        type=_base_.dataset_type,
-        pipeline=train_pipeline,
-        modality=_base_.input_modality,
-        backend_args=_base_.backend_args,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_train_file_name,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        test_mode=False,
-        data_prefix=_base_.data_prefix,
-        box_type_3d="LiDAR",
-        filter_cfg=_base_.filter_cfg,
-    ),
-)
-
-val_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_val_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-test_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_test_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-val_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-)
-
-test_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-    save_csv=True,
-)
-
-default_hooks = dict(
-    logger=dict(type="LoggerHook", interval=50),
-    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
-)
-log_processor = dict(window_size=50)
-
-load_from = "work_dirs/bevfusion_lidar_2.7.0/base/epoch_48.pth"
-
-custom_hooks = []
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
index c884c0aef..406e87655 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
@@ -64,6 +64,10 @@
             pc_range=_base_.point_cloud_range[0:2],
             voxel_size=_base_.voxel_size[0:2],
         ),
+        partial_ignore_labels=["traffic_cone", "barrier"],
+        loss_heatmap=dict(
+            reduction="none",
+        ),
     ),
 )
 
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
index 79337d976..e8068332a 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
@@ -62,6 +62,10 @@
             pc_range=_base_.point_cloud_range[0:2],
             voxel_size=_base_.voxel_size[0:2],
         ),
+        partial_ignore_labels=["traffic_cone", "barrier"],
+        loss_heatmap=dict(
+            reduction="none",
+        ),
     ),
 )
 
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_full.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_full.py
deleted file mode 100644
index 38b1e8ea5..000000000
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_full.py
+++ /dev/null
@@ -1,163 +0,0 @@
-_base_ = [
-    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
-    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
-    "../default/pipelines/default_lidar_120m.py",
-    "../default/models/default_lidar_second_secfpn_120m.py",
-    "../default/schedulers/default_50e_8xb8_adamw_cosine.py",
-    "../default/default_misc.py",
-]
-
-custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
-custom_imports["imports"] += _base_.custom_imports["imports"]
-custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
-
-# user setting
-data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_8/"
-
-experiment_group_name = "bevfusion_lidar_traffic_cone_full/base/" + _base_.dataset_type
-experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m"
-work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
-
-# model parameter
-model = dict(
-    type="BEVFusion",
-    voxelize_cfg=dict(
-        point_cloud_range=_base_.point_cloud_range,
-        voxel_size=_base_.voxel_size,
-        voxelize_reduce=True,
-    ),
-    pts_voxel_encoder=dict(num_features=_base_.point_use_dim),
-    pts_middle_encoder=dict(
-        in_channels=_base_.point_use_dim,
-        sparse_shape=_base_.grid_size,
-        num_aug_features=4,
-        # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here
-        aug_features_min_values=[
-            _base_.point_cloud_range[0],
-            _base_.point_cloud_range[1],
-            _base_.point_cloud_range[2],
-            0.0,
-        ],
-        aug_features_max_values=[
-            _base_.point_cloud_range[3],
-            _base_.point_cloud_range[4],
-            _base_.point_cloud_range[5],
-            0.2,
-        ],
-    ),
-    bbox_head=dict(
-        class_names=_base_.class_names,  # Use class names to identify the correct class indices
-        train_cfg=dict(
-            point_cloud_range=_base_.point_cloud_range,
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size,
-        ),
-        test_cfg=dict(
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size[0:2],
-            pc_range=_base_.point_cloud_range[0:2],
-        ),
-        bbox_coder=dict(
-            pc_range=_base_.point_cloud_range[0:2],
-            voxel_size=_base_.voxel_size[0:2],
-        ),
-        partial_ignore_labels=None,
-        loss_heatmap=dict(
-            reduction="none",
-        ),
-    ),
-)
-
-# Dataset parameters
-train_dataloader = dict(
-    batch_size=_base_.train_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=True),
-    dataset=dict(
-        type=_base_.dataset_type,
-        pipeline=_base_.train_pipeline,
-        modality=_base_.input_modality,
-        backend_args=_base_.backend_args,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_train_file_name,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        test_mode=False,
-        data_prefix=_base_.data_prefix,
-        box_type_3d="LiDAR",
-        filter_cfg=_base_.filter_cfg,
-    ),
-)
-
-val_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_val_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-test_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_test_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-val_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-)
-
-test_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-    save_csv=True,
-)
-
-default_hooks = dict(
-    logger=dict(type="LoggerHook", interval=50),
-    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
-)
-log_processor = dict(window_size=50)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_ignore.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_ignore.py
deleted file mode 100644
index 90136a748..000000000
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_ignore.py
+++ /dev/null
@@ -1,163 +0,0 @@
-_base_ = [
-    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
-    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
-    "../default/pipelines/default_lidar_120m.py",
-    "../default/models/default_lidar_second_secfpn_120m.py",
-    "../default/schedulers/default_50e_8xb8_adamw_cosine.py",
-    "../default/default_misc.py",
-]
-
-custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
-custom_imports["imports"] += _base_.custom_imports["imports"]
-custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
-
-# user setting
-data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_8/"
-
-experiment_group_name = "bevfusion_lidar_traffic_cone/base/" + _base_.dataset_type
-experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m_ignore"
-work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
-
-# model parameter
-model = dict(
-    type="BEVFusion",
-    voxelize_cfg=dict(
-        point_cloud_range=_base_.point_cloud_range,
-        voxel_size=_base_.voxel_size,
-        voxelize_reduce=True,
-    ),
-    pts_voxel_encoder=dict(num_features=_base_.point_use_dim),
-    pts_middle_encoder=dict(
-        in_channels=_base_.point_use_dim,
-        sparse_shape=_base_.grid_size,
-        num_aug_features=4,
-        # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here
-        aug_features_min_values=[
-            _base_.point_cloud_range[0],
-            _base_.point_cloud_range[1],
-            _base_.point_cloud_range[2],
-            0.0,
-        ],
-        aug_features_max_values=[
-            _base_.point_cloud_range[3],
-            _base_.point_cloud_range[4],
-            _base_.point_cloud_range[5],
-            0.2,
-        ],
-    ),
-    bbox_head=dict(
-        class_names=_base_.class_names,  # Use class names to identify the correct class indices
-        train_cfg=dict(
-            point_cloud_range=_base_.point_cloud_range,
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size,
-        ),
-        test_cfg=dict(
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size[0:2],
-            pc_range=_base_.point_cloud_range[0:2],
-        ),
-        bbox_coder=dict(
-            pc_range=_base_.point_cloud_range[0:2],
-            voxel_size=_base_.voxel_size[0:2],
-        ),
-        partial_ignore_labels=["traffic_cone", "barrier"],
-        loss_heatmap=dict(
-            reduction="none",
-        ),
-    ),
-)
-
-# Dataset parameters
-train_dataloader = dict(
-    batch_size=_base_.train_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=True),
-    dataset=dict(
-        type=_base_.dataset_type,
-        pipeline=_base_.train_pipeline,
-        modality=_base_.input_modality,
-        backend_args=_base_.backend_args,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_train_file_name,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        test_mode=False,
-        data_prefix=_base_.data_prefix,
-        box_type_3d="LiDAR",
-        filter_cfg=_base_.filter_cfg,
-    ),
-)
-
-val_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_val_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-test_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_test_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-val_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-)
-
-test_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-    save_csv=True,
-)
-
-default_hooks = dict(
-    logger=dict(type="LoggerHook", interval=50),
-    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
-)
-log_processor = dict(window_size=50)

From dc2265e33d56726313862e0f42fbd87f8fd65fde Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 7 May 2026 06:32:44 +0000
Subject: [PATCH 083/183] ci(pre-commit): autofix

---
 .../detection3d/dataset/t4dataset/base.py     | 14 ++----
 .../t4dataset/j6gen2_base_traffic_cone.py     |  2 +-
 .../t4dataset/jpntaxi_base_traffic_cone.py    |  2 +-
 .../dataset/t4dataset/jpntaxi_gen2.py         |  6 +--
 .../detection3d/dataset/t4dataset/largebus.py |  4 +-
 autoware_ml/detection3d/datasets/t4dataset.py |  6 +--
 .../datasets/transforms/__init__.py           |  2 +-
 .../datasets/transforms/loading.py            | 47 +++++++++++--------
 .../BEVFusion/bevfusion/bevfusion_head.py     | 45 +++++++++++-------
 projects/BEVFusion/bevfusion/utils.py         |  4 +-
 .../default_lidar_second_secfpn_120m.py       |  2 +-
 .../pipelines/default_lidar_intensity_120m.py |  6 +--
 tools/detection3d/create_data_t4dataset.py    | 15 ++++--
 .../t4dataset_converters/t4converter.py       |  2 +-
 14 files changed, 86 insertions(+), 71 deletions(-)

diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py
index 4248c90e6..3be587072 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py
@@ -143,20 +143,12 @@
     "semi_trailer": "trailer",
     "tractor_unit": "truck",
     "construction_vehicle": "truck",
-	"traffic_cone": "traffic_cone",
-	"trafficcone": "traffic_cone",
+    "traffic_cone": "traffic_cone",
+    "trafficcone": "traffic_cone",
     "barrier": "barrier",
 }
 
-class_names = [
-    "car",
-    "truck",
-    "bus",
-    "bicycle",
-    "pedestrian",
-	"traffic_cone",
-	"barrier"
-]
+class_names = ["car", "truck", "bus", "bicycle", "pedestrian", "traffic_cone", "barrier"]
 num_class = len(class_names)
 metainfo = dict(classes=class_names)
 
diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py
index 8c57cf4fa..176763b54 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py
@@ -208,4 +208,4 @@
     matching_class_agnostic_fps=False,
 )
 
-remove_non_traffic_cone_barrier = True
\ No newline at end of file
+remove_non_traffic_cone_barrier = True
diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py
index c7e631458..61e9e915c 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py
@@ -199,4 +199,4 @@
     matching_class_agnostic_fps=False,
 )
 
-remove_non_traffic_cone_barrier = True
\ No newline at end of file
+remove_non_traffic_cone_barrier = True
diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py
index 6b7250673..dbd6e2813 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py
@@ -117,9 +117,9 @@
     "semi_trailer": "trailer",
     "tractor_unit": "truck",
     "construction_vehicle": "truck",
-	"traffic_cone": "traffic_cone",
-	"trafficcone": "traffic_cone",
-	"barrier": "barrier",
+    "traffic_cone": "traffic_cone",
+    "trafficcone": "traffic_cone",
+    "barrier": "barrier",
 }
 
 class_names = [
diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py b/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py
index 2b54629eb..2212b8e56 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py
@@ -130,8 +130,8 @@
     "bus",
     "bicycle",
     "pedestrian",
-	"traffic_cone",
-	"barrier",
+    "traffic_cone",
+    "barrier",
 ]
 
 num_class = len(class_names)
diff --git a/autoware_ml/detection3d/datasets/t4dataset.py b/autoware_ml/detection3d/datasets/t4dataset.py
index 526150755..d7fed6256 100644
--- a/autoware_ml/detection3d/datasets/t4dataset.py
+++ b/autoware_ml/detection3d/datasets/t4dataset.py
@@ -191,8 +191,8 @@ def parse_data_info(self, info: dict) -> dict:
                     info["lidar2img"] = np.array(info["images"][self.default_cam_key]["lidar2img"])
                 else:
                     info["lidar2img"] = info["cam2img"] @ info["lidar2cam"]
-        
+
         # Default difficulty to 0 if not present
-        if 'difficulty' not in info:
-            info['difficulty'] = 0
+        if "difficulty" not in info:
+            info["difficulty"] = 0
         return info
diff --git a/autoware_ml/detection3d/datasets/transforms/__init__.py b/autoware_ml/detection3d/datasets/transforms/__init__.py
index dc95d27f8..b517bf1ea 100644
--- a/autoware_ml/detection3d/datasets/transforms/__init__.py
+++ b/autoware_ml/detection3d/datasets/transforms/__init__.py
@@ -1,4 +1,4 @@
-from .object_min_points_filter import ObjectMinPointsFilter
 from .loading import LoadPointsFromCurrentFileSweep
+from .object_min_points_filter import ObjectMinPointsFilter
 
 __all__ = ["ObjectMinPointsFilter", "LoadPointsFromCurrentFileSweep"]
diff --git a/autoware_ml/detection3d/datasets/transforms/loading.py b/autoware_ml/detection3d/datasets/transforms/loading.py
index 535653d9b..09beddc34 100644
--- a/autoware_ml/detection3d/datasets/transforms/loading.py
+++ b/autoware_ml/detection3d/datasets/transforms/loading.py
@@ -1,15 +1,14 @@
 from typing import List, Optional, Union
 
 from mmcv.transforms import BaseTransform
-from mmdet3d.structures.ops import box_np_ops
 from mmdet3d.datasets.transforms import LoadPointsFromFile, LoadPointsFromMultiSweeps
+from mmdet3d.structures.ops import box_np_ops
 from mmengine.registry import TRANSFORMS
 
 
-
 @TRANSFORMS.register_module()
 class LoadPointsFromCurrentFileSweep(BaseTransform):
-    """Load points from the current file and sweep. 
+    """Load points from the current file and sweep.
     This is used to load the points from the current file and sweep for copy-paste augmentation.
 
     Args:
@@ -20,24 +19,32 @@ class LoadPointsFromCurrentFileSweep(BaseTransform):
             corresponding backend. Defaults to None.
     """
 
-    def __init__(self,
-                 coord_type: str,
-                 load_dim: int = 6,
-                 use_dim: Union[int, List[int]] = [0, 1, 2],
-                 shift_height: bool = False,
-                 use_color: bool = False,
-                 norm_intensity: bool = False,
-                 norm_elongation: bool = False,
-                 backend_args: Optional[dict] = None, 
-                 sweeps_num: int = 10,
-                 pad_empty_sweeps: bool = False,
-                 remove_close: bool = False,
-                 test_mode: bool = False
-                 ) -> None:
-        
-        self.points_loader = LoadPointsFromFile(coord_type=coord_type, load_dim=load_dim, use_dim=use_dim, backend_args=backend_args)
+    def __init__(
+        self,
+        coord_type: str,
+        load_dim: int = 6,
+        use_dim: Union[int, List[int]] = [0, 1, 2],
+        shift_height: bool = False,
+        use_color: bool = False,
+        norm_intensity: bool = False,
+        norm_elongation: bool = False,
+        backend_args: Optional[dict] = None,
+        sweeps_num: int = 10,
+        pad_empty_sweeps: bool = False,
+        remove_close: bool = False,
+        test_mode: bool = False,
+    ) -> None:
+
+        self.points_loader = LoadPointsFromFile(
+            coord_type=coord_type, load_dim=load_dim, use_dim=use_dim, backend_args=backend_args
+        )
         if sweeps_num > 0:
-            self.points_from_multi_sweeps_loader = LoadPointsFromMultiSweeps(sweeps_num=sweeps_num, pad_empty_sweeps=pad_empty_sweeps, remove_close=remove_close, test_mode=test_mode)
+            self.points_from_multi_sweeps_loader = LoadPointsFromMultiSweeps(
+                sweeps_num=sweeps_num,
+                pad_empty_sweeps=pad_empty_sweeps,
+                remove_close=remove_close,
+                test_mode=test_mode,
+            )
         else:
             self.points_from_multi_sweeps_loader = None
 
diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index da056efcc..b62113f65 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -13,9 +13,9 @@
 from mmdet3d.structures import xywhr2xyxyr
 from mmdet.models.task_modules import AssignResult, PseudoSampler, build_assigner, build_bbox_coder, build_sampler
 from mmdet.models.utils import multi_apply
+from mmengine.logging import print_log
 from mmengine.structures import InstanceData
 from torch import nn
-from mmengine.logging import print_log
 
 
 def clip_sigmoid(x, eps=1e-4):
@@ -71,7 +71,7 @@ def __init__(
         test_cfg=None,
         bbox_coder=None,
         partial_ignore_labels=None,
-        partial_ignore_dense_heatmap=False
+        partial_ignore_dense_heatmap=False,
     ):
         super().__init__()
         self.class_names = class_names
@@ -187,17 +187,24 @@ def __init__(
             cluster["class_indices"] = sorted(
                 [self.class_name_to_indices[class_name] for class_name in cluster["class_names"]]
             )
-        
+
         # If true, only compute loss for traffic cone and barrier when it's available in the frame
         if partial_ignore_labels is not None:
-            assert loss_heatmap['reduction'] == 'none', "Loss reduction must be 'none' for partial traffic cone and barrier"
-            self.partial_ignore_labels = [self.class_name_to_indices[class_name] for class_name in partial_ignore_labels]
+            assert (
+                loss_heatmap["reduction"] == "none"
+            ), "Loss reduction must be 'none' for partial traffic cone and barrier"
+            self.partial_ignore_labels = [
+                self.class_name_to_indices[class_name] for class_name in partial_ignore_labels
+            ]
         else:
             self.partial_ignore_labels = None
-        
+
         self.partial_ignore_dense_heatmap = partial_ignore_dense_heatmap
-        print_log(f"BEVFusionHead Partial ignore labels: {self.partial_ignore_labels}, partial ignore dense heatmap: {self.partial_ignore_dense_heatmap}, dense heatmap pooling classes: \
-        {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}", logger="current")
+        print_log(
+            f"BEVFusionHead Partial ignore labels: {self.partial_ignore_labels}, partial ignore dense heatmap: {self.partial_ignore_dense_heatmap}, dense heatmap pooling classes: \
+        {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}",
+            logger="current",
+        )
 
     def create_2D_grid(self, x_size, y_size):
         meshgrid = [[0, x_size - 1, x_size], [0, y_size - 1, y_size]]
@@ -469,7 +476,9 @@ def predict_by_feat(self, preds_dicts, metas, img=None, rescale=False, for_roi=F
 
         return rets[0]
 
-    def get_targets(self, batch_gt_instances_3d: List[InstanceData], preds_dict: List[dict], batch_metadata: List[dict]):
+    def get_targets(
+        self, batch_gt_instances_3d: List[InstanceData], preds_dict: List[dict], batch_metadata: List[dict]
+    ):
         """Generate training targets.
         Args:
             batch_gt_instances_3d (List[InstanceData]):
@@ -579,7 +588,7 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata):
             num_layer = self.num_decoder_layers
         else:
             num_layer = 1
-        
+
         assign_result_list = []
         for idx_layer in range(num_layer):
             bboxes_tensor_layer = bboxes_tensor[
@@ -653,10 +662,10 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata):
                 label_weights[pos_inds] = 1.0
             else:
                 label_weights[pos_inds] = self.train_cfg.pos_weight
-            
+
         if len(neg_inds) > 0:
             label_weights[neg_inds] = 1.0
-        
+
         # # compute dense heatmap targets
         device = labels.device
         gt_bboxes_3d = torch.cat([gt_bboxes_3d.gravity_center, gt_bboxes_3d.tensor[:, 3:]], dim=1).to(device)
@@ -692,12 +701,10 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata):
         # Ignore labels for traffic cone and barrier
         traffic_cone_barrier_status = metadata.get("traffic_cone_barrier_status", True)
         if self.partial_ignore_labels is not None and not traffic_cone_barrier_status:
-            heatmap_weights[self.partial_ignore_labels] = 0.0 # Set to 0 to ignore these grids 
+            heatmap_weights[self.partial_ignore_labels] = 0.0  # Set to 0 to ignore these grids
             if len(neg_inds) > 0:
                 # neg_inds [N] and column indices [K] must broadcast (not pair);
-                _cols = torch.as_tensor(
-                    self.partial_ignore_labels, device=label_weights.device, dtype=torch.long
-                )
+                _cols = torch.as_tensor(self.partial_ignore_labels, device=label_weights.device, dtype=torch.long)
                 label_weights[neg_inds.unsqueeze(1), _cols.unsqueeze(0)] = 0.0
 
         return (
@@ -732,7 +739,9 @@ def loss(self, batch_feats, batch_data_samples):
 
         return loss
 
-    def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: List[InstanceData], batch_input_metas):
+    def loss_by_feat(
+        self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: List[InstanceData], batch_input_metas
+    ):
         (
             labels,
             label_weights,
@@ -775,7 +784,7 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li
             # (Batch, num_classes)
             for cls_i, class_name in enumerate(self.class_names):
                 loss_dict[f"loss_heatmap_{class_name}"] = loss_heatmap_cls[cls_i]
-            
+
             # Prevent loss item to avoid computing gradients twice. This is for logging.
             loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum()
 
diff --git a/projects/BEVFusion/bevfusion/utils.py b/projects/BEVFusion/bevfusion/utils.py
index b6bd2be41..8fd83a0c5 100644
--- a/projects/BEVFusion/bevfusion/utils.py
+++ b/projects/BEVFusion/bevfusion/utils.py
@@ -259,10 +259,10 @@ def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, train_cfg, ignore_label
         reg_cost = self.reg_cost(bboxes, gt_bboxes, train_cfg)
         iou = self.iou_calculator(bboxes, gt_bboxes)
         iou_cost = self.iou_cost(iou)
-     
+
         # weighted sum of above three costs
         cost = cls_cost + reg_cost + iou_cost
-        
+
         # if ignore_labels is not None:
         #     preds_labels = pred_instances.scores.argmax(dim=1, keepdim=False)
         #     print("shape of pred_instances.scores, preds_labels", pred_instances.scores.shape, preds_labels.shape)
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
index 023c6774d..809179b20 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
@@ -114,6 +114,6 @@
         ),
         loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="mean", loss_weight=1.0),
         loss_bbox=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=0.25),
-        partial_ignore_labels=None
+        partial_ignore_labels=None,
     ),
 )
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
index 9c7e02977..e2de195e9 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
@@ -13,8 +13,8 @@
     "bus": 120,
     "bicycle": 120,
     "pedestrian": 120,
-	  "traffic_cone": 120, 
-	  "barrier": 120,
+    "traffic_cone": 120,
+    "barrier": 120,
 }
 
 # LiDAR parameters
@@ -132,7 +132,7 @@
             "timestamp",
             "vehicle_type",
             "city",
-            "traffic_cone_barrier_status",  
+            "traffic_cone_barrier_status",
         ],
     ),
 ]
diff --git a/tools/detection3d/create_data_t4dataset.py b/tools/detection3d/create_data_t4dataset.py
index 9550b2872..3b02017e0 100644
--- a/tools/detection3d/create_data_t4dataset.py
+++ b/tools/detection3d/create_data_t4dataset.py
@@ -273,7 +273,7 @@ def main():
 
     if cfg.filter_attributes is None:
         print_log("No attribute filtering is applied!")
-    
+
     remove_non_traffic_cone_barrier = cfg.get("remove_non_traffic_cone_barrier", False)
     # Get every pair of min-max distance filtering thresholds
     bev_distance_ranges = []
@@ -310,9 +310,14 @@ def main():
                 )
                 dataset_scene_info = scene_id.split("/")
                 if len(dataset_scene_info) == 5:
-                    t4_dataset_id, t4_dataset_version_id, city, vehicle_type, traffic_cone_barrier_status = dataset_scene_info
+                    t4_dataset_id, t4_dataset_version_id, city, vehicle_type, traffic_cone_barrier_status = (
+                        dataset_scene_info
+                    )
                     if remove_non_traffic_cone_barrier and traffic_cone_barrier_status == "false":
-                        print_log(f"Skipping scene: {scene_id} because it does not have traffic cone or barrier", logger="current")
+                        print_log(
+                            f"Skipping scene: {scene_id} because it does not have traffic cone or barrier",
+                            logger="current",
+                        )
                         continue
                 elif len(dataset_scene_info) == 2:
                     t4_dataset_id, t4_dataset_version_id = dataset_scene_info
@@ -336,7 +341,9 @@ def main():
                 infos = []
                 for i in range(0, len(t4.sample), sample_steps):
                     sample = t4.sample[i]
-                    info = get_info(cfg, t4, sample, i, args.max_sweeps, traffic_cone_barrier_status, city, vehicle_type)
+                    info = get_info(
+                        cfg, t4, sample, i, args.max_sweeps, traffic_cone_barrier_status, city, vehicle_type
+                    )
                     if info is None:
                         continue
                     # info["version"] = dataset_version             # used for visualizations during debugging.
diff --git a/tools/detection3d/t4dataset_converters/t4converter.py b/tools/detection3d/t4dataset_converters/t4converter.py
index 5dfd1dc1f..ccc88b2d1 100644
--- a/tools/detection3d/t4dataset_converters/t4converter.py
+++ b/tools/detection3d/t4dataset_converters/t4converter.py
@@ -626,7 +626,7 @@ def get_lidarseg_annotations(
 ) -> dict:
     if not hasattr(t4, "lidarseg") or not t4.lidarseg:
         return dict()
-    
+
     if sd_record.info_filename is None:
         print(f"sample {lidar_token} doesn't have lidar info_filename")
         return dict()

From 15bbf0ef0920f41f6e267b312447d2900b5bd8fd Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Thu, 7 May 2026 15:35:46 +0900
Subject: [PATCH 084/183] remove unecessary changes

---
 .../t4dataset/j6gen2_base_traffic_cone.py     | 211 ------------------
 .../t4dataset/jpntaxi_base_traffic_cone.py    | 202 -----------------
 .../datasets/transforms/__init__.py           |   3 +-
 .../datasets/transforms/loading.py            |  55 -----
 .../BEVFusion/bevfusion/bevfusion_head.py     |  14 +-
 projects/BEVFusion/bevfusion/utils.py         |   9 +-
 6 files changed, 6 insertions(+), 488 deletions(-)
 delete mode 100644 autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py
 delete mode 100644 autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py
 delete mode 100644 autoware_ml/detection3d/datasets/transforms/loading.py

diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py
deleted file mode 100644
index 176763b54..000000000
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py
+++ /dev/null
@@ -1,211 +0,0 @@
-custom_imports = dict(
-    imports=[
-        "autoware_ml.detection3d.datasets.t4dataset",
-        "autoware_ml.detection3d.evaluation.t4metric.t4metric",
-        "autoware_ml.detection3d.evaluation.t4metric.t4metric_v2",
-    ]
-)
-
-# dataset type setting
-dataset_type = "T4Dataset"
-info_train_file_name = "t4dataset_j6gen2_base_traffic_cone_infos_train.pkl"
-info_val_file_name = "t4dataset_j6gen2_base_traffic_cone_infos_val.pkl"
-info_test_file_name = "t4dataset_j6gen2_base_traffic_cone_infos_test.pkl"
-
-info_train_statistics_file_name = "t4dataset_j6gen2_base_traffic_cone_statistics_train.parquet"
-info_val_statistics_file_name = "t4dataset_j6gen2_base_traffic_cone_statistics_val.parquet"
-info_test_statistics_file_name = "t4dataset_j6gen2_base_traffic_cone_statistics_test.parquet"
-
-# dataset scene setting
-dataset_version_list = [
-    "db_j6gen2_v1",
-    "db_j6gen2_v2",
-    "db_j6gen2_v3",
-    "db_j6gen2_v4",
-    "db_j6gen2_v5",
-    "db_j6gen2_v6",
-    "db_j6gen2_v7",
-    "db_j6gen2_v8",
-    "db_j6gen2_v9",
-    "db_largebus_v1",
-    "db_largebus_v2",
-    "db_largebus_v3",
-]
-
-dataset_test_groups = {
-    "largebus": ("t4dataset_largebus_traffic_cone_infos_test.pkl", False),
-    "j6gen2": ("t4dataset_j6gen2_traffic_cone_infos_test.pkl", False),
-    "j6gen2_base": ("t4dataset_j6gen2_base_traffic_cone_infos_test.pkl", True),
-}
-
-# dataset format setting
-data_prefix = dict(
-    pts="",
-    CAM_FRONT="",
-    CAM_FRONT_LEFT="",
-    CAM_FRONT_RIGHT="",
-    CAM_BACK="",
-    CAM_BACK_RIGHT="",
-    CAM_BACK_LEFT="",
-    sweeps="",
-)
-
-camera_types = {
-    "CAM_FRONT",
-    "CAM_FRONT_RIGHT",
-    "CAM_FRONT_LEFT",
-    "CAM_BACK",
-    "CAM_BACK_LEFT",
-    "CAM_BACK_RIGHT",
-}
-
-# class setting
-name_mapping = {
-    # DBv1.0
-    "vehicle.car": "car",
-    "vehicle.construction": "truck",
-    "vehicle.emergency (ambulance & police)": "car",
-    "vehicle.motorcycle": "bicycle",
-    "vehicle.trailer": "trailer",
-    "vehicle.truck": "truck",
-    "vehicle.bicycle": "bicycle",
-    "vehicle.bus (bendy & rigid)": "bus",
-    "pedestrian.adult": "pedestrian",
-    "pedestrian.child": "pedestrian",
-    "pedestrian.construction_worker": "pedestrian",
-    "pedestrian.personal_mobility": "pedestrian",
-    "pedestrian.police_officer": "pedestrian",
-    "pedestrian.stroller": "pedestrian",
-    "pedestrian.wheelchair": "pedestrian",
-    "movable_object.barrier": "barrier",
-    "movable_object.debris": "barrier",
-    "movable_object.pushable_pullable": "barrier",
-    "movable_object.trafficcone": "traffic_cone",
-    "movable_object.traffic_cone": "traffic_cone",
-    "animal": "animal",
-    "static_object.bicycle_rack": "bicycle_rack",
-    # DBv1.1 and UCv2.0
-    "car": "car",
-    "truck": "truck",
-    "bus": "bus",
-    "trailer": "trailer",
-    "motorcycle": "bicycle",
-    "bicycle": "bicycle",
-    "police_car": "car",
-    "pedestrian": "pedestrian",
-    "police_officer": "pedestrian",
-    "forklift": "car",
-    "construction_worker": "pedestrian",
-    "stroller": "pedestrian",
-    # DBv2.0 and DBv3.0
-    "animal": "animal",
-    "movable_object.barrier": "barrier",
-    "movable_object.pushable_pullable": "barrier",
-    "movable_object.traffic_cone": "traffic_cone",
-    "pedestrian.adult": "pedestrian",
-    "pedestrian.child": "pedestrian",
-    "pedestrian.construction_worker": "pedestrian",
-    "pedestrian.personal_mobility": "pedestrian",
-    "pedestrian.police_officer": "pedestrian",
-    "pedestrian.stroller": "pedestrian",
-    "pedestrian.wheelchair": "pedestrian",
-    "static_object.bicycle rack": "bicycle rack",
-    "static_object.bollard": "bollard",
-    "vehicle.ambulance": "car",  # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car
-    "vehicle.bicycle": "bicycle",
-    "vehicle.bus": "bus",
-    "vehicle.car": "car",
-    "vehicle.construction": "truck",
-    "vehicle.fire": "truck",
-    "vehicle.motorcycle": "bicycle",
-    "vehicle.police": "car",
-    "vehicle.trailer": "trailer",
-    "vehicle.truck": "truck",
-    # DBv1.3
-    "ambulance": "car",
-    "kart": "car",
-    "wheelchair": "pedestrian",
-    "personal_mobility": "pedestrian",
-    "fire_truck": "truck",
-    "semi_trailer": "trailer",
-    "tractor_unit": "truck",
-    "construction_vehicle": "truck",
-    "traffic_cone": "traffic_cone",
-    "trafficcone": "traffic_cone",
-    "barrier": "barrier",
-}
-
-
-class_names = [
-    "car",
-    "truck",
-    "bus",
-    "bicycle",
-    "pedestrian",
-    "traffic_cone",
-    "barrier",
-]
-num_class = len(class_names)
-metainfo = dict(classes=class_names)
-
-merge_objects = [
-    ("truck", ["truck", "trailer"]),
-]
-merge_type = "extend_longer"  # One of ["extend_longer","union", None]
-
-# visualization
-class_colors = {
-    "car": (30, 144, 255),
-    "truck": (140, 0, 255),
-    "construction_vehicle": (255, 255, 0),
-    "bus": (111, 255, 111),
-    "trailer": (0, 255, 255),
-    "barrier": (0, 0, 0),
-    "motorcycle": (100, 0, 30),
-    "bicycle": (255, 0, 30),
-    "pedestrian": (255, 200, 200),
-    "traffic_cone": (120, 120, 120),
-}
-camera_panels = [
-    "data/CAM_FRONT_LEFT",
-    "data/CAM_FRONT",
-    "data/CAM_FRONT_RIGHT",
-    "data/CAM_BACK_LEFT",
-    "data/CAM_BACK",
-    "data/CAM_BACK_RIGHT",
-]
-
-filter_attributes = [
-    ("vehicle.bicycle", "vehicle_state.parked"),
-    ("vehicle.bicycle", "cycle_state.without_rider"),
-    ("vehicle.bicycle", "motorcycle_state.without_rider"),
-    ("vehicle.motorcycle", "vehicle_state.parked"),
-    ("vehicle.motorcycle", "cycle_state.without_rider"),
-    ("vehicle.motorcycle", "motorcycle_state.without_rider"),
-    ("bicycle", "vehicle_state.parked"),
-    ("bicycle", "cycle_state.without_rider"),
-    ("bicycle", "motorcycle_state.without_rider"),
-    ("motorcycle", "vehicle_state.parked"),
-    ("motorcycle", "cycle_state.without_rider"),
-    ("motorcycle", "motorcycle_state.without_rider"),
-]
-
-evaluator_metric_configs = dict(
-    evaluation_task="detection",
-    target_labels=class_names,
-    center_distance_bev_thresholds=[0.5, 1.0, 2.0, 4.0],
-    # plane_distance_thresholds is required for the pass fail evaluation
-    plane_distance_thresholds=[2.0, 4.0],
-    iou_2d_thresholds=None,
-    iou_3d_thresholds=None,
-    label_prefix="autoware",
-    # bev minimum distance ranges for each range bucket, must be the same length as max_distance,
-    # they will form bev distance ranges in [(min_distance[0], max_distance[0]), (min_distance[1], max_distance[1]), ...] when filtering
-    min_distance=[0.0, 50.0, 90.0, 0.0],
-    # bev maximum distance ranges for each range bucket, must be the same length as min_distance
-    max_distance=[50.0, 90.0, 121.0, 121.0],
-    min_point_numbers=0,
-    matching_class_agnostic_fps=False,
-)
-
-remove_non_traffic_cone_barrier = True
diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py
deleted file mode 100644
index 61e9e915c..000000000
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py
+++ /dev/null
@@ -1,202 +0,0 @@
-custom_imports = dict(
-    imports=[
-        "autoware_ml.detection3d.datasets.t4dataset",
-        "autoware_ml.detection3d.evaluation.t4metric.t4metric",
-        "autoware_ml.detection3d.evaluation.t4metric.t4metric_v2",
-    ]
-)
-
-# dataset type setting
-dataset_type = "T4Dataset"
-info_train_file_name = "t4dataset_jpntaxi_base_traffic_cone_infos_train.pkl"
-info_val_file_name = "t4dataset_jpntaxi_base_traffic_cone_infos_val.pkl"
-info_test_file_name = "t4dataset_jpntaxi_base_traffic_cone_infos_test.pkl"
-
-info_train_statistics_file_name = "t4dataset_jpntaxi_base_traffic_cone_statistics_train.parquet"
-info_val_statistics_file_name = "t4dataset_jpntaxi_base_traffic_cone_statistics_val.parquet"
-info_test_statistics_file_name = "t4dataset_jpntaxi_base_traffic_cone_statistics_test.parquet"
-
-# dataset scene setting
-dataset_test_groups = {
-    "jpntaxi_base_traffic_cone": ("t4dataset_jpntaxi_base_traffic_cone_infos_test.pkl", True),
-}
-
-dataset_version_list = [
-    "db_jpntaxigen2_v1",
-    "db_jpntaxigen2_v2",
-    "db_jpntaxi_v1",
-    "db_jpntaxi_v2",
-    "db_jpntaxi_v4",
-]
-
-# dataset format setting
-data_prefix = dict(
-    pts="",
-    CAM_FRONT="",
-    CAM_FRONT_LEFT="",
-    CAM_FRONT_RIGHT="",
-    CAM_BACK="",
-    CAM_BACK_RIGHT="",
-    CAM_BACK_LEFT="",
-    sweeps="",
-)
-camera_types = {
-    "CAM_FRONT",
-    "CAM_FRONT_RIGHT",
-    "CAM_FRONT_LEFT",
-    "CAM_BACK",
-    "CAM_BACK_LEFT",
-    "CAM_BACK_RIGHT",
-}
-
-# class setting
-name_mapping = {
-    # DBv1.0
-    "vehicle.car": "car",
-    "vehicle.construction": "truck",
-    "vehicle.emergency (ambulance & police)": "car",
-    "vehicle.motorcycle": "bicycle",
-    "vehicle.trailer": "trailer",
-    "vehicle.truck": "truck",
-    "vehicle.bicycle": "bicycle",
-    "vehicle.bus (bendy & rigid)": "bus",
-    "pedestrian.adult": "pedestrian",
-    "pedestrian.child": "pedestrian",
-    "pedestrian.construction_worker": "pedestrian",
-    "pedestrian.personal_mobility": "pedestrian",
-    "pedestrian.police_officer": "pedestrian",
-    "pedestrian.stroller": "pedestrian",
-    "pedestrian.wheelchair": "pedestrian",
-    "movable_object.barrier": "barrier",
-    "movable_object.debris": "barrier",
-    "movable_object.pushable_pullable": "barrier",
-    "movable_object.trafficcone": "traffic_cone",
-    "movable_object.traffic_cone": "traffic_cone",
-    "animal": "animal",
-    "static_object.bicycle_rack": "bicycle_rack",
-    # DBv1.1 and UCv2.0
-    "car": "car",
-    "truck": "truck",
-    "bus": "bus",
-    "trailer": "trailer",
-    "motorcycle": "bicycle",
-    "bicycle": "bicycle",
-    "police_car": "car",
-    "pedestrian": "pedestrian",
-    "police_officer": "pedestrian",
-    "forklift": "car",
-    "construction_worker": "pedestrian",
-    "stroller": "pedestrian",
-    # DBv2.0 and DBv3.0
-    "animal": "animal",
-    "movable_object.barrier": "barrier",
-    "movable_object.pushable_pullable": "barrier",
-    "movable_object.traffic_cone": "traffic_cone",
-    "pedestrian.adult": "pedestrian",
-    "pedestrian.child": "pedestrian",
-    "pedestrian.construction_worker": "pedestrian",
-    "pedestrian.personal_mobility": "pedestrian",
-    "pedestrian.police_officer": "pedestrian",
-    "pedestrian.stroller": "pedestrian",
-    "pedestrian.wheelchair": "pedestrian",
-    "static_object.bicycle rack": "bicycle rack",
-    "static_object.bollard": "bollard",
-    "vehicle.ambulance": "car",  # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car
-    "vehicle.bicycle": "bicycle",
-    "vehicle.bus": "bus",
-    "vehicle.car": "car",
-    "vehicle.construction": "truck",
-    "vehicle.fire": "truck",
-    "vehicle.motorcycle": "bicycle",
-    "vehicle.police": "car",
-    "vehicle.trailer": "trailer",
-    "vehicle.truck": "truck",
-    # DBv1.3
-    "ambulance": "car",
-    "kart": "car",
-    "wheelchair": "pedestrian",
-    "personal_mobility": "pedestrian",
-    "fire_truck": "truck",
-    "semi_trailer": "trailer",
-    "tractor_unit": "truck",
-    "construction_vehicle": "truck",
-    "traffic_cone": "traffic_cone",
-    "trafficcone": "traffic_cone",
-    "barrier": "barrier",
-}
-
-class_names = [
-    "car",
-    "truck",
-    "bus",
-    "bicycle",
-    "pedestrian",
-    "traffic_cone",
-    "barrier",
-]
-
-num_class = len(class_names)
-metainfo = dict(classes=class_names)
-
-merge_objects = [
-    ("truck", ["truck", "trailer"]),
-]
-merge_type = "extend_longer"  # One of ["extend_longer","union", None]
-
-# visualization
-class_colors = {
-    "car": (30, 144, 255),
-    "truck": (140, 0, 255),
-    "construction_vehicle": (255, 255, 0),
-    "bus": (111, 255, 111),
-    "trailer": (0, 255, 255),
-    "barrier": (0, 0, 0),
-    "motorcycle": (100, 0, 30),
-    "bicycle": (255, 0, 30),
-    "pedestrian": (255, 200, 200),
-    "traffic_cone": (120, 120, 120),
-}
-camera_panels = [
-    "data/CAM_FRONT_LEFT",
-    "data/CAM_FRONT",
-    "data/CAM_FRONT_RIGHT",
-    "data/CAM_BACK_LEFT",
-    "data/CAM_BACK",
-    "data/CAM_BACK_RIGHT",
-]
-
-# Add filter attributes
-filter_attributes = [
-    ("vehicle.bicycle", "vehicle_state.parked"),
-    ("vehicle.bicycle", "cycle_state.without_rider"),
-    ("vehicle.bicycle", "motorcycle_state.without_rider"),
-    ("vehicle.motorcycle", "vehicle_state.parked"),
-    ("vehicle.motorcycle", "cycle_state.without_rider"),
-    ("vehicle.motorcycle", "motorcycle_state.without_rider"),
-    ("bicycle", "vehicle_state.parked"),
-    ("bicycle", "cycle_state.without_rider"),
-    ("bicycle", "motorcycle_state.without_rider"),
-    ("motorcycle", "vehicle_state.parked"),
-    ("motorcycle", "cycle_state.without_rider"),
-    ("motorcycle", "motorcycle_state.without_rider"),
-]
-
-evaluator_metric_configs = dict(
-    evaluation_task="detection",
-    target_labels=class_names,
-    center_distance_bev_thresholds=[0.5, 1.0, 2.0, 4.0],
-    # plane_distance_thresholds is required for the pass fail evaluation
-    plane_distance_thresholds=[2.0, 4.0],
-    iou_2d_thresholds=None,
-    iou_3d_thresholds=None,
-    label_prefix="autoware",
-    # bev minimum distance ranges for each range bucket, must be the same length as max_distance,
-    # they will form bev distance ranges in [(min_distance[0], max_distance[0]), (min_distance[1], max_distance[1]), ...] when filtering
-    min_distance=[0.0, 50.0, 90.0, 0.0],
-    # bev maximum distance ranges for each range bucket, must be the same length as min_distance
-    max_distance=[50.0, 90.0, 121.0, 121.0],
-    min_point_numbers=0,
-    matching_class_agnostic_fps=False,
-)
-
-remove_non_traffic_cone_barrier = True
diff --git a/autoware_ml/detection3d/datasets/transforms/__init__.py b/autoware_ml/detection3d/datasets/transforms/__init__.py
index b517bf1ea..6bc932f1a 100644
--- a/autoware_ml/detection3d/datasets/transforms/__init__.py
+++ b/autoware_ml/detection3d/datasets/transforms/__init__.py
@@ -1,4 +1,3 @@
-from .loading import LoadPointsFromCurrentFileSweep
 from .object_min_points_filter import ObjectMinPointsFilter
 
-__all__ = ["ObjectMinPointsFilter", "LoadPointsFromCurrentFileSweep"]
+__all__ = ["ObjectMinPointsFilter"]
diff --git a/autoware_ml/detection3d/datasets/transforms/loading.py b/autoware_ml/detection3d/datasets/transforms/loading.py
deleted file mode 100644
index 09beddc34..000000000
--- a/autoware_ml/detection3d/datasets/transforms/loading.py
+++ /dev/null
@@ -1,55 +0,0 @@
-from typing import List, Optional, Union
-
-from mmcv.transforms import BaseTransform
-from mmdet3d.datasets.transforms import LoadPointsFromFile, LoadPointsFromMultiSweeps
-from mmdet3d.structures.ops import box_np_ops
-from mmengine.registry import TRANSFORMS
-
-
-@TRANSFORMS.register_module()
-class LoadPointsFromCurrentFileSweep(BaseTransform):
-    """Load points from the current file and sweep.
-    This is used to load the points from the current file and sweep for copy-paste augmentation.
-
-    Args:
-        coord_type (str): The type of coordinates of points cloud.
-        load_dim (int): The dimension of the loaded points.
-        use_dim (list[int] | int): Which dimensions of the points to use.
-        backend_args (dict, optional): Arguments to instantiate the
-            corresponding backend. Defaults to None.
-    """
-
-    def __init__(
-        self,
-        coord_type: str,
-        load_dim: int = 6,
-        use_dim: Union[int, List[int]] = [0, 1, 2],
-        shift_height: bool = False,
-        use_color: bool = False,
-        norm_intensity: bool = False,
-        norm_elongation: bool = False,
-        backend_args: Optional[dict] = None,
-        sweeps_num: int = 10,
-        pad_empty_sweeps: bool = False,
-        remove_close: bool = False,
-        test_mode: bool = False,
-    ) -> None:
-
-        self.points_loader = LoadPointsFromFile(
-            coord_type=coord_type, load_dim=load_dim, use_dim=use_dim, backend_args=backend_args
-        )
-        if sweeps_num > 0:
-            self.points_from_multi_sweeps_loader = LoadPointsFromMultiSweeps(
-                sweeps_num=sweeps_num,
-                pad_empty_sweeps=pad_empty_sweeps,
-                remove_close=remove_close,
-                test_mode=test_mode,
-            )
-        else:
-            self.points_from_multi_sweeps_loader = None
-
-    def transform(self, results: dict) -> dict:
-        points = self.points_loader(results)
-        if self.points_from_multi_sweeps_loader is not None:
-            points = self.points_from_multi_sweeps_loader(points)
-        return points
diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index b62113f65..dd566eab1 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -70,9 +70,7 @@ def __init__(
         train_cfg=None,
         test_cfg=None,
         bbox_coder=None,
-        partial_ignore_labels=None,
-        partial_ignore_dense_heatmap=False,
-    ):
+        partial_ignore_labels=None):
         super().__init__()
         self.class_names = class_names
         self.num_classes = len(self.class_names)
@@ -198,13 +196,9 @@ def __init__(
             ]
         else:
             self.partial_ignore_labels = None
-
-        self.partial_ignore_dense_heatmap = partial_ignore_dense_heatmap
-        print_log(
-            f"BEVFusionHead Partial ignore labels: {self.partial_ignore_labels}, partial ignore dense heatmap: {self.partial_ignore_dense_heatmap}, dense heatmap pooling classes: \
-        {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}",
-            logger="current",
-        )
+        
+        print_log(f"BEVFusionHead Partial ignore labels: {self.partial_ignore_labels}, dense heatmap pooling classes: \
+        {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}", logger="current")
 
     def create_2D_grid(self, x_size, y_size):
         meshgrid = [[0, x_size - 1, x_size], [0, y_size - 1, y_size]]
diff --git a/projects/BEVFusion/bevfusion/utils.py b/projects/BEVFusion/bevfusion/utils.py
index 8fd83a0c5..c47604dbd 100644
--- a/projects/BEVFusion/bevfusion/utils.py
+++ b/projects/BEVFusion/bevfusion/utils.py
@@ -238,7 +238,7 @@ def __init__(
         self.iou_cost = TASK_UTILS.build(iou_cost)
         self.iou_calculator = TASK_UTILS.build(iou_calculator)
 
-    def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, train_cfg, ignore_labels=None):
+    def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, train_cfg):
         num_gts, num_bboxes = gt_bboxes.size(0), bboxes.size(0)
 
         # 1. assign -1 by default
@@ -263,13 +263,6 @@ def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, train_cfg, ignore_label
         # weighted sum of above three costs
         cost = cls_cost + reg_cost + iou_cost
 
-        # if ignore_labels is not None:
-        #     preds_labels = pred_instances.scores.argmax(dim=1, keepdim=False)
-        #     print("shape of pred_instances.scores, preds_labels", pred_instances.scores.shape, preds_labels.shape)
-        #     ignore_preds_masks = preds_labels.isin(ignore_labels)
-        #     cost[ignore_preds_masks] = 10000
-        #     print("shape of ignore_preds_masks, cost", ignore_preds_masks.shape, cost.shape)
-
         # 3. do Hungarian matching on CPU using linear_sum_assignment
         cost = cost.detach().cpu()
         if linear_sum_assignment is None:

From 1323d4ed662678fc225ca43ef7baaf5a8b144cc1 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 7 May 2026 06:36:38 +0000
Subject: [PATCH 085/183] ci(pre-commit): autofix

---
 projects/BEVFusion/bevfusion/bevfusion_head.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index dd566eab1..c37c5a538 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -70,7 +70,8 @@ def __init__(
         train_cfg=None,
         test_cfg=None,
         bbox_coder=None,
-        partial_ignore_labels=None):
+        partial_ignore_labels=None,
+    ):
         super().__init__()
         self.class_names = class_names
         self.num_classes = len(self.class_names)
@@ -196,7 +197,7 @@ def __init__(
             ]
         else:
             self.partial_ignore_labels = None
-        
+
         print_log(f"BEVFusionHead Partial ignore labels: {self.partial_ignore_labels}, dense heatmap pooling classes: \
         {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}", logger="current")
 

From 585a0b2b068d5c721d657a93e15bbbe6f904cf45 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Thu, 7 May 2026 20:15:05 +0900
Subject: [PATCH 086/183] remove unecessary changes

---
 Dockerfile                                    |   6 +-
 .../dataset/t4dataset/j6gen2_v2.py            | 194 ++++++++++++++++++
 ...second_secfpn_30e_8xb8_j6gen2_base_120m.py |   2 +-
 ...n_30e_8xb8_j6gen2_base_120m_t4metric_v2.py |   4 +-
 ...econd_secfpn_30e_8xb8_jpntaxi_base_120m.py |   8 +-
 ..._30e_8xb8_jpntaxi_base_120m_t4metric_v2.py |   4 +-
 6 files changed, 207 insertions(+), 11 deletions(-)
 create mode 100644 autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_v2.py

diff --git a/Dockerfile b/Dockerfile
index 3e9caecb9..2fbcaa620 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -61,13 +61,15 @@ RUN python3 -m pip --no-cache-dir install \
 RUN python3 -m pip install git+https://github.com/tier4/t4-devkit@v0.5.1
 
 # Install autoware-perception-evaluation
-RUN python3 -m pip install git+https://github.com/tier4/autoware_perception_evaluation@9d8c9773d35177bb0b7f2606f429f58a5fb708ca
+RUN python3 -m pip install git+https://github.com/tier4/autoware_perception_evaluation@3c9577dc23fd76a049559b42656ca46c1c32fa66
 
 # Need to dowgrade setuptools to 60.2.0 to fix setup
 RUN python3 -m pip --no-cache-dir install \
     setuptools==60.2.0 \
     transformers==4.51.3 \
-    polars==1.37.1
+    polars==1.37.1 \
+		onnx_graphsurgeon==0.5.8 \
+		spconv-cu126==2.3.8
 
 # NOTE(knzo25): this patch is needed to use numpy versions over 1.23.5 (version used in mmdet3d 1.4.0)
 # It can be safely deleted when mmdet3d updates the numpy version
diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_v2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_v2.py
new file mode 100644
index 000000000..e4375d576
--- /dev/null
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_v2.py
@@ -0,0 +1,194 @@
+custom_imports = dict(
+    imports=[
+        "autoware_ml.detection3d.datasets.t4dataset",
+        "autoware_ml.detection3d.evaluation.t4metric.t4metric",
+        "autoware_ml.detection3d.evaluation.t4metric.t4metric_v2",
+    ]
+)
+
+# dataset type setting
+dataset_type = "T4Dataset"
+info_train_file_name = "t4dataset_j6gen2_v2_infos_train.pkl"
+info_val_file_name = "t4dataset_j6gen2_v2_infos_val.pkl"
+info_test_file_name = "t4dataset_j6gen2_v2_infos_test.pkl"
+
+info_train_statistics_file_name = "t4dataset_j6gen2_v2_statistics_train.parquet"
+info_val_statistics_file_name = "t4dataset_j6gen2_v2_statistics_val.parquet"
+info_test_statistics_file_name = "t4dataset_j6gen2_v2_statistics_test.parquet"
+
+# dataset scene setting
+dataset_version_list = [
+    "db_j6gen2_v2",
+]
+
+dataset_test_groups = {
+    "j6gen2_v2": ("t4dataset_j6gen2_v2_infos_test.pkl", True),
+}
+
+# dataset format setting
+data_prefix = dict(
+    pts="",
+    CAM_FRONT="",
+    CAM_FRONT_LEFT="",
+    CAM_FRONT_RIGHT="",
+    CAM_BACK="",
+    CAM_BACK_RIGHT="",
+    CAM_BACK_LEFT="",
+    sweeps="",
+)
+camera_types = {
+    "CAM_FRONT",
+    "CAM_FRONT_RIGHT",
+    "CAM_FRONT_LEFT",
+    "CAM_BACK",
+    "CAM_BACK_LEFT",
+    "CAM_BACK_RIGHT",
+}
+
+# class setting
+name_mapping = {
+    # DBv1.0
+    "vehicle.car": "car",
+    "vehicle.construction": "truck",
+    "vehicle.emergency (ambulance & police)": "car",
+    "vehicle.motorcycle": "bicycle",
+    "vehicle.trailer": "trailer",
+    "vehicle.truck": "truck",
+    "vehicle.bicycle": "bicycle",
+    "vehicle.bus (bendy & rigid)": "bus",
+    "pedestrian.adult": "pedestrian",
+    "pedestrian.child": "pedestrian",
+    "pedestrian.construction_worker": "pedestrian",
+    "pedestrian.personal_mobility": "pedestrian",
+    "pedestrian.police_officer": "pedestrian",
+    "pedestrian.stroller": "pedestrian",
+    "pedestrian.wheelchair": "pedestrian",
+    "movable_object.barrier": "barrier",
+    "movable_object.debris": "barrier",
+    "movable_object.pushable_pullable": "barrier",
+    "movable_object.trafficcone": "traffic_cone",
+    "movable_object.traffic_cone": "traffic_cone",
+    "animal": "animal",
+    "static_object.bicycle_rack": "bicycle_rack",
+    # DBv1.1 and UCv2.0
+    "car": "car",
+    "truck": "truck",
+    "bus": "bus",
+    "trailer": "trailer",
+    "motorcycle": "bicycle",
+    "bicycle": "bicycle",
+    "police_car": "car",
+    "pedestrian": "pedestrian",
+    "police_officer": "pedestrian",
+    "forklift": "car",
+    "construction_worker": "pedestrian",
+    "stroller": "pedestrian",
+    # DBv2.0 and DBv3.0
+    "animal": "animal",
+    "movable_object.barrier": "barrier",
+    "movable_object.pushable_pullable": "barrier",
+    "movable_object.traffic_cone": "traffic_cone",
+    "pedestrian.adult": "pedestrian",
+    "pedestrian.child": "pedestrian",
+    "pedestrian.construction_worker": "pedestrian",
+    "pedestrian.personal_mobility": "pedestrian",
+    "pedestrian.police_officer": "pedestrian",
+    "pedestrian.stroller": "pedestrian",
+    "pedestrian.wheelchair": "pedestrian",
+    "static_object.bicycle rack": "bicycle rack",
+    "static_object.bollard": "bollard",
+    "vehicle.ambulance": "car",  # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car
+    "vehicle.bicycle": "bicycle",
+    "vehicle.bus": "bus",
+    "vehicle.car": "car",
+    "vehicle.construction": "truck",
+    "vehicle.fire": "truck",
+    "vehicle.motorcycle": "bicycle",
+    "vehicle.police": "car",
+    "vehicle.trailer": "trailer",
+    "vehicle.truck": "truck",
+    # DBv1.3
+    "ambulance": "car",
+    "kart": "car",
+    "wheelchair": "pedestrian",
+    "personal_mobility": "pedestrian",
+    "fire_truck": "truck",
+    "semi_trailer": "trailer",
+    "tractor_unit": "truck",
+    "construction_vehicle": "truck",
+    "traffic_cone": "traffic_cone",
+    "trafficcone": "traffic_cone",
+    "barrier": "barrier",
+}
+
+class_names = [
+    "car",
+    "truck",
+    "bus",
+    "bicycle",
+    "pedestrian",
+    "traffic_cone",
+    "barrier",
+]
+num_class = len(class_names)
+metainfo = dict(classes=class_names)
+
+merge_objects = [
+    ("truck", ["truck", "trailer"]),
+]
+merge_type = "extend_longer"  # One of ["extend_longer","union", None]
+
+# visualization
+class_colors = {
+    "car": (30, 144, 255),
+    "truck": (140, 0, 255),
+    "construction_vehicle": (255, 255, 0),
+    "bus": (111, 255, 111),
+    "trailer": (0, 255, 255),
+    "barrier": (0, 0, 0),
+    "motorcycle": (100, 0, 30),
+    "bicycle": (255, 0, 30),
+    "pedestrian": (255, 200, 200),
+    "traffic_cone": (120, 120, 120),
+}
+camera_panels = [
+    "data/CAM_FRONT_LEFT",
+    "data/CAM_FRONT",
+    "data/CAM_FRONT_RIGHT",
+    "data/CAM_BACK_LEFT",
+    "data/CAM_BACK",
+    "data/CAM_BACK_RIGHT",
+]
+
+filter_attributes = [
+    ("vehicle.bicycle", "vehicle_state.parked"),
+    ("vehicle.bicycle", "cycle_state.without_rider"),
+    ("vehicle.bicycle", "motorcycle_state.without_rider"),
+    ("vehicle.motorcycle", "vehicle_state.parked"),
+    ("vehicle.motorcycle", "cycle_state.without_rider"),
+    ("vehicle.motorcycle", "motorcycle_state.without_rider"),
+    ("bicycle", "vehicle_state.parked"),
+    ("bicycle", "cycle_state.without_rider"),
+    ("bicycle", "motorcycle_state.without_rider"),
+    ("motorcycle", "vehicle_state.parked"),
+    ("motorcycle", "cycle_state.without_rider"),
+    ("motorcycle", "motorcycle_state.without_rider"),
+]
+
+evaluator_metric_configs = dict(
+    evaluation_task="detection",
+    target_labels=class_names,
+    center_distance_bev_thresholds=[0.5, 1.0, 2.0, 4.0],
+    # plane_distance_thresholds is required for the pass fail evaluation
+    plane_distance_thresholds=[2.0, 4.0],
+    iou_2d_thresholds=None,
+    iou_3d_thresholds=None,
+    label_prefix="autoware",
+    # bev minimum distance ranges for each range bucket, must be the same length as max_distance,
+    # they will form bev distance ranges in [(min_distance[0], max_distance[0]), (min_distance[1], max_distance[1]), ...] when filtering
+    min_distance=[0.0, 50.0, 90.0, 0.0],
+    # bev maximum distance ranges for each range bucket, must be the same length as min_distance
+    max_distance=[50.0, 90.0, 121.0, 121.0],
+    min_point_numbers=0,
+    matching_class_agnostic_fps=False,
+)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
index d32dc9c70..380a4ba81 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
@@ -13,7 +13,7 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/user_name/"
+info_directory_path = "info/kokseang_2_8/"
 
 experiment_group_name = "bevfusion_lidar_intensity/j6gen2_base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m"
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py
index 39462b1f6..e3f7d5146 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py
@@ -3,7 +3,7 @@
 ]
 
 # user setting
-experiment_group_name = "bevfusion_lidar_intensity/j6gen2_base/" + _base_.dataset_type
+experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
@@ -18,7 +18,7 @@
 frame_pass_fail_config = dict(
     target_labels=_base_.class_names,
     # Matching thresholds per class (must align with `plane_distance_thresholds` used in evaluation)
-    matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0],
+    matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0],
     confidence_threshold_list=None,
 )
 
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
index 406e87655..eec87a585 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
@@ -13,10 +13,10 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/user_name/"
+info_directory_path = "info/kokseang_2_8/"
 
-experiment_group_name = "bevfusion_lidar/jpntaxi_base/" + _base_.dataset_type
-experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m"
+experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/jpntaxi_base/" + _base_.dataset_type
+experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_ignore"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
@@ -164,4 +164,4 @@
 )
 log_processor = dict(window_size=50)
 
-load_from = None
+load_from = "work_dirs/bevfusion_lidar_traffic_cone/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m_ignore/epoch_48.pth"
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py
index b50b093f7..5190182cc 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py
@@ -3,7 +3,7 @@
 ]
 
 # user setting
-experiment_group_name = "bevfusion_lidar_intensity/jpntaxi_base/" + _base_.dataset_type
+experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/jpntaxi_base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
@@ -18,7 +18,7 @@
 frame_pass_fail_config = dict(
     target_labels=_base_.class_names,
     # Matching thresholds per class (must align with `plane_distance_thresholds` used in evaluation)
-    matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0],
+	matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0],
     confidence_threshold_list=None,
 )
 

From a47646ea6476603518857c0a60cf18b30d5720a8 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 7 May 2026 11:15:34 +0000
Subject: [PATCH 087/183] ci(pre-commit): autofix

---
 ...oxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py
index 5190182cc..213f0041b 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py
@@ -18,7 +18,7 @@
 frame_pass_fail_config = dict(
     target_labels=_base_.class_names,
     # Matching thresholds per class (must align with `plane_distance_thresholds` used in evaluation)
-	matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0],
+    matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0],
     confidence_threshold_list=None,
 )
 

From 16eb517a21911d55513ad85863b47c0a6576a200 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sat, 9 May 2026 12:04:10 +0900
Subject: [PATCH 088/183] Add the script

---
 .../bevfusion/bevfusion_voxel_encoder.py      |  66 ++++---
 ..._base_120m_sincos_48_channels_32_points.py | 164 ++++++++++++++++++
 2 files changed, 207 insertions(+), 23 deletions(-)
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_48_channels_32_points.py

diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
index 5037113aa..2cde57cc5 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
@@ -57,18 +57,19 @@ def __init__(self,
         super(BEVFusionVoxelEncoder, self).__init__()
         assert len(feat_channels) > 0
         self.legacy = legacy
+        pfn_in_channels = 0
         if with_cluster_center:
-            in_channels += 3
+            pfn_in_channels += 3
         if with_voxel_center:
-            in_channels += 3
+            pfn_in_channels += 3
         if with_distance:
-            in_channels += 1
+            pfn_in_channels += 1
         self._with_distance = with_distance
         self._with_cluster_center = with_cluster_center
         self._with_voxel_center = with_voxel_center
         # Create PillarFeatureNet layers
         self.in_channels = in_channels
-        feat_channels = [in_channels] + list(feat_channels)
+        feat_channels = [pfn_in_channels] + list(feat_channels)
         pfn_layers = []
         for i in range(len(feat_channels) - 1):
             in_filters = feat_channels[i]
@@ -97,7 +98,8 @@ def __init__(self,
 
         self.register_buffer("min_norm_values", torch.tensor(min_norm_values))
         self.register_buffer("max_norm_values", torch.tensor(max_norm_values))
-        self.register_buffer("voxel_size", torch.tensor([self.vx, self.vy, self.vz]))
+        self.register_buffer("exponents", (2 ** torch.arange(0, self.in_channels)).float())
+        # self.register_buffer("voxel_size", torch.tensor([self.vx, self.vy, self.vz]))
 
     def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
                 *args, **kwargs) -> Tensor:
@@ -112,12 +114,26 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
         Returns:
             torch.Tensor: Features of pillars in shape (M, C).
         """
-        if self.min_norm_values is not None and self.max_norm_values is not None:
-            features_norm = (features - self.min_norm_values) / (self.max_norm_values - self.min_norm_values)
-        else:
-            features_norm = features
+        num_voxels, max_points_per_voxel = features.shape[0], features.shape[1]
+        
+        # Mean in the voxel
+        # (N, M, 3) -> (N, 3)
+        voxel_features = (features.sum(dim=1, keepdim=False) / num_points.type_as(features).view(
+                    -1, 1)).contiguous()
 
-        features_ls = [features_norm]
+        # min-max normalization, (N, 3) -> (N, 3)
+        voxel_features_norm = (voxel_features - \
+         self.min_norm_values.view(1, -1)) / ((self.max_norm_values - self.min_norm_values).view(1, -1))
+        
+        # SinCos encoding
+        # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3)
+        y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1)
+        # (N*3, 3) -> (N, 3*3)
+        y = y.reshape(num_voxels, -1)
+        # (N, 3*3) -> (N, 3*3*2)
+        voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1)
+        
+        features_ls = []
         # Find distance of x, y, and z from cluster center, mapped to [-1,   1] if available
         if self._with_cluster_center:
             points_mean = features[:, :, :3].sum(
@@ -125,9 +141,9 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
                     -1, 1, 1)
             f_cluster = features[:, :, :3] - points_mean
             # Map to [0, 1] if available
-            if self.min_norm_values is not None and self.max_norm_values is not None:
-                voxel_size = features.new_tensor([self.vx, self.vy, self.vz])
-                f_cluster = f_cluster / voxel_size
+            # if self.min_norm_values is not None and self.max_norm_values is not None:
+            #     voxel_size = features.new_tensor([self.vx, self.vy, self.vz])
+            #     f_cluster = f_cluster / voxel_size
             features_ls.append(f_cluster)
 
         # Find distance of x, y, and z from pillar center
@@ -156,8 +172,8 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
                     coors[:, 1].type_as(features).unsqueeze(1) * self.vz +
                     self.z_offset)
             
-            if self.min_norm_values is not None and self.max_norm_values is not None:
-                f_center = f_center / (voxel_size * 0.5)
+            # if self.min_norm_values is not None and self.max_norm_values is not None:
+            #     f_center = f_center / (voxel_size * 0.5)
             features_ls.append(f_center)
 
         if self._with_distance:
@@ -165,19 +181,23 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
             features_ls.append(points_dist)
 
         # Combine together feature decorations
-        features = torch.cat(features_ls, dim=-1)
+        voxel_feature_offsets = torch.cat(features_ls, dim=-1)
+
         # The feature decorations were calculated without regard to whether
         # pillar was empty. Need to ensure that
         # empty pillars remain set to zeros.
-        voxel_count = features.shape[1]
-        mask = get_paddings_indicator(num_points, voxel_count, axis=0)
-        mask = torch.unsqueeze(mask, -1).type_as(features)
-        features *= mask
-
+        mask = get_paddings_indicator(num_points, max_points_per_voxel, axis=0)
+        mask = torch.unsqueeze(mask, -1).type_as(voxel_feature_offsets)
+        voxel_feature_offsets *= mask
+        
+        # PFN
         for pfn in self.pfn_layers:
-            features = pfn(features, num_points)
+            voxel_feature_offsets = pfn(voxel_feature_offsets, num_points)
+        
+        # Concat 
+        features = torch.cat([voxel_fourier_features, voxel_feature_offsets.squeeze(1)], dim=-1)
 
-        return features.squeeze(1)
+        return features
 
 
 @MODELS.register_module()
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_48_channels_32_points.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_48_channels_32_points.py
new file mode 100644
index 000000000..073249a3e
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_48_channels_32_points.py
@@ -0,0 +1,164 @@
+_base_ = [
+    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
+    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
+    "../default/pipelines/default_lidar_120m.py",
+    "../default/models/default_lidar_second_secfpn_120m.py",
+    "../default/schedulers/default_50e_8xb8_adamw_cosine.py",
+    "../default/default_misc.py",
+]
+
+custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
+custom_imports["imports"] += _base_.custom_imports["imports"]
+custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
+
+# user setting
+data_root = "data/t4dataset/"
+info_directory_path = "info/kokseang_2_6_2/"
+
+experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type
+experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_48_channels_32_points"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    voxelize_cfg=dict(
+        max_num_points=32,
+        point_cloud_range=_base_.point_cloud_range,
+        voxel_size=_base_.voxel_size,
+        voxelize_reduce=False,
+    ),
+    pts_voxel_encoder=dict(
+        _delete_=True,
+        type="BEVFusionVoxelEncoder", 
+        in_channels=4,
+        with_distance=False,
+        with_cluster_center=True,
+        with_voxel_center=True,
+        feat_channels=[16],
+        point_cloud_range=_base_.point_cloud_range,
+        voxel_size=_base_.voxel_size,
+        norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01),
+        # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
+        min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
+        max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
+    ),
+    pts_middle_encoder=dict(
+        in_channels=48,
+        sparse_shape=_base_.grid_size,
+        # num_aug_features=4,
+        # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here
+        # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
+        # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
+    ),
+    bbox_head=dict(
+        class_names=_base_.class_names,  # Use class names to identify the correct class indices
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+    ),
+)
+
+# Dataset parameters
+train_dataloader = dict(
+    batch_size=_base_.train_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=True),
+    dataset=dict(
+        type=_base_.dataset_type,
+        pipeline=_base_.train_pipeline,
+        modality=_base_.input_modality,
+        backend_args=_base_.backend_args,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_train_file_name,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        test_mode=False,
+        data_prefix=_base_.data_prefix,
+        box_type_3d="LiDAR",
+        filter_cfg=_base_.filter_cfg,
+    ),
+)
+
+val_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_val_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+test_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_test_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+val_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+)
+
+test_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+    save_csv=True,
+)
+
+default_hooks = dict(
+    logger=dict(type="LoggerHook", interval=50),
+    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
+)
+log_processor = dict(window_size=50)

From 731f6d9fea6db8f90a4e4c36c9dedb541292da4a Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sat, 9 May 2026 16:58:35 +0900
Subject: [PATCH 089/183] Resolve conflict

---
 projects/BEVFusion/bevfusion/bevfusion.py     |  19 +-
 .../bevfusion/bevfusion_voxel_encoder.py      |   6 +-
 .../BEVFusion/bevfusion/sparse_encoder.py     |  20 ---
 ...second_secfpn_30e_8xb8_j6gen2_base_120m.py |  28 +--
 ...econd_secfpn_30e_8xb8_jpntaxi_base_120m.py |  32 ++--
 ...oxel_second_secfpn_50e_8xb16_base_120m.py} |  19 +-
 ..._voxel_second_secfpn_50e_8xb8_base_120m.py | 160 -----------------
 ...n_50e_8xb8_base_120m_sincos_34_channels.py | 163 -----------------
 ...b8_base_120m_sincos_timeexp_34_channels.py | 165 -----------------
 ...0m_sincos_timeexp_34_channels_32_points.py | 166 ------------------
 ...d_secfpn_50e_8xb8_base_120m_t4metric_v2.py |   2 +-
 .../default_lidar_second_secfpn_120m.py       |  15 +-
 ...fault_lidar_second_secfpn_120m_iou_loss.py | 117 +-----------
 .../default_camera_lidar_intensity_120m.py    |  15 +-
 .../pipelines/default_lidar_intensity_120m.py |   2 +-
 ...e.py => default_30e_8xb16_adamw_cosine.py} |   9 +-
 ...e.py => default_50e_8xb16_adamw_cosine.py} |   9 +-
 17 files changed, 59 insertions(+), 888 deletions(-)
 rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos.py => bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py} (85%)
 delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
 delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_34_channels.py
 delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels.py
 delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels_32_points.py
 rename projects/BEVFusion/configs/t4dataset/default/schedulers/{default_30e_8xb8_adamw_cosine.py => default_30e_8xb16_adamw_cosine.py} (95%)
 rename projects/BEVFusion/configs/t4dataset/default/schedulers/{default_50e_8xb8_adamw_cosine.py => default_50e_8xb16_adamw_cosine.py} (95%)

diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py
index bc3f1b094..b113bb566 100644
--- a/projects/BEVFusion/bevfusion/bevfusion.py
+++ b/projects/BEVFusion/bevfusion/bevfusion.py
@@ -56,12 +56,10 @@ def __init__(
         super().__init__(data_preprocessor=data_preprocessor, init_cfg=init_cfg)
 
         if voxelize_cfg is not None:
-            self.voxelize_reduce = voxelize_cfg.pop("voxelize_reduce")
             self.pts_voxel_layer = Voxelization(**voxelize_cfg)
             self.pts_voxel_encoder = MODELS.build(pts_voxel_encoder)
             self.pts_middle_encoder = MODELS.build(pts_middle_encoder)
         else:
-            self.voxelize_reduce = False
             self.pts_voxel_layer = None
             self.pts_voxel_encoder = None
             self.pts_middle_encoder = None
@@ -207,10 +205,6 @@ def extract_pts_feat(self, feats, coords, sizes, points=None) -> torch.Tensor:
                 points = [point.float() for point in points]
                 feats, coords, sizes = self.voxelize(points)
                 batch_size = coords[-1, 0] + 1
-                
-                if self.pts_voxel_encoder is not None:
-                    assert not self.voxelize_reduce
-                    feats = self.pts_voxel_encoder(feats, sizes, coords)
         else:
             # NOTE(knzo25): onnx inference. Voxelization happens outside the graph
             with torch.cuda.amp.autocast(enabled=False):
@@ -224,12 +218,7 @@ def extract_pts_feat(self, feats, coords, sizes, points=None) -> torch.Tensor:
                 # batch_size = coords[-1, 0] + 1
                 batch_size = 1
                 print("Run onnx point_eSpConvst")
-                if self.pts_voxel_encoder is not None:
-                    feats = self.pts_voxel_encoder(feats, sizes, coords)
-                else:
-                    assert self.voxelize_reduce
-                    if self.voxelize_reduce:
-                        feats = feats.sum(dim=1, keepdim=False) / sizes.type_as(feats).view(-1, 1)
+        feats = self.pts_voxel_encoder(feats, sizes, coords)
         x = self.pts_middle_encoder(feats, coords, batch_size)
         return x
 
@@ -255,9 +244,9 @@ def voxelize(self, points):
         assert len(sizes) > 0, "No points in the voxel"
         sizes = torch.cat(sizes, dim=0)
         
-        if self.voxelize_reduce:
-            feats = feats.sum(dim=1, keepdim=False) / sizes.type_as(feats).view(-1, 1)
-            feats = feats.contiguous()
+        # if self.voxelize_reduce:
+        #     feats = feats.sum(dim=1, keepdim=False) / sizes.type_as(feats).view(-1, 1)
+        #     feats = feats.contiguous()
         
         return feats, coords, sizes
 
diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
index 2cde57cc5..6c41234c5 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
@@ -376,11 +376,7 @@ class BEVFusionVoxelMeanSinCosEncoder(nn.Module):
     def __init__(self, 
                  min_norm_values: Tuple[float],
                  max_norm_values: Tuple[float],
-                 in_channels: Optional[int] = 4,
-                 voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4),
-                 point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4,
-                                                              40, 1),
-                 mode: Optional[str] = 'max'):
+                 in_channels: Optional[int] = 4):
         super(BEVFusionVoxelMeanSinCosEncoder, self).__init__()
 
         # Create PillarFeatureNet layers
diff --git a/projects/BEVFusion/bevfusion/sparse_encoder.py b/projects/BEVFusion/bevfusion/sparse_encoder.py
index 019cb630c..ce45d4536 100644
--- a/projects/BEVFusion/bevfusion/sparse_encoder.py
+++ b/projects/BEVFusion/bevfusion/sparse_encoder.py
@@ -47,9 +47,6 @@ class BEVFusionSparseEncoder(SparseEncoder):
     def __init__(
         self,
         in_channels,
-        aug_features_min_values,
-        aug_features_max_values,
-        num_aug_features,
         sparse_shape,
         order=("conv", "norm", "act"),
         norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01),
@@ -64,9 +61,6 @@ def __init__(
         assert block_type in ["conv_module", "basicblock"]
         self.sparse_shape = sparse_shape
         self.in_channels = in_channels
-        self.register_buffer("aug_features_min_values", torch.tensor(aug_features_min_values))
-        self.register_buffer("aug_features_max_values", torch.tensor(aug_features_max_values))
-        self.num_aug_features = num_aug_features
         self.order = order
         self.base_channels = base_channels
         self.output_channels = output_channels
@@ -77,10 +71,6 @@ def __init__(
         self.return_middle_feats = return_middle_feats
         # Spconv init all weight on its own
 
-        if num_aug_features:
-            self.in_channels = in_channels * num_aug_features * 2
-            self.register_buffer("exponents", (2 ** torch.arange(0, num_aug_features).float()))
-
         assert isinstance(order, tuple) and len(order) == 3
         assert set(order) == {"conv", "norm", "act"}
 
@@ -140,16 +130,6 @@ def forward(self, voxel_features, coors, batch_size):
                 output features. When self.return_middle_feats is True, the
                 module returns middle features.
         """
-
-        if self.num_aug_features:
-            num_points = voxel_features.shape[0]
-            x = (voxel_features - self.aug_features_min_values.view(1, -1)) / (
-                self.aug_features_max_values - self.aug_features_min_values
-            ).view(1, -1)
-            y = x.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1)
-            y = y.reshape(num_points, -1)
-            voxel_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1)
-
         coors = coors.int()
         input_sp_tensor = SparseConvTensor(voxel_features, coors, self.sparse_shape, batch_size)
         x = self.conv_input(input_sp_tensor)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
index 380a4ba81..4cf51faa5 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
@@ -15,7 +15,7 @@
 data_root = "data/t4dataset/"
 info_directory_path = "info/kokseang_2_8/"
 
-experiment_group_name = "bevfusion_lidar_intensity/j6gen2_base/" + _base_.dataset_type
+experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
@@ -25,28 +25,16 @@
     voxelize_cfg=dict(
         point_cloud_range=_base_.point_cloud_range,
         voxel_size=_base_.voxel_size,
-        voxelize_reduce=True,
     ),
-    pts_voxel_encoder=dict(num_features=_base_.point_use_dim),
+    pts_voxel_encoder=dict(
+        in_channels=len(_base_.lidar_sweep_dims),
+        # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
+        min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0, 0.0],
+        max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 255.0, 0.2],
+    ),
     pts_middle_encoder=dict(
-        in_channels=_base_.point_use_dim,
+        in_channels=50,
         sparse_shape=_base_.grid_size,
-        num_aug_features=5,
-        # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
-        aug_features_min_values=[
-            _base_.point_cloud_range[0],
-            _base_.point_cloud_range[1],
-            _base_.point_cloud_range[2],
-            0.0,
-            0.0,
-        ],
-        aug_features_max_values=[
-            _base_.point_cloud_range[3],
-            _base_.point_cloud_range[4],
-            _base_.point_cloud_range[5],
-            255.0,
-            0.2,
-        ],
     ),
     bbox_head=dict(
         class_names=_base_.class_names,  # Use class names to identify the correct class indices
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
index eec87a585..3b7c23b18 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
@@ -3,7 +3,7 @@
     "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py",
     "../default/pipelines/default_lidar_intensity_120m.py",
     "../default/models/default_lidar_second_secfpn_120m.py",
-    "../default/schedulers/default_30e_8xb8_adamw_cosine.py",
+    "../default/schedulers/default_30e_8xb16_adamw_cosine.py",
     "../default/default_misc.py",
 ]
 
@@ -15,8 +15,8 @@
 data_root = "data/t4dataset/"
 info_directory_path = "info/kokseang_2_8/"
 
-experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/jpntaxi_base/" + _base_.dataset_type
-experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_ignore"
+experiment_group_name = "bevfusion_lidar_intensity_2_8_1/jpntaxi_base/" + _base_.dataset_type
+experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
@@ -25,28 +25,16 @@
     voxelize_cfg=dict(
         point_cloud_range=_base_.point_cloud_range,
         voxel_size=_base_.voxel_size,
-        voxelize_reduce=True,
     ),
-    pts_voxel_encoder=dict(num_features=_base_.point_use_dim),
+    pts_voxel_encoder=dict(
+        in_channels=len(_base_.lidar_sweep_dims),
+        # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
+        min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0, 0.0],
+        max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 255.0, 0.2],
+    ),
     pts_middle_encoder=dict(
-        in_channels=_base_.point_use_dim,
+        in_channels=50,
         sparse_shape=_base_.grid_size,
-        num_aug_features=5,
-        # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
-        aug_features_min_values=[
-            _base_.point_cloud_range[0],
-            _base_.point_cloud_range[1],
-            _base_.point_cloud_range[2],
-            0.0,
-            0.0,
-        ],
-        aug_features_max_values=[
-            _base_.point_cloud_range[3],
-            _base_.point_cloud_range[4],
-            _base_.point_cloud_range[5],
-            255.0,
-            0.2,
-        ],
     ),
     bbox_head=dict(
         class_names=_base_.class_names,  # Use class names to identify the correct class indices
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py
similarity index 85%
rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos.py
rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py
index d856b1d4b..6d3a1f93b 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py
@@ -3,7 +3,7 @@
     "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
     "../default/pipelines/default_lidar_120m.py",
     "../default/models/default_lidar_second_secfpn_120m.py",
-    "../default/schedulers/default_50e_8xb8_adamw_cosine.py",
+    "../default/schedulers/default_50e_8xb16_adamw_cosine.py",
     "../default/default_misc.py",
 ]
 
@@ -13,10 +13,10 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_6_2/"
+info_directory_path = "info/kokseang_2_8/"
 
-experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type
-experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_sincos"
+experiment_group_name = "bevfusion_lidar_2_8_0/base/" + _base_.dataset_type
+experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
@@ -25,23 +25,16 @@
     voxelize_cfg=dict(
         point_cloud_range=_base_.point_cloud_range,
         voxel_size=_base_.voxel_size,
-        voxelize_reduce=False,
     ),
     pts_voxel_encoder=dict(
-        _delete_=True,
-        type="BEVFusionVoxelMeanSinCosEncoder", 
-        in_channels=4,
-        # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
+        in_channels=len(_base_.lidar_sweep_dims),
+        # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here
         min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
         max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
     ),
     pts_middle_encoder=dict(
         in_channels=32,
         sparse_shape=_base_.grid_size,
-        # num_aug_features=4,
-        # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here
-        # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
-        # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
     ),
     bbox_head=dict(
         class_names=_base_.class_names,  # Use class names to identify the correct class indices
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
deleted file mode 100644
index 1f52662a4..000000000
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py
+++ /dev/null
@@ -1,160 +0,0 @@
-_base_ = [
-    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
-    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
-    "../default/pipelines/default_lidar_120m.py",
-    "../default/models/default_lidar_second_secfpn_120m_iou_loss.py",
-    "../default/schedulers/default_50e_8xb8_adamw_cosine.py",
-    "../default/default_misc.py",
-]
-
-custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
-custom_imports["imports"] += _base_.custom_imports["imports"]
-custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
-
-# user setting
-data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_6_2/"
-
-experiment_group_name = "bevfusion_lidar/base/" + _base_.dataset_type
-experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m"
-work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
-
-# model parameter
-model = dict(
-    type="BEVFusion",
-    voxelize_cfg=dict(
-        point_cloud_range=_base_.point_cloud_range,
-        voxel_size=_base_.voxel_size,
-        voxelize_reduce=False,
-    ),
-    pts_voxel_encoder=dict(
-        _delete_=True,
-        type="BEVFusionVoxelMeanSinCosEncoder", 
-        in_channels=4,
-        # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
-        min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
-        max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
-    ),
-    pts_middle_encoder=dict(
-        in_channels=32,
-        sparse_shape=_base_.grid_size,
-        # num_aug_features=4,
-        # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here
-        # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
-        # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
-    ),
-    bbox_head=dict(
-        class_names=_base_.class_names,  # Use class names to identify the correct class indices
-        train_cfg=dict(
-            point_cloud_range=_base_.point_cloud_range,
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size,
-        ),
-        test_cfg=dict(
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size[0:2],
-            pc_range=_base_.point_cloud_range[0:2],
-        ),
-        bbox_coder=dict(
-            pc_range=_base_.point_cloud_range[0:2],
-            voxel_size=_base_.voxel_size[0:2],
-        ),
-        partial_ignore_labels=["traffic_cone", "barrier"],
-        loss_heatmap=dict(
-            reduction="none",
-        ),
-    ),
-)
-
-# Dataset parameters
-train_dataloader = dict(
-    batch_size=_base_.train_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=True),
-    dataset=dict(
-        type=_base_.dataset_type,
-        pipeline=_base_.train_pipeline,
-        modality=_base_.input_modality,
-        backend_args=_base_.backend_args,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_train_file_name,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        test_mode=False,
-        data_prefix=_base_.data_prefix,
-        box_type_3d="LiDAR",
-        filter_cfg=_base_.filter_cfg,
-    ),
-)
-
-val_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_val_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-test_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_test_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-val_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-)
-
-test_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-    save_csv=True,
-)
-
-default_hooks = dict(
-    logger=dict(type="LoggerHook", interval=50),
-    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
-)
-log_processor = dict(window_size=50)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_34_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_34_channels.py
deleted file mode 100644
index 54af6be5f..000000000
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_34_channels.py
+++ /dev/null
@@ -1,163 +0,0 @@
-_base_ = [
-    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
-    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
-    "../default/pipelines/default_lidar_120m.py",
-    "../default/models/default_lidar_second_secfpn_120m.py",
-    "../default/schedulers/default_50e_8xb8_adamw_cosine.py",
-    "../default/default_misc.py",
-]
-
-custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
-custom_imports["imports"] += _base_.custom_imports["imports"]
-custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
-
-# user setting
-data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_6_2/"
-
-experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type
-experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_34_channels"
-work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
-
-# model parameter
-model = dict(
-    type="BEVFusion",
-    voxelize_cfg=dict(
-        point_cloud_range=_base_.point_cloud_range,
-        voxel_size=_base_.voxel_size,
-        voxelize_reduce=False,
-    ),
-    pts_voxel_encoder=dict(
-        _delete_=True,
-        type="BEVFusionVoxelSinCosEncoder", 
-        in_channels=4,
-        with_distance=False,
-        with_cluster_center=True,
-        with_voxel_center=True,
-        feat_channels=[16],
-        point_cloud_range=_base_.point_cloud_range,
-        voxel_size=_base_.voxel_size,
-        norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01),
-        # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
-        min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
-        max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
-    ),
-    pts_middle_encoder=dict(
-        in_channels=34,
-        sparse_shape=_base_.grid_size,
-        # num_aug_features=4,
-        # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here
-        # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
-        # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
-    ),
-    bbox_head=dict(
-        class_names=_base_.class_names,  # Use class names to identify the correct class indices
-        train_cfg=dict(
-            point_cloud_range=_base_.point_cloud_range,
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size,
-        ),
-        test_cfg=dict(
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size[0:2],
-            pc_range=_base_.point_cloud_range[0:2],
-        ),
-        bbox_coder=dict(
-            pc_range=_base_.point_cloud_range[0:2],
-            voxel_size=_base_.voxel_size[0:2],
-        ),
-    ),
-)
-
-# Dataset parameters
-train_dataloader = dict(
-    batch_size=_base_.train_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=True),
-    dataset=dict(
-        type=_base_.dataset_type,
-        pipeline=_base_.train_pipeline,
-        modality=_base_.input_modality,
-        backend_args=_base_.backend_args,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_train_file_name,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        test_mode=False,
-        data_prefix=_base_.data_prefix,
-        box_type_3d="LiDAR",
-        filter_cfg=_base_.filter_cfg,
-    ),
-)
-
-val_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_val_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-test_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_test_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-val_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-)
-
-test_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-    save_csv=True,
-)
-
-default_hooks = dict(
-    logger=dict(type="LoggerHook", interval=50),
-    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
-)
-log_processor = dict(window_size=50)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels.py
deleted file mode 100644
index d7e61102b..000000000
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels.py
+++ /dev/null
@@ -1,165 +0,0 @@
-_base_ = [
-    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
-    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
-    "../default/pipelines/default_lidar_120m.py",
-    "../default/models/default_lidar_second_secfpn_120m.py",
-    "../default/schedulers/default_50e_8xb8_adamw_cosine.py",
-    "../default/default_misc.py",
-]
-
-custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
-custom_imports["imports"] += _base_.custom_imports["imports"]
-custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
-
-# user setting
-data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_6_2/"
-
-experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type
-experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_34_channels_timeexp"
-work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
-
-# model parameter
-model = dict(
-    type="BEVFusion",
-    voxelize_cfg=dict(
-        point_cloud_range=_base_.point_cloud_range,
-        voxel_size=_base_.voxel_size,
-        voxelize_reduce=False,
-    ),
-    pts_voxel_encoder=dict(
-        _delete_=True,
-        type="BEVFusionVoxelSinCosEncoder", 
-        in_channels=4,
-        time_lag_channel_index=3,
-        time_exp_factor=1.0,
-        with_distance=False,
-        with_cluster_center=True,
-        with_voxel_center=True,
-        feat_channels=[16],
-        point_cloud_range=_base_.point_cloud_range,
-        voxel_size=_base_.voxel_size,
-        norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01),
-        # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
-        min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
-        max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
-    ),
-    pts_middle_encoder=dict(
-        in_channels=34,
-        sparse_shape=_base_.grid_size,
-        # num_aug_features=4,
-        # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here
-        # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
-        # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
-    ),
-    bbox_head=dict(
-        class_names=_base_.class_names,  # Use class names to identify the correct class indices
-        train_cfg=dict(
-            point_cloud_range=_base_.point_cloud_range,
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size,
-        ),
-        test_cfg=dict(
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size[0:2],
-            pc_range=_base_.point_cloud_range[0:2],
-        ),
-        bbox_coder=dict(
-            pc_range=_base_.point_cloud_range[0:2],
-            voxel_size=_base_.voxel_size[0:2],
-        ),
-    ),
-)
-
-# Dataset parameters
-train_dataloader = dict(
-    batch_size=_base_.train_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=True),
-    dataset=dict(
-        type=_base_.dataset_type,
-        pipeline=_base_.train_pipeline,
-        modality=_base_.input_modality,
-        backend_args=_base_.backend_args,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_train_file_name,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        test_mode=False,
-        data_prefix=_base_.data_prefix,
-        box_type_3d="LiDAR",
-        filter_cfg=_base_.filter_cfg,
-    ),
-)
-
-val_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_val_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-test_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_test_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-val_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-)
-
-test_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-    save_csv=True,
-)
-
-default_hooks = dict(
-    logger=dict(type="LoggerHook", interval=50),
-    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
-)
-log_processor = dict(window_size=50)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels_32_points.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels_32_points.py
deleted file mode 100644
index f784b2386..000000000
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels_32_points.py
+++ /dev/null
@@ -1,166 +0,0 @@
-_base_ = [
-    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
-    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
-    "../default/pipelines/default_lidar_120m.py",
-    "../default/models/default_lidar_second_secfpn_120m.py",
-    "../default/schedulers/default_50e_8xb8_adamw_cosine.py",
-    "../default/default_misc.py",
-]
-
-custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
-custom_imports["imports"] += _base_.custom_imports["imports"]
-custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
-
-# user setting
-data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_6_2/"
-
-experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type
-experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_34_channels_timeexp_32_points"
-work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
-
-# model parameter
-model = dict(
-    type="BEVFusion",
-    voxelize_cfg=dict(
-        max_num_points=32,
-        point_cloud_range=_base_.point_cloud_range,
-        voxel_size=_base_.voxel_size,
-        voxelize_reduce=False,
-    ),
-    pts_voxel_encoder=dict(
-        _delete_=True,
-        type="BEVFusionVoxelSinCosEncoder", 
-        in_channels=4,
-        time_lag_channel_index=3,
-        time_exp_factor=1.0,
-        with_distance=False,
-        with_cluster_center=True,
-        with_voxel_center=True,
-        feat_channels=[16],
-        point_cloud_range=_base_.point_cloud_range,
-        voxel_size=_base_.voxel_size,
-        norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01),
-        # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
-        min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
-        max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
-    ),
-    pts_middle_encoder=dict(
-        in_channels=34,
-        sparse_shape=_base_.grid_size,
-        # num_aug_features=4,
-        # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here
-        # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
-        # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
-    ),
-    bbox_head=dict(
-        class_names=_base_.class_names,  # Use class names to identify the correct class indices
-        train_cfg=dict(
-            point_cloud_range=_base_.point_cloud_range,
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size,
-        ),
-        test_cfg=dict(
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size[0:2],
-            pc_range=_base_.point_cloud_range[0:2],
-        ),
-        bbox_coder=dict(
-            pc_range=_base_.point_cloud_range[0:2],
-            voxel_size=_base_.voxel_size[0:2],
-        ),
-    ),
-)
-
-# Dataset parameters
-train_dataloader = dict(
-    batch_size=_base_.train_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=True),
-    dataset=dict(
-        type=_base_.dataset_type,
-        pipeline=_base_.train_pipeline,
-        modality=_base_.input_modality,
-        backend_args=_base_.backend_args,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_train_file_name,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        test_mode=False,
-        data_prefix=_base_.data_prefix,
-        box_type_3d="LiDAR",
-        filter_cfg=_base_.filter_cfg,
-    ),
-)
-
-val_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_val_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-test_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_test_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-val_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-)
-
-test_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-    save_csv=True,
-)
-
-default_hooks = dict(
-    logger=dict(type="LoggerHook", interval=50),
-    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
-)
-log_processor = dict(window_size=50)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py
index efcd091f5..98a65a3f9 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py
@@ -18,7 +18,7 @@
 frame_pass_fail_config = dict(
     target_labels=_base_.class_names,
     # Matching thresholds per class (must align with `plane_distance_thresholds` used in evaluation)
-    matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0],
+    matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0],
     confidence_threshold_list=None,
 )
 
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
index 11a1b42b7..c097d10bf 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
@@ -1,5 +1,5 @@
 num_proposals = 500
-max_num_points = 10
+max_num_points = 32
 max_voxels = [120000, 160000]
 
 model = dict(
@@ -7,19 +7,18 @@
     voxelize_cfg=dict(
         max_num_points=max_num_points,
         max_voxels=max_voxels,
-        voxelize_reduce=True,
     ),
     data_preprocessor=dict(
         type="Det3DDataPreprocessor",
         pad_size_divisor=32,
     ),
-    pts_voxel_encoder=dict(type="HardSimpleVFE"),
+    pts_voxel_encoder=dict(
+        type="BEVFusionVoxelMeanSinCosEncoder", 
+        in_channels=4,
+    ),
     pts_middle_encoder=dict(
         type="BEVFusionSparseEncoder",
         in_channels=5,
-        aug_features_min_values=[],
-        aug_features_max_values=[],
-        num_aug_features=0,
         order=("conv", "norm", "act"),
         norm_cfg=dict(type="BN1d", eps=0.001, momentum=0.01),
         encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)),
@@ -112,9 +111,7 @@
             reduction="mean",
             loss_weight=1.0,
         ),
-				# loss_iou=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=1.0),
-        loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="mean", loss_weight=1.0),
+        loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="none", loss_weight=1.0),
         loss_bbox=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=0.25),
-        partial_ignore_labels=None,
     ),
 )
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m_iou_loss.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m_iou_loss.py
index 792392c09..e90687fe3 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m_iou_loss.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m_iou_loss.py
@@ -1,119 +1,10 @@
-num_proposals = 500
-max_num_points = 10
-max_voxels = [120000, 160000]
+_base_ = [
+    "./default_lidar_second_secfpn_120m.py",
+]
 
 model = dict(
-    type="BEVFusion",
-    voxelize_cfg=dict(
-        max_num_points=max_num_points,
-        max_voxels=max_voxels,
-        voxelize_reduce=True,
-    ),
-    data_preprocessor=dict(
-        type="Det3DDataPreprocessor",
-        pad_size_divisor=32,
-    ),
-    pts_voxel_encoder=dict(type="HardSimpleVFE"),
-    pts_middle_encoder=dict(
-        type="BEVFusionSparseEncoder",
-        in_channels=5,
-        aug_features_min_values=[],
-        aug_features_max_values=[],
-        num_aug_features=0,
-        order=("conv", "norm", "act"),
-        norm_cfg=dict(type="BN1d", eps=0.001, momentum=0.01),
-        encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)),
-        encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, (1, 1, 0)), (0, 0)),
-        block_type="basicblock",
-    ),
-    pts_backbone=dict(
-        type="SECOND",
-        in_channels=256,
-        out_channels=[128, 256],
-        layer_nums=[5, 5],
-        layer_strides=[1, 2],
-        norm_cfg=dict(type="BN", eps=0.001, momentum=0.01),
-        conv_cfg=dict(type="Conv2d", bias=False),
-    ),
-    pts_neck=dict(
-        type="SECONDFPN",
-        in_channels=[128, 256],
-        out_channels=[256, 256],
-        upsample_strides=[1, 2],
-        norm_cfg=dict(type="BN", eps=0.001, momentum=0.01),
-        upsample_cfg=dict(type="deconv", bias=False),
-        use_conv_for_no_stride=True,
-    ),
     bbox_head=dict(
-        type="BEVFusionHead",
-        num_proposals=num_proposals,
-        auxiliary=True,
-        in_channels=512,
-        hidden_channel=128,
-        nms_kernel_size=3,
-        bn_momentum=0.1,
-        num_decoder_layers=1,
-        decoder_layer=dict(
-            type="TransformerDecoderLayer",
-            self_attn_cfg=dict(embed_dims=128, num_heads=8, dropout=0.1),
-            cross_attn_cfg=dict(embed_dims=128, num_heads=8, dropout=0.1),
-            ffn_cfg=dict(
-                embed_dims=128,
-                feedforward_channels=256,
-                num_fcs=2,
-                ffn_drop=0.1,
-                act_cfg=dict(type="ReLU", inplace=True),
-            ),
-            norm_cfg=dict(type="LN"),
-            pos_encoding_cfg=dict(input_channel=2, num_pos_feats=128),
-        ),
-        train_cfg=dict(
-            dataset="t4datasets",
-            out_size_factor=8,
-            gaussian_overlap=0.1,
-            min_radius=2,
-            pos_weight=-1,
-            code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
-            assigner=dict(
-                type="HungarianAssigner3D",
-                iou_calculator=dict(type="BboxOverlaps3D", coordinate="lidar"),
-                cls_cost=dict(type="mmdet.FocalLossCost", gamma=2.0, alpha=0.25, weight=0.15),
-                reg_cost=dict(type="BBoxBEVL1Cost", weight=0.25),
-                iou_cost=dict(type="IoU3DCost", weight=0.25),
-            ),
-        ),
-        test_cfg=dict(
-            dataset="t4datasets",
-            out_size_factor=8,
-            nms_type=None,  # Set to "circle" for circle_nms
-            # Set NMS for different clusters
-            nms_clusters=[
-                dict(class_names=["car", "truck", "bus"], nms_threshold=0.5),  # It's radius if using circle_nms
-                dict(class_names=["bicycle"], nms_threshold=0.5),
-                dict(class_names=["pedestrian"], nms_threshold=0.175),
-                dict(class_names=["barrier"], nms_threshold=0.5),
-                dict(class_names=["traffic_cone"], nms_threshold=0.175),
-            ],
-        ),
-        dense_heatmap_pooling_classes=["car", "truck", "bus", "bicycle", "barrier"],  # Use class indices for pooling
         common_heads=dict(center=[2, 2], height=[1, 2], dim=[3, 2], rot=[2, 2], vel=[2, 2], iou=[1, 2]),
-        bbox_coder=dict(
-            type="TransFusionBBoxCoder",
-            post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0],
-            score_threshold=0.0,
-            out_size_factor=8,
-            code_size=10,
-        ),
-        loss_cls=dict(
-            type="mmdet.FocalLoss",
-            use_sigmoid=True,
-            gamma=2.0,
-            alpha=0.25,
-            reduction="mean",
-            loss_weight=1.0,
-        ),
-		loss_iou=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=1.0),
-        loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="mean", loss_weight=1.0),
-        loss_bbox=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=0.25),
+        loss_iou=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=1.0),
     ),
 )
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
index 963a218e1..0b0f44c08 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
@@ -1,6 +1,6 @@
 # Dataset parameters
 backend_args = None
-num_workers = 32
+num_workers = 16
 input_modality = dict(use_lidar=True, use_camera=True)
 
 # range setting
@@ -13,6 +13,8 @@
     "bus": 120,
     "bicycle": 120,
     "pedestrian": 120,
+    "traffic_cone": 120,
+    "barrier": 120,
 }
 
 # LiDAR parameters
@@ -74,14 +76,11 @@
         classes=[
             "car",
             "truck",
-            "construction_vehicle",
             "bus",
-            "trailer",
-            "barrier",
-            "motorcycle",
             "bicycle",
             "pedestrian",
             "traffic_cone",
+            "barrier",
         ],
     ),
     dict(type="PointShuffle"),
@@ -107,6 +106,9 @@
             "img_aug_matrix",
             "lidar_aug_matrix",
             "timestamp",
+            "vehicle_type",
+            "city",
+            "traffic_cone_barrier_status",
         ],
     ),
 ]
@@ -164,6 +166,9 @@
             "num_pts_feats",
             "num_views",
             "timestamp",
+            "vehicle_type",
+            "city",
+            "traffic_cone_barrier_status",
         ],
     ),
 ]
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
index e2de195e9..1ce2aa2be 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
@@ -1,6 +1,6 @@
 # Dataset parameters
 backend_args = None
-num_workers = 32
+num_workers = 16
 input_modality = dict(use_lidar=True, use_camera=False)
 
 # range setting
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py
similarity index 95%
rename from projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py
rename to projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py
index a2cd2d2e9..1e1ce37ea 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py
@@ -1,13 +1,12 @@
 # learning rate
-# 1e-4 * sqrt(2) = 0.0001414
-lr = 1.4141e-4
-t_max = 8
+lr = 2.0e-4
+t_max = 3
 max_epochs = 30
 val_interval = 5
 
 train_gpu_size = 8
-test_batch_size = 2
-train_batch_size = 8
+test_batch_size = 4
+train_batch_size = 16
 
 param_scheduler = [
     # learning rate scheduler
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_cosine.py
similarity index 95%
rename from projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py
rename to projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_cosine.py
index 87571d0b3..5be98b3d9 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_cosine.py
@@ -1,13 +1,12 @@
 # learning rate
-# 1e-4 * sqrt(2) = 0.0001414
-lr = 1.4141e-4
-t_max = 15
+lr = 2.0e-4
+t_max = 5
 max_epochs = 50
 val_interval = 5
 
 train_gpu_size = 8
-test_batch_size = 2
-train_batch_size = 8
+test_batch_size = 4
+train_batch_size = 16
 
 param_scheduler = [
     # learning rate scheduler

From ad4f746d8e4150a826e82009a80beedab991c7ad Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sat, 9 May 2026 17:00:43 +0900
Subject: [PATCH 090/183] Resolve conflict

---
 .../default/schedulers/default_30e_8xb16_adamw_cosine.py    | 6 +++---
 .../default/schedulers/default_50e_8xb16_adamw_cosine.py    | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py
index 1e1ce37ea..e3975f6eb 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py
@@ -10,7 +10,7 @@
 
 param_scheduler = [
     # learning rate scheduler
-    # During the first (max_epochs * 0.4) epochs, learning rate increases from 0 to lr * 10
+    # During the first (max_epochs * 0.10) epochs, learning rate increases from 0 to lr * 10
     # during the next epochs, learning rate decreases from lr * 10 to
     # lr * 1e-4
     dict(
@@ -23,7 +23,7 @@
         convert_to_iter_based=True,
     ),
     dict(
-        type="CosineAnnealingLR",
+        type="CosineAnnealingLR
         T_max=(max_epochs - t_max),
         eta_min=lr * 1e-4,
         begin=t_max,
@@ -32,7 +32,7 @@
         convert_to_iter_based=True,
     ),
     # momentum scheduler
-    # During the first (0.4 * max_epochs) epochs, momentum increases from 0 to 0.85 / 0.95
+    # During the first (max_epochs * 0.10) epochs, momentum increases from 0 to 0.85 / 0.95
     # during the next epochs, momentum increases from 0.85 / 0.95 to 1
     dict(
         type="CosineAnnealingMomentum",
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_cosine.py
index 5be98b3d9..d209d0c1b 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_cosine.py
@@ -10,7 +10,7 @@
 
 param_scheduler = [
     # learning rate scheduler
-    # During the first (max_epochs * 0.4) epochs, learning rate increases from 0 to lr * 10
+    # During the first (max_epochs * 0.10) epochs, learning rate increases from 0 to lr * 10
     # during the next epochs, learning rate decreases from lr * 10 to
     # lr * 1e-4
     dict(
@@ -32,7 +32,7 @@
         convert_to_iter_based=True,
     ),
     # momentum scheduler
-    # During the first (0.4 * max_epochs) epochs, momentum increases from 0 to 0.85 / 0.95
+    # During the first (0.10 * max_epochs) epochs, momentum increases from 0 to 0.85 / 0.95
     # during the next epochs, momentum increases from 0.85 / 0.95 to 1
     dict(
         type="CosineAnnealingMomentum",

From be69b11cb6732c96d5ae185db7b5c6521a65708d Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sat, 9 May 2026 17:03:29 +0900
Subject: [PATCH 091/183] Resolve conflict

---
 ...ion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py} | 0
 ...xel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py} | 0
 ...on_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py} | 0
 ...el_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py} | 0
 ..._secfpn_50e_8xb16_base_120m_sincos_48_channels_32_points.py} | 2 +-
 ...idar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py} | 0
 6 files changed, 1 insertion(+), 1 deletion(-)
 rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py} (100%)
 rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py} (100%)
 rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py} (100%)
 rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py} (100%)
 rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_48_channels_32_points.py => bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_sincos_48_channels_32_points.py} (98%)
 rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py => bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py} (100%)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
similarity index 100%
rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py
rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py
similarity index 100%
rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py
rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py
similarity index 100%
rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py
rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py
similarity index 100%
rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py
rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_48_channels_32_points.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_sincos_48_channels_32_points.py
similarity index 98%
rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_48_channels_32_points.py
rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_sincos_48_channels_32_points.py
index 073249a3e..44acb083c 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_48_channels_32_points.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_sincos_48_channels_32_points.py
@@ -3,7 +3,7 @@
     "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
     "../default/pipelines/default_lidar_120m.py",
     "../default/models/default_lidar_second_secfpn_120m.py",
-    "../default/schedulers/default_50e_8xb8_adamw_cosine.py",
+    "../default/schedulers/default_50e_8xb16_adamw_cosine.py",
     "../default/default_misc.py",
 ]
 
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py
similarity index 100%
rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py
rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py

From 258e64c8ce89af6e174d9eccb17a45737e63c0cb Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sat, 9 May 2026 18:13:14 +0900
Subject: [PATCH 092/183] Resolve conflict

---
 projects/BEVFusion/bevfusion/bevfusion_head.py               | 5 +++--
 .../default/models/default_lidar_second_secfpn_120m.py       | 3 +++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index 0b510eae7..1de3af05f 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -782,8 +782,9 @@ def loss_by_feat(
             for cls_i, class_name in enumerate(self.class_names):
                 loss_dict[f"loss_heatmap_{class_name}"] = loss_heatmap_cls[cls_i]
 
-            # Prevent loss item to avoid computing gradients twice. This is for logging.
-            loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum()
+            # Logging-only aggregate. Detach so it does not retain the autograd graph;
+            # the per-class `loss_heatmap_{class_name}` entries are what drive gradients.
+            loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum().detach()
 
         # compute loss for each layer
         for idx_layer in range(self.num_decoder_layers if self.auxiliary else 1):
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
index c097d10bf..d56e6d1a3 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
@@ -111,7 +111,10 @@
             reduction="mean",
             loss_weight=1.0,
         ),
+        loss_iou=None,
         loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="none", loss_weight=1.0),
         loss_bbox=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=0.25),
+        # partial_
+        partial_ignore_labels=["traffic_cone", "barrier"],
     ),
 )

From 75a46d3cfe983dc76fc4e3e478cc82b0ffe02e86 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sat, 9 May 2026 19:07:24 +0900
Subject: [PATCH 093/183] Updated

---
 .../bevfusion/bevfusion_voxel_encoder.py          |  2 +-
 ...ond_secfpn_50e_8xb16_base_120m_48_channels.py} | 15 +++++----------
 2 files changed, 6 insertions(+), 11 deletions(-)
 rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_sincos_48_channels_32_points.py => bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py} (88%)

diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
index 6c41234c5..06ca2e434 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
@@ -107,7 +107,7 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
 
         Args:
             features (torch.Tensor): Point features or raw points in shape
-                (N, M, C).
+                (N, M, C) in (x, y, z, intensity, time_lag) if C is 5, (x, y, z, time_lag) if C is 4.
             num_points (torch.Tensor): Number of points in each pillar in shape (M).
             coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx).
 
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_sincos_48_channels_32_points.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py
similarity index 88%
rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_sincos_48_channels_32_points.py
rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py
index 44acb083c..02f9642f2 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_sincos_48_channels_32_points.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py
@@ -13,20 +13,18 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_6_2/"
+info_directory_path = "info/kokseang_2_8/"
 
-experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type
-experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_48_channels_32_points"
+experiment_group_name = "bevfusion_lidar_2_8_0/base/" + _base_.dataset_type
+experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
 model = dict(
     type="BEVFusion",
     voxelize_cfg=dict(
-        max_num_points=32,
         point_cloud_range=_base_.point_cloud_range,
         voxel_size=_base_.voxel_size,
-        voxelize_reduce=False,
     ),
     pts_voxel_encoder=dict(
         _delete_=True,
@@ -35,21 +33,18 @@
         with_distance=False,
         with_cluster_center=True,
         with_voxel_center=True,
-        feat_channels=[16],
+        feat_channels=[16, 16],
         point_cloud_range=_base_.point_cloud_range,
         voxel_size=_base_.voxel_size,
         norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01),
         # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
         min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
         max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
+        legacy=False
     ),
     pts_middle_encoder=dict(
         in_channels=48,
         sparse_shape=_base_.grid_size,
-        # num_aug_features=4,
-        # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here
-        # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
-        # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
     ),
     bbox_head=dict(
         class_names=_base_.class_names,  # Use class names to identify the correct class indices

From 16fe09be0b4acd4bce50017351c816327f11d3fc Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sat, 9 May 2026 22:26:29 +0900
Subject: [PATCH 094/183] Updated

---
 projects/BEVFusion/bevfusion/__init__.py      |   2 +-
 .../bevfusion/bevfusion_voxel_encoder.py      | 489 +++++++++---------
 2 files changed, 239 insertions(+), 252 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py
index 2e9822d76..ce9b31aa5 100644
--- a/projects/BEVFusion/bevfusion/__init__.py
+++ b/projects/BEVFusion/bevfusion/__init__.py
@@ -7,7 +7,7 @@
 from .transformer import TransformerDecoderLayer
 from .transforms_3d import BEVFusionGlobalRotScaleTrans, BEVFusionRandomFlip3D, GridMask, ImageAug3D
 from .utils import BBoxBEVL1Cost, HeuristicAssigner3D, HungarianAssigner3D, IoU3DCost, TransFusionBBoxCoder
-from .bevfusion_voxel_encoder import BEVFusionVoxelEncoder, BEVFusionVoxelSinCosEncoder, BEVFusionVoxelMeanSinCosEncoder
+from .bevfusion_voxel_encoder import HardSimpleVoxelSinCosEncoder, BEVFusionVoxelEncoder
 
 __all__ = [
     "BEVFusion",
diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
index 06ca2e434..843624b56 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
@@ -11,32 +11,80 @@
 
 
 @MODELS.register_module()
-class BEVFusionVoxelEncoder(nn.Module):
+class HardSimpleVoxelSinCosEncoder(nn.Module):
+    def __init__(self, 
+                 min_norm_values: Tuple[float],
+                 max_norm_values: Tuple[float],
+                 in_channels: Optional[int] = 4) -> None:
+        """
+        Simple voxel encoder that only performs mean pooling on the normalize features, and then 
+        performs sin-cos (fourier encoding) on each voxel channels.
+
+        The output shape of each voxel is (N, feature_channels*2).
+        Args:
+            min_norm_values (Tuple[float]): Minimum values for the features.
+            max_norm_values (Tuple[float]): Maximum values for the features.
+            in_channels (int): Number of input channels.
+        """
+        super(BEVFusionVoxelMeanSinCosEncoder, self).__init__()
+      
+        # Create PillarFeatureNet layers
+        self.in_channels = in_channels
+
+        self.register_buffer("min_norm_values", torch.tensor(min_norm_values))
+        self.register_buffer("max_norm_values", torch.tensor(max_norm_values))
+        self.register_buffer("exponents", (2 ** torch.arange(0, self.in_channels)).float())
+
+    def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
+                *args, **kwargs) -> Tensor:
+        """Forward function.
+
+        Args:
+            features (torch.Tensor): Point features or raw points in shape
+                (N, M, C) in (x, y, z, intensity, time_lag) if C is 5, (x, y, z, time_lag) if C is 4.
+            num_points (torch.Tensor): Number of points in each pillar in shape (M).
+            coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx).
+
+        Returns:
+            torch.Tensor: Features of pillars in shape (M, C*C*2).
+
+        """
+        num_voxels, max_points_per_voxel = features.shape[0], features.shape[1]
+        
+        # Mean in the voxel
+        # (N, M, 3) -> (N, 3)
+        voxel_features = (features.sum(dim=1, keepdim=False) / num_points.type_as(features).view(
+                    -1, 1)).contiguous()
+
+        # min-max normalization, (N, 3) -> (N, 3)
+        voxel_features_norm = (voxel_features - \
+         self.min_norm_values.view(1, -1)) / ((self.max_norm_values - self.min_norm_values).view(1, -1))
+        
+        # SinCos encoding
+        # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3)
+        y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1)
+        # (N*3, 3) -> (N, 3*3)
+        y = y.reshape(num_voxels, -1)
+        # (N, 3*3) -> (N, 3*3*2)
+        voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1)
+        
+        return voxel_fourier_features
+
+
+@MODELS.register_module()
+class BEVFusionVoxelEncoder(HardSimpleVoxelSinCosEncoder):
     """BEVFusion Voxel Encoder Feature Net.
     
-    The network is same as pillar featuer net.
-    The network prepares the pillar features and performs forward pass
-    through PFNLayers.
+    The network is same as HardSimpleVoxelSinCosEncoder, but it performs PFNLayers and max-pooling on the 
+    offset features, for example, distances. After that, it concatenates the fourier features and the offset features 
+    along the channel dimension for each voxel.
 
     Args:
-        in_channels (int, optional): Number of input features,
-            either x, y, z or x, y, z, r. Defaults to 4.
+        min_norm_values (Tuple[float]): Minimum values for the features.
+        max_norm_values (Tuple[float]): Maximum values for the features.
+        in_channels (int): Number of input channels.
         feat_channels (tuple, optional): Number of features in each of the
             N PFNLayers. Defaults to (64, ).
-        with_distance (bool, optional): Whether to include Euclidean distance
-            to points. Defaults to False.
-        with_cluster_center (bool, optional): [description]. Defaults to True.
-        with_voxel_center (bool, optional): [description]. Defaults to True.
-        voxel_size (tuple[float], optional): Size of voxels, only utilize x
-            and y size. Defaults to (0.2, 0.2, 4).
-        point_cloud_range (tuple[float], optional): Point cloud range, only
-            utilizes x and y min. Defaults to (0, -40, -3, 70.4, 40, 1).
-        norm_cfg ([type], optional): [description].
-            Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01).
-        mode (str, optional): The mode to gather point features. Options are
-            'max' or 'avg'. Defaults to 'max'.
-        legacy (bool, optional): Whether to use the new behavior or
-            the original behavior. Defaults to True.
     """
 
     def __init__(self,
@@ -54,7 +102,11 @@ def __init__(self,
                      type='BN1d', eps=1e-3, momentum=0.01),
                  mode: Optional[str] = 'max',
                  legacy: Optional[bool] = True):
-        super(BEVFusionVoxelEncoder, self).__init__()
+        
+        super(BEVFusionVoxelEncoder, self).__init__(
+            min_norm_values=min_norm_values, 
+            max_norm_values=max_norm_values, in_channels=in_channels
+        )
         assert len(feat_channels) > 0
         self.legacy = legacy
         pfn_in_channels = 0
@@ -64,11 +116,13 @@ def __init__(self,
             pfn_in_channels += 3
         if with_distance:
             pfn_in_channels += 1
+        
+        assert pfn_in_channels > 0, "pfn_in_channels must be greater than 0"
         self._with_distance = with_distance
         self._with_cluster_center = with_cluster_center
         self._with_voxel_center = with_voxel_center
-        # Create PillarFeatureNet layers
-        self.in_channels = in_channels
+        
+        # Create VoxelFeatureNet layers
         feat_channels = [pfn_in_channels] + list(feat_channels)
         pfn_layers = []
         for i in range(len(feat_channels) - 1):
@@ -96,11 +150,6 @@ def __init__(self,
         self.z_offset = self.vz / 2 + point_cloud_range[2]
         self.point_cloud_range = point_cloud_range
 
-        self.register_buffer("min_norm_values", torch.tensor(min_norm_values))
-        self.register_buffer("max_norm_values", torch.tensor(max_norm_values))
-        self.register_buffer("exponents", (2 ** torch.arange(0, self.in_channels)).float())
-        # self.register_buffer("voxel_size", torch.tensor([self.vx, self.vy, self.vz]))
-
     def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
                 *args, **kwargs) -> Tensor:
         """Forward function.
@@ -112,26 +161,13 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
             coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx).
 
         Returns:
-            torch.Tensor: Features of pillars in shape (M, C).
+            torch.Tensor: Features of pillars in shape (M, C*C*2 + feat_channels[-1]).
         """
-        num_voxels, max_points_per_voxel = features.shape[0], features.shape[1]
-        
-        # Mean in the voxel
-        # (N, M, 3) -> (N, 3)
-        voxel_features = (features.sum(dim=1, keepdim=False) / num_points.type_as(features).view(
-                    -1, 1)).contiguous()
-
-        # min-max normalization, (N, 3) -> (N, 3)
-        voxel_features_norm = (voxel_features - \
-         self.min_norm_values.view(1, -1)) / ((self.max_norm_values - self.min_norm_values).view(1, -1))
+        # (M, C*C*2)
+        voxel_fourier_features = super().forward(features, num_points, coors)
         
-        # SinCos encoding
-        # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3)
-        y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1)
-        # (N*3, 3) -> (N, 3*3)
-        y = y.reshape(num_voxels, -1)
-        # (N, 3*3) -> (N, 3*3*2)
-        voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1)
+        # Offset features
+        max_points_per_voxel = features.shape[1] 
         
         features_ls = []
         # Find distance of x, y, and z from cluster center, mapped to [-1,   1] if available
@@ -200,222 +236,173 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
         return features
 
 
-@MODELS.register_module()
-class BEVFusionVoxelSinCosEncoder(nn.Module):
-    def __init__(self, 
-                 min_norm_values: Tuple[float],
-                 max_norm_values: Tuple[float],
-                 time_lag_channel_index: int = 3,
-                 time_exp_factor: Optional[float] = None,
-                 feat_channels: Optional[tuple] = (16, ),
-                 in_channels: Optional[int] = 4,
-                 with_distance: Optional[bool] = False,
-                 with_cluster_center: Optional[bool] = True,
-                 with_voxel_center: Optional[bool] = True,
-                 voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4),
-                 point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4,
-                                                              40, 1),
-                 norm_cfg: Optional[dict] = dict(
-                     type='BN1d', eps=1e-3, momentum=0.01),
-                 mode: Optional[str] = 'max'):
-        super(BEVFusionVoxelSinCosEncoder, self).__init__()
-
-        self._with_distance = with_distance
-        self._with_cluster_center = with_cluster_center
-        self._with_voxel_center = with_voxel_center
-        # Create PillarFeatureNet layers
-        self.in_channels = in_channels
-
-        # Need pillar (voxel) size and x/y offset in order to calculate offset
-        self.vx = voxel_size[0]
-        self.vy = voxel_size[1]
-        self.vz = voxel_size[2]
-        self.x_offset = self.vx / 2 + point_cloud_range[0]
-        self.y_offset = self.vy / 2 + point_cloud_range[1]
-        self.z_offset = self.vz / 2 + point_cloud_range[2]
-        self.point_cloud_range = point_cloud_range
+# @MODELS.register_module()
+# class BEVFusionVoxelSinCosEncoder(nn.Module):
+#     def __init__(self, 
+#                  min_norm_values: Tuple[float],
+#                  max_norm_values: Tuple[float],
+#                  time_lag_channel_index: int = 3,
+#                  time_exp_factor: Optional[float] = None,
+#                  feat_channels: Optional[tuple] = (16, ),
+#                  in_channels: Optional[int] = 4,
+#                  with_distance: Optional[bool] = False,
+#                  with_cluster_center: Optional[bool] = True,
+#                  with_voxel_center: Optional[bool] = True,
+#                  voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4),
+#                  point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4,
+#                                                               40, 1),
+#                  norm_cfg: Optional[dict] = dict(
+#                      type='BN1d', eps=1e-3, momentum=0.01),
+#                  mode: Optional[str] = 'max'):
+#         super(BEVFusionVoxelSinCosEncoder, self).__init__()
+
+#         self._with_distance = with_distance
+#         self._with_cluster_center = with_cluster_center
+#         self._with_voxel_center = with_voxel_center
+#         # Create PillarFeatureNet layers
+#         self.in_channels = in_channels
+
+#         # Need pillar (voxel) size and x/y offset in order to calculate offset
+#         self.vx = voxel_size[0]
+#         self.vy = voxel_size[1]
+#         self.vz = voxel_size[2]
+#         self.x_offset = self.vx / 2 + point_cloud_range[0]
+#         self.y_offset = self.vy / 2 + point_cloud_range[1]
+#         self.z_offset = self.vz / 2 + point_cloud_range[2]
+#         self.point_cloud_range = point_cloud_range
         
-        self.xyz_channels = 3
-        feat_offset_channels = in_channels - self.xyz_channels
-        if with_cluster_center:
-            feat_offset_channels += 3
-        if with_voxel_center:
-            feat_offset_channels += 3
-        if with_distance:
-            feat_offset_channels += 1
-
-        feat_channels = [feat_offset_channels] + list(feat_channels)
-        assert len(feat_channels) > 0, "feat_channels must be greater than 0"
-        pfn_layers = []
-        for i in range(len(feat_channels) - 1):
-            in_filters = feat_channels[i]
-            out_filters = feat_channels[i + 1]
-            if i < len(feat_channels) - 2:
-                last_layer = False
-            else:
-                last_layer = True
-            pfn_layers.append(
-                PFNLayer(
-                    in_filters,
-                    out_filters,
-                    norm_cfg=norm_cfg,
-                    last_layer=last_layer,
-                    mode=mode))
-        self.pfn_layers = nn.ModuleList(pfn_layers)
-
-        self.time_lag_channel_index = time_lag_channel_index
-        self.time_exp_factor = time_exp_factor
+#         self.xyz_channels = 3
+#         feat_offset_channels = in_channels - self.xyz_channels
+#         if with_cluster_center:
+#             feat_offset_channels += 3
+#         if with_voxel_center:
+#             feat_offset_channels += 3
+#         if with_distance:
+#             feat_offset_channels += 1
+
+#         feat_channels = [feat_offset_channels] + list(feat_channels)
+#         assert len(feat_channels) > 0, "feat_channels must be greater than 0"
+#         pfn_layers = []
+#         for i in range(len(feat_channels) - 1):
+#             in_filters = feat_channels[i]
+#             out_filters = feat_channels[i + 1]
+#             if i < len(feat_channels) - 2:
+#                 last_layer = False
+#             else:
+#                 last_layer = True
+#             pfn_layers.append(
+#                 PFNLayer(
+#                     in_filters,
+#                     out_filters,
+#                     norm_cfg=norm_cfg,
+#                     last_layer=last_layer,
+#                     mode=mode))
+#         self.pfn_layers = nn.ModuleList(pfn_layers)
+
+#         self.time_lag_channel_index = time_lag_channel_index
+#         self.time_exp_factor = time_exp_factor
         
-        self.register_buffer("min_norm_values", torch.tensor(min_norm_values))
-        self.register_buffer("max_norm_values", torch.tensor(max_norm_values))
-        self.register_buffer("voxel_size", torch.tensor([self.vx, self.vy, self.vz]))
-        self.register_buffer("exponents", (2 ** torch.arange(0, self.xyz_channels)).float())
-
-    def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
-                *args, **kwargs) -> Tensor:
-        """Forward function.
-
-        Args:
-            features (torch.Tensor): Point features or raw points in shape
-                (N, M, C).
-            num_points (torch.Tensor): Number of points in each pillar in shape (M).
-            coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx).
-
-        Returns:
-            torch.Tensor: Features of pillars in shape (M, C).
-        """ 
-        num_voxels, max_points_per_voxel = features.shape[0], features.shape[1]
+#         self.register_buffer("min_norm_values", torch.tensor(min_norm_values))
+#         self.register_buffer("max_norm_values", torch.tensor(max_norm_values))
+#         self.register_buffer("voxel_size", torch.tensor([self.vx, self.vy, self.vz]))
+#         self.register_buffer("exponents", (2 ** torch.arange(0, self.xyz_channels)).float())
+
+#     def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
+#                 *args, **kwargs) -> Tensor:
+#         """Forward function.
+
+#         Args:
+#             features (torch.Tensor): Point features or raw points in shape
+#                 (N, M, C).
+#             num_points (torch.Tensor): Number of points in each pillar in shape (M).
+#             coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx).
+
+#         Returns:
+#             torch.Tensor: Features of pillars in shape (M, C).
+#         """ 
+#         num_voxels, max_points_per_voxel = features.shape[0], features.shape[1]
         
-        # Mean in the voxel
-        # (N, M, 3) -> (N, 3)
-        voxel_features = (features[:, :, :self.xyz_channels].sum(dim=1, keepdim=False) / num_points.type_as(features).view(
-                    -1, 1)).contiguous()
-
-        # min-max normalization, (N, 3) -> (N, 3)
-        voxel_features_norm = (voxel_features - \
-         self.min_norm_values[:self.xyz_channels].view(1, -1)) / ((self.max_norm_values[:self.xyz_channels] - self.min_norm_values[:self.xyz_channels]).view(1, -1))
+#         # Mean in the voxel
+#         # (N, M, 3) -> (N, 3)
+#         voxel_features = (features[:, :, :self.xyz_channels].sum(dim=1, keepdim=False) / num_points.type_as(features).view(
+#                     -1, 1)).contiguous()
+
+#         # min-max normalization, (N, 3) -> (N, 3)
+#         voxel_features_norm = (voxel_features - \
+#          self.min_norm_values[:self.xyz_channels].view(1, -1)) / ((self.max_norm_values[:self.xyz_channels] - self.min_norm_values[:self.xyz_channels]).view(1, -1))
         
-        # SinCos encoding
-        # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3)
-        y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1)
-        # (N*3, 3) -> (N, 3*3)
-        y = y.reshape(num_voxels, -1)
-        # (N, 3*3) -> (N, 3*3*2)
-        voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1)
-
-        # PFN 
-        # Other features, for example, intensity or time_lag 
-        other_features = features[:, :, self.xyz_channels:]
+#         # SinCos encoding
+#         # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3)
+#         y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1)
+#         # (N*3, 3) -> (N, 3*3)
+#         y = y.reshape(num_voxels, -1)
+#         # (N, 3*3) -> (N, 3*3*2)
+#         voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1)
+
+#         # PFN 
+#         # Other features, for example, intensity or time_lag 
+#         other_features = features[:, :, self.xyz_channels:]
         
-        # Normalization 
-        other_features_norm = (other_features - self.min_norm_values[self.xyz_channels:]) / (self.max_norm_values[self.xyz_channels:] - self.min_norm_values[self.xyz_channels:])    
-
-        time_lag_feature_index = self.time_lag_channel_index - self.xyz_channels
-        # exponentiate time_lag features, it's higher when the normlized time lag is lower 
-        # (1.0 when time_lag_features is 0.0)
-        if self.time_exp_factor is not None:
-            other_features_norm[:, :, time_lag_feature_index] = torch.exp(- other_features_norm[:, :, time_lag_feature_index] * self.time_exp_factor)
-        else:
-            # Inverse the time_lag feature 
-            other_features_norm[:, :, time_lag_feature_index] = 1.0 - other_features_norm[:, :, time_lag_feature_index]
+#         # Normalization 
+#         other_features_norm = (other_features - self.min_norm_values[self.xyz_channels:]) / (self.max_norm_values[self.xyz_channels:] - self.min_norm_values[self.xyz_channels:])    
+
+#         time_lag_feature_index = self.time_lag_channel_index - self.xyz_channels
+#         # exponentiate time_lag features, it's higher when the normlized time lag is lower 
+#         # (1.0 when time_lag_features is 0.0)
+#         if self.time_exp_factor is not None:
+#             other_features_norm[:, :, time_lag_feature_index] = torch.exp(- other_features_norm[:, :, time_lag_feature_index] * self.time_exp_factor)
+#         else:
+#             # Inverse the time_lag feature 
+#             other_features_norm[:, :, time_lag_feature_index] = 1.0 - other_features_norm[:, :, time_lag_feature_index]
             
-        # Offsets
-        voxel_feature_offsets = [other_features_norm]
-        # Find distance of x, y, and z from cluster center
-        if self._with_cluster_center:
-            points_mean = features[:, :, :3].sum(
-                dim=1, keepdim=True) / num_points.type_as(features).view(
-                    -1, 1, 1)
+#         # Offsets
+#         voxel_feature_offsets = [other_features_norm]
+#         # Find distance of x, y, and z from cluster center
+#         if self._with_cluster_center:
+#             points_mean = features[:, :, :3].sum(
+#                 dim=1, keepdim=True) / num_points.type_as(features).view(
+#                     -1, 1, 1)
             
-            # f_cluster = (features[:, :, :3] - points_mean)
-            f_cluster = features[:, :, :3] - points_mean
-            voxel_feature_offsets.append(f_cluster)
-
-        # Find distance of x, y, and z from pillar center
-        dtype = features.dtype
-        if self._with_voxel_center:
-            f_center = torch.zeros_like(features[:, :, :3])
-            f_center[:, :, 0] = features[:, :, 0] - (
-                coors[:, 3].to(dtype).unsqueeze(1) * self.vx +
-                self.x_offset)
-            f_center[:, :, 1] = features[:, :, 1] - (
-                coors[:, 2].to(dtype).unsqueeze(1) * self.vy +
-                self.y_offset)
-            f_center[:, :, 2] = features[:, :, 2] - (
-                coors[:, 1].to(dtype).unsqueeze(1) * self.vz +
-                self.z_offset)
+#             # f_cluster = (features[:, :, :3] - points_mean)
+#             f_cluster = features[:, :, :3] - points_mean
+#             voxel_feature_offsets.append(f_cluster)
+
+#         # Find distance of x, y, and z from pillar center
+#         dtype = features.dtype
+#         if self._with_voxel_center:
+#             f_center = torch.zeros_like(features[:, :, :3])
+#             f_center[:, :, 0] = features[:, :, 0] - (
+#                 coors[:, 3].to(dtype).unsqueeze(1) * self.vx +
+#                 self.x_offset)
+#             f_center[:, :, 1] = features[:, :, 1] - (
+#                 coors[:, 2].to(dtype).unsqueeze(1) * self.vy +
+#                 self.y_offset)
+#             f_center[:, :, 2] = features[:, :, 2] - (
+#                 coors[:, 1].to(dtype).unsqueeze(1) * self.vz +
+#                 self.z_offset)
             
-            # Map to [-1, 1]
-            # f_center = f_center / (self.voxel_size * 0.5)
-            voxel_feature_offsets.append(f_center)
+#             # Map to [-1, 1]
+#             # f_center = f_center / (self.voxel_size * 0.5)
+#             voxel_feature_offsets.append(f_center)
 
-        if self._with_distance:
-            points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)
-            voxel_feature_offsets.append(points_dist)
+#         if self._with_distance:
+#             points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)
+#             voxel_feature_offsets.append(points_dist)
         
-        voxel_feature_offsets = torch.cat(voxel_feature_offsets, dim=-1)
-        # The feature decorations were calculated without regard to whether
-        # pillar was empty. Need to ensure that
-        # empty pillars remain set to zeros.
-        mask = get_paddings_indicator(num_points, max_points_per_voxel, axis=0)
-        mask = torch.unsqueeze(mask, -1).type_as(voxel_feature_offsets)
-        voxel_feature_offsets *= mask
+#         voxel_feature_offsets = torch.cat(voxel_feature_offsets, dim=-1)
+#         # The feature decorations were calculated without regard to whether
+#         # pillar was empty. Need to ensure that
+#         # empty pillars remain set to zeros.
+#         mask = get_paddings_indicator(num_points, max_points_per_voxel, axis=0)
+#         mask = torch.unsqueeze(mask, -1).type_as(voxel_feature_offsets)
+#         voxel_feature_offsets *= mask
         
-        # PFN
-        for pfn in self.pfn_layers:
-            voxel_feature_offsets = pfn(voxel_feature_offsets, num_points)
+#         # PFN
+#         for pfn in self.pfn_layers:
+#             voxel_feature_offsets = pfn(voxel_feature_offsets, num_points)
         
-        # Concat 
-        features = torch.cat([voxel_fourier_features, voxel_feature_offsets.squeeze(1)], dim=-1)
-        return features
-
-
-
-@MODELS.register_module()
-class BEVFusionVoxelMeanSinCosEncoder(nn.Module):
-    def __init__(self, 
-                 min_norm_values: Tuple[float],
-                 max_norm_values: Tuple[float],
-                 in_channels: Optional[int] = 4):
-        super(BEVFusionVoxelMeanSinCosEncoder, self).__init__()
-
-        # Create PillarFeatureNet layers
-        self.in_channels = in_channels
+#         # Concat 
+#         features = torch.cat([voxel_fourier_features, voxel_feature_offsets.squeeze(1)], dim=-1)
+#         return features
 
-        self.register_buffer("min_norm_values", torch.tensor(min_norm_values))
-        self.register_buffer("max_norm_values", torch.tensor(max_norm_values))
-        self.register_buffer("exponents", (2 ** torch.arange(0, self.in_channels)).float())
 
-    def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
-                *args, **kwargs) -> Tensor:
-        """Forward function.
-
-        Args:
-            features (torch.Tensor): Point features or raw points in shape
-                (N, M, C).
-            num_points (torch.Tensor): Number of points in each pillar in shape (M).
-            coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx).
-
-        Returns:
-            torch.Tensor: Features of pillars in shape (M, C).
-        """ 
-        num_voxels, max_points_per_voxel = features.shape[0], features.shape[1]
-        
-        # Mean in the voxel
-        # (N, M, 3) -> (N, 3)
-        voxel_features = (features.sum(dim=1, keepdim=False) / num_points.type_as(features).view(
-                    -1, 1)).contiguous()
-
-        # min-max normalization, (N, 3) -> (N, 3)
-        voxel_features_norm = (voxel_features - \
-         self.min_norm_values.view(1, -1)) / ((self.max_norm_values - self.min_norm_values).view(1, -1))
-        
-        # SinCos encoding
-        # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3)
-        y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1)
-        # (N*3, 3) -> (N, 3*3)
-        y = y.reshape(num_voxels, -1)
-        # (N, 3*3) -> (N, 3*3*2)
-        voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1)
-        
-        return voxel_fourier_features

From db8e7f8b2193e883ac806923a6f335dc127c8c8b Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sat, 9 May 2026 22:41:59 +0900
Subject: [PATCH 095/183] Updated

---
 projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py     | 6 +++---
 ...r_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py | 1 -
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
index 843624b56..6c1955505 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
@@ -26,7 +26,7 @@ def __init__(self,
             max_norm_values (Tuple[float]): Maximum values for the features.
             in_channels (int): Number of input channels.
         """
-        super(BEVFusionVoxelMeanSinCosEncoder, self).__init__()
+        super(HardSimpleVoxelSinCosEncoder, self).__init__()
       
         # Create PillarFeatureNet layers
         self.in_channels = in_channels
@@ -75,8 +75,8 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
 class BEVFusionVoxelEncoder(HardSimpleVoxelSinCosEncoder):
     """BEVFusion Voxel Encoder Feature Net.
     
-    The network is same as HardSimpleVoxelSinCosEncoder, but it performs PFNLayers and max-pooling on the 
-    offset features, for example, distances. After that, it concatenates the fourier features and the offset features 
+    The network is same as HardSimpleVoxelSinCosEncoder, but it performs PFNLayers on the 
+    offset features, for example, distances. After that, it concatenates the fourier features and the PFN features 
     along the channel dimension for each voxel.
 
     Args:
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py
index 02f9642f2..72e73c036 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py
@@ -27,7 +27,6 @@
         voxel_size=_base_.voxel_size,
     ),
     pts_voxel_encoder=dict(
-        _delete_=True,
         type="BEVFusionVoxelEncoder", 
         in_channels=4,
         with_distance=False,

From 4c907aa1af5f986eaa21ac91a6664b68b7c7de07 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sat, 9 May 2026 22:43:23 +0900
Subject: [PATCH 096/183] Updated

---
 projects/BEVFusion/bevfusion/__init__.py                   | 7 +++----
 projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py    | 2 +-
 ..._voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py | 2 +-
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py
index ce9b31aa5..fa23d120c 100644
--- a/projects/BEVFusion/bevfusion/__init__.py
+++ b/projects/BEVFusion/bevfusion/__init__.py
@@ -7,7 +7,7 @@
 from .transformer import TransformerDecoderLayer
 from .transforms_3d import BEVFusionGlobalRotScaleTrans, BEVFusionRandomFlip3D, GridMask, ImageAug3D
 from .utils import BBoxBEVL1Cost, HeuristicAssigner3D, HungarianAssigner3D, IoU3DCost, TransFusionBBoxCoder
-from .bevfusion_voxel_encoder import HardSimpleVoxelSinCosEncoder, BEVFusionVoxelEncoder
+from .bevfusion_voxel_encoder import HardSimpleVoxelSinCosEncoder, BEVFusionVoxelFeatureNet
 
 __all__ = [
     "BEVFusion",
@@ -28,7 +28,6 @@
     "BEVFusionRandomFlip3D",
     "BEVFusionGlobalRotScaleTrans",
     "TransFusionBBoxCoder",
-    "BEVFusionVoxelEncoder",
-    "BEVFusionVoxelSinCosEncoder",
-    "BEVFusionVoxelMeanSinCosEncoder",
+    "HardSimpleVoxelSinCosEncoder",
+    "BEVFusionVoxelFeatureNet",
 ]
diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
index 6c1955505..f7a5c481c 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
@@ -72,7 +72,7 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
 
 
 @MODELS.register_module()
-class BEVFusionVoxelEncoder(HardSimpleVoxelSinCosEncoder):
+class BEVFusionVoxelFeatureNet(HardSimpleVoxelSinCosEncoder):
     """BEVFusion Voxel Encoder Feature Net.
     
     The network is same as HardSimpleVoxelSinCosEncoder, but it performs PFNLayers on the 
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py
index 72e73c036..b6ad6cac2 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py
@@ -27,7 +27,7 @@
         voxel_size=_base_.voxel_size,
     ),
     pts_voxel_encoder=dict(
-        type="BEVFusionVoxelEncoder", 
+        type="BEVFusionVoxelFeatureNet", 
         in_channels=4,
         with_distance=False,
         with_cluster_center=True,

From 8cb422d5ab8a3c35943ef089169fb8fb89046b3e Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sat, 9 May 2026 22:45:03 +0900
Subject: [PATCH 097/183] Updated

---
 projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py         | 2 +-
 ...lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py | 2 +-
 .../default/models/default_lidar_second_secfpn_120m.py          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
index f7a5c481c..efed0ce5d 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
@@ -103,7 +103,7 @@ def __init__(self,
                  mode: Optional[str] = 'max',
                  legacy: Optional[bool] = True):
         
-        super(BEVFusionVoxelEncoder, self).__init__(
+        super(BEVFusionVoxelFeatureNet, self).__init__(
             min_norm_values=min_norm_values, 
             max_norm_values=max_norm_values, in_channels=in_channels
         )
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py
index b6ad6cac2..83a607386 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py
@@ -28,7 +28,7 @@
     ),
     pts_voxel_encoder=dict(
         type="BEVFusionVoxelFeatureNet", 
-        in_channels=4,
+        in_channels=len(_base_.lidar_sweep_dims),
         with_distance=False,
         with_cluster_center=True,
         with_voxel_center=True,
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
index d56e6d1a3..5a880d975 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
@@ -13,7 +13,7 @@
         pad_size_divisor=32,
     ),
     pts_voxel_encoder=dict(
-        type="BEVFusionVoxelMeanSinCosEncoder", 
+        type="HardSimpleVoxelSinCosEncoder", 
         in_channels=4,
     ),
     pts_middle_encoder=dict(

From 5635a003821391a44d67eed601be99c88a58a84d Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sat, 9 May 2026 22:48:59 +0900
Subject: [PATCH 098/183] Updated

---
 ...lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py
index 83a607386..36c39dd5a 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py
@@ -32,7 +32,7 @@
         with_distance=False,
         with_cluster_center=True,
         with_voxel_center=True,
-        feat_channels=[16, 16],
+        feat_channels=[16],
         point_cloud_range=_base_.point_cloud_range,
         voxel_size=_base_.voxel_size,
         norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01),

From fb27f498e2312bf60dee8ecb0c1e5c4b489bba39 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 12 May 2026 05:56:51 +0900
Subject: [PATCH 099/183] Resolve conflict

---
 projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
index efed0ce5d..83cd70482 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
@@ -109,7 +109,7 @@ def __init__(self,
         )
         assert len(feat_channels) > 0
         self.legacy = legacy
-        pfn_in_channels = 0
+        pfn_in_channels = in_channels
         if with_cluster_center:
             pfn_in_channels += 3
         if with_voxel_center:
@@ -165,11 +165,14 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
         """
         # (M, C*C*2)
         voxel_fourier_features = super().forward(features, num_points, coors)
+
+        # Normalize the features
+        norm_features = (features - self.min_norm_values.view(1, -1)) / ((self.max_norm_values - self.min_norm_values).view(1, -1))
         
         # Offset features
         max_points_per_voxel = features.shape[1] 
         
-        features_ls = []
+        features_ls = [norm_features]
         # Find distance of x, y, and z from cluster center, mapped to [-1,   1] if available
         if self._with_cluster_center:
             points_mean = features[:, :, :3].sum(

From bb5d7579e5d9906bf89e3ec9a88f54802992bd49 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 12 May 2026 22:24:10 +0900
Subject: [PATCH 100/183] Resolve conflict

---
 projects/BEVFusion/bevfusion/bevfusion_head.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index 1de3af05f..69417347b 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -387,7 +387,7 @@ def predict_by_feat(self, preds_dicts, metas, img=None, rescale=False, for_roi=F
             batch_size = preds_dict[0]["heatmap"].shape[0]
             batch_score = preds_dict[0]["heatmap"][..., -self.num_proposals :].sigmoid()
             if self.loss_iou is not None:
-               batch_score = torch.sqrt(batch_score * preds_dict[0]['iou'][..., -self.num_proposals:].clamp(min=0.0, max=1.0)) # noqa: E501
+               batch_score = torch.sqrt(batch_score * preds_dict[0]['iou'][..., -self.num_proposals:].sigmoid()) # noqa: E501
             one_hot = F.one_hot(self.query_labels, num_classes=self.num_classes).permute(0, 2, 1)
             batch_score = batch_score * preds_dict[0]["query_heatmap_score"] * one_hot
 

From 3f64c2c5323efc1ee5d0283f44f1255bd53dc3e5 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Wed, 13 May 2026 19:31:59 +0900
Subject: [PATCH 101/183] Resolve conflict

---
 ..._lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py | 6 +++---
 .../default/schedulers/default_30e_8xb16_adamw_cosine.py    | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
index 4cf51faa5..71c1829d4 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
@@ -3,7 +3,7 @@
     "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
     "../default/pipelines/default_lidar_intensity_120m.py",
     "../default/models/default_lidar_second_secfpn_120m.py",
-    "../default/schedulers/default_30e_8xb8_adamw_cosine.py",
+    "../default/schedulers/default_30e_8xb16_adamw_cosine.py",
     "../default/default_misc.py",
 ]
 
@@ -16,7 +16,7 @@
 info_directory_path = "info/kokseang_2_8/"
 
 experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type
-experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m"
+experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
@@ -152,4 +152,4 @@
 )
 log_processor = dict(window_size=50)
 
-load_from = None
+load_from = "work_dirs/bevfusion_lidar_2_8_0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb16_base_120m/best_epoch_47.pth"
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py
index e3975f6eb..d28468f71 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py
@@ -23,7 +23,7 @@
         convert_to_iter_based=True,
     ),
     dict(
-        type="CosineAnnealingLR
+        type="CosineAnnealingLR",
         T_max=(max_epochs - t_max),
         eta_min=lr * 1e-4,
         begin=t_max,

From ac62b49630918abc492ce81f3274441dc9de1528 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Wed, 13 May 2026 20:16:32 +0900
Subject: [PATCH 102/183] Resolve conflict

---
 ...et50_fpn_lss_30e_8xb8_j6gen2_base_120m.py} |   8 +-
 ...snet50_fpn_lss_30e_8xb8_j6gen2_base_50m.py | 137 ++++++++++++++++++
 ...ra_resnet50_fpn_lss_50e_8xb8_base_120m.py} |   6 +-
 ...mera_resnet50_fpn_lss_50e_8xb8_base_50m.py | 137 ++++++++++++++++++
 ...swin_fpn_lss_30e_8xb8_jpntaxi_base_120m.py | 137 ++++++++++++++++++
 ..._swin_fpn_lss_30e_8xb8_jpntaxi_base_50m.py | 137 ++++++++++++++++++
 ..._camera_swin_fpn_lss_50e_8xb8_base_120m.py |   2 +-
 ...second_secfpn_20e_8xb8_j6gen2_base_120m.py |   4 +-
 ...econd_secfpn_20e_8xb8_jpntaxi_base_120m.py |   4 +-
 ...ault_camera_resnet50_fpn_depthlss_120m.py} |  39 ++---
 ...0_fpn_depthlss_lidar_second_secfpn_120m.py |  53 +++++++
 .../default_camera_resnet50_fpn_lss_50m.py    |  23 +++
 ..._fpn_depthlss_lidar_second_secfpn_120m.py} |   0
 .../models/default_camera_swin_fpn_lss_50m.py |  49 +------
 .../pipelines/default_camera_base_120m.py     | 133 -----------------
 ... default_20e_8xb16_adamw_linear_cosine.py} |   5 +-
 ... default_30e_8xb16_adamw_linear_cosine.py} |   5 +-
 ... default_50e_8xb16_adamw_linear_cosine.py} |   5 +-
 18 files changed, 658 insertions(+), 226 deletions(-)
 rename projects/BEVFusion/configs/t4dataset/BEVFusion-C/{bevfusion_camera_swin_fpn_depthlss_30e_8xb8_j6gen2_base_120m.py => bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_120m.py} (95%)
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_50m.py
 rename projects/BEVFusion/configs/t4dataset/BEVFusion-C/{bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m.py => bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_120m.py} (96%)
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_50m.py
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_120m.py
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_50m.py
 rename projects/BEVFusion/configs/t4dataset/default/models/{default_camera_swin_fpn_lss_120m.py => default_camera_resnet50_fpn_depthlss_120m.py} (57%)
 create mode 100644 projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_lidar_second_secfpn_120m.py
 create mode 100644 projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_lss_50m.py
 rename projects/BEVFusion/configs/t4dataset/default/models/{default_camera_swin_fpn_lidar_second_secfpn_120m.py => default_camera_swin_fpn_depthlss_lidar_second_secfpn_120m.py} (100%)
 delete mode 100644 projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py
 rename projects/BEVFusion/configs/t4dataset/default/schedulers/{default_20e_8xb8_adamw_linear_cosine.py => default_20e_8xb16_adamw_linear_cosine.py} (96%)
 rename projects/BEVFusion/configs/t4dataset/default/schedulers/{default_30e_8xb8_adamw_linear_cosine.py => default_30e_8xb16_adamw_linear_cosine.py} (96%)
 rename projects/BEVFusion/configs/t4dataset/default/schedulers/{default_50e_8xb8_adamw_linear_cosine.py => default_50e_8xb16_adamw_linear_cosine.py} (96%)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_120m.py
similarity index 95%
rename from projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_30e_8xb8_j6gen2_base_120m.py
rename to projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_120m.py
index d31630dd0..4c809264e 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_30e_8xb8_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_120m.py
@@ -2,7 +2,7 @@
     "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
     "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
     "../default/pipelines/default_camera_lidar_intensity_120m.py",
-    "../default/models/default_camera_swin_fpn_depthlss_120m.py",
+    "../default/models/default_camera_swin_fpn_lss_120m.py",
     "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py",
     "../default/default_misc.py",
 ]
@@ -12,11 +12,11 @@
 custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
 
 # user setting
-data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_6_2/"
+data_root = "data/t4datasets/"
+info_directory_path = "info/kokseang_2_6_1/"
 
 experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type
-experiment_name = "bevfusion_camera_swin_fpn_depthlss_30e_8xb8_j6gen2_base_120m"
+experiment_name = "bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_50m.py
new file mode 100644
index 000000000..927310e7d
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_50m.py
@@ -0,0 +1,137 @@
+_base_ = [
+    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
+    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
+    "../default/pipelines/default_camera_base_50m.py",
+    "../default/models/default_camera_swin_fpn_lss_50m.py",
+    "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py",
+    "../default/default_misc.py",
+]
+
+custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
+custom_imports["imports"] += _base_.custom_imports["imports"]
+custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
+
+# user setting
+data_root = "data/t4datasets/"
+info_directory_path = "info/kokseang_2_6_1/"
+
+experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type
+experiment_name = "bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_50m"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    view_transform=dict(image_size=_base_.image_size),
+    bbox_head=dict(
+        class_names=_base_.class_names,
+        in_channels=80,
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+    ),
+)
+
+# Dataset parameters
+train_dataloader = dict(
+    batch_size=_base_.train_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=True),
+    dataset=dict(
+        type=_base_.dataset_type,
+        pipeline=_base_.train_pipeline,
+        modality=_base_.input_modality,
+        backend_args=_base_.backend_args,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_train_file_name,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        test_mode=False,
+        data_prefix=_base_.data_prefix,
+        box_type_3d="LiDAR",
+        filter_cfg=_base_.filter_cfg,
+    ),
+)
+
+val_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_val_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+test_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_test_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+val_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+)
+
+test_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+    save_csv=True,
+)
+
+default_hooks = dict(
+    logger=dict(type="LoggerHook", interval=50),
+    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
+)
+log_processor = dict(window_size=50)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_120m.py
similarity index 96%
rename from projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m.py
rename to projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_120m.py
index 4ac46afea..42f93d1b1 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_120m.py
@@ -2,7 +2,7 @@
     "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
     "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
     "../default/pipelines/default_camera_lidar_intensity_120m.py",
-    "../default/models/default_camera_swin_fpn_depthlss_120m.py",
+    "../default/models/default_camera_swin_fpn_lss_120m.py",
     "../default/schedulers/default_50e_8xb8_adamw_linear_cosine.py",
     "../default/default_misc.py",
 ]
@@ -16,7 +16,7 @@
 info_directory_path = "info/kokseang_2_6_2/"
 
 experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type
-experiment_name = "bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m"
+experiment_name = "bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
@@ -135,5 +135,3 @@
     checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
 )
 log_processor = dict(window_size=50)
-
-resume = True
\ No newline at end of file
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_50m.py
new file mode 100644
index 000000000..7c5a5f91f
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_50m.py
@@ -0,0 +1,137 @@
+_base_ = [
+    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
+    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
+    "../default/pipelines/default_camera_base_50m.py",
+    "../default/models/default_camera_swin_fpn_lss_50m.py",
+    "../default/schedulers/default_50e_8xb8_adamw_linear_cosine.py",
+    "../default/default_misc.py",
+]
+
+custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
+custom_imports["imports"] += _base_.custom_imports["imports"]
+custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
+
+# user setting
+data_root = "data/t4dataset/"
+info_directory_path = "info/kokseang_2_6_2/"
+
+experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type
+experiment_name = "bevfusion_camera_swin_fpn_lss_50e_8xb8_base_50m"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    view_transform=dict(image_size=_base_.image_size),
+    bbox_head=dict(
+        class_names=_base_.class_names,
+        in_channels=80,
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+    ),
+)
+
+# Dataset parameters
+train_dataloader = dict(
+    batch_size=_base_.train_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=True),
+    dataset=dict(
+        type=_base_.dataset_type,
+        pipeline=_base_.train_pipeline,
+        modality=_base_.input_modality,
+        backend_args=_base_.backend_args,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_train_file_name,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        test_mode=False,
+        data_prefix=_base_.data_prefix,
+        box_type_3d="LiDAR",
+        filter_cfg=_base_.filter_cfg,
+    ),
+)
+
+val_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_val_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+test_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_test_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+val_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+)
+
+test_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+    save_csv=True,
+)
+
+default_hooks = dict(
+    logger=dict(type="LoggerHook", interval=50),
+    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
+)
+log_processor = dict(window_size=50)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_120m.py
new file mode 100644
index 000000000..4c809264e
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_120m.py
@@ -0,0 +1,137 @@
+_base_ = [
+    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
+    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
+    "../default/pipelines/default_camera_lidar_intensity_120m.py",
+    "../default/models/default_camera_swin_fpn_lss_120m.py",
+    "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py",
+    "../default/default_misc.py",
+]
+
+custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
+custom_imports["imports"] += _base_.custom_imports["imports"]
+custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
+
+# user setting
+data_root = "data/t4datasets/"
+info_directory_path = "info/kokseang_2_6_1/"
+
+experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type
+experiment_name = "bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_120m"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    view_transform=dict(image_size=_base_.image_size),
+    bbox_head=dict(
+        class_names=_base_.class_names,
+        in_channels=80,
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+    ),
+)
+
+# Dataset parameters
+train_dataloader = dict(
+    batch_size=_base_.train_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=True),
+    dataset=dict(
+        type=_base_.dataset_type,
+        pipeline=_base_.train_pipeline,
+        modality=_base_.input_modality,
+        backend_args=_base_.backend_args,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_train_file_name,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        test_mode=False,
+        data_prefix=_base_.data_prefix,
+        box_type_3d="LiDAR",
+        filter_cfg=_base_.filter_cfg,
+    ),
+)
+
+val_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_val_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+test_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_test_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+val_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+)
+
+test_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+    save_csv=True,
+)
+
+default_hooks = dict(
+    logger=dict(type="LoggerHook", interval=50),
+    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
+)
+log_processor = dict(window_size=50)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_50m.py
new file mode 100644
index 000000000..927310e7d
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_50m.py
@@ -0,0 +1,137 @@
+_base_ = [
+    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
+    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
+    "../default/pipelines/default_camera_base_50m.py",
+    "../default/models/default_camera_swin_fpn_lss_50m.py",
+    "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py",
+    "../default/default_misc.py",
+]
+
+custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
+custom_imports["imports"] += _base_.custom_imports["imports"]
+custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
+
+# user setting
+data_root = "data/t4datasets/"
+info_directory_path = "info/kokseang_2_6_1/"
+
+experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type
+experiment_name = "bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_50m"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    view_transform=dict(image_size=_base_.image_size),
+    bbox_head=dict(
+        class_names=_base_.class_names,
+        in_channels=80,
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+    ),
+)
+
+# Dataset parameters
+train_dataloader = dict(
+    batch_size=_base_.train_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=True),
+    dataset=dict(
+        type=_base_.dataset_type,
+        pipeline=_base_.train_pipeline,
+        modality=_base_.input_modality,
+        backend_args=_base_.backend_args,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_train_file_name,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        test_mode=False,
+        data_prefix=_base_.data_prefix,
+        box_type_3d="LiDAR",
+        filter_cfg=_base_.filter_cfg,
+    ),
+)
+
+val_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_val_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+test_dataloader = dict(
+    batch_size=_base_.test_batch_size,
+    num_workers=_base_.num_workers,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=_base_.dataset_type,
+        data_root=data_root,
+        ann_file=info_directory_path + _base_.info_test_file_name,
+        pipeline=_base_.test_pipeline,
+        metainfo=_base_.metainfo,
+        class_names=_base_.class_names,
+        modality=_base_.input_modality,
+        data_prefix=_base_.data_prefix,
+        test_mode=True,
+        box_type_3d="LiDAR",
+        backend_args=_base_.backend_args,
+    ),
+)
+
+val_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+)
+
+test_evaluator = dict(
+    type="T4Metric",
+    data_root=data_root,
+    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
+    metric="bbox",
+    backend_args=_base_.backend_args,
+    class_names=_base_.class_names,
+    name_mapping=_base_.name_mapping,
+    eval_class_range=_base_.eval_class_range,
+    filter_attributes=_base_.filter_attributes,
+    save_csv=True,
+)
+
+default_hooks = dict(
+    logger=dict(type="LoggerHook", interval=50),
+    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
+)
+log_processor = dict(window_size=50)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m.py
index 1c30d708a..42f93d1b1 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m.py
@@ -1,7 +1,7 @@
 _base_ = [
     "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
     "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
-    "../default/pipelines/default_camera_base_120m.py",
+    "../default/pipelines/default_camera_lidar_intensity_120m.py",
     "../default/models/default_camera_swin_fpn_lss_120m.py",
     "../default/schedulers/default_50e_8xb8_adamw_linear_cosine.py",
     "../default/default_misc.py",
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_j6gen2_base_120m.py
index 4f81af760..a93b1d435 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_j6gen2_base_120m.py
@@ -2,8 +2,8 @@
     "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
     "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
     "../default/pipelines/default_camera_lidar_intensity_120m.py",
-    "../default/models/default_camera_swin_fpn_lidar_second_secfpn_120m.py",
-    "../default/schedulers/default_20e_8xb8_adamw_linear_cosine.py",
+    "../default/models/default_camera_swin_fpn_depthlss_lidar_second_secfpn_120m.py",
+    "../default/schedulers/default_20e_8xb16_adamw_linear_cosine.py",
     "../default/default_misc.py",
 ]
 
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_base_120m.py
index 20c85b1d8..b8408956b 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_base_120m.py
@@ -2,8 +2,8 @@
     "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
     "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py",
     "../default/pipelines/default_camera_lidar_intensity_120m.py",
-    "../default/models/default_camera_swin_fpn_lidar_second_secfpn_120m.py",
-    "../default/schedulers/default_20e_8xb8_adamw_linear_cosine.py",
+    "../default/models/default_camera_swin_fpn_depthlss_lidar_second_secfpn_120m.py",
+    "../default/schedulers/default_20e_8xb16_adamw_linear_cosine.py",
     "../default/default_misc.py",
 ]
 
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_120m.py
similarity index 57%
rename from projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_120m.py
rename to projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_120m.py
index 2f1d1f3be..0edff4398 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_120m.py
@@ -18,44 +18,37 @@
         bgr_to_rgb=False,
         rgb_to_bgr=False,
     ),
-    img_backbone=dict(
-        type="mmdet.SwinTransformer",
-        pretrain_img_size=(256, 704),
-        embed_dims=96,
-        depths=[2, 2, 6, 2],
-        num_heads=[3, 6, 12, 24],
-        window_size=7,
-        mlp_ratio=4,
-        qkv_bias=True,
-        qk_scale=None,
-        drop_rate=0.0,
-        attn_drop_rate=0.0,
-        drop_path_rate=0.2,
-        patch_norm=True,
-        out_indices=[1, 2, 3],
+		img_backbone=dict(
+        pretrained="torchvision://resnet50",
+        type="ResNet",
+        depth=50,
+        num_stages=4,
+        out_indices=(1, 2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type="BN2d", requires_grad=True),
+        norm_eval=False,
         with_cp=False,
-        convert_weights=True,
-        init_cfg=dict(
+        style="pytorch",
+				init_cfg=dict(
             type="Pretrained",
-            # https://download.openmmlab.com/mmdetection3d/v1.1.0_models/bevfusion/swint-nuimages-pretrained.pth
-            checkpoint="work_dirs/swin_transformer/swint_nuimages_pretrained.pth",  # noqa: E251
+            checkpoint="work_dirs/resnet50/mmdet_resnet50-19c8e357.pth",  # noqa: E251
         ),
     ),
     img_neck=dict(
         type="GeneralizedLSSFPN",
-        in_channels=[192, 384, 768],
+        in_channels=[512, 1024, 2048],
         out_channels=256,
         start_level=0,
-        num_outs=3,
+        num_outs=2,
         norm_cfg=dict(type="BN2d", requires_grad=True),
         act_cfg=dict(type="ReLU", inplace=True),
         upsample_cfg=dict(mode="bilinear", align_corners=False),
     ),
     view_transform=dict(
-        type="LSSTransform",
+        type="DepthLSSTransform",
         in_channels=256,
         out_channels=80,
-        feature_size=[48, 96],
+        feature_size=[24, 48],
         xbound=[-122.40, 122.40, 0.68],
         ybound=[-122.40, 122.40, 0.68],
         zbound=[-10.0, 10.0, 20.0],
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_lidar_second_secfpn_120m.py
new file mode 100644
index 000000000..21f746da8
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_lidar_second_secfpn_120m.py
@@ -0,0 +1,53 @@
+_base_ = [
+    "./default_lidar_second_secfpn_120m.py",
+]
+
+# Image network
+model = dict(
+    data_preprocessor=dict(
+        type="Det3DDataPreprocessor",
+        pad_size_divisor=32,
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=False,
+        rgb_to_bgr=False,
+    ),
+    img_backbone=dict(
+        pretrained="torchvision://resnet50",
+        type="ResNet",
+        depth=50,
+        num_stages=4,
+        out_indices=(1, 2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type="BN2d", requires_grad=True),
+        norm_eval=False,
+        with_cp=False,
+        style="pytorch",
+				init_cfg=dict(
+            type="Pretrained",
+            checkpoint="work_dirs/resnet50/mmdet_resnet50-19c8e357.pth",  # noqa: E251
+        ),
+    ),
+    img_neck=dict(
+        type="GeneralizedLSSFPN",
+        in_channels=[512, 1024, 2048],
+        out_channels=256,
+        start_level=0,
+        num_outs=2,
+        norm_cfg=dict(type="BN2d", requires_grad=True),
+        act_cfg=dict(type="ReLU", inplace=True),
+        upsample_cfg=dict(mode="bilinear", align_corners=False),
+    ),
+    view_transform=dict(
+        type="DepthLSSTransform",
+        in_channels=256,
+        out_channels=80,
+        feature_size=[48, 96],
+        xbound=[-122.40, 122.40, 0.68],
+        ybound=[-122.40, 122.40, 0.68],
+        zbound=[-10.0, 10.0, 20.0],
+        dbound=[1.0, 130, 1.0],
+        downsample=2,
+    ),
+    fusion_layer=dict(type="ConvFuser", in_channels=[80, 256], out_channels=256, kernel_size=5, stride=2, padding=2),
+)
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_lss_50m.py
new file mode 100644
index 000000000..5577723bf
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_lss_50m.py
@@ -0,0 +1,23 @@
+_base_ = [
+    "./default_camera_resnet50_fpn_depthlss_120m.py",
+]
+
+# Image network
+model = dict(
+    view_transform=dict(
+        type="LSSTransform",
+        in_channels=256,
+        out_channels=80,
+        feature_size=[48, 96],
+        xbound=[-54.0, 54.0, 0.3],
+        ybound=[-54.0, 54.0, 0.3],
+        zbound=[-10.0, 10.0, 20.0],
+        dbound=[1.0, 60, 0.5],
+        downsample=2,
+    ),
+    bbox_head=dict(
+        bbox_coder=dict(
+            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
+        ),
+    )
+)
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_depthlss_lidar_second_secfpn_120m.py
similarity index 100%
rename from projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lidar_second_secfpn_120m.py
rename to projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_depthlss_lidar_second_secfpn_120m.py
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_50m.py
index a35e3a79a..39a4a637f 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_50m.py
@@ -1,56 +1,9 @@
 _base_ = [
-    "./default_lidar_second_secfpn_120m.py",
+    "./default_camera_swin_fpn_depthlss_120m.py",
 ]
 
 # Image network
 model = dict(
-    # Remove all lidar related configs
-    voxelize_cfg=None,
-    pts_voxel_encoder=None,
-    pts_middle_encoder=None,
-    pts_neck=None,
-    pts_backbone=None,
-    data_preprocessor=dict(
-        type="Det3DDataPreprocessor",
-        pad_size_divisor=32,
-        mean=[123.675, 116.28, 103.53],
-        std=[58.395, 57.12, 57.375],
-        bgr_to_rgb=False,
-        rgb_to_bgr=False,
-    ),
-    img_backbone=dict(
-        type="mmdet.SwinTransformer",
-        pretrain_img_size=(256, 704),
-        embed_dims=96,
-        depths=[2, 2, 6, 2],
-        num_heads=[3, 6, 12, 24],
-        window_size=7,
-        mlp_ratio=4,
-        qkv_bias=True,
-        qk_scale=None,
-        drop_rate=0.0,
-        attn_drop_rate=0.0,
-        drop_path_rate=0.2,
-        patch_norm=True,
-        out_indices=[1, 2, 3],
-        with_cp=False,
-        convert_weights=True,
-        init_cfg=dict(
-            type="Pretrained",
-            # https://download.openmmlab.com/mmdetection3d/v1.1.0_models/bevfusion/swint-nuimages-pretrained.pth
-            checkpoint="work_dirs/swin_transformer/swint_nuimages_pretrained.pth",  # noqa: E251
-        ),
-    ),
-    img_neck=dict(
-        type="GeneralizedLSSFPN",
-        in_channels=[192, 384, 768],
-        out_channels=256,
-        start_level=0,
-        num_outs=3,
-        norm_cfg=dict(type="BN2d", requires_grad=True),
-        act_cfg=dict(type="ReLU", inplace=True),
-        upsample_cfg=dict(mode="bilinear", align_corners=False),
-    ),
     view_transform=dict(
         type="LSSTransform",
         in_channels=256,
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py
deleted file mode 100644
index de8d48263..000000000
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py
+++ /dev/null
@@ -1,133 +0,0 @@
-## This config is for the camera_base only model, without lidar points
-
-_base_ = [
-    "./default_lidar_120m.py",
-]
-input_modality = dict(use_lidar=True, use_camera=True)
-
-# Image parameters
-image_size = [384, 768]  # Height, Width
-camera_order = ["CAM_FRONT", "CAM_FRONT_LEFT", "CAM_BACK_LEFT", "CAM_FRONT_RIGHT", "CAM_BACK_RIGHT"]
-
-train_pipeline = [
-    dict(
-        type="BEVLoadMultiViewImageFromFiles",
-        to_float32=True,
-        color_type="color",
-        backend_args=_base_.backend_args,
-        camera_order=camera_order,
-    ),
-    # We keep loading LiDAR points to make downstream BEV augmentation easier 
-    dict(
-        type="LoadPointsFromFile",
-        coord_type="LIDAR",
-        load_dim=_base_.point_load_dim,
-        use_dim=_base_.point_load_dim,
-        backend_args=_base_.backend_args,
-    ),
-    dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False),
-    dict(
-        type="ImageAug3D",
-        final_dim=image_size,
-        resize_lim=[0.28, 0.40],
-        bot_pct_lim=[0.0, 0.0],
-        rot_lim=[0.0, 0.0],
-        rand_flip=True,
-        is_train=True,
-    ),
-    dict(
-        type="BEVFusionGlobalRotScaleTrans",
-        scale_ratio_range=[0.95, 1.05],
-        rot_range=[-0.78539816, 0.78539816],
-        translation_std=[0.5, 0.5, 0.2],
-    ),
-    dict(type="BEVFusionRandomFlip3D"),
-    dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range),
-    # Remove LiDAR points from the data
-    dict(type="BEVFusionRemoveLiDARPoints"),
-    dict(
-        type="ObjectNameFilter",
-        classes=[
-            "car",
-            "truck",
-            "construction_vehicle",
-            "bus",
-            "trailer",
-            "barrier",
-            "motorcycle",
-            "bicycle",
-            "pedestrian",
-            "traffic_cone",
-        ],
-    ),
-    dict(
-        type="Pack3DDetInputs",
-        keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"],
-        meta_keys=[
-            "cam2img",
-            "ori_cam2img",
-            "lidar2cam",
-            "lidar2img",
-            "cam2lidar",
-            "ori_lidar2img",
-            "img_aug_matrix",
-            "box_type_3d",
-            "sample_idx",
-            "lidar_path",
-            "img_path",
-            "transformation_3d_flow",
-            "pcd_rotation",
-            "pcd_scale_factor",
-            "pcd_trans",
-            "img_aug_matrix",
-            "lidar_aug_matrix",
-            "timestamp",
-            "vehicle_type",
-            "city",
-        ],
-    ),
-]
-
-test_pipeline = [
-    dict(
-        type="BEVLoadMultiViewImageFromFiles",
-        to_float32=True,
-        color_type="color",
-        backend_args=_base_.backend_args,
-        camera_order=camera_order,
-    ),
-    dict(
-        type="ImageAug3D",
-        final_dim=image_size,
-        # resize_lim=[0.34, 0.34],
-        resize_lim=0.02,
-        bot_pct_lim=[0.0, 0.0],
-        rot_lim=[0.0, 0.0],
-        rand_flip=False,
-        is_train=False,
-    ),
-    dict(
-        type="Pack3DDetInputs",
-        keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"],
-        meta_keys=[
-            "cam2img",
-            "ori_cam2img",
-            "lidar2cam",
-            "lidar2img",
-            "cam2lidar",
-            "ori_lidar2img",
-            "img_aug_matrix",
-            "box_type_3d",
-            "sample_idx",
-            "lidar_path",
-            "img_path",
-            "num_pts_feats",
-            "num_views",
-            "timestamp",
-            "vehicle_type",
-            "city",
-        ],
-    ),
-]
-
-filter_cfg = dict(filter_frames_with_camera_order=camera_order)
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb16_adamw_linear_cosine.py
similarity index 96%
rename from projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py
rename to projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb16_adamw_linear_cosine.py
index db5515b46..05740e442 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb16_adamw_linear_cosine.py
@@ -1,7 +1,6 @@
 # learning rate
-# 1e-4 * sqrt(2) = 0.0001414
-lr = 1e-4
-t_max = 6
+lr = 2e-4
+t_max = 2
 max_epochs = 20
 val_interval = 1
 
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py
similarity index 96%
rename from projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py
rename to projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py
index 2181cbebb..261246886 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py
@@ -1,7 +1,6 @@
 # learning rate
-# 1e-4 * sqrt(2) = 0.0001414
-lr = 1e-4
-t_max = 2
+lr = 2e-4
+t_max = 3
 max_epochs = 30
 val_interval = 1
 
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_linear_cosine.py
similarity index 96%
rename from projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py
rename to projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_linear_cosine.py
index d569900d6..43715fed7 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_linear_cosine.py
@@ -1,7 +1,6 @@
 # learning rate
-# 1e-4 * sqrt(2) = 0.0001414
-lr = 1e-4
-t_max = 3
+lr = 2e-4
+t_max = 5
 max_epochs = 50
 val_interval = 5
 

From 45cc9cf3378006f5c50858abe5f547e311763562 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Wed, 13 May 2026 20:22:50 +0900
Subject: [PATCH 103/183] Resolve conflict

---
 .../models/default_camera_resnet50_fpn_depthlss_120m.py       | 4 ++--
 ...t_camera_resnet50_fpn_depthlss_lidar_second_secfpn_120m.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_120m.py
index 0edff4398..6203da514 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_120m.py
@@ -23,7 +23,7 @@
         type="ResNet",
         depth=50,
         num_stages=4,
-        out_indices=(1, 2, 3),
+        out_indices=(2, 3),
         frozen_stages=-1,
         norm_cfg=dict(type="BN2d", requires_grad=True),
         norm_eval=False,
@@ -36,7 +36,7 @@
     ),
     img_neck=dict(
         type="GeneralizedLSSFPN",
-        in_channels=[512, 1024, 2048],
+        in_channels=[1024, 2048],
         out_channels=256,
         start_level=0,
         num_outs=2,
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_lidar_second_secfpn_120m.py
index 21f746da8..43e8dd9ac 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_lidar_second_secfpn_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_lidar_second_secfpn_120m.py
@@ -17,7 +17,7 @@
         type="ResNet",
         depth=50,
         num_stages=4,
-        out_indices=(1, 2, 3),
+        out_indices=(2, 3),
         frozen_stages=-1,
         norm_cfg=dict(type="BN2d", requires_grad=True),
         norm_eval=False,
@@ -30,7 +30,7 @@
     ),
     img_neck=dict(
         type="GeneralizedLSSFPN",
-        in_channels=[512, 1024, 2048],
+        in_channels=[1024, 2048],
         out_channels=256,
         start_level=0,
         num_outs=2,

From 1a81b03587668e4009a34d80fe050878f895a757 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Thu, 14 May 2026 16:23:50 +0900
Subject: [PATCH 104/183] Add local 3d box expand

---
 .../datasets/transforms/__init__.py           |   3 +-
 .../datasets/transforms/local_3d_bbox.py      |  57 +++++++
 .../pipelines/default_lidar_120m_width.py     | 150 ++++++++++++++++++
 3 files changed, 209 insertions(+), 1 deletion(-)
 create mode 100644 autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py
 create mode 100644 projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m_width.py

diff --git a/autoware_ml/detection3d/datasets/transforms/__init__.py b/autoware_ml/detection3d/datasets/transforms/__init__.py
index 6bc932f1a..b20961db6 100644
--- a/autoware_ml/detection3d/datasets/transforms/__init__.py
+++ b/autoware_ml/detection3d/datasets/transforms/__init__.py
@@ -1,3 +1,4 @@
 from .object_min_points_filter import ObjectMinPointsFilter
+from .local_3d_bbox import Local3DBBoxExpand
 
-__all__ = ["ObjectMinPointsFilter"]
+__all__ = ["ObjectMinPointsFilter", "Local3DBBoxExpand"]
diff --git a/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py b/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py
new file mode 100644
index 000000000..e417c4bfb
--- /dev/null
+++ b/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py
@@ -0,0 +1,57 @@
+import numpy as np 
+
+from mmcv.transforms import BaseTransform
+from mmdet3d.structures.ops import box_np_ops
+from mmengine.registry import TRANSFORMS
+
+
+@TRANSFORMS.register_module()
+class Local3DBBoxExpand(BaseTransform):
+    """Locally expand the 3D bounding boxes by scaling the width, which it doesn't scale the points.
+
+    Args:
+        expand_widths: (List[float]): Uniformly sampled expand width.
+        width_dim: (int): The dimension of the width. Default is 4, which is the width dimension of the 3D 
+				  bounding box. Since 3D Bbox is in the format of [x, y, z, dx, dy, dz, heading], the width dimension is the
+					4th dimension.
+		    label_ids: (List[int]): The label IDs to expand. If None, all label IDs will be expanded.
+		"""
+
+    def __init__(self, expand_widths: List[float], width_dim: int = 4, label_ids: List[int] = None) -> None:
+        assert isinstance(expand_widths, list)
+        assert len(expand_widths) == 2
+        assert expand_widths[0] < expand_widths[1]
+        self.expand_widths = expand_widths
+				self.width_dim = width_dim
+				self.label_ids = label_ids
+    
+		def transform(self, input_dict: dict) -> dict:
+        """Call function to locally augment the 3D bounding boxes by scaling the width.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Results after locally augmenting the 3D bounding boxes by scaling the width, 'gt_bboxes_3d' \
+                key is updated in the result dict.
+        """
+				# Label mask 
+				if self.label_ids is not None:
+					  label_masks = [True if label in self.label_ids else False for label in input_dict["gt_labels_3d"]] 
+	      else:
+					  label_masks = np.ones(len(input_dict["gt_labels_3d"]), dtype=bool)
+
+				for i in range(len(input_dict["gt_bboxes_3d"])):
+				    if not label_masks[i]:
+				        continue 
+		      
+					expand_width = np.random.uniform(self.expand_widths[0], self.expand_widths[1])
+					input_dict["gt_bboxes_3d"].tensor[i, self.width_dim] += expand_width
+        
+				return input_dict
+
+    def __repr__(self) -> str:
+        """str: Return a string that describes the module."""
+        repr_str = self.__class__.__name__
+        repr_str += f"(expand_widths={self.expand_widths}, width_dim={self.width_dim}, label_ids={self.label_ids})"
+        return repr_str
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m_width.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m_width.py
new file mode 100644
index 000000000..0b32cc86a
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m_width.py
@@ -0,0 +1,150 @@
+# Dataset parameters
+backend_args = None
+num_workers = 16
+input_modality = dict(use_lidar=True, use_camera=False)
+
+# range setting
+point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0]
+voxel_size = [0.17, 0.17, 0.2]
+grid_size = [1440, 1440, 41]
+eval_class_range = {
+    "car": 120,
+    "truck": 120,
+    "bus": 120,
+    "bicycle": 120,
+    "pedestrian": 120,
+    "traffic_cone": 120,
+    "barrier": 120,
+}
+
+# LiDAR parameters
+point_load_dim = 5  # x, y, z, intensity, ring_id
+point_use_dim = 4
+lidar_sweep_dims = [0, 1, 2, 4]  # x, y, z, time_lag
+sweeps_num = 1
+
+train_pipeline = [
+    dict(
+        type="LoadPointsFromFile",
+        coord_type="LIDAR",
+        load_dim=point_load_dim,
+        use_dim=point_load_dim,
+        backend_args=backend_args,
+    ),
+    dict(
+        type="LoadPointsFromMultiSweeps",
+        sweeps_num=sweeps_num,
+        load_dim=point_load_dim,
+        use_dim=lidar_sweep_dims,
+        pad_empty_sweeps=True,
+        remove_close=True,
+        backend_args=backend_args,
+        test_mode=False,
+    ),
+    dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False),
+		# For the vehicle, we expand the width by 0.20 - 0.40 to try to include side mirros
+		dict(type="Local3DBBoxExpand", expand_widths=[0.20, 0.40], width_dim=4, label_ids=[0]),
+		# For truck and bus, they are usually huge vehicles, so we expand the width by 0.40 - 0.70
+		dict(type="Local3DBBoxExpand", expand_widths=[0.40, 0.70], width_dim=4, label_ids=[1, 2]),
+    dict(
+        type="BEVFusionGlobalRotScaleTrans",
+        scale_ratio_range=[0.95, 1.05],
+        rot_range=[-0.78539816, 0.78539816],
+        translation_std=[0.5, 0.5, 0.2],
+    ),
+    dict(type="BEVFusionRandomFlip3D"),
+    dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range),
+    dict(type="ObjectRangeFilter", point_cloud_range=point_cloud_range),
+    dict(
+        type="ObjectNameFilter",
+        classes=[
+            "car",
+            "truck",
+            "bus",
+            "bicycle",
+            "pedestrian",
+            "traffic_cone",
+            "barrier",
+        ],
+    ),
+    dict(type="PointShuffle"),
+    dict(
+        type="Pack3DDetInputs",
+        keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"],
+        meta_keys=[
+            "cam2img",
+            "ori_cam2img",
+            "lidar2cam",
+            "lidar2img",
+            "cam2lidar",
+            "ori_lidar2img",
+            "img_aug_matrix",
+            "box_type_3d",
+            "sample_idx",
+            "lidar_path",
+            "img_path",
+            "transformation_3d_flow",
+            "pcd_rotation",
+            "pcd_scale_factor",
+            "pcd_trans",
+            "img_aug_matrix",
+            "lidar_aug_matrix",
+            "timestamp",
+            "vehicle_type",
+            "city",
+            "traffic_cone_barrier_status",
+        ],
+    ),
+]
+
+test_pipeline = [
+    dict(
+        type="LoadPointsFromFile",
+        coord_type="LIDAR",
+        load_dim=point_load_dim,
+        use_dim=point_load_dim,
+        backend_args=backend_args,
+    ),
+    dict(
+        type="LoadPointsFromMultiSweeps",
+        sweeps_num=sweeps_num,
+        load_dim=point_load_dim,
+        use_dim=lidar_sweep_dims,
+        pad_empty_sweeps=True,
+        remove_close=True,
+        backend_args=backend_args,
+        test_mode=True,
+    ),
+    dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range),
+    dict(
+        type="Pack3DDetInputs",
+        keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"],
+        meta_keys=[
+            "cam2img",
+            "ori_cam2img",
+            "lidar2cam",
+            "lidar2img",
+            "cam2lidar",
+            "ori_lidar2img",
+            "img_aug_matrix",
+            "box_type_3d",
+            "sample_idx",
+            "lidar_path",
+            "img_path",
+            "num_pts_feats",
+            "num_views",
+            "timestamp",
+            "vehicle_type",
+            "city",
+            "traffic_cone_barrier_status",
+        ],
+    ),
+]
+
+# Filtering configuration
+# Note:
+# - In camera–LiDAR configs, `filter_cfg` can enable image-based frame filtering,
+#   e.g., dict(filter_frames_with_missing_image=True).
+# - This is a LiDAR-only config (`input_modality['use_camera'] = False`), so
+#   image-based filtering does not apply and `filter_cfg` is intentionally None.
+filter_cfg = None

From 5257c01a90aebb4f6aea4343717073c660089885 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Fri, 15 May 2026 00:26:00 +0900
Subject: [PATCH 105/183] Resolve conflict

---
 .../datasets/transforms/local_3d_bbox.py      | 42 ++++++++++---------
 ...cond_secfpn_30e_8xb16_jpntaxi_base_120m.py |  2 +-
 2 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py b/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py
index e417c4bfb..96772cf44 100644
--- a/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py
+++ b/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py
@@ -1,3 +1,5 @@
+from typing import List
+
 import numpy as np 
 
 from mmcv.transforms import BaseTransform
@@ -12,20 +14,20 @@ class Local3DBBoxExpand(BaseTransform):
     Args:
         expand_widths: (List[float]): Uniformly sampled expand width.
         width_dim: (int): The dimension of the width. Default is 4, which is the width dimension of the 3D 
-				  bounding box. Since 3D Bbox is in the format of [x, y, z, dx, dy, dz, heading], the width dimension is the
-					4th dimension.
-		    label_ids: (List[int]): The label IDs to expand. If None, all label IDs will be expanded.
-		"""
+                  bounding box. Since 3D Bbox is in the format of [x, y, z, dx, dy, dz, heading], the width dimension is the
+                    4th dimension.
+            label_ids: (List[int]): The label IDs to expand. If None, all label IDs will be expanded.
+        """
 
     def __init__(self, expand_widths: List[float], width_dim: int = 4, label_ids: List[int] = None) -> None:
         assert isinstance(expand_widths, list)
         assert len(expand_widths) == 2
         assert expand_widths[0] < expand_widths[1]
         self.expand_widths = expand_widths
-				self.width_dim = width_dim
-				self.label_ids = label_ids
+        self.width_dim = width_dim
+        self.label_ids = label_ids
     
-		def transform(self, input_dict: dict) -> dict:
+    def transform(self, input_dict: dict) -> dict:
         """Call function to locally augment the 3D bounding boxes by scaling the width.
 
         Args:
@@ -35,20 +37,20 @@ def transform(self, input_dict: dict) -> dict:
             dict: Results after locally augmenting the 3D bounding boxes by scaling the width, 'gt_bboxes_3d' \
                 key is updated in the result dict.
         """
-				# Label mask 
-				if self.label_ids is not None:
-					  label_masks = [True if label in self.label_ids else False for label in input_dict["gt_labels_3d"]] 
-	      else:
-					  label_masks = np.ones(len(input_dict["gt_labels_3d"]), dtype=bool)
-
-				for i in range(len(input_dict["gt_bboxes_3d"])):
-				    if not label_masks[i]:
-				        continue 
-		      
-					expand_width = np.random.uniform(self.expand_widths[0], self.expand_widths[1])
-					input_dict["gt_bboxes_3d"].tensor[i, self.width_dim] += expand_width
+        # Label mask 
+        if self.label_ids is not None:
+            label_masks = [True if label in self.label_ids else False for label in input_dict["gt_labels_3d"]] 
+        else:
+            label_masks = np.ones(len(input_dict["gt_labels_3d"]), dtype=bool)
+
+        for i in range(len(input_dict["gt_bboxes_3d"])):
+            if not label_masks[i]:
+                continue 
+              
+            expand_width = np.random.uniform(self.expand_widths[0], self.expand_widths[1])
+            input_dict["gt_bboxes_3d"].tensor[i, self.width_dim] += expand_width
         
-				return input_dict
+        return input_dict
 
     def __repr__(self) -> str:
         """str: Return a string that describes the module."""
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py
index 3b7c23b18..02ed7542a 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py
@@ -152,4 +152,4 @@
 )
 log_processor = dict(window_size=50)
 
-load_from = "work_dirs/bevfusion_lidar_traffic_cone/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m_ignore/epoch_48.pth"
+load_from = "work_dirs/bevfusion_lidar_2_8_0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb16_base_120m/best_epoch_47.pth"

From 14a20e107d5f0f054be24ad1246d0d287098a0c9 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Fri, 15 May 2026 22:38:53 +0900
Subject: [PATCH 106/183] Updated

---
 ...lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py
index 36c39dd5a..3208a592c 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py
@@ -16,7 +16,7 @@
 info_directory_path = "info/kokseang_2_8/"
 
 experiment_group_name = "bevfusion_lidar_2_8_0/base/" + _base_.dataset_type
-experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels"
+experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels_normalized"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter

From 6dde84dd661ce4f48b23c7c9286d17f1f18be82e Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sat, 16 May 2026 18:29:09 +0900
Subject: [PATCH 107/183] Add local 3d box expand

---
 ..._second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py
index e3f7d5146..3bdda213e 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py
@@ -1,10 +1,10 @@
 _base_ = [
-    "./bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py",
+    "./bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py",
 ]
 
 # user setting
-experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type
-experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2"
+experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type
+experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # Add evaluator configs

From 5213d864533cb7f879895d9552b7b04a7423b7ab Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sun, 17 May 2026 16:46:20 +0900
Subject: [PATCH 108/183] Add local 3d box expand

---
 .../detection3d/dataset/t4dataset/base.py     |   3 +
 .../detection3d/dataset/t4dataset/j6gen2.py   |   3 +
 .../dataset/t4dataset/j6gen2_base.py          |   3 +
 .../dataset/t4dataset/j6gen2_v2.py            | 194 ------------------
 ...30e_8xb16_jpntaxi_base_120m_t4metric_v2.py |   6 +-
 5 files changed, 12 insertions(+), 197 deletions(-)
 delete mode 100644 autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_v2.py

diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py
index 3be587072..8e49f2396 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py
@@ -29,6 +29,9 @@
     "db_j6gen2_v7",
     "db_j6gen2_v8",
     "db_j6gen2_v9",
+    "db_j6gen2_v10",
+    "db_j6gen2_v11",
+    "db_j6gen2_v12",
     "db_largebus_v1",
     "db_largebus_v2",
     "db_largebus_v3",
diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py
index 0324e7207..a93bf56af 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py
@@ -27,6 +27,9 @@
     "db_j6gen2_v7",
     "db_j6gen2_v8",
     "db_j6gen2_v9",
+    "db_j6gen2_v10",
+    "db_j6gen2_v11",
+    "db_j6gen2_v12",
 ]
 
 dataset_test_groups = {
diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py
index b9ec03f27..170086752 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py
@@ -27,6 +27,9 @@
     "db_j6gen2_v7",
     "db_j6gen2_v8",
     "db_j6gen2_v9",
+    "db_j6gen2_v10",
+    "db_j6gen2_v11",
+    "db_j6gen2_v12",
     "db_largebus_v1",
     "db_largebus_v2",
     "db_largebus_v3",
diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_v2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_v2.py
deleted file mode 100644
index e4375d576..000000000
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_v2.py
+++ /dev/null
@@ -1,194 +0,0 @@
-custom_imports = dict(
-    imports=[
-        "autoware_ml.detection3d.datasets.t4dataset",
-        "autoware_ml.detection3d.evaluation.t4metric.t4metric",
-        "autoware_ml.detection3d.evaluation.t4metric.t4metric_v2",
-    ]
-)
-
-# dataset type setting
-dataset_type = "T4Dataset"
-info_train_file_name = "t4dataset_j6gen2_v2_infos_train.pkl"
-info_val_file_name = "t4dataset_j6gen2_v2_infos_val.pkl"
-info_test_file_name = "t4dataset_j6gen2_v2_infos_test.pkl"
-
-info_train_statistics_file_name = "t4dataset_j6gen2_v2_statistics_train.parquet"
-info_val_statistics_file_name = "t4dataset_j6gen2_v2_statistics_val.parquet"
-info_test_statistics_file_name = "t4dataset_j6gen2_v2_statistics_test.parquet"
-
-# dataset scene setting
-dataset_version_list = [
-    "db_j6gen2_v2",
-]
-
-dataset_test_groups = {
-    "j6gen2_v2": ("t4dataset_j6gen2_v2_infos_test.pkl", True),
-}
-
-# dataset format setting
-data_prefix = dict(
-    pts="",
-    CAM_FRONT="",
-    CAM_FRONT_LEFT="",
-    CAM_FRONT_RIGHT="",
-    CAM_BACK="",
-    CAM_BACK_RIGHT="",
-    CAM_BACK_LEFT="",
-    sweeps="",
-)
-camera_types = {
-    "CAM_FRONT",
-    "CAM_FRONT_RIGHT",
-    "CAM_FRONT_LEFT",
-    "CAM_BACK",
-    "CAM_BACK_LEFT",
-    "CAM_BACK_RIGHT",
-}
-
-# class setting
-name_mapping = {
-    # DBv1.0
-    "vehicle.car": "car",
-    "vehicle.construction": "truck",
-    "vehicle.emergency (ambulance & police)": "car",
-    "vehicle.motorcycle": "bicycle",
-    "vehicle.trailer": "trailer",
-    "vehicle.truck": "truck",
-    "vehicle.bicycle": "bicycle",
-    "vehicle.bus (bendy & rigid)": "bus",
-    "pedestrian.adult": "pedestrian",
-    "pedestrian.child": "pedestrian",
-    "pedestrian.construction_worker": "pedestrian",
-    "pedestrian.personal_mobility": "pedestrian",
-    "pedestrian.police_officer": "pedestrian",
-    "pedestrian.stroller": "pedestrian",
-    "pedestrian.wheelchair": "pedestrian",
-    "movable_object.barrier": "barrier",
-    "movable_object.debris": "barrier",
-    "movable_object.pushable_pullable": "barrier",
-    "movable_object.trafficcone": "traffic_cone",
-    "movable_object.traffic_cone": "traffic_cone",
-    "animal": "animal",
-    "static_object.bicycle_rack": "bicycle_rack",
-    # DBv1.1 and UCv2.0
-    "car": "car",
-    "truck": "truck",
-    "bus": "bus",
-    "trailer": "trailer",
-    "motorcycle": "bicycle",
-    "bicycle": "bicycle",
-    "police_car": "car",
-    "pedestrian": "pedestrian",
-    "police_officer": "pedestrian",
-    "forklift": "car",
-    "construction_worker": "pedestrian",
-    "stroller": "pedestrian",
-    # DBv2.0 and DBv3.0
-    "animal": "animal",
-    "movable_object.barrier": "barrier",
-    "movable_object.pushable_pullable": "barrier",
-    "movable_object.traffic_cone": "traffic_cone",
-    "pedestrian.adult": "pedestrian",
-    "pedestrian.child": "pedestrian",
-    "pedestrian.construction_worker": "pedestrian",
-    "pedestrian.personal_mobility": "pedestrian",
-    "pedestrian.police_officer": "pedestrian",
-    "pedestrian.stroller": "pedestrian",
-    "pedestrian.wheelchair": "pedestrian",
-    "static_object.bicycle rack": "bicycle rack",
-    "static_object.bollard": "bollard",
-    "vehicle.ambulance": "car",  # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car
-    "vehicle.bicycle": "bicycle",
-    "vehicle.bus": "bus",
-    "vehicle.car": "car",
-    "vehicle.construction": "truck",
-    "vehicle.fire": "truck",
-    "vehicle.motorcycle": "bicycle",
-    "vehicle.police": "car",
-    "vehicle.trailer": "trailer",
-    "vehicle.truck": "truck",
-    # DBv1.3
-    "ambulance": "car",
-    "kart": "car",
-    "wheelchair": "pedestrian",
-    "personal_mobility": "pedestrian",
-    "fire_truck": "truck",
-    "semi_trailer": "trailer",
-    "tractor_unit": "truck",
-    "construction_vehicle": "truck",
-    "traffic_cone": "traffic_cone",
-    "trafficcone": "traffic_cone",
-    "barrier": "barrier",
-}
-
-class_names = [
-    "car",
-    "truck",
-    "bus",
-    "bicycle",
-    "pedestrian",
-    "traffic_cone",
-    "barrier",
-]
-num_class = len(class_names)
-metainfo = dict(classes=class_names)
-
-merge_objects = [
-    ("truck", ["truck", "trailer"]),
-]
-merge_type = "extend_longer"  # One of ["extend_longer","union", None]
-
-# visualization
-class_colors = {
-    "car": (30, 144, 255),
-    "truck": (140, 0, 255),
-    "construction_vehicle": (255, 255, 0),
-    "bus": (111, 255, 111),
-    "trailer": (0, 255, 255),
-    "barrier": (0, 0, 0),
-    "motorcycle": (100, 0, 30),
-    "bicycle": (255, 0, 30),
-    "pedestrian": (255, 200, 200),
-    "traffic_cone": (120, 120, 120),
-}
-camera_panels = [
-    "data/CAM_FRONT_LEFT",
-    "data/CAM_FRONT",
-    "data/CAM_FRONT_RIGHT",
-    "data/CAM_BACK_LEFT",
-    "data/CAM_BACK",
-    "data/CAM_BACK_RIGHT",
-]
-
-filter_attributes = [
-    ("vehicle.bicycle", "vehicle_state.parked"),
-    ("vehicle.bicycle", "cycle_state.without_rider"),
-    ("vehicle.bicycle", "motorcycle_state.without_rider"),
-    ("vehicle.motorcycle", "vehicle_state.parked"),
-    ("vehicle.motorcycle", "cycle_state.without_rider"),
-    ("vehicle.motorcycle", "motorcycle_state.without_rider"),
-    ("bicycle", "vehicle_state.parked"),
-    ("bicycle", "cycle_state.without_rider"),
-    ("bicycle", "motorcycle_state.without_rider"),
-    ("motorcycle", "vehicle_state.parked"),
-    ("motorcycle", "cycle_state.without_rider"),
-    ("motorcycle", "motorcycle_state.without_rider"),
-]
-
-evaluator_metric_configs = dict(
-    evaluation_task="detection",
-    target_labels=class_names,
-    center_distance_bev_thresholds=[0.5, 1.0, 2.0, 4.0],
-    # plane_distance_thresholds is required for the pass fail evaluation
-    plane_distance_thresholds=[2.0, 4.0],
-    iou_2d_thresholds=None,
-    iou_3d_thresholds=None,
-    label_prefix="autoware",
-    # bev minimum distance ranges for each range bucket, must be the same length as max_distance,
-    # they will form bev distance ranges in [(min_distance[0], max_distance[0]), (min_distance[1], max_distance[1]), ...] when filtering
-    min_distance=[0.0, 50.0, 90.0, 0.0],
-    # bev maximum distance ranges for each range bucket, must be the same length as min_distance
-    max_distance=[50.0, 90.0, 121.0, 121.0],
-    min_point_numbers=0,
-    matching_class_agnostic_fps=False,
-)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py
index 213f0041b..64d494655 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py
@@ -1,10 +1,10 @@
 _base_ = [
-    "./bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py",
+    "./bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py",
 ]
 
 # user setting
-experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/jpntaxi_base/" + _base_.dataset_type
-experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2"
+experiment_group_name = "bevfusion_lidar_intensity_2_8_1/jpntaxi_base/" + _base_.dataset_type
+experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # Add evaluator configs

From e616c02adf9a2451a9e9e26088a2c7469b531435 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Mon, 18 May 2026 23:35:01 +0900
Subject: [PATCH 109/183] Add local 3d box expand

---
 projects/BEVFusion/bevfusion/utils.py         |  28 ++--
 .../default_lidar_second_secfpn_120m.py       |  16 +-
 .../pipelines/default_lidar_120m_width.py     | 150 ------------------
 3 files changed, 25 insertions(+), 169 deletions(-)
 delete mode 100644 projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m_width.py

diff --git a/projects/BEVFusion/bevfusion/utils.py b/projects/BEVFusion/bevfusion/utils.py
index c47604dbd..84797cc51 100644
--- a/projects/BEVFusion/bevfusion/utils.py
+++ b/projects/BEVFusion/bevfusion/utils.py
@@ -85,26 +85,31 @@ def decode(self, heatmap, rot, dim, center, height, vel, filter=False):
             final_box_preds = torch.cat([center, height, dim, rot, vel], dim=1).permute(0, 2, 1)
 
         predictions_dicts = []
-        for i in range(heatmap.shape[0]):
-            boxes3d = final_box_preds[i]
-            scores = final_scores[i]
-            labels = final_preds[i]
-            predictions_dict = {"bboxes": boxes3d, "scores": scores, "labels": labels}
-            predictions_dicts.append(predictions_dict)
-
-        if filter is False:
+        if not filter:
+            for i in range(heatmap.shape[0]):
+                boxes3d = final_box_preds[i]
+                scores = final_scores[i]
+                labels = final_preds[i]
+                predictions_dict = {"bboxes": boxes3d, "scores": scores, "labels": labels}
+                predictions_dicts.append(predictions_dict)
             return predictions_dicts
-
+        
         # use score threshold
         if self.score_threshold is not None:
-            thresh_mask = final_scores > self.score_threshold
+            if isinstance(self.score_threshold, float):
+                thresh_mask = final_scores > self.score_threshold
+            elif isinstance(self.score_threshold, (list, tuple)):
+                score_threshold = final_scores.new_tensor(self.score_threshold)
+                thresh_mask = final_scores > score_threshold[final_preds]
+            else:
+                raise ValueError("score_threshold must be a float or list")
 
+        predictions_dicts = []
         if self.post_center_range is not None:
             self.post_center_range = torch.tensor(self.post_center_range, device=heatmap.device)
             mask = (final_box_preds[..., :3] >= self.post_center_range[:3]).all(2)
             mask &= (final_box_preds[..., :3] <= self.post_center_range[3:]).all(2)
 
-            predictions_dicts = []
             for i in range(heatmap.shape[0]):
                 cmask = mask[i, :]
                 if self.score_threshold:
@@ -114,7 +119,6 @@ def decode(self, heatmap, rot, dim, center, height, vel, filter=False):
                 scores = final_scores[i, cmask]
                 labels = final_preds[i, cmask]
                 predictions_dict = {"bboxes": boxes3d, "scores": scores, "labels": labels}
-
                 predictions_dicts.append(predictions_dict)
         else:
             raise NotImplementedError(
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
index 5a880d975..f1fa5a90d 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
@@ -84,14 +84,14 @@
         test_cfg=dict(
             dataset="t4datasets",
             out_size_factor=8,
-            nms_type=None,  # Set to "circle" for circle_nms
+            nms_type="circle",  # Set to "circle" for circle_nms
             # Set NMS for different clusters
             nms_clusters=[
-                dict(class_names=["car", "truck", "bus"], nms_threshold=0.5),  # It's radius if using circle_nms
-                dict(class_names=["bicycle"], nms_threshold=0.5),
-                dict(class_names=["pedestrian"], nms_threshold=0.175),
-                dict(class_names=["barrier"], nms_threshold=0.5),
-                dict(class_names=["traffic_cone"], nms_threshold=0.175),
+                dict(class_names=["car", "truck", "bus"], class_indices=[0, 1, 2], nms_threshold=0.5),  # It's radius if using circle_nms
+                dict(class_names=["bicycle"], class_indices=[3], nms_threshold=0.0),
+                dict(class_names=["pedestrian"], class_indices=[4], nms_threshold=0.0),
+                dict(class_names=["traffic_cone"], class_indices=[5], nms_threshold=0.0),
+                dict(class_names=["barrier"], class_indices=[6], nms_threshold=0.0),
             ],
         ),
         dense_heatmap_pooling_classes=["car", "truck", "bus", "bicycle", "barrier"],  # Use class indices for pooling
@@ -99,7 +99,9 @@
         bbox_coder=dict(
             type="TransFusionBBoxCoder",
             post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0],
-            score_threshold=0.0,
+            # score_threshold=0.03,
+            # CAR, TRUCK, BUS, BICYCLE, PEDESTRIAN, TRAFFIC_CONE, BARRIER
+            score_threshold=[0.02, 0.015, 0.015, 0.01, 0.02, 0.02, 0.015],
             out_size_factor=8,
             code_size=10,
         ),
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m_width.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m_width.py
deleted file mode 100644
index 0b32cc86a..000000000
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m_width.py
+++ /dev/null
@@ -1,150 +0,0 @@
-# Dataset parameters
-backend_args = None
-num_workers = 16
-input_modality = dict(use_lidar=True, use_camera=False)
-
-# range setting
-point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0]
-voxel_size = [0.17, 0.17, 0.2]
-grid_size = [1440, 1440, 41]
-eval_class_range = {
-    "car": 120,
-    "truck": 120,
-    "bus": 120,
-    "bicycle": 120,
-    "pedestrian": 120,
-    "traffic_cone": 120,
-    "barrier": 120,
-}
-
-# LiDAR parameters
-point_load_dim = 5  # x, y, z, intensity, ring_id
-point_use_dim = 4
-lidar_sweep_dims = [0, 1, 2, 4]  # x, y, z, time_lag
-sweeps_num = 1
-
-train_pipeline = [
-    dict(
-        type="LoadPointsFromFile",
-        coord_type="LIDAR",
-        load_dim=point_load_dim,
-        use_dim=point_load_dim,
-        backend_args=backend_args,
-    ),
-    dict(
-        type="LoadPointsFromMultiSweeps",
-        sweeps_num=sweeps_num,
-        load_dim=point_load_dim,
-        use_dim=lidar_sweep_dims,
-        pad_empty_sweeps=True,
-        remove_close=True,
-        backend_args=backend_args,
-        test_mode=False,
-    ),
-    dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False),
-		# For the vehicle, we expand the width by 0.20 - 0.40 to try to include side mirros
-		dict(type="Local3DBBoxExpand", expand_widths=[0.20, 0.40], width_dim=4, label_ids=[0]),
-		# For truck and bus, they are usually huge vehicles, so we expand the width by 0.40 - 0.70
-		dict(type="Local3DBBoxExpand", expand_widths=[0.40, 0.70], width_dim=4, label_ids=[1, 2]),
-    dict(
-        type="BEVFusionGlobalRotScaleTrans",
-        scale_ratio_range=[0.95, 1.05],
-        rot_range=[-0.78539816, 0.78539816],
-        translation_std=[0.5, 0.5, 0.2],
-    ),
-    dict(type="BEVFusionRandomFlip3D"),
-    dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range),
-    dict(type="ObjectRangeFilter", point_cloud_range=point_cloud_range),
-    dict(
-        type="ObjectNameFilter",
-        classes=[
-            "car",
-            "truck",
-            "bus",
-            "bicycle",
-            "pedestrian",
-            "traffic_cone",
-            "barrier",
-        ],
-    ),
-    dict(type="PointShuffle"),
-    dict(
-        type="Pack3DDetInputs",
-        keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"],
-        meta_keys=[
-            "cam2img",
-            "ori_cam2img",
-            "lidar2cam",
-            "lidar2img",
-            "cam2lidar",
-            "ori_lidar2img",
-            "img_aug_matrix",
-            "box_type_3d",
-            "sample_idx",
-            "lidar_path",
-            "img_path",
-            "transformation_3d_flow",
-            "pcd_rotation",
-            "pcd_scale_factor",
-            "pcd_trans",
-            "img_aug_matrix",
-            "lidar_aug_matrix",
-            "timestamp",
-            "vehicle_type",
-            "city",
-            "traffic_cone_barrier_status",
-        ],
-    ),
-]
-
-test_pipeline = [
-    dict(
-        type="LoadPointsFromFile",
-        coord_type="LIDAR",
-        load_dim=point_load_dim,
-        use_dim=point_load_dim,
-        backend_args=backend_args,
-    ),
-    dict(
-        type="LoadPointsFromMultiSweeps",
-        sweeps_num=sweeps_num,
-        load_dim=point_load_dim,
-        use_dim=lidar_sweep_dims,
-        pad_empty_sweeps=True,
-        remove_close=True,
-        backend_args=backend_args,
-        test_mode=True,
-    ),
-    dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range),
-    dict(
-        type="Pack3DDetInputs",
-        keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"],
-        meta_keys=[
-            "cam2img",
-            "ori_cam2img",
-            "lidar2cam",
-            "lidar2img",
-            "cam2lidar",
-            "ori_lidar2img",
-            "img_aug_matrix",
-            "box_type_3d",
-            "sample_idx",
-            "lidar_path",
-            "img_path",
-            "num_pts_feats",
-            "num_views",
-            "timestamp",
-            "vehicle_type",
-            "city",
-            "traffic_cone_barrier_status",
-        ],
-    ),
-]
-
-# Filtering configuration
-# Note:
-# - In camera–LiDAR configs, `filter_cfg` can enable image-based frame filtering,
-#   e.g., dict(filter_frames_with_missing_image=True).
-# - This is a LiDAR-only config (`input_modality['use_camera'] = False`), so
-#   image-based filtering does not apply and `filter_cfg` is intentionally None.
-filter_cfg = None

From b5036550910474284c7346d746060eff066d4b95 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Fri, 15 May 2026 22:38:53 +0900
Subject: [PATCH 110/183] Updated

---
 ...lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py
index 36c39dd5a..3208a592c 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py
@@ -16,7 +16,7 @@
 info_directory_path = "info/kokseang_2_8/"
 
 experiment_group_name = "bevfusion_lidar_2_8_0/base/" + _base_.dataset_type
-experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels"
+experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels_normalized"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter

From 6a9a47578d58018db28cab42b1e5364666bd4302 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Mon, 18 May 2026 23:41:31 +0900
Subject: [PATCH 111/183] Updated

---
 ..._secfpn_50e_8xb16_base_120m_48_channels.py | 158 ------------------
 1 file changed, 158 deletions(-)
 delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py
deleted file mode 100644
index 3208a592c..000000000
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py
+++ /dev/null
@@ -1,158 +0,0 @@
-_base_ = [
-    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
-    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
-    "../default/pipelines/default_lidar_120m.py",
-    "../default/models/default_lidar_second_secfpn_120m.py",
-    "../default/schedulers/default_50e_8xb16_adamw_cosine.py",
-    "../default/default_misc.py",
-]
-
-custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
-custom_imports["imports"] += _base_.custom_imports["imports"]
-custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
-
-# user setting
-data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_8/"
-
-experiment_group_name = "bevfusion_lidar_2_8_0/base/" + _base_.dataset_type
-experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels_normalized"
-work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
-
-# model parameter
-model = dict(
-    type="BEVFusion",
-    voxelize_cfg=dict(
-        point_cloud_range=_base_.point_cloud_range,
-        voxel_size=_base_.voxel_size,
-    ),
-    pts_voxel_encoder=dict(
-        type="BEVFusionVoxelFeatureNet", 
-        in_channels=len(_base_.lidar_sweep_dims),
-        with_distance=False,
-        with_cluster_center=True,
-        with_voxel_center=True,
-        feat_channels=[16],
-        point_cloud_range=_base_.point_cloud_range,
-        voxel_size=_base_.voxel_size,
-        norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01),
-        # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
-        min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0],
-        max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2],
-        legacy=False
-    ),
-    pts_middle_encoder=dict(
-        in_channels=48,
-        sparse_shape=_base_.grid_size,
-    ),
-    bbox_head=dict(
-        class_names=_base_.class_names,  # Use class names to identify the correct class indices
-        train_cfg=dict(
-            point_cloud_range=_base_.point_cloud_range,
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size,
-        ),
-        test_cfg=dict(
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size[0:2],
-            pc_range=_base_.point_cloud_range[0:2],
-        ),
-        bbox_coder=dict(
-            pc_range=_base_.point_cloud_range[0:2],
-            voxel_size=_base_.voxel_size[0:2],
-        ),
-    ),
-)
-
-# Dataset parameters
-train_dataloader = dict(
-    batch_size=_base_.train_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=True),
-    dataset=dict(
-        type=_base_.dataset_type,
-        pipeline=_base_.train_pipeline,
-        modality=_base_.input_modality,
-        backend_args=_base_.backend_args,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_train_file_name,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        test_mode=False,
-        data_prefix=_base_.data_prefix,
-        box_type_3d="LiDAR",
-        filter_cfg=_base_.filter_cfg,
-    ),
-)
-
-val_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_val_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-test_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_test_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-val_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-)
-
-test_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-    save_csv=True,
-)
-
-default_hooks = dict(
-    logger=dict(type="LoggerHook", interval=50),
-    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
-)
-log_processor = dict(window_size=50)

From f0f4542b29768388482152b5f5ff5b127cd795b7 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 19 May 2026 17:22:48 +0900
Subject: [PATCH 112/183] Updated

---
 projects/BEVFusion/bevfusion/sparse_encoder.py    | 15 +++++++++++----
 ...dar_voxel_second_secfpn_50e_8xb16_base_120m.py |  4 ++--
 .../models/default_lidar_second_secfpn_120m.py    |  9 ++++++---
 .../default/pipelines/default_lidar_120m.py       |  4 ++--
 4 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/sparse_encoder.py b/projects/BEVFusion/bevfusion/sparse_encoder.py
index ce45d4536..6e98a73ab 100644
--- a/projects/BEVFusion/bevfusion/sparse_encoder.py
+++ b/projects/BEVFusion/bevfusion/sparse_encoder.py
@@ -1,4 +1,10 @@
 # Copyright (c) OpenMMLab. All rights reserved.
+
+from typing import Dict, Optional
+
+import numpy as np
+import torch
+
 from mmdet3d.models.layers import make_sparse_convmodule
 from mmdet3d.models.layers.spconv import IS_SPCONV2_AVAILABLE
 from mmdet3d.models.middle_encoders import SparseEncoder
@@ -9,8 +15,6 @@
 else:
     from mmcv.ops import SparseConvTensor
 
-import numpy as np
-import torch
 
 
 @MODELS.register_module()
@@ -56,6 +60,8 @@ def __init__(
         encoder_paddings=((1,), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1, 1)),
         block_type="conv_module",
         return_middle_feats=False,
+        encoder_strides=(2, 2, 2, -1),
+        output_stride=2,
     ):
         super(SparseEncoder, self).__init__()
         assert block_type in ["conv_module", "basicblock"]
@@ -66,6 +72,7 @@ def __init__(
         self.output_channels = output_channels
         self.encoder_channels = encoder_channels
         self.encoder_paddings = encoder_paddings
+        self.encoder_strides = encoder_strides
         self.stage_num = len(self.encoder_channels)
         self.fp16_enabled = False
         self.return_middle_feats = return_middle_feats
@@ -110,7 +117,7 @@ def __init__(
             indice_key="spconv_down2",
             conv_type="SparseConv3d",
         )
-
+    
     def forward(self, voxel_features, coors, batch_size):
         """Forward of SparseEncoder.
 
@@ -138,7 +145,7 @@ def forward(self, voxel_features, coors, batch_size):
         for encoder_layer in self.encoder_layers:
             x = encoder_layer(x)
             encode_features.append(x)
-
+        
         # for detection head
         # [200, 176, 5] -> [200, 176, 2]
         out = self.conv_out(encode_features[-1])
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py
index 6d3a1f93b..28499b4f9 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py
@@ -13,9 +13,9 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_8/"
+info_directory_path = "info/kokseang_2_8_1/"
 
-experiment_group_name = "bevfusion_lidar_2_8_0/base/" + _base_.dataset_type
+experiment_group_name = "bevfusion_lidar_2_8_2/base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
index f1fa5a90d..e3297de3d 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
@@ -1,6 +1,7 @@
 num_proposals = 500
 max_num_points = 32
 max_voxels = [120000, 160000]
+out_size_factor = 8
 
 model = dict(
     type="BEVFusion",
@@ -23,7 +24,9 @@
         norm_cfg=dict(type="BN1d", eps=0.001, momentum=0.01),
         encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)),
         encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, (1, 1, 0)), (0, 0)),
+        encoder_strides=(2, 2, 2, -1), # No stride for the last stage
         block_type="basicblock",
+        output_stride=2, # downsample stride
     ),
     pts_backbone=dict(
         type="SECOND",
@@ -68,7 +71,7 @@
         ),
         train_cfg=dict(
             dataset="t4datasets",
-            out_size_factor=8,
+            out_size_factor=out_size_factor,
             gaussian_overlap=0.1,
             min_radius=2,
             pos_weight=-1,
@@ -83,7 +86,7 @@
         ),
         test_cfg=dict(
             dataset="t4datasets",
-            out_size_factor=8,
+            out_size_factor=out_size_factor,
             nms_type="circle",  # Set to "circle" for circle_nms
             # Set NMS for different clusters
             nms_clusters=[
@@ -102,7 +105,7 @@
             # score_threshold=0.03,
             # CAR, TRUCK, BUS, BICYCLE, PEDESTRIAN, TRAFFIC_CONE, BARRIER
             score_threshold=[0.02, 0.015, 0.015, 0.01, 0.02, 0.02, 0.015],
-            out_size_factor=8,
+            out_size_factor=out_size_factor,
             code_size=10,
         ),
         loss_cls=dict(
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
index 09b9f7b26..317c594c1 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
@@ -5,8 +5,8 @@
 
 # range setting
 point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0]
-voxel_size = [0.17, 0.17, 0.2]
-grid_size = [1440, 1440, 41]
+voxel_size = [0.15, 0.15, 0.2]
+grid_size = [1632, 1632, 41]
 eval_class_range = {
     "car": 120,
     "truck": 120,

From c9c34bb3c35c39bd452dde6d35e5936072d66fc6 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 19 May 2026 17:24:16 +0900
Subject: [PATCH 113/183] Add local 3d box expand

---
 projects/BEVFusion/bevfusion/bevfusion_head.py       |  1 +
 ..._secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py |  2 +-
 .../models/default_lidar_second_secfpn_120m.py       | 12 ++++++------
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index 69417347b..96c38658b 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -435,6 +435,7 @@ def predict_by_feat(self, preds_dicts, metas, img=None, rescale=False, for_roi=F
                                     circle_nms(
                                         boxes_for_nms.detach().cpu().numpy(),
                                         nms_cluster["nms_threshold"],
+                                        post_max_size=nms_cluster["post_max_size"],
                                     )
                                 )
                             else:
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py
index 3bdda213e..4a32f99a0 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py
@@ -3,7 +3,7 @@
 ]
 
 # user setting
-experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type
+experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v2/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
index e3297de3d..8b450d72a 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
@@ -90,11 +90,11 @@
             nms_type="circle",  # Set to "circle" for circle_nms
             # Set NMS for different clusters
             nms_clusters=[
-                dict(class_names=["car", "truck", "bus"], class_indices=[0, 1, 2], nms_threshold=0.5),  # It's radius if using circle_nms
-                dict(class_names=["bicycle"], class_indices=[3], nms_threshold=0.0),
-                dict(class_names=["pedestrian"], class_indices=[4], nms_threshold=0.0),
-                dict(class_names=["traffic_cone"], class_indices=[5], nms_threshold=0.0),
-                dict(class_names=["barrier"], class_indices=[6], nms_threshold=0.0),
+                dict(class_names=["car", "truck", "bus"], class_indices=[0, 1, 2], nms_threshold=0.25, post_max_size=300),  # It's radius if using circle_nms
+                dict(class_names=["bicycle"], class_indices=[3], nms_threshold=0.0, post_max_size=50),
+                dict(class_names=["pedestrian"], class_indices=[4], nms_threshold=0.0, post_max_size=100),
+                dict(class_names=["traffic_cone"], class_indices=[5], nms_threshold=0.0, post_max_size=100),
+                dict(class_names=["barrier"], class_indices=[6], nms_threshold=0.0, post_max_size=50),
             ],
         ),
         dense_heatmap_pooling_classes=["car", "truck", "bus", "bicycle", "barrier"],  # Use class indices for pooling
@@ -104,7 +104,7 @@
             post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0],
             # score_threshold=0.03,
             # CAR, TRUCK, BUS, BICYCLE, PEDESTRIAN, TRAFFIC_CONE, BARRIER
-            score_threshold=[0.02, 0.015, 0.015, 0.01, 0.02, 0.02, 0.015],
+            score_threshold=[0.015, 0.010, 0.010, 0.010, 0.015, 0.015, 0.010],
             out_size_factor=out_size_factor,
             code_size=10,
         ),

From a8073f8ddf7bf1c876b87c2eb1a489bfc644b3f5 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 19 May 2026 17:24:39 +0900
Subject: [PATCH 114/183] Add local 3d box expand

---
 ...on_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py | 2 +-
 .../default/models/default_lidar_second_secfpn_120m.py        | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
index 71c1829d4..b3858fb06 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
@@ -15,7 +15,7 @@
 data_root = "data/t4dataset/"
 info_directory_path = "info/kokseang_2_8/"
 
-experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type
+experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v3/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
index 8b450d72a..e871fce58 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
@@ -104,8 +104,8 @@
             post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0],
             # score_threshold=0.03,
             # CAR, TRUCK, BUS, BICYCLE, PEDESTRIAN, TRAFFIC_CONE, BARRIER
-            score_threshold=[0.015, 0.010, 0.010, 0.010, 0.015, 0.015, 0.010],
-            out_size_factor=out_size_factor,
+            score_threshold=[0.015, 0.010, 0.010, 0.010, 0.015, 0.015, 0.015],
+            out_size_factor=8,
             code_size=10,
         ),
         loss_cls=dict(

From be70f2fbb991e5d1272df5689bdeeb775df3d02f Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 19 May 2026 17:23:07 +0900
Subject: [PATCH 115/183] Add local 3d box expand

---
 ...sion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py | 2 +-
 ...oxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
index b3858fb06..71c1829d4 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
@@ -15,7 +15,7 @@
 data_root = "data/t4dataset/"
 info_directory_path = "info/kokseang_2_8/"
 
-experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v3/" + _base_.dataset_type
+experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py
index 4a32f99a0..c77e0332b 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py
@@ -3,7 +3,7 @@
 ]
 
 # user setting
-experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v2/" + _base_.dataset_type
+experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v3/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 

From 25d154bd7967b381be76b417d2f7e50c3a3313ca Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 19 May 2026 18:52:03 +0900
Subject: [PATCH 116/183] Update camera config structure

---
 ...net50_fpn_lss_30e_8xb8_j6gen2_base_120m.py | 137 ----------------
 ...snet50_fpn_lss_30e_8xb8_j6gen2_base_50m.py | 137 ----------------
 ..._swin_fpn_lss_30e_8xb8_j6gen2_base_120m.py | 137 ----------------
 ...a_swin_fpn_lss_30e_8xb8_j6gen2_base_50m.py | 137 ----------------
 ...swin_fpn_lss_30e_8xb8_jpntaxi_base_120m.py | 137 ----------------
 ..._swin_fpn_lss_30e_8xb8_jpntaxi_base_50m.py | 137 ----------------
 ...sion_camera_30e_8xb16_j6gen2_base_120m.py} |  37 +----
 ...usion_camera_30e_8xb16_j6gen2_base_50m.py} |  37 +----
 ...t_bevfusion_camera_50e_8xb16_base_120m.py} |  35 +----
 ...lt_bevfusion_camera_50e_8xb16_base_50m.py} |  35 +----
 ...fpn_depthlss_30e_8xb16_j6gen2_base_120m.py |  32 ++++
 ...snet50_fpn_depthlss_50e_8xb16_base_120m.py |  32 ++++
 ...net50_fpn_lss_30e_8xb16_j6gen2_base_50m.py |  32 ++++
 ...era_resnet50_fpn_lss_50e_8xb16_base_50m.py |  32 ++++
 ...a_swin_fpn_depthlss_50e_8xb16_base_120m.py |  32 ++++
 ..._camera_swin_fpn_lss_50e_8xb16_base_50m.py |  32 ++++
 ...swin_fpn_lss_50e_8xb16_j6gen2_base_120m.py |  32 ++++
 ..._swin_fpn_lss_50e_8xb16_j6gen2_base_50m.py |  32 ++++
 ...0_fpn_depthlss_lidar_second_secfpn_120m.py |   2 +-
 .../camera_resnet50_fpn_depthlss_120m.py}     |   6 +-
 .../camera_resnet50_fpn_lss_50m.py}           |   5 +-
 .../camera_swin_fpn_depthlss_120m.py}         |   2 +-
 ..._fpn_depthlss_lidar_second_secfpn_120m.py} |   2 +-
 .../camera_swin_fpn_lss_50m.py}               |   2 +-
 .../pipelines/cameras/default_camera_120m.py  | 147 ++++++++++++++++++
 .../default_camera_50m.py}                    |   6 +-
 .../default/pipelines/default_lidar_50m.py    |   6 +
 27 files changed, 437 insertions(+), 963 deletions(-)
 delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_120m.py
 delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_50m.py
 delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_120m.py
 delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_50m.py
 delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_120m.py
 delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_50m.py
 rename projects/BEVFusion/configs/t4dataset/BEVFusion-C/{bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_120m.py => default_bevfusion_camera_30e_8xb16_j6gen2_base_120m.py} (74%)
 rename projects/BEVFusion/configs/t4dataset/BEVFusion-C/{bevfusion_camera_swin_fpn_lss_50e_8xb8_base_50m.py => default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py} (76%)
 rename projects/BEVFusion/configs/t4dataset/BEVFusion-C/{bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_50m.py => default_bevfusion_camera_50e_8xb16_base_120m.py} (75%)
 rename projects/BEVFusion/configs/t4dataset/BEVFusion-C/{bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m.py => default_bevfusion_camera_50e_8xb16_base_50m.py} (74%)
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m.py
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_50e_8xb16_base_120m.py
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m.py
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_depthlss_50e_8xb16_base_120m.py
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_lss_50e_8xb16_base_50m.py
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_lss_50e_8xb16_j6gen2_base_120m.py
 create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_lss_50e_8xb16_j6gen2_base_50m.py
 rename projects/BEVFusion/configs/t4dataset/default/models/{default_camera_resnet50_fpn_depthlss_120m.py => resnet50/camera_resnet50_fpn_depthlss_120m.py} (94%)
 rename projects/BEVFusion/configs/t4dataset/default/models/{default_camera_resnet50_fpn_lss_50m.py => resnet50/camera_resnet50_fpn_lss_50m.py} (83%)
 rename projects/BEVFusion/configs/t4dataset/default/models/{default_camera_swin_fpn_depthlss_120m.py => swin_transformer/camera_swin_fpn_depthlss_120m.py} (97%)
 rename projects/BEVFusion/configs/t4dataset/default/models/{default_camera_swin_fpn_depthlss_lidar_second_secfpn_120m.py => swin_transformer/camera_swin_fpn_depthlss_lidar_second_secfpn_120m.py} (97%)
 rename projects/BEVFusion/configs/t4dataset/default/models/{default_camera_swin_fpn_lss_50m.py => swin_transformer/camera_swin_fpn_lss_50m.py} (98%)
 create mode 100644 projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py
 rename projects/BEVFusion/configs/t4dataset/default/pipelines/{default_camera_base_50m.py => cameras/default_camera_50m.py} (96%)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_120m.py
deleted file mode 100644
index 4c809264e..000000000
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_120m.py
+++ /dev/null
@@ -1,137 +0,0 @@
-_base_ = [
-    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
-    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
-    "../default/pipelines/default_camera_lidar_intensity_120m.py",
-    "../default/models/default_camera_swin_fpn_lss_120m.py",
-    "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py",
-    "../default/default_misc.py",
-]
-
-custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
-custom_imports["imports"] += _base_.custom_imports["imports"]
-custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
-
-# user setting
-data_root = "data/t4datasets/"
-info_directory_path = "info/kokseang_2_6_1/"
-
-experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type
-experiment_name = "bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_120m"
-work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
-
-# model parameter
-model = dict(
-    type="BEVFusion",
-    view_transform=dict(image_size=_base_.image_size),
-    bbox_head=dict(
-        class_names=_base_.class_names,
-        in_channels=80,
-        train_cfg=dict(
-            point_cloud_range=_base_.point_cloud_range,
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size,
-        ),
-        test_cfg=dict(
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size[0:2],
-            pc_range=_base_.point_cloud_range[0:2],
-        ),
-        bbox_coder=dict(
-            pc_range=_base_.point_cloud_range[0:2],
-            voxel_size=_base_.voxel_size[0:2],
-        ),
-    ),
-)
-
-# Dataset parameters
-train_dataloader = dict(
-    batch_size=_base_.train_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=True),
-    dataset=dict(
-        type=_base_.dataset_type,
-        pipeline=_base_.train_pipeline,
-        modality=_base_.input_modality,
-        backend_args=_base_.backend_args,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_train_file_name,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        test_mode=False,
-        data_prefix=_base_.data_prefix,
-        box_type_3d="LiDAR",
-        filter_cfg=_base_.filter_cfg,
-    ),
-)
-
-val_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_val_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-test_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_test_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-val_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-)
-
-test_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-    save_csv=True,
-)
-
-default_hooks = dict(
-    logger=dict(type="LoggerHook", interval=50),
-    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
-)
-log_processor = dict(window_size=50)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_50m.py
deleted file mode 100644
index 927310e7d..000000000
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_50m.py
+++ /dev/null
@@ -1,137 +0,0 @@
-_base_ = [
-    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
-    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
-    "../default/pipelines/default_camera_base_50m.py",
-    "../default/models/default_camera_swin_fpn_lss_50m.py",
-    "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py",
-    "../default/default_misc.py",
-]
-
-custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
-custom_imports["imports"] += _base_.custom_imports["imports"]
-custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
-
-# user setting
-data_root = "data/t4datasets/"
-info_directory_path = "info/kokseang_2_6_1/"
-
-experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type
-experiment_name = "bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_50m"
-work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
-
-# model parameter
-model = dict(
-    type="BEVFusion",
-    view_transform=dict(image_size=_base_.image_size),
-    bbox_head=dict(
-        class_names=_base_.class_names,
-        in_channels=80,
-        train_cfg=dict(
-            point_cloud_range=_base_.point_cloud_range,
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size,
-        ),
-        test_cfg=dict(
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size[0:2],
-            pc_range=_base_.point_cloud_range[0:2],
-        ),
-        bbox_coder=dict(
-            pc_range=_base_.point_cloud_range[0:2],
-            voxel_size=_base_.voxel_size[0:2],
-        ),
-    ),
-)
-
-# Dataset parameters
-train_dataloader = dict(
-    batch_size=_base_.train_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=True),
-    dataset=dict(
-        type=_base_.dataset_type,
-        pipeline=_base_.train_pipeline,
-        modality=_base_.input_modality,
-        backend_args=_base_.backend_args,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_train_file_name,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        test_mode=False,
-        data_prefix=_base_.data_prefix,
-        box_type_3d="LiDAR",
-        filter_cfg=_base_.filter_cfg,
-    ),
-)
-
-val_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_val_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-test_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_test_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-val_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-)
-
-test_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-    save_csv=True,
-)
-
-default_hooks = dict(
-    logger=dict(type="LoggerHook", interval=50),
-    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
-)
-log_processor = dict(window_size=50)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_120m.py
deleted file mode 100644
index 4c809264e..000000000
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_120m.py
+++ /dev/null
@@ -1,137 +0,0 @@
-_base_ = [
-    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
-    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
-    "../default/pipelines/default_camera_lidar_intensity_120m.py",
-    "../default/models/default_camera_swin_fpn_lss_120m.py",
-    "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py",
-    "../default/default_misc.py",
-]
-
-custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
-custom_imports["imports"] += _base_.custom_imports["imports"]
-custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
-
-# user setting
-data_root = "data/t4datasets/"
-info_directory_path = "info/kokseang_2_6_1/"
-
-experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type
-experiment_name = "bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_120m"
-work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
-
-# model parameter
-model = dict(
-    type="BEVFusion",
-    view_transform=dict(image_size=_base_.image_size),
-    bbox_head=dict(
-        class_names=_base_.class_names,
-        in_channels=80,
-        train_cfg=dict(
-            point_cloud_range=_base_.point_cloud_range,
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size,
-        ),
-        test_cfg=dict(
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size[0:2],
-            pc_range=_base_.point_cloud_range[0:2],
-        ),
-        bbox_coder=dict(
-            pc_range=_base_.point_cloud_range[0:2],
-            voxel_size=_base_.voxel_size[0:2],
-        ),
-    ),
-)
-
-# Dataset parameters
-train_dataloader = dict(
-    batch_size=_base_.train_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=True),
-    dataset=dict(
-        type=_base_.dataset_type,
-        pipeline=_base_.train_pipeline,
-        modality=_base_.input_modality,
-        backend_args=_base_.backend_args,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_train_file_name,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        test_mode=False,
-        data_prefix=_base_.data_prefix,
-        box_type_3d="LiDAR",
-        filter_cfg=_base_.filter_cfg,
-    ),
-)
-
-val_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_val_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-test_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_test_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-val_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-)
-
-test_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-    save_csv=True,
-)
-
-default_hooks = dict(
-    logger=dict(type="LoggerHook", interval=50),
-    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
-)
-log_processor = dict(window_size=50)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_50m.py
deleted file mode 100644
index 927310e7d..000000000
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_50m.py
+++ /dev/null
@@ -1,137 +0,0 @@
-_base_ = [
-    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
-    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
-    "../default/pipelines/default_camera_base_50m.py",
-    "../default/models/default_camera_swin_fpn_lss_50m.py",
-    "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py",
-    "../default/default_misc.py",
-]
-
-custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
-custom_imports["imports"] += _base_.custom_imports["imports"]
-custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
-
-# user setting
-data_root = "data/t4datasets/"
-info_directory_path = "info/kokseang_2_6_1/"
-
-experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type
-experiment_name = "bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_50m"
-work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
-
-# model parameter
-model = dict(
-    type="BEVFusion",
-    view_transform=dict(image_size=_base_.image_size),
-    bbox_head=dict(
-        class_names=_base_.class_names,
-        in_channels=80,
-        train_cfg=dict(
-            point_cloud_range=_base_.point_cloud_range,
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size,
-        ),
-        test_cfg=dict(
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size[0:2],
-            pc_range=_base_.point_cloud_range[0:2],
-        ),
-        bbox_coder=dict(
-            pc_range=_base_.point_cloud_range[0:2],
-            voxel_size=_base_.voxel_size[0:2],
-        ),
-    ),
-)
-
-# Dataset parameters
-train_dataloader = dict(
-    batch_size=_base_.train_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=True),
-    dataset=dict(
-        type=_base_.dataset_type,
-        pipeline=_base_.train_pipeline,
-        modality=_base_.input_modality,
-        backend_args=_base_.backend_args,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_train_file_name,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        test_mode=False,
-        data_prefix=_base_.data_prefix,
-        box_type_3d="LiDAR",
-        filter_cfg=_base_.filter_cfg,
-    ),
-)
-
-val_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_val_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-test_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_test_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-val_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-)
-
-test_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-    save_csv=True,
-)
-
-default_hooks = dict(
-    logger=dict(type="LoggerHook", interval=50),
-    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
-)
-log_processor = dict(window_size=50)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_120m.py
deleted file mode 100644
index 4c809264e..000000000
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_120m.py
+++ /dev/null
@@ -1,137 +0,0 @@
-_base_ = [
-    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
-    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
-    "../default/pipelines/default_camera_lidar_intensity_120m.py",
-    "../default/models/default_camera_swin_fpn_lss_120m.py",
-    "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py",
-    "../default/default_misc.py",
-]
-
-custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
-custom_imports["imports"] += _base_.custom_imports["imports"]
-custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
-
-# user setting
-data_root = "data/t4datasets/"
-info_directory_path = "info/kokseang_2_6_1/"
-
-experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type
-experiment_name = "bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_120m"
-work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
-
-# model parameter
-model = dict(
-    type="BEVFusion",
-    view_transform=dict(image_size=_base_.image_size),
-    bbox_head=dict(
-        class_names=_base_.class_names,
-        in_channels=80,
-        train_cfg=dict(
-            point_cloud_range=_base_.point_cloud_range,
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size,
-        ),
-        test_cfg=dict(
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size[0:2],
-            pc_range=_base_.point_cloud_range[0:2],
-        ),
-        bbox_coder=dict(
-            pc_range=_base_.point_cloud_range[0:2],
-            voxel_size=_base_.voxel_size[0:2],
-        ),
-    ),
-)
-
-# Dataset parameters
-train_dataloader = dict(
-    batch_size=_base_.train_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=True),
-    dataset=dict(
-        type=_base_.dataset_type,
-        pipeline=_base_.train_pipeline,
-        modality=_base_.input_modality,
-        backend_args=_base_.backend_args,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_train_file_name,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        test_mode=False,
-        data_prefix=_base_.data_prefix,
-        box_type_3d="LiDAR",
-        filter_cfg=_base_.filter_cfg,
-    ),
-)
-
-val_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_val_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-test_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_test_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-val_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-)
-
-test_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-    save_csv=True,
-)
-
-default_hooks = dict(
-    logger=dict(type="LoggerHook", interval=50),
-    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
-)
-log_processor = dict(window_size=50)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_50m.py
deleted file mode 100644
index 927310e7d..000000000
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_50m.py
+++ /dev/null
@@ -1,137 +0,0 @@
-_base_ = [
-    "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
-    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
-    "../default/pipelines/default_camera_base_50m.py",
-    "../default/models/default_camera_swin_fpn_lss_50m.py",
-    "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py",
-    "../default/default_misc.py",
-]
-
-custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False)
-custom_imports["imports"] += _base_.custom_imports["imports"]
-custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
-
-# user setting
-data_root = "data/t4datasets/"
-info_directory_path = "info/kokseang_2_6_1/"
-
-experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type
-experiment_name = "bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_50m"
-work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
-
-# model parameter
-model = dict(
-    type="BEVFusion",
-    view_transform=dict(image_size=_base_.image_size),
-    bbox_head=dict(
-        class_names=_base_.class_names,
-        in_channels=80,
-        train_cfg=dict(
-            point_cloud_range=_base_.point_cloud_range,
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size,
-        ),
-        test_cfg=dict(
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size[0:2],
-            pc_range=_base_.point_cloud_range[0:2],
-        ),
-        bbox_coder=dict(
-            pc_range=_base_.point_cloud_range[0:2],
-            voxel_size=_base_.voxel_size[0:2],
-        ),
-    ),
-)
-
-# Dataset parameters
-train_dataloader = dict(
-    batch_size=_base_.train_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=True),
-    dataset=dict(
-        type=_base_.dataset_type,
-        pipeline=_base_.train_pipeline,
-        modality=_base_.input_modality,
-        backend_args=_base_.backend_args,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_train_file_name,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        test_mode=False,
-        data_prefix=_base_.data_prefix,
-        box_type_3d="LiDAR",
-        filter_cfg=_base_.filter_cfg,
-    ),
-)
-
-val_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_val_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-test_dataloader = dict(
-    batch_size=_base_.test_batch_size,
-    num_workers=_base_.num_workers,
-    persistent_workers=True,
-    sampler=dict(type="DefaultSampler", shuffle=False),
-    dataset=dict(
-        type=_base_.dataset_type,
-        data_root=data_root,
-        ann_file=info_directory_path + _base_.info_test_file_name,
-        pipeline=_base_.test_pipeline,
-        metainfo=_base_.metainfo,
-        class_names=_base_.class_names,
-        modality=_base_.input_modality,
-        data_prefix=_base_.data_prefix,
-        test_mode=True,
-        box_type_3d="LiDAR",
-        backend_args=_base_.backend_args,
-    ),
-)
-
-val_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_val_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-)
-
-test_evaluator = dict(
-    type="T4Metric",
-    data_root=data_root,
-    ann_file=data_root + info_directory_path + _base_.info_test_file_name,
-    metric="bbox",
-    backend_args=_base_.backend_args,
-    class_names=_base_.class_names,
-    name_mapping=_base_.name_mapping,
-    eval_class_range=_base_.eval_class_range,
-    filter_attributes=_base_.filter_attributes,
-    save_csv=True,
-)
-
-default_hooks = dict(
-    logger=dict(type="LoggerHook", interval=50),
-    checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
-)
-log_processor = dict(window_size=50)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_120m.py
similarity index 74%
rename from projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_120m.py
rename to projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_120m.py
index 42f93d1b1..987c13393 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_120m.py
@@ -1,9 +1,8 @@
 _base_ = [
     "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
-    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
-    "../default/pipelines/default_camera_lidar_intensity_120m.py",
-    "../default/models/default_camera_swin_fpn_lss_120m.py",
-    "../default/schedulers/default_50e_8xb8_adamw_linear_cosine.py",
+    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
+    "../default/pipelines/cameras/default_camera_120m.py",
+    "../default/schedulers/default_30e_8xb16_adamw_linear_cosine.py",
     "../default/default_misc.py",
 ]
 
@@ -13,35 +12,7 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_6_2/"
-
-experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type
-experiment_name = "bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m"
-work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
-
-# model parameter
-model = dict(
-    type="BEVFusion",
-    view_transform=dict(image_size=_base_.image_size),
-    bbox_head=dict(
-        class_names=_base_.class_names,
-        in_channels=80,
-        train_cfg=dict(
-            point_cloud_range=_base_.point_cloud_range,
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size,
-        ),
-        test_cfg=dict(
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size[0:2],
-            pc_range=_base_.point_cloud_range[0:2],
-        ),
-        bbox_coder=dict(
-            pc_range=_base_.point_cloud_range[0:2],
-            voxel_size=_base_.voxel_size[0:2],
-        ),
-    ),
-)
+info_directory_path = "info/kokseang_2_8_0/"
 
 # Dataset parameters
 train_dataloader = dict(
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py
similarity index 76%
rename from projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_50m.py
rename to projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py
index 7c5a5f91f..ceedda1c9 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py
@@ -1,9 +1,8 @@
 _base_ = [
     "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
-    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
-    "../default/pipelines/default_camera_base_50m.py",
-    "../default/models/default_camera_swin_fpn_lss_50m.py",
-    "../default/schedulers/default_50e_8xb8_adamw_linear_cosine.py",
+    "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py",
+    "../default/pipelines/cameras/default_camera_50m.py",
+    "../default/schedulers/default_30e_8xb16_adamw_linear_cosine.py",
     "../default/default_misc.py",
 ]
 
@@ -13,36 +12,12 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_6_2/"
+info_directory_path = "info/kokseang_2_8_0/"
 
-experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type
-experiment_name = "bevfusion_camera_swin_fpn_lss_50e_8xb8_base_50m"
+experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type
+experiment_name = "bevfusion_camera_30e_8xb16_j6gen2_base_50m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
-# model parameter
-model = dict(
-    type="BEVFusion",
-    view_transform=dict(image_size=_base_.image_size),
-    bbox_head=dict(
-        class_names=_base_.class_names,
-        in_channels=80,
-        train_cfg=dict(
-            point_cloud_range=_base_.point_cloud_range,
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size,
-        ),
-        test_cfg=dict(
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size[0:2],
-            pc_range=_base_.point_cloud_range[0:2],
-        ),
-        bbox_coder=dict(
-            pc_range=_base_.point_cloud_range[0:2],
-            voxel_size=_base_.voxel_size[0:2],
-        ),
-    ),
-)
-
 # Dataset parameters
 train_dataloader = dict(
     batch_size=_base_.train_batch_size,
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_50e_8xb16_base_120m.py
similarity index 75%
rename from projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_50m.py
rename to projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_50e_8xb16_base_120m.py
index 7c5a5f91f..7a81be126 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_50e_8xb16_base_120m.py
@@ -1,9 +1,8 @@
 _base_ = [
     "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
     "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
-    "../default/pipelines/default_camera_base_50m.py",
-    "../default/models/default_camera_swin_fpn_lss_50m.py",
-    "../default/schedulers/default_50e_8xb8_adamw_linear_cosine.py",
+    "../default/pipelines/cameras/default_camera_120m.py",
+    "../default/schedulers/default_50e_8xb16_adamw_linear_cosine.py",
     "../default/default_misc.py",
 ]
 
@@ -13,35 +12,7 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_6_2/"
-
-experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type
-experiment_name = "bevfusion_camera_swin_fpn_lss_50e_8xb8_base_50m"
-work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
-
-# model parameter
-model = dict(
-    type="BEVFusion",
-    view_transform=dict(image_size=_base_.image_size),
-    bbox_head=dict(
-        class_names=_base_.class_names,
-        in_channels=80,
-        train_cfg=dict(
-            point_cloud_range=_base_.point_cloud_range,
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size,
-        ),
-        test_cfg=dict(
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size[0:2],
-            pc_range=_base_.point_cloud_range[0:2],
-        ),
-        bbox_coder=dict(
-            pc_range=_base_.point_cloud_range[0:2],
-            voxel_size=_base_.voxel_size[0:2],
-        ),
-    ),
-)
+info_directory_path = "info/kokseang_2_8_0/"
 
 # Dataset parameters
 train_dataloader = dict(
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_50e_8xb16_base_50m.py
similarity index 74%
rename from projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m.py
rename to projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_50e_8xb16_base_50m.py
index 42f93d1b1..4b79e2102 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_50e_8xb16_base_50m.py
@@ -1,9 +1,8 @@
 _base_ = [
     "../../../../../autoware_ml/configs/detection3d/default_runtime.py",
     "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py",
-    "../default/pipelines/default_camera_lidar_intensity_120m.py",
-    "../default/models/default_camera_swin_fpn_lss_120m.py",
-    "../default/schedulers/default_50e_8xb8_adamw_linear_cosine.py",
+    "../default/pipelines/cameras/default_camera_50m.py",
+    "../default/schedulers/default_50e_8xb16_adamw_linear_cosine.py",
     "../default/default_misc.py",
 ]
 
@@ -13,35 +12,7 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_6_2/"
-
-experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type
-experiment_name = "bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m"
-work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
-
-# model parameter
-model = dict(
-    type="BEVFusion",
-    view_transform=dict(image_size=_base_.image_size),
-    bbox_head=dict(
-        class_names=_base_.class_names,
-        in_channels=80,
-        train_cfg=dict(
-            point_cloud_range=_base_.point_cloud_range,
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size,
-        ),
-        test_cfg=dict(
-            grid_size=_base_.grid_size,
-            voxel_size=_base_.voxel_size[0:2],
-            pc_range=_base_.point_cloud_range[0:2],
-        ),
-        bbox_coder=dict(
-            pc_range=_base_.point_cloud_range[0:2],
-            voxel_size=_base_.voxel_size[0:2],
-        ),
-    ),
-)
+info_directory_path = "info/kokseang_2_8_0/"
 
 # Dataset parameters
 train_dataloader = dict(
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m.py
new file mode 100644
index 000000000..e73416744
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m.py
@@ -0,0 +1,32 @@
+_base_ = [
+    "../default_bevfusion_camera_30e_8xb16_j6gen2_base_120m.py",
+    "../../default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py",
+]
+
+experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type
+experiment_name = "bevfusion_camera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    view_transform=dict(image_size=_base_.image_size),
+    bbox_head=dict(
+        class_names=_base_.class_names,
+        in_channels=80,
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+    ),
+)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_50e_8xb16_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_50e_8xb16_base_120m.py
new file mode 100644
index 000000000..ebdfff437
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_50e_8xb16_base_120m.py
@@ -0,0 +1,32 @@
+_base_ = [
+    "../default_bevfusion_camera_50e_8xb16_base_120m.py",
+    "../../default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py",
+]
+
+experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type
+experiment_name = "bevfusion_camera_resnet50_fpn_depthlss_50e_8xb16_base_120m"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    view_transform=dict(image_size=_base_.image_size),
+    bbox_head=dict(
+        class_names=_base_.class_names,
+        in_channels=80,
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+    ),
+)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py
new file mode 100644
index 000000000..e23efb65a
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py
@@ -0,0 +1,32 @@
+_base_ = [
+    "../default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py",
+    "../../default/models/resnet50/camera_resnet50_fpn_lss_50m.py",
+]
+
+experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type
+experiment_name = "bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    view_transform=dict(image_size=_base_.image_size),
+    bbox_head=dict(
+        class_names=_base_.class_names,
+        in_channels=80,
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+    ),
+)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m.py
new file mode 100644
index 000000000..7bf63010b
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m.py
@@ -0,0 +1,32 @@
+_base_ = [
+    "../default_bevfusion_camera_50e_8xb16_base_50m.py",
+    "../../default/models/resnet50/camera_resnet50_fpn_lss_50m.py",
+]
+
+experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type
+experiment_name = "bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    view_transform=dict(image_size=_base_.image_size),
+    bbox_head=dict(
+        class_names=_base_.class_names,
+        in_channels=80,
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+    ),
+)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_depthlss_50e_8xb16_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_depthlss_50e_8xb16_base_120m.py
new file mode 100644
index 000000000..56c2930bb
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_depthlss_50e_8xb16_base_120m.py
@@ -0,0 +1,32 @@
+_base_ = [
+    "../default_bevfusion_camera_50e_8xb16_base_50m.py",
+    "../../default/models/swin_transformer/camera_swin_fpn_depthlss_120m.py",
+]
+
+experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type
+experiment_name = "bevfusion_camera_swin_fpn_depthlss_50e_8xb16_base_120m"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    view_transform=dict(image_size=_base_.image_size),
+    bbox_head=dict(
+        class_names=_base_.class_names,
+        in_channels=80,
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+    ),
+)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_lss_50e_8xb16_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_lss_50e_8xb16_base_50m.py
new file mode 100644
index 000000000..8d1ff7681
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_lss_50e_8xb16_base_50m.py
@@ -0,0 +1,32 @@
+_base_ = [
+    "../default_bevfusion_camera_50e_8xb16_base_50m.py",
+    "../../default/models/swin_transformer/camera_swin_fpn_lss_50m.py",
+]
+
+experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type
+experiment_name = "bevfusion_camera_swin_fpn_lss_50e_8xb16_base_50m"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    view_transform=dict(image_size=_base_.image_size),
+    bbox_head=dict(
+        class_names=_base_.class_names,
+        in_channels=80,
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+    ),
+)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_lss_50e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_lss_50e_8xb16_j6gen2_base_120m.py
new file mode 100644
index 000000000..401ac7861
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_lss_50e_8xb16_j6gen2_base_120m.py
@@ -0,0 +1,32 @@
+_base_ = [
+    "../default_bevfusion_camera_30e_8xb16_j6gen2_base_120m.py",
+    "../../default/models/swin_transformer/camera_swin_fpn_depthlss_120m.py",
+]
+
+experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type
+experiment_name = "bevfusion_camera_swin_fpn_depthlss_50e_8xb16_j6gen2_base_120m"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    view_transform=dict(image_size=_base_.image_size),
+    bbox_head=dict(
+        class_names=_base_.class_names,
+        in_channels=80,
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+    ),
+)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_lss_50e_8xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_lss_50e_8xb16_j6gen2_base_50m.py
new file mode 100644
index 000000000..80e81be39
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_lss_50e_8xb16_j6gen2_base_50m.py
@@ -0,0 +1,32 @@
+_base_ = [
+    "../default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py",
+    "../../default/models/swin_transformer/camera_swin_fpn_lss_50m.py",
+]
+
+experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type
+experiment_name = "bevfusion_camera_swin_fpn_lss_50e_8xb16_j6gen2_base_50m"
+work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
+
+# model parameter
+model = dict(
+    type="BEVFusion",
+    view_transform=dict(image_size=_base_.image_size),
+    bbox_head=dict(
+        class_names=_base_.class_names,
+        in_channels=80,
+        train_cfg=dict(
+            point_cloud_range=_base_.point_cloud_range,
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size,
+        ),
+        test_cfg=dict(
+            grid_size=_base_.grid_size,
+            voxel_size=_base_.voxel_size[0:2],
+            pc_range=_base_.point_cloud_range[0:2],
+        ),
+        bbox_coder=dict(
+            pc_range=_base_.point_cloud_range[0:2],
+            voxel_size=_base_.voxel_size[0:2],
+        ),
+    ),
+)
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_lidar_second_secfpn_120m.py
index 43e8dd9ac..339f3e97e 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_lidar_second_secfpn_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_lidar_second_secfpn_120m.py
@@ -23,7 +23,7 @@
         norm_eval=False,
         with_cp=False,
         style="pytorch",
-				init_cfg=dict(
+        init_cfg=dict(
             type="Pretrained",
             checkpoint="work_dirs/resnet50/mmdet_resnet50-19c8e357.pth",  # noqa: E251
         ),
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py
similarity index 94%
rename from projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_120m.py
rename to projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py
index 6203da514..a6ccca5dc 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py
@@ -1,5 +1,5 @@
 _base_ = [
-    "./default_lidar_second_secfpn_120m.py",
+    "../default_lidar_second_secfpn_120m.py",
 ]
 
 # Image network
@@ -18,7 +18,7 @@
         bgr_to_rgb=False,
         rgb_to_bgr=False,
     ),
-		img_backbone=dict(
+    img_backbone=dict(
         pretrained="torchvision://resnet50",
         type="ResNet",
         depth=50,
@@ -29,7 +29,7 @@
         norm_eval=False,
         with_cp=False,
         style="pytorch",
-				init_cfg=dict(
+        init_cfg=dict(
             type="Pretrained",
             checkpoint="work_dirs/resnet50/mmdet_resnet50-19c8e357.pth",  # noqa: E251
         ),
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
similarity index 83%
rename from projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_lss_50m.py
rename to projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
index 5577723bf..ca3e8f8a2 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_lss_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
@@ -6,9 +6,6 @@
 model = dict(
     view_transform=dict(
         type="LSSTransform",
-        in_channels=256,
-        out_channels=80,
-        feature_size=[48, 96],
         xbound=[-54.0, 54.0, 0.3],
         ybound=[-54.0, 54.0, 0.3],
         zbound=[-10.0, 10.0, 20.0],
@@ -19,5 +16,5 @@
         bbox_coder=dict(
             post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
         ),
-    )
+    ),
 )
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_depthlss_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/swin_transformer/camera_swin_fpn_depthlss_120m.py
similarity index 97%
rename from projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_depthlss_120m.py
rename to projects/BEVFusion/configs/t4dataset/default/models/swin_transformer/camera_swin_fpn_depthlss_120m.py
index c4b0cd9ab..88e74efc7 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_depthlss_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/swin_transformer/camera_swin_fpn_depthlss_120m.py
@@ -1,5 +1,5 @@
 _base_ = [
-    "./default_lidar_second_secfpn_120m.py",
+    "../default_lidar_second_secfpn_120m.py",
 ]
 
 # Image network
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_depthlss_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/swin_transformer/camera_swin_fpn_depthlss_lidar_second_secfpn_120m.py
similarity index 97%
rename from projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_depthlss_lidar_second_secfpn_120m.py
rename to projects/BEVFusion/configs/t4dataset/default/models/swin_transformer/camera_swin_fpn_depthlss_lidar_second_secfpn_120m.py
index 55c6ca3cd..2ac22b1b6 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_depthlss_lidar_second_secfpn_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/swin_transformer/camera_swin_fpn_depthlss_lidar_second_secfpn_120m.py
@@ -1,5 +1,5 @@
 _base_ = [
-    "./default_lidar_second_secfpn_120m.py",
+    "../default_lidar_second_secfpn_120m.py",
 ]
 
 # Image network
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/swin_transformer/camera_swin_fpn_lss_50m.py
similarity index 98%
rename from projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_50m.py
rename to projects/BEVFusion/configs/t4dataset/default/models/swin_transformer/camera_swin_fpn_lss_50m.py
index 39a4a637f..1294416ad 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/swin_transformer/camera_swin_fpn_lss_50m.py
@@ -19,5 +19,5 @@
         bbox_coder=dict(
             post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
         ),
-    )
+    ),
 )
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py
new file mode 100644
index 000000000..fc7338699
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py
@@ -0,0 +1,147 @@
+## This config is for the camera_base only model, without lidar points
+
+_base_ = [
+    "../default_lidar_120m.py",
+]
+input_modality = dict(use_lidar=True, use_camera=True)
+
+# Image parameters
+image_size = [384, 768]  # Height, Width
+camera_order = ["CAM_FRONT", "CAM_FRONT_LEFT", "CAM_BACK_LEFT", "CAM_FRONT_RIGHT", "CAM_BACK_RIGHT"]
+
+train_pipeline = [
+    dict(
+        type="BEVLoadMultiViewImageFromFiles",
+        to_float32=True,
+        color_type="color",
+        backend_args=_base_.backend_args,
+        camera_order=camera_order,
+    ),
+    # We keep loading LiDAR points to make downstream BEV augmentation easier
+    dict(
+        type="LoadPointsFromFile",
+        coord_type="LIDAR",
+        load_dim=_base_.point_load_dim,
+        use_dim=_base_.point_load_dim,
+        backend_args=_base_.backend_args,
+    ),
+    dict(
+        type="LoadPointsFromMultiSweeps",
+        sweeps_num=_base_.sweeps_num,
+        load_dim=_base_.point_load_dim,
+        use_dim=_base_.lidar_sweep_dims,
+        pad_empty_sweeps=True,
+        remove_close=True,
+        backend_args=_base_.backend_args,
+        test_mode=False,
+    ),
+    dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False),
+    dict(
+        type="ImageAug3D",
+        final_dim=image_size,
+        resize_lim=[0.28, 0.40],
+        bot_pct_lim=[0.0, 0.0],
+        rot_lim=[0.0, 0.0],
+        rand_flip=True,
+        is_train=True,
+    ),
+    dict(
+        type="BEVFusionGlobalRotScaleTrans",
+        scale_ratio_range=[0.95, 1.05],
+        rot_range=[-0.78539816, 0.78539816],
+        translation_std=[0.5, 0.5, 0.2],
+    ),
+    dict(type="BEVFusionRandomFlip3D"),
+    dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range),
+    dict(
+        type="ObjectNameFilter",
+        classes=[
+            "car",
+            "truck",
+            "bus",
+            "bicycle",
+            "pedestrian",
+            "traffic_cone",
+            "barrier",
+        ],
+    ),
+    dict(
+        type="Pack3DDetInputs",
+        keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"],
+        meta_keys=[
+            "cam2img",
+            "ori_cam2img",
+            "lidar2cam",
+            "lidar2img",
+            "cam2lidar",
+            "ori_lidar2img",
+            "img_aug_matrix",
+            "box_type_3d",
+            "sample_idx",
+            "lidar_path",
+            "img_path",
+            "transformation_3d_flow",
+            "pcd_rotation",
+            "pcd_scale_factor",
+            "pcd_trans",
+            "img_aug_matrix",
+            "lidar_aug_matrix",
+            "timestamp",
+            "vehicle_type",
+            "city",
+        ],
+    ),
+]
+
+test_pipeline = [
+    dict(
+        type="BEVLoadMultiViewImageFromFiles",
+        to_float32=True,
+        color_type="color",
+        backend_args=_base_.backend_args,
+        camera_order=camera_order,
+    ),
+    dict(
+        type="LoadPointsFromMultiSweeps",
+        sweeps_num=_base_.sweeps_num,
+        load_dim=_base_.point_load_dim,
+        use_dim=_base_.lidar_sweep_dims,
+        pad_empty_sweeps=True,
+        remove_close=True,
+        backend_args=_base_.backend_args,
+        test_mode=True,
+    ),
+    dict(
+        type="ImageAug3D",
+        final_dim=image_size,
+        resize_lim=[0.34, 0.34],
+        bot_pct_lim=[0.0, 0.0],
+        rot_lim=[0.0, 0.0],
+        rand_flip=False,
+        is_train=False,
+    ),
+    dict(
+        type="Pack3DDetInputs",
+        keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"],
+        meta_keys=[
+            "cam2img",
+            "ori_cam2img",
+            "lidar2cam",
+            "lidar2img",
+            "cam2lidar",
+            "ori_lidar2img",
+            "img_aug_matrix",
+            "box_type_3d",
+            "sample_idx",
+            "lidar_path",
+            "img_path",
+            "num_pts_feats",
+            "num_views",
+            "timestamp",
+            "vehicle_type",
+            "city",
+        ],
+    ),
+]
+
+filter_cfg = dict(filter_frames_with_camera_order=camera_order)
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
similarity index 96%
rename from projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_50m.py
rename to projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
index 37c17e79a..77470a938 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
@@ -1,7 +1,7 @@
 ## This config is for the camera_base only model, without lidar points
 
 _base_ = [
-    "./default_lidar_50m.py",
+    "../default_lidar_50m.py",
 ]
 input_modality = dict(use_lidar=True, use_camera=True)
 
@@ -17,7 +17,7 @@
         backend_args=_base_.backend_args,
         camera_order=camera_order,
     ),
-    # We keep loading LiDAR points to make downstream BEV augmentation easier 
+    # We keep loading LiDAR points to make downstream BEV augmentation easier
     dict(
         type="LoadPointsFromFile",
         coord_type="LIDAR",
@@ -84,6 +84,7 @@
             "timestamp",
             "vehicle_type",
             "city",
+            "traffic_cone_barrier_status",
         ],
     ),
 ]
@@ -125,6 +126,7 @@
             "timestamp",
             "vehicle_type",
             "city",
+            "traffic_cone_barrier_status",
         ],
     ),
 ]
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py
index 964d6eef9..80fdac189 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py
@@ -13,6 +13,8 @@
     "bus": 54.0,
     "bicycle": 54.0,
     "pedestrian": 54.0,
+    "traffic_cone": 54.0,
+    "barrier": 54.0,
 }
 
 # LiDAR parameters
@@ -57,6 +59,8 @@
             "bus",
             "bicycle",
             "pedestrian",
+            "traffic_cone",
+            "barrier",
         ],
     ),
     dict(type="PointShuffle"),
@@ -84,6 +88,7 @@
             "timestamp",
             "vehicle_type",
             "city",
+            "traffic_cone_barrier_status",
         ],
     ),
 ]
@@ -127,6 +132,7 @@
             "timestamp",
             "vehicle_type",
             "city",
+            "traffic_cone_barrier_status",
         ],
     ),
 ]

From 842dec226224e26258324b0ef8d19c2f16dcbf6a Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 19 May 2026 18:57:06 +0900
Subject: [PATCH 117/183] Updated

---
 projects/BEVFusion/bevfusion/depth_lss_v2.py  |   9 +
 .../bevfusion/ops/bev_pool_v2/__init__.py     |   3 +
 .../bevfusion/ops/bev_pool_v2/bev_pool_v2.py  | 190 ++++++++++++++++++
 .../ops/bev_pool_v2/src/bev_pool.cpp          | 111 ++++++++++
 .../ops/bev_pool_v2/src/bev_pool_cuda.cu      | 140 +++++++++++++
 projects/BEVFusion/setup.py                   |   8 +
 6 files changed, 461 insertions(+)
 create mode 100644 projects/BEVFusion/bevfusion/depth_lss_v2.py
 create mode 100644 projects/BEVFusion/bevfusion/ops/bev_pool_v2/__init__.py
 create mode 100644 projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py
 create mode 100644 projects/BEVFusion/bevfusion/ops/bev_pool_v2/src/bev_pool.cpp
 create mode 100644 projects/BEVFusion/bevfusion/ops/bev_pool_v2/src/bev_pool_cuda.cu

diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py
new file mode 100644
index 000000000..974a39cce
--- /dev/null
+++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py
@@ -0,0 +1,9 @@
+from typing import Tuple
+
+import torch
+from mmdet3d.registry import MODELS
+from torch import nn
+
+from .depth_lss import DepthLSSNet, DownSampleNet, LidarDepthImageNet, BaseViewTransform
+from .ops import bev_pool_v2
+
diff --git a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/__init__.py b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/__init__.py
new file mode 100644
index 000000000..549a97e81
--- /dev/null
+++ b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/__init__.py
@@ -0,0 +1,3 @@
+from .bev_pool_v2 import bev_pool_v2
+
+__all__ = ["bev_pool_v2"]
\ No newline at end of file
diff --git a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py
new file mode 100644
index 000000000..b1d2f03af
--- /dev/null
+++ b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py
@@ -0,0 +1,190 @@
+# Copyright (c) Phigent Robotics. All rights reserved.
+
+import numpy as np
+import torch
+
+from . import bev_pool_v2_ext
+
+
+class QuickCumsumV2TrainingCuda(torch.autograd.Function):
+    r"""BEVPoolv2 implementation for Lift-Splat-Shoot view transformation.
+
+    Please refer to the `paper <https://arxiv.org/abs/2211.17111>`_
+    """
+    @staticmethod
+    def forward(ctx, depth, feat, ranks_depth, ranks_feat, ranks_bev,
+                bev_feat_shape, interval_starts, interval_lengths):
+        ranks_bev = ranks_bev.int()
+        depth = depth.contiguous().float()
+        feat = feat.contiguous().float()
+        ranks_depth = ranks_depth.contiguous().int()
+        ranks_feat = ranks_feat.contiguous().int()
+        interval_lengths = interval_lengths.contiguous().int()
+        interval_starts = interval_starts.contiguous().int()
+
+        out = feat.new_zeros(bev_feat_shape)
+
+        bev_pool_v2_ext.bev_pool_v2_forward(
+            depth,
+            feat,
+            out,
+            ranks_depth,
+            ranks_feat,
+            ranks_bev,
+            interval_lengths,
+            interval_starts,
+        )
+
+        ctx.save_for_backward(ranks_bev, depth, feat, ranks_feat, ranks_depth)
+        return out
+
+    @staticmethod
+    def backward(ctx, out_grad):
+        ranks_bev, depth, feat, ranks_feat, ranks_depth = ctx.saved_tensors
+
+        order = ranks_feat.argsort()
+        ranks_feat, ranks_depth, ranks_bev = \
+            ranks_feat[order], ranks_depth[order], ranks_bev[order]
+        kept = torch.ones(
+            ranks_bev.shape[0], device=ranks_bev.device, dtype=torch.bool)
+        kept[1:] = ranks_feat[1:] != ranks_feat[:-1]
+        interval_starts_bp = torch.where(kept)[0].int()
+        interval_lengths_bp = torch.zeros_like(interval_starts_bp)
+        interval_lengths_bp[:-1] = interval_starts_bp[
+            1:] - interval_starts_bp[:-1]
+        interval_lengths_bp[-1] = ranks_bev.shape[0] - interval_starts_bp[-1]
+
+        depth = depth.contiguous()
+        feat = feat.contiguous()
+        ranks_depth = ranks_depth.contiguous()
+        ranks_feat = ranks_feat.contiguous()
+        ranks_bev = ranks_bev.contiguous()
+        interval_lengths_bp = interval_lengths_bp.contiguous()
+        interval_starts_bp = interval_starts_bp.contiguous()
+
+        depth_grad = depth.new_zeros(depth.shape)
+        feat_grad = feat.new_zeros(feat.shape)
+        out_grad = out_grad.contiguous()
+        bev_pool_v2_ext.bev_pool_v2_backward(
+            out_grad,
+            depth_grad,
+            feat_grad,
+            depth,
+            feat,
+            ranks_depth,
+            ranks_feat,
+            ranks_bev,
+            interval_lengths_bp,
+            interval_starts_bp,
+        )
+        return depth_grad, feat_grad, None, None, None, None, None, \
+            None, None, None
+
+
+class QuickCumsumV2Cuda(torch.autograd.Function):
+
+    @staticmethod
+    def symbolic(g,
+                 depth,
+                 feat,
+                 ranks_depth,
+                 ranks_feat,
+                 ranks_bev,
+                 interval_starts,
+                 interval_lengths,
+                 out_height=128,
+                 out_width=128):
+        """symbolic function for creating onnx op."""
+        x = g.op(
+            'autoware::QuickCumsumV2Cuda',
+            depth,
+            feat,
+            ranks_depth,
+            ranks_feat,
+            ranks_bev,
+            interval_starts,
+            interval_lengths,
+            out_height_i=out_height,
+            out_width_i=out_width)
+        
+        # features_shape = _get_tensor_sizes(feat)
+        # if features_shape is not None and hasattr(x.type(), "with_sizes"):
+        #     output_type = x.type().with_sizes([B, D, H, W, _get_tensor_dim_size(x, -1)])
+        #     output.setType(output_type)
+
+    @staticmethod
+    def forward(ctx,
+                depth,  # N,D,H,W
+                feat,  # N,H,W,C
+                ranks_depth,
+                ranks_feat,
+                ranks_bev,
+                interval_starts,
+                interval_lengths,
+                out_height=128,
+                out_width=128):
+        """run forward."""
+        feat = feat.unsqueeze(0)
+        depth = depth.unsqueeze(0)
+        bev_feat_shape = (depth.shape[0], 1, out_height, out_width,
+                          feat.shape[-1])  # (B, Z, Y, X, C)
+        bev_feat = bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev,
+                               bev_feat_shape, interval_starts,
+                               interval_lengths)
+        bev_feat = bev_feat.squeeze(2)
+        bev_feat = bev_feat.permute(0, 2, 3, 1)
+        return bev_feat
+     
+    @staticmethod
+    def backward(ctx, out_grad):
+        raise NotImplementedError
+
+
+def bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev,
+                bev_feat_shape, interval_starts, interval_lengths, is_training):
+    
+    if is_training:
+        x = QuickCumsumV2TrainingCuda.apply(depth, feat, ranks_depth, ranks_feat, ranks_bev,
+                              bev_feat_shape, interval_starts,
+                              interval_lengths)
+    else:
+        x = QuickCumsumV2Cuda.apply(depth, feat, ranks_depth, ranks_feat, ranks_bev,
+                              bev_feat_shape, interval_starts,
+                              interval_lengths)
+    
+    x = x.permute(0, 4, 1, 2, 3).contiguous()
+    return x
+
+
+def test_bev_pool_v2():
+    depth = np.array([0.3, 0.4, 0.2, 0.1, 0.7, 0.6, 0.8, 0.9])
+    depth = torch.from_numpy(depth).float().cuda()
+    depth = depth.view(1, 1, 2, 2, 2).requires_grad_()
+    feat = torch.ones(
+        size=[1, 1, 2, 2, 2], dtype=torch.float,
+        device='cuda').requires_grad_()
+    ranks_depth = torch.from_numpy(np.array([0, 4, 1, 6])).int().cuda()
+    ranks_feat = torch.from_numpy(np.array([0, 0, 1, 2])).int().cuda()
+    ranks_bev = torch.from_numpy(np.array([0, 0, 1, 1])).int().cuda()
+
+    kept = torch.ones(
+        ranks_bev.shape[0], device=ranks_bev.device, dtype=torch.bool)
+    kept[1:] = ranks_bev[1:] != ranks_bev[:-1]
+    interval_starts = torch.where(kept)[0].int()
+    if len(interval_starts) == 0:
+        return None, None, None, None, None
+    interval_lengths = torch.zeros_like(interval_starts)
+    interval_lengths[:-1] = interval_starts[1:] - interval_starts[:-1]
+    interval_lengths[-1] = ranks_bev.shape[0] - interval_starts[-1]
+    bev_feat = bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev,
+                           (1, 1, 2, 2, 2), interval_starts, interval_lengths)
+    loss = torch.sum(bev_feat)
+    loss.backward()
+    assert loss == 4.4
+    grad_depth = np.array([2., 2., 0., 0., 2., 0., 2., 0.])
+    grad_depth = torch.from_numpy(grad_depth).float()
+    grad_depth = grad_depth.cuda().view(1, 1, 2, 2, 2)
+    assert depth.grad.allclose(grad_depth)
+    grad_feat = np.array([1.0, 1.0, 0.4, 0.4, 0.8, 0.8, 0., 0.])
+    grad_feat = torch.from_numpy(grad_feat).float().cuda().view(1, 1, 2, 2, 2)
+    assert feat.grad.allclose(grad_feat)
diff --git a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/src/bev_pool.cpp b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/src/bev_pool.cpp
new file mode 100644
index 000000000..c7c38f695
--- /dev/null
+++ b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/src/bev_pool.cpp
@@ -0,0 +1,111 @@
+// Copyright (c) Phigent Robotics. All rights reserved.
+// Reference https://arxiv.org/abs/2211.17111
+#include <torch/torch.h>
+#include <c10/cuda/CUDAGuard.h>
+
+// CUDA function declarations
+void bev_pool_v2(int c, int n_intervals, const float* depth, const float* feat,
+    const int* ranks_depth, const int* ranks_feat, const int* ranks_bev,
+    const int* interval_starts, const int* interval_lengths, float* out);
+
+void bev_pool_v2_grad(int c, int n_intervals, const float* out_grad,
+  const float* depth, const float* feat, const int* ranks_depth, const int* ranks_feat,
+  const int* ranks_bev, const int* interval_starts, const int* interval_lengths,
+  float* depth_grad, float* feat_grad);
+
+
+/*
+  Function: pillar pooling (forward, cuda)
+  Args:
+    depth            : input depth, FloatTensor[n, d, h, w]
+    feat             : input features, FloatTensor[n, h, w, c]
+    out              : output features, FloatTensor[b, c, h_out, w_out]
+    ranks_depth      : depth index of points, IntTensor[n_points]
+    ranks_feat       : feat index of points, IntTensor[n_points]
+    ranks_bev        : output index of points, IntTensor[n_points]
+    interval_lengths : starting position for pooled point, IntTensor[n_intervals]
+    interval_starts  : how many points in each pooled point, IntTensor[n_intervals]
+  Return:
+*/
+void bev_pool_v2_forward(
+  const at::Tensor _depth,
+  const at::Tensor _feat,
+  at::Tensor _out,
+  const at::Tensor _ranks_depth,
+  const at::Tensor _ranks_feat,
+  const at::Tensor _ranks_bev,
+  const at::Tensor _interval_lengths,
+  const at::Tensor _interval_starts
+) {
+  int c = _feat.size(4);
+  int n_intervals = _interval_lengths.size(0);
+  const at::cuda::OptionalCUDAGuard device_guard(device_of(_depth));
+  const float* depth = _depth.data_ptr<float>();
+  const float* feat = _feat.data_ptr<float>();
+  const int* ranks_depth = _ranks_depth.data_ptr<int>();
+  const int* ranks_feat = _ranks_feat.data_ptr<int>();
+  const int* ranks_bev = _ranks_bev.data_ptr<int>();
+
+  const int* interval_lengths = _interval_lengths.data_ptr<int>();
+  const int* interval_starts = _interval_starts.data_ptr<int>();
+
+  float* out = _out.data_ptr<float>();
+  bev_pool_v2(
+    c, n_intervals, depth, feat, ranks_depth, ranks_feat,
+    ranks_bev, interval_starts, interval_lengths, out
+  );
+}
+
+
+/*
+  Function: pillar pooling (backward, cuda)
+  Args:
+    out_grad         : grad of output bev feature, FloatTensor[b, c, h_out, w_out]
+    depth_grad       : grad of input depth, FloatTensor[n, d, h, w]
+    feat_grad        : grad of input feature, FloatTensor[n, h, w, c]
+    depth            : input depth, FloatTensor[n, d, h, w]
+    feat             : input features, FloatTensor[n, h, w, c]
+    ranks_depth      : depth index of points, IntTensor[n_points]
+    ranks_feat       : feat index of points, IntTensor[n_points]
+    ranks_bev        : output index of points, IntTensor[n_points]
+    interval_lengths : starting position for pooled point, IntTensor[n_intervals]
+    interval_starts  : how many points in each pooled point, IntTensor[n_intervals]
+*/
+void bev_pool_v2_backward(
+  const at::Tensor _out_grad,
+  at::Tensor _depth_grad,
+  at::Tensor _feat_grad,
+  const at::Tensor _depth,
+  const at::Tensor _feat,
+  const at::Tensor _ranks_depth,
+  const at::Tensor _ranks_feat,
+  const at::Tensor _ranks_bev,
+  const at::Tensor _interval_lengths,
+  const at::Tensor _interval_starts
+) {
+  int c = _out_grad.size(4);
+  int n_intervals = _interval_lengths.size(0);
+  const at::cuda::OptionalCUDAGuard device_guard(device_of(_out_grad));
+  const float* out_grad = _out_grad.data_ptr<float>();
+  float* depth_grad = _depth_grad.data_ptr<float>();
+  float* feat_grad = _feat_grad.data_ptr<float>();
+  const float* depth = _depth.data_ptr<float>();
+  const float* feat = _feat.data_ptr<float>();
+  const int* ranks_depth = _ranks_depth.data_ptr<int>();
+  const int* ranks_feat = _ranks_feat.data_ptr<int>();
+  const int* ranks_bev = _ranks_bev.data_ptr<int>();
+  const int* interval_lengths = _interval_lengths.data_ptr<int>();
+  const int* interval_starts = _interval_starts.data_ptr<int>();
+
+  bev_pool_v2_grad(
+    c, n_intervals, out_grad, depth, feat, ranks_depth, ranks_feat,
+    ranks_bev, interval_starts, interval_lengths, depth_grad, feat_grad
+  );
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("bev_pool_v2_forward", &bev_pool_v2_forward,
+        "bev_pool_v2_forward");
+  m.def("bev_pool_v2_backward", &bev_pool_v2_backward,
+        "bev_pool_v2_backward");
+}
diff --git a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/src/bev_pool_cuda.cu b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/src/bev_pool_cuda.cu
new file mode 100644
index 000000000..7fa3179b7
--- /dev/null
+++ b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/src/bev_pool_cuda.cu
@@ -0,0 +1,140 @@
+// Copyright (c) Phigent Robotics. All rights reserved.
+// Reference https://arxiv.org/abs/2211.17111
+
+#include <stdio.h>
+#include <stdlib.h>
+
+/*
+  Function: pillar pooling
+  Args:
+    c                : number of channels
+    n_intervals      : number of unique points
+    depth            : input depth, FloatTensor[b,n,d,h,w]
+    feat             : input feat, FloatTensor[b,n,h,w,c]
+    ranks_depth      : input index of depth, IntTensor[n]
+    ranks_feat       : input index of feat, IntTensor[n]
+    ranks_bev        : output index, IntTensor[n]
+    interval_lengths : starting position for pooled point, IntTensor[n_intervals]
+    interval_starts  : how many points in each pooled point, IntTensor[n_intervals]
+    out              : output features, FloatTensor[b, d, h, w, c]
+*/
+__global__ void bev_pool_v2_kernel(int c, int n_intervals,
+                                  const float *__restrict__ depth,
+                                  const float *__restrict__ feat,
+                                  const int *__restrict__ ranks_depth,
+                                  const int *__restrict__ ranks_feat,
+                                  const int *__restrict__ ranks_bev,
+                                  const int *__restrict__ interval_starts,
+                                  const int *__restrict__ interval_lengths,
+                                  float* __restrict__ out) {
+  int idx = blockIdx.x * blockDim.x + threadIdx.x;
+  int index = idx / c;
+  int cur_c = idx % c;
+  if (index >= n_intervals) return;
+  int interval_start = interval_starts[index];
+  int interval_length = interval_lengths[index];
+  float psum = 0;
+  const float* cur_depth;
+  const float* cur_feat;
+  for(int i = 0; i < interval_length; i++){
+    cur_depth = depth + ranks_depth[interval_start+i];
+    cur_feat = feat + ranks_feat[interval_start+i] * c + cur_c;
+    psum += *cur_feat * *cur_depth;
+  }
+
+  const int* cur_rank = ranks_bev + interval_start;
+  float* cur_out = out + *cur_rank * c + cur_c;
+  *cur_out = psum;
+}
+
+
+/*
+  Function: pillar pooling backward
+  Args:
+    c                : number of channels
+    n_intervals      : number of unique points
+    out_grad         : gradient of the BEV fmap from top, FloatTensor[b, d, h, w, c]
+    depth            : input depth, FloatTensor[b,n,d,h,w]
+    feat             : input feat, FloatTensor[b,n,h,w,c]
+    ranks_depth      : input index of depth, IntTensor[n]
+    ranks_feat       : input index of feat, IntTensor[n]
+    ranks_bev        : output index, IntTensor[n]
+    interval_lengths : starting position for pooled point, IntTensor[n_intervals]
+    interval_starts  : how many points in each pooled point, IntTensor[n_intervals]
+    depth_grad       : gradient of the depth fmap, FloatTensor
+    feat_grad        : gradient of the feature fmap, FloatTensor
+*/
+__global__ void bev_pool_grad_kernel(int c, int n_intervals,
+                                  const float *__restrict__ out_grad,
+                                  const float *__restrict__ depth,
+                                  const float *__restrict__ feat,
+                                  const int *__restrict__ ranks_depth,
+                                  const int *__restrict__ ranks_feat,
+                                  const int *__restrict__ ranks_bev,
+                                  const int *__restrict__ interval_starts,
+                                  const int *__restrict__ interval_lengths,
+                                  float* __restrict__ depth_grad,
+                                  float* __restrict__ feat_grad) {
+  int idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (idx >= n_intervals) return;
+  int interval_start = interval_starts[idx];
+  int interval_length = interval_lengths[idx];
+
+  const int* cur_rank;
+  const float* cur_out_grad;
+  const float* cur_out_grad_start;
+
+  const float* cur_feat;
+  const float* cur_feat_start;
+  float* cur_depth_grad;
+  float grad_sum;
+  for(int i = 0; i < interval_length; i++){
+    cur_rank = ranks_bev + interval_start + i;
+    cur_out_grad_start = out_grad +  * cur_rank * c;
+    cur_feat_start = feat + ranks_feat[interval_start+i] * c;
+
+    grad_sum = 0;
+    for(int cur_c = 0; cur_c < c; cur_c++){
+      cur_out_grad = cur_out_grad_start + cur_c;
+      cur_feat = cur_feat_start + cur_c;
+      grad_sum += *cur_out_grad * *cur_feat;
+    }
+
+    cur_depth_grad = depth_grad + ranks_depth[interval_start+i];
+    *cur_depth_grad = grad_sum;
+  }
+
+  float* cur_feat_grad;
+  const float* cur_depth;
+  for(int cur_c = 0; cur_c < c; cur_c++){
+    grad_sum = 0;
+    for(int i = 0; i < interval_length; i++){
+      cur_rank = ranks_bev + interval_start + i;
+      cur_out_grad = out_grad + *cur_rank * c + cur_c;
+
+      cur_depth = depth + ranks_depth[interval_start+i];
+      grad_sum += *cur_out_grad * *cur_depth;
+    }
+    cur_feat_grad = feat_grad + ranks_feat[interval_start] * c + cur_c ;
+    * cur_feat_grad = grad_sum;
+  }
+}
+
+
+
+void bev_pool_v2(int c, int n_intervals, const float* depth, const float* feat, const int* ranks_depth,
+  const int* ranks_feat, const int* ranks_bev, const int* interval_starts, const int* interval_lengths, float* out) {
+  bev_pool_v2_kernel<<<(int)ceil(((double)n_intervals * c / 256)), 256>>>(
+    c, n_intervals, depth, feat, ranks_depth, ranks_feat,
+    ranks_bev, interval_starts, interval_lengths, out
+  );
+}
+
+void bev_pool_v2_grad(int c, int n_intervals, const float* out_grad,
+  const float* depth, const float* feat, const int* ranks_depth, const int* ranks_feat,
+  const int* ranks_bev, const int* interval_starts, const int* interval_lengths, float* depth_grad, float* feat_grad) {
+  bev_pool_grad_kernel<<<(int)ceil(((double)n_intervals / 256)), 256>>>(
+     c, n_intervals, out_grad, depth, feat, ranks_depth, ranks_feat,
+     ranks_bev, interval_starts, interval_lengths, depth_grad, feat_grad
+  );
+}
diff --git a/projects/BEVFusion/setup.py b/projects/BEVFusion/setup.py
index 38f588b20..02fe93524 100644
--- a/projects/BEVFusion/setup.py
+++ b/projects/BEVFusion/setup.py
@@ -54,6 +54,14 @@ def make_cuda_ext(name, module, sources, sources_cuda=[], extra_args=[], extra_i
                     "src/bev_pool_cuda.cu",
                 ],
             ),
+            make_cuda_ext(
+                name="bev_pool_v2_ext",
+                module="projects.BEVFusion.bevfusion.ops.bev_pool_v2",
+                sources=[
+                    "src/bev_pool_v2.cpp",
+                    "src/bev_pool_v2_cuda.cu",
+                ],
+            ),
             make_cuda_ext(
                 name="voxel_layer",
                 module="projects.BEVFusion.bevfusion.ops.voxel",

From 1499a91a4c7854793a87c4a943fba22bf3e9234b Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 19 May 2026 20:00:32 +0900
Subject: [PATCH 118/183] Update camera config structure

---
 projects/BEVFusion/bevfusion/depth_lss.py    |   2 +-
 projects/BEVFusion/bevfusion/depth_lss_v2.py | 215 ++++++++++++++++++-
 2 files changed, 215 insertions(+), 2 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/depth_lss.py b/projects/BEVFusion/bevfusion/depth_lss.py
index f202c3777..ac7c5b503 100644
--- a/projects/BEVFusion/bevfusion/depth_lss.py
+++ b/projects/BEVFusion/bevfusion/depth_lss.py
@@ -567,7 +567,7 @@ def __init__(
             zbound=zbound,
             dbound=dbound,
         )
-        
+
         self.dtransform = LidarDepthImageNet(in_channels=1, out_channels=64, last_stride=lidar_depth_image_last_stride)
         self.depthnet = DepthLSSNet(
             in_channels=in_channels + self.dtransform.out_channels, out_channels=self.D + self.C
diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py
index 974a39cce..4305b1fe6 100644
--- a/projects/BEVFusion/bevfusion/depth_lss_v2.py
+++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py
@@ -4,6 +4,219 @@
 from mmdet3d.registry import MODELS
 from torch import nn
 
-from .depth_lss import DepthLSSNet, DownSampleNet, LidarDepthImageNet, BaseViewTransform
+from .depth_lss import BaseViewTransform, DepthLSSNet, DownSampleNet, LidarDepthImageNet
 from .ops import bev_pool_v2
 
+
+class BaseViewTransformV2(BaseViewTransform):
+
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        image_size: Tuple[int, int],
+        feature_size: Tuple[int, int],
+        xbound: Tuple[float, float, float],
+        ybound: Tuple[float, float, float],
+        zbound: Tuple[float, float, float],
+        dbound: Tuple[float, float, float],
+    ):
+        super().__init__(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            image_size=image_size,
+            feature_size=feature_size,
+            xbound=xbound,
+            ybound=ybound,
+            zbound=zbound,
+            dbound=dbound,
+        )
+
+    def get_cam_feats(self, x) -> Tuple[torch.Tensor, torch.Tensor]:
+        raise NotImplementedError
+
+    def forward(
+        self,
+        img,
+        points,
+        lidar2image,
+        camera_intrinsics,
+        camera2lidar,
+        img_aug_matrix,
+        lidar_aug_matrix,
+        metas,
+        camera_intrinsics_inverse,
+        img_aug_matrix_inverse,
+        lidar_aug_matrix_inverse,
+        geom_feats_precomputed,
+    ):
+        if geom_feats_precomputed is not None:
+            geom_feats, kept, ranks, indices = geom_feats_precomputed
+            x, depth_softmax = self.get_cam_feats(img)
+            x = self.bev_pool_precomputed(x, depth_softmax, geom_feats, kept, ranks, indices)
+
+        else:
+            intrins = camera_intrinsics[..., :3, :3]
+            post_rots = img_aug_matrix[..., :3, :3]
+            post_trans = img_aug_matrix[..., :3, 3]
+            camera2lidar_rots = camera2lidar[..., :3, :3]
+            camera2lidar_trans = camera2lidar[..., :3, 3]
+
+            extra_rots = lidar_aug_matrix[..., :3, :3]
+            extra_trans = lidar_aug_matrix[..., :3, 3]
+
+            geom = self.get_geometry(
+                camera2lidar_rots,
+                camera2lidar_trans,
+                torch.inverse(intrins),
+                torch.inverse(post_rots),
+                post_trans,
+                extra_rots=extra_rots,
+                extra_trans=extra_trans,
+            )
+
+            # depth is not connected to the calibration
+            # on_img is
+            # is also flattened_indices
+            (
+                view_feats,
+                depth_softmax,
+            ) = self.get_cam_feats(img)
+            x = self.bev_pool(view_feats, depth_softmax, geom)
+
+        return x
+
+    def bev_pool_aux(self, geom_feats):
+        B, N, D, H, W, C = geom_feats.shape
+        Nprime = B * N * D * H * W
+        assert C == 3
+
+        # record the index of selected points for acceleration purpose
+        ranks_depth = torch.range(0, Nprime - 1, dtype=torch.int, device=geom_feats.device)
+        ranks_feat = torch.range(0, Nprime // D - 1, dtype=torch.int, device=geom_feats.device)
+        ranks_feat = ranks_feat.reshape(B, N, 1, H, W)
+        ranks_feat = ranks_feat.expand(B, N, D, H, W).flatten()
+
+        # flatten indices
+        geom_feats = ((geom_feats - (self.bx - self.dx / 2.0)) / self.dx).long()
+        geom_feats = geom_feats.view(Nprime, 3)
+        batch_ix = torch.cat(
+            [torch.full([Nprime // B, 1], ix, device=geom_feats.device, dtype=torch.long) for ix in range(B)]
+        )
+        geom_feats = torch.cat((geom_feats, batch_ix), 1)
+
+        # filter out points that are outside box
+        kept = (
+            (geom_feats[:, 0] >= 0)
+            & (geom_feats[:, 0] < self.nx[0])
+            & (geom_feats[:, 1] >= 0)
+            & (geom_feats[:, 1] < self.nx[1])
+            & (geom_feats[:, 2] >= 0)
+            & (geom_feats[:, 2] < self.nx[2])
+        )
+
+        if len(kept) == 0:
+            return None, None, None, None
+
+        geom_feats, ranks_depth, ranks_feat = geom_feats[kept], ranks_depth[kept], ranks_feat[kept]
+
+        # nx is the total number of voxels/cells in the BEV grid
+        # nx[0] is x, nx[1] is y, nx[2] is z
+        ranks_bev = (
+            geom_feats[:, 0] * (self.nx[1] * self.nx[2] * B)
+            + geom_feats[:, 1] * (self.nx[2] * B)
+            + geom_feats[:, 2] * B
+            + geom_feats[:, 3]
+        )
+        indices = ranks_bev.argsort()
+        ranks_bev, ranks_depth, ranks_feat = ranks_bev[indices], ranks_depth[indices], ranks_feat[indices]
+
+        intervals = self.compute_intervals(ranks_bev)
+        if intervals is None:
+            return None, None, None, None, None
+
+        interval_starts, interval_lengths = intervals
+        return (
+            ranks_bev.int().contiguous(),
+            ranks_depth.int().contiguous(),
+            ranks_feat.int().contiguous(),
+            interval_starts.int().contiguous(),
+            interval_lengths.int().contiguous(),
+        )
+
+    def compute_intervals(self, ranks_bev: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        kept = torch.ones(ranks_bev.shape[0], device=ranks_bev.device, dtype=torch.bool)
+        kept[1:] = ranks_bev[1:] != ranks_bev[:-1]
+        interval_starts = torch.where(kept)[0].int()
+        if len(interval_starts) == 0:
+            return None
+
+        interval_lengths = torch.zeros_like(interval_starts)
+        interval_lengths[:-1] = interval_starts[1:] - interval_starts[:-1]
+        interval_lengths[-1] = ranks_bev.shape[0] - interval_starts[-1]
+        return interval_starts.int().contiguous(), interval_lengths.int().contiguous()
+
+    def bev_pool(self, view_feats, depth_softmax, geom) -> torch.Tensor:
+        """ """
+        B, N, D, H, W, _ = geom.shape
+        num_points = B * N * D * H * W
+
+        # record the index of selected points for acceleration purpose
+        ranks_depth = torch.range(0, num_points - 1, dtype=torch.int, device=geom.device)
+        ranks_feat = torch.range(0, num_points // D - 1, dtype=torch.int, device=geom.device)
+        ranks_feat = ranks_feat.reshape(B, N, 1, H, W)
+        ranks_feat = ranks_feat.expand(B, N, D, H, W).flatten()
+
+        B, N, C, fH, fW = view_feats.shape
+
+        bev_feat = bev_pool_v2(
+            depth_softmax,
+            x,
+            ranks_depth,
+            ranks_feat,
+            ranks_bev,
+            bev_feat_shape,
+            interval_starts,
+            interval_lengths,
+            is_training,
+        )
+        return bev_feat
+
+
+class LSSTransformV2(BaseViewTransformV2):
+
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        image_size: Tuple[int, int],
+        feature_size: Tuple[int, int],
+        xbound: Tuple[float, float, float],
+        ybound: Tuple[float, float, float],
+        zbound: Tuple[float, float, float],
+        dbound: Tuple[float, float, float],
+        downsample: int = 1,
+    ):
+        super().__init__(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            image_size=image_size,
+            feature_size=feature_size,
+            xbound=xbound,
+            ybound=ybound,
+            zbound=zbound,
+            dbound=dbound,
+        )
+        self.depthnet = nn.Conv2d(self.in_channels, self.D + self.C, 1)
+        self.downsample = DownSampleNet(downsample, self.out_channels, self.out_channels)
+
+    def get_cam_feats(self, x):
+        B, N, C, fH, fW = x.shape
+        x = x.view(B * N, C, fH, fW)
+        x = self.depthnet(x)
+
+        depth_softmax = x[:, : self.D].softmax(dim=1)
+        depth_softmax = depth_softmax.view(B, N, self.D, fH, fW)
+        view_feats = x[:, self.D : (self.D + self.C)]
+        view_feats = view_feats.view(B, N, self.C, fH, fW)
+        return view_feats, depth_softmax

From a5b406540c1495081c62ed8e4ddf338914fbee42 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 19 May 2026 20:14:30 +0900
Subject: [PATCH 119/183] Add local 3d box expand

---
 .../default/models/default_lidar_second_secfpn_120m.py          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
index e871fce58..4e4e7dde7 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
@@ -104,7 +104,7 @@
             post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0],
             # score_threshold=0.03,
             # CAR, TRUCK, BUS, BICYCLE, PEDESTRIAN, TRAFFIC_CONE, BARRIER
-            score_threshold=[0.015, 0.010, 0.010, 0.010, 0.015, 0.015, 0.015],
+            score_threshold=[0.015, 0.010, 0.010, 0.010, 0.020, 0.020, 0.015],
             out_size_factor=8,
             code_size=10,
         ),

From 0b547bf528d8ec474856a40e0697137c75bab6b6 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 19 May 2026 20:27:56 +0900
Subject: [PATCH 120/183] Add local 3d box expand

---
 ...oxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py | 2 +-
 .../default/models/default_lidar_second_secfpn_120m.py          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py
index c77e0332b..288cb4d1b 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py
@@ -3,7 +3,7 @@
 ]
 
 # user setting
-experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v3/" + _base_.dataset_type
+experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v4/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
index 4e4e7dde7..796fef3e8 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
@@ -104,7 +104,7 @@
             post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0],
             # score_threshold=0.03,
             # CAR, TRUCK, BUS, BICYCLE, PEDESTRIAN, TRAFFIC_CONE, BARRIER
-            score_threshold=[0.015, 0.010, 0.010, 0.010, 0.020, 0.020, 0.015],
+            score_threshold=[0.015, 0.015, 0.010, 0.020, 0.030, 0.030, 0.020],
             out_size_factor=8,
             code_size=10,
         ),

From 6949e4154493164b950e56b2c85d38abbca7c29c Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 19 May 2026 22:37:21 +0900
Subject: [PATCH 121/183] Add local 3d box expand

---
 ...l_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py | 2 +-
 .../default/models/default_lidar_second_secfpn_120m.py       | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py
index 288cb4d1b..71a60c0d5 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py
@@ -3,7 +3,7 @@
 ]
 
 # user setting
-experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v4/" + _base_.dataset_type
+experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v5/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
index 796fef3e8..6b5f28e31 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
@@ -90,9 +90,10 @@
             nms_type="circle",  # Set to "circle" for circle_nms
             # Set NMS for different clusters
             nms_clusters=[
+                # Sqrt(0.25) = 0.5
                 dict(class_names=["car", "truck", "bus"], class_indices=[0, 1, 2], nms_threshold=0.25, post_max_size=300),  # It's radius if using circle_nms
-                dict(class_names=["bicycle"], class_indices=[3], nms_threshold=0.0, post_max_size=50),
-                dict(class_names=["pedestrian"], class_indices=[4], nms_threshold=0.0, post_max_size=100),
+                # Sqrt(0.04) = 0.2
+                dict(class_names=["bicycle". "pedestrian"], class_indices=[3, 4], nms_threshold=0.04, post_max_size=200),
                 dict(class_names=["traffic_cone"], class_indices=[5], nms_threshold=0.0, post_max_size=100),
                 dict(class_names=["barrier"], class_indices=[6], nms_threshold=0.0, post_max_size=50),
             ],

From bc6d024bc2b5f35dddc08ad5d582e279c358e19c Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 19 May 2026 22:45:08 +0900
Subject: [PATCH 122/183] Add local 3d box expand

---
 .../default/models/default_lidar_second_secfpn_120m.py        | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
index 6b5f28e31..44744c1dd 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
@@ -93,7 +93,7 @@
                 # Sqrt(0.25) = 0.5
                 dict(class_names=["car", "truck", "bus"], class_indices=[0, 1, 2], nms_threshold=0.25, post_max_size=300),  # It's radius if using circle_nms
                 # Sqrt(0.04) = 0.2
-                dict(class_names=["bicycle". "pedestrian"], class_indices=[3, 4], nms_threshold=0.04, post_max_size=200),
+                dict(class_names=["bicycle", "pedestrian"], class_indices=[3, 4], nms_threshold=0.04, post_max_size=200),
                 dict(class_names=["traffic_cone"], class_indices=[5], nms_threshold=0.0, post_max_size=100),
                 dict(class_names=["barrier"], class_indices=[6], nms_threshold=0.0, post_max_size=50),
             ],
@@ -105,7 +105,7 @@
             post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0],
             # score_threshold=0.03,
             # CAR, TRUCK, BUS, BICYCLE, PEDESTRIAN, TRAFFIC_CONE, BARRIER
-            score_threshold=[0.015, 0.015, 0.010, 0.020, 0.030, 0.030, 0.020],
+            score_threshold=[0.015, 0.015, 0.010, 0.020, 0.030, 0.040, 0.025],
             out_size_factor=8,
             code_size=10,
         ),

From 366e7a4975688ecc6f8069684dd3c9a772b53960 Mon Sep 17 00:00:00 2001
From: KSeangTan <kseangtan@gmail.com>
Date: Wed, 20 May 2026 08:34:03 +0900
Subject: [PATCH 123/183] Updated

---
 projects/BEVFusion/bevfusion/depth_lss_v2.py  | 56 ++++++++++++-------
 .../bevfusion/ops/bev_pool_v2/bev_pool_v2.py  |  7 ++-
 2 files changed, 41 insertions(+), 22 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py
index 4305b1fe6..b4276834d 100644
--- a/projects/BEVFusion/bevfusion/depth_lss_v2.py
+++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py
@@ -3,6 +3,7 @@
 import torch
 from mmdet3d.registry import MODELS
 from torch import nn
+from mmengine.logging import print_log
 
 from .depth_lss import BaseViewTransform, DepthLSSNet, DownSampleNet, LidarDepthImageNet
 from .ops import bev_pool_v2
@@ -20,6 +21,7 @@ def __init__(
         ybound: Tuple[float, float, float],
         zbound: Tuple[float, float, float],
         dbound: Tuple[float, float, float],
+        collapse_z: bool = True,
     ):
         super().__init__(
             in_channels=in_channels,
@@ -31,7 +33,8 @@ def __init__(
             zbound=zbound,
             dbound=dbound,
         )
-
+        self.collapse_z = collapse_z
+    
     def get_cam_feats(self, x) -> Tuple[torch.Tensor, torch.Tensor]:
         raise NotImplementedError
 
@@ -161,25 +164,40 @@ def bev_pool(self, view_feats, depth_softmax, geom) -> torch.Tensor:
         B, N, D, H, W, _ = geom.shape
         num_points = B * N * D * H * W
 
-        # record the index of selected points for acceleration purpose
-        ranks_depth = torch.range(0, num_points - 1, dtype=torch.int, device=geom.device)
-        ranks_feat = torch.range(0, num_points // D - 1, dtype=torch.int, device=geom.device)
-        ranks_feat = ranks_feat.reshape(B, N, 1, H, W)
-        ranks_feat = ranks_feat.expand(B, N, D, H, W).flatten()
-
-        B, N, C, fH, fW = view_feats.shape
-
+        ranks_bev, ranks_depth, ranks_feat, interval_starts, interval_lengths = self.bev_pool_aux(geom)
+
+        if ranks_feat is None:
+            print_log('warning ---> no points within the predefined bev receptive field')
+            dummy = torch.zeros(size=[
+                view_feats.shape[0], view_feats.shape[2],
+                int(self.nx[2]),
+                int(self.nx[1]),
+                int(self.nx[0])
+            ]).to(view_feats)
+            dummy = torch.cat(dummy.unbind(dim=2), 1)
+            return dummy
+        
+        # permute view_feats from (B, N, C, fH, fW) to (B, N, fH, fW, C)
+        view_feats = view_feats.permute(0, 1, 3, 4, 2)
+        bev_feat_shape = (depth_softmax.shape[0], int(self.nx[2]),
+                        int(self.nx[1]), int(self.nx[0]),
+                        view_feats.shape[-1])  # (B, Z, Y, X, C)
+        
         bev_feat = bev_pool_v2(
-            depth_softmax,
-            x,
-            ranks_depth,
-            ranks_feat,
-            ranks_bev,
-            bev_feat_shape,
-            interval_starts,
-            interval_lengths,
-            is_training,
-        )
+            depth=depth_softmax, 
+            feat=view_feats, 
+            ranks_depth=ranks_depth, 
+            ranks_feat=ranks_feat, 
+            ranks_bev=ranks_bev,
+            interval_starts=interval_starts, 
+            interval_lengths=interval_lengths, 
+            bev_feat_shape=bev_feat_shape,
+            is_training=self.training)
+        
+        # collapse Z
+        if self.collapse_z:
+            bev_feat = torch.cat(bev_feat.unbind(dim=2), 1)
+
         return bev_feat
 
 
diff --git a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py
index b1d2f03af..a40717503 100644
--- a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py
+++ b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py
@@ -141,16 +141,17 @@ def backward(ctx, out_grad):
 
 
 def bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev,
-                bev_feat_shape, interval_starts, interval_lengths, is_training):
+                interval_starts, interval_lengths, bev_feat_shape, is_training):
     
     if is_training:
         x = QuickCumsumV2TrainingCuda.apply(depth, feat, ranks_depth, ranks_feat, ranks_bev,
                               bev_feat_shape, interval_starts,
                               interval_lengths)
     else:
+        # BEV Shape is (B, Z, Y, X, C)
+        out_height, out_width = bev_feat_shape[2], bev_feat_shape[2]
         x = QuickCumsumV2Cuda.apply(depth, feat, ranks_depth, ranks_feat, ranks_bev,
-                              bev_feat_shape, interval_starts,
-                              interval_lengths)
+                              interval_starts, interval_lengths)
     
     x = x.permute(0, 4, 1, 2, 3).contiguous()
     return x

From a61c859ec61b5f2639ed742fa7850648e72afba0 Mon Sep 17 00:00:00 2001
From: KSeangTan <kseangtan@gmail.com>
Date: Wed, 20 May 2026 08:35:41 +0900
Subject: [PATCH 124/183] Updated

---
 projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py
index a40717503..5a479924d 100644
--- a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py
+++ b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py
@@ -151,8 +151,9 @@ def bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev,
         # BEV Shape is (B, Z, Y, X, C)
         out_height, out_width = bev_feat_shape[2], bev_feat_shape[2]
         x = QuickCumsumV2Cuda.apply(depth, feat, ranks_depth, ranks_feat, ranks_bev,
-                              interval_starts, interval_lengths)
+                                    out_height, out_width, interval_starts, interval_lengths)
     
+    # Final shape: (B, C, Z, Y, X)
     x = x.permute(0, 4, 1, 2, 3).contiguous()
     return x
 

From 81e26bbc0fca82c9174f593c2877e48a31436d55 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Wed, 20 May 2026 16:55:27 +0900
Subject: [PATCH 125/183] Add local 3d box expand

---
 ...cond_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py | 2 +-
 .../default/models/default_lidar_second_secfpn_120m.py   | 9 +++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py
index 71a60c0d5..245ae0814 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py
@@ -3,7 +3,7 @@
 ]
 
 # user setting
-experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v5/" + _base_.dataset_type
+experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v9/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
index 44744c1dd..979dd31f2 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
@@ -90,10 +90,11 @@
             nms_type="circle",  # Set to "circle" for circle_nms
             # Set NMS for different clusters
             nms_clusters=[
-                # Sqrt(0.25) = 0.5
+                # Sqrt(0.25) = 0.50
                 dict(class_names=["car", "truck", "bus"], class_indices=[0, 1, 2], nms_threshold=0.25, post_max_size=300),  # It's radius if using circle_nms
-                # Sqrt(0.04) = 0.2
-                dict(class_names=["bicycle", "pedestrian"], class_indices=[3, 4], nms_threshold=0.04, post_max_size=200),
+                # Sqrt(0.001) = 0.0316
+                dict(class_names=["bicycle", "pedestrian"], class_indices=[3, 4], nms_threshold=0.001, post_max_size=200),
+                # dict(class_names=["pedestrian"], class_indices=[4], nms_threshold=0.0, post_max_size=200),
                 dict(class_names=["traffic_cone"], class_indices=[5], nms_threshold=0.0, post_max_size=100),
                 dict(class_names=["barrier"], class_indices=[6], nms_threshold=0.0, post_max_size=50),
             ],
@@ -105,7 +106,7 @@
             post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0],
             # score_threshold=0.03,
             # CAR, TRUCK, BUS, BICYCLE, PEDESTRIAN, TRAFFIC_CONE, BARRIER
-            score_threshold=[0.015, 0.015, 0.010, 0.020, 0.030, 0.040, 0.025],
+            score_threshold=[0.015, 0.010, 0.010, 0.020, 0.030, 0.040, 0.020],
             out_size_factor=8,
             code_size=10,
         ),

From 770be09224b0536ab40505abe3aa1a0622bbb1cb Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Wed, 20 May 2026 18:52:18 +0900
Subject: [PATCH 126/183] Add local 3d box expand

---
 .../default/models/default_lidar_second_secfpn_120m.py       | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
index 979dd31f2..bad602cb7 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
@@ -92,9 +92,8 @@
             nms_clusters=[
                 # Sqrt(0.25) = 0.50
                 dict(class_names=["car", "truck", "bus"], class_indices=[0, 1, 2], nms_threshold=0.25, post_max_size=300),  # It's radius if using circle_nms
-                # Sqrt(0.001) = 0.0316
-                dict(class_names=["bicycle", "pedestrian"], class_indices=[3, 4], nms_threshold=0.001, post_max_size=200),
-                # dict(class_names=["pedestrian"], class_indices=[4], nms_threshold=0.0, post_max_size=200),
+                dict(class_names=["bicycle"], class_indices=[3], nms_threshold=0.0, post_max_size=50),
+                dict(class_names=["pedestrian"], class_indices=[4], nms_threshold=0.0, post_max_size=100),
                 dict(class_names=["traffic_cone"], class_indices=[5], nms_threshold=0.0, post_max_size=100),
                 dict(class_names=["barrier"], class_indices=[6], nms_threshold=0.0, post_max_size=50),
             ],

From 89ab9c329e29afc3f8ae15f217664c3ac3421aa2 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Thu, 21 May 2026 01:22:59 +0900
Subject: [PATCH 127/183] Updated

---
 autoware_ml/configs/detection3d/dataset/t4dataset/base.py       | 2 ++
 autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py     | 2 ++
 .../configs/detection3d/dataset/t4dataset/j6gen2_base.py        | 2 ++
 .../configs/detection3d/dataset/t4dataset/jpntaxi_base.py       | 2 ++
 .../configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py       | 2 ++
 autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py   | 2 ++
 ...oxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py | 2 +-
 7 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py
index 8e49f2396..7f4be6293 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py
@@ -149,6 +149,8 @@
     "traffic_cone": "traffic_cone",
     "trafficcone": "traffic_cone",
     "barrier": "barrier",
+    "other_vehicle": "car",
+    "other_pedestrian": "pedestrian",
 }
 
 class_names = ["car", "truck", "bus", "bicycle", "pedestrian", "traffic_cone", "barrier"]
diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py
index a93bf56af..a87166019 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py
@@ -130,6 +130,8 @@
     "traffic_cone": "traffic_cone",
     "trafficcone": "traffic_cone",
     "barrier": "barrier",
+    "other_vehicle": "car",
+    "other_pedestrian": "pedestrian",
 }
 
 class_names = [
diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py
index 170086752..ef0141a5b 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py
@@ -136,6 +136,8 @@
     "traffic_cone": "traffic_cone",
     "trafficcone": "traffic_cone",
     "barrier": "barrier",
+    "other_vehicle": "car",
+    "other_pedestrian": "pedestrian",
 }
 
 
diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py
index c08decfa1..0f00a651d 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py
@@ -123,6 +123,8 @@
     "traffic_cone": "traffic_cone",
     "trafficcone": "traffic_cone",
     "barrier": "barrier",
+    "other_vehicle": "car",
+    "other_pedestrian": "pedestrian",
 }
 
 class_names = [
diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py
index dbd6e2813..9995cd9b7 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py
@@ -120,6 +120,8 @@
     "traffic_cone": "traffic_cone",
     "trafficcone": "traffic_cone",
     "barrier": "barrier",
+    "other_vehicle": "car",
+    "other_pedestrian": "pedestrian",
 }
 
 class_names = [
diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py b/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py
index 2212b8e56..cd42362b5 100644
--- a/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py
+++ b/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py
@@ -122,6 +122,8 @@
     "traffic_cone": "traffic_cone",
     "trafficcone": "traffic_cone",
     "barrier": "barrier",
+    "other_vehicle": "car",
+    "other_pedestrian": "pedestrian",
 }
 
 class_names = [
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py
index 245ae0814..3bdda213e 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py
@@ -3,7 +3,7 @@
 ]
 
 # user setting
-experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v9/" + _base_.dataset_type
+experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 

From e6f64520a3fe5a28d60ac09213c6e4361ce6780b Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Thu, 21 May 2026 17:51:44 +0900
Subject: [PATCH 128/183] Update camera config structure

---
 projects/BEVFusion/bevfusion/__init__.py      |  12 +-
 projects/BEVFusion/bevfusion/depth_lss_v2.py  |  59 ++++----
 .../bevfusion/ops/bev_pool_v2/bev_pool_v2.py  | 127 +++++++++---------
 .../resnet50/camera_resnet50_fpn_lss_50m.py   |   2 +-
 4 files changed, 111 insertions(+), 89 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py
index 6a1a32ecc..385ccd89e 100644
--- a/projects/BEVFusion/bevfusion/__init__.py
+++ b/projects/BEVFusion/bevfusion/__init__.py
@@ -1,13 +1,20 @@
 from .bevfusion import BEVFusion
 from .bevfusion_head import BEVFusionHead, ConvFuser
 from .bevfusion_necks import GeneralizedLSSFPN
+from .bevfusion_voxel_encoder import BEVFusionVoxelFeatureNet, HardSimpleVoxelSinCosEncoder
 from .depth_lss import DepthLSSTransform, LSSTransform
+from .depth_lss_v2 import LSSTransformV2
 from .loading import BEVLoadMultiViewImageFromFiles
 from .sparse_encoder import BEVFusionSparseEncoder
 from .transformer import TransformerDecoderLayer
-from .transforms_3d import BEVFusionGlobalRotScaleTrans, BEVFusionRandomFlip3D, GridMask, ImageAug3D, BEVFusionRemoveLiDARPoints
+from .transforms_3d import (
+    BEVFusionGlobalRotScaleTrans,
+    BEVFusionRandomFlip3D,
+    BEVFusionRemoveLiDARPoints,
+    GridMask,
+    ImageAug3D,
+)
 from .utils import BBoxBEVL1Cost, HeuristicAssigner3D, HungarianAssigner3D, IoU3DCost, TransFusionBBoxCoder
-from .bevfusion_voxel_encoder import HardSimpleVoxelSinCosEncoder, BEVFusionVoxelFeatureNet
 
 __all__ = [
     "BEVFusion",
@@ -31,4 +38,5 @@
     "TransFusionBBoxCoder",
     "HardSimpleVoxelSinCosEncoder",
     "BEVFusionVoxelFeatureNet",
+    "LSSTransformV2",
 ]
diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py
index b4276834d..e3213eb6e 100644
--- a/projects/BEVFusion/bevfusion/depth_lss_v2.py
+++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py
@@ -2,8 +2,8 @@
 
 import torch
 from mmdet3d.registry import MODELS
-from torch import nn
 from mmengine.logging import print_log
+from torch import nn
 
 from .depth_lss import BaseViewTransform, DepthLSSNet, DownSampleNet, LidarDepthImageNet
 from .ops import bev_pool_v2
@@ -22,7 +22,13 @@ def __init__(
         zbound: Tuple[float, float, float],
         dbound: Tuple[float, float, float],
         collapse_z: bool = True,
+        expand_batch_axis: bool = False,
     ):
+        """
+        Args:
+            collapse_z: collapse the Z axis of the BEV grid
+            expand_batch_axis: expand the batch axis of the inputs to bev pool if this is set to True.
+        """
         super().__init__(
             in_channels=in_channels,
             out_channels=out_channels,
@@ -34,7 +40,8 @@ def __init__(
             dbound=dbound,
         )
         self.collapse_z = collapse_z
-    
+        self.expand_batch_axis = expand_batch_axis
+
     def get_cam_feats(self, x) -> Tuple[torch.Tensor, torch.Tensor]:
         raise NotImplementedError
 
@@ -161,39 +168,41 @@ def compute_intervals(self, ranks_bev: torch.Tensor) -> Tuple[torch.Tensor, torc
 
     def bev_pool(self, view_feats, depth_softmax, geom) -> torch.Tensor:
         """ """
-        B, N, D, H, W, _ = geom.shape
-        num_points = B * N * D * H * W
-
         ranks_bev, ranks_depth, ranks_feat, interval_starts, interval_lengths = self.bev_pool_aux(geom)
+        if self.expand_batch_axis:
+            view_feats = view_feats.unsqueeze(0)
+            depth_softmax = depth_softmax.unsqueeze(0)
 
         if ranks_feat is None:
-            print_log('warning ---> no points within the predefined bev receptive field')
-            dummy = torch.zeros(size=[
-                view_feats.shape[0], view_feats.shape[2],
-                int(self.nx[2]),
-                int(self.nx[1]),
-                int(self.nx[0])
-            ]).to(view_feats)
+            print_log("warning ---> no points within the predefined bev receptive field")
+            dummy = torch.zeros(
+                size=[view_feats.shape[0], view_feats.shape[2], int(self.nx[2]), int(self.nx[1]), int(self.nx[0])]
+            ).to(view_feats)
             dummy = torch.cat(dummy.unbind(dim=2), 1)
             return dummy
-        
+
         # permute view_feats from (B, N, C, fH, fW) to (B, N, fH, fW, C)
         view_feats = view_feats.permute(0, 1, 3, 4, 2)
-        bev_feat_shape = (depth_softmax.shape[0], int(self.nx[2]),
-                        int(self.nx[1]), int(self.nx[0]),
-                        view_feats.shape[-1])  # (B, Z, Y, X, C)
-        
+        bev_feat_shape = (
+            depth_softmax.shape[0],
+            int(self.nx[2]),
+            int(self.nx[1]),
+            int(self.nx[0]),
+            view_feats.shape[-1],
+        )  # (B, Z, Y, X, C)
+
         bev_feat = bev_pool_v2(
-            depth=depth_softmax, 
-            feat=view_feats, 
-            ranks_depth=ranks_depth, 
-            ranks_feat=ranks_feat, 
+            depth=depth_softmax,
+            feat=view_feats,
+            ranks_depth=ranks_depth,
+            ranks_feat=ranks_feat,
             ranks_bev=ranks_bev,
-            interval_starts=interval_starts, 
-            interval_lengths=interval_lengths, 
+            interval_starts=interval_starts,
+            interval_lengths=interval_lengths,
             bev_feat_shape=bev_feat_shape,
-            is_training=self.training)
-        
+            is_training=self.training,
+        )
+
         # collapse Z
         if self.collapse_z:
             bev_feat = torch.cat(bev_feat.unbind(dim=2), 1)
diff --git a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py
index 5a479924d..d9408d078 100644
--- a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py
+++ b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py
@@ -11,9 +11,11 @@ class QuickCumsumV2TrainingCuda(torch.autograd.Function):
 
     Please refer to the `paper <https://arxiv.org/abs/2211.17111>`_
     """
+
     @staticmethod
-    def forward(ctx, depth, feat, ranks_depth, ranks_feat, ranks_bev,
-                bev_feat_shape, interval_starts, interval_lengths):
+    def forward(
+        ctx, depth, feat, ranks_depth, ranks_feat, ranks_bev, bev_feat_shape, interval_starts, interval_lengths
+    ):
         ranks_bev = ranks_bev.int()
         depth = depth.contiguous().float()
         feat = feat.contiguous().float()
@@ -43,15 +45,12 @@ def backward(ctx, out_grad):
         ranks_bev, depth, feat, ranks_feat, ranks_depth = ctx.saved_tensors
 
         order = ranks_feat.argsort()
-        ranks_feat, ranks_depth, ranks_bev = \
-            ranks_feat[order], ranks_depth[order], ranks_bev[order]
-        kept = torch.ones(
-            ranks_bev.shape[0], device=ranks_bev.device, dtype=torch.bool)
+        ranks_feat, ranks_depth, ranks_bev = ranks_feat[order], ranks_depth[order], ranks_bev[order]
+        kept = torch.ones(ranks_bev.shape[0], device=ranks_bev.device, dtype=torch.bool)
         kept[1:] = ranks_feat[1:] != ranks_feat[:-1]
         interval_starts_bp = torch.where(kept)[0].int()
         interval_lengths_bp = torch.zeros_like(interval_starts_bp)
-        interval_lengths_bp[:-1] = interval_starts_bp[
-            1:] - interval_starts_bp[:-1]
+        interval_lengths_bp[:-1] = interval_starts_bp[1:] - interval_starts_bp[:-1]
         interval_lengths_bp[-1] = ranks_bev.shape[0] - interval_starts_bp[-1]
 
         depth = depth.contiguous()
@@ -77,26 +76,27 @@ def backward(ctx, out_grad):
             interval_lengths_bp,
             interval_starts_bp,
         )
-        return depth_grad, feat_grad, None, None, None, None, None, \
-            None, None, None
+        return depth_grad, feat_grad, None, None, None, None, None, None, None, None
 
 
 class QuickCumsumV2Cuda(torch.autograd.Function):
 
     @staticmethod
-    def symbolic(g,
-                 depth,
-                 feat,
-                 ranks_depth,
-                 ranks_feat,
-                 ranks_bev,
-                 interval_starts,
-                 interval_lengths,
-                 out_height=128,
-                 out_width=128):
+    def symbolic(
+        g,
+        depth,
+        feat,
+        ranks_depth,
+        ranks_feat,
+        ranks_bev,
+        interval_starts,
+        interval_lengths,
+        out_height=128,
+        out_width=128,
+    ):
         """symbolic function for creating onnx op."""
         x = g.op(
-            'autoware::QuickCumsumV2Cuda',
+            "autoware::QuickCumsumV2Cuda",
             depth,
             feat,
             ranks_depth,
@@ -105,54 +105,61 @@ def symbolic(g,
             interval_starts,
             interval_lengths,
             out_height_i=out_height,
-            out_width_i=out_width)
-        
+            out_width_i=out_width,
+        )
+
         # features_shape = _get_tensor_sizes(feat)
         # if features_shape is not None and hasattr(x.type(), "with_sizes"):
         #     output_type = x.type().with_sizes([B, D, H, W, _get_tensor_dim_size(x, -1)])
         #     output.setType(output_type)
 
     @staticmethod
-    def forward(ctx,
-                depth,  # N,D,H,W
-                feat,  # N,H,W,C
-                ranks_depth,
-                ranks_feat,
-                ranks_bev,
-                interval_starts,
-                interval_lengths,
-                out_height=128,
-                out_width=128):
+    def forward(
+        ctx,
+        depth,  # B,N,D,H,W
+        feat,  # B,N,H,W,C
+        ranks_depth,
+        ranks_feat,
+        ranks_bev,
+        interval_starts,
+        interval_lengths,
+        out_height=128,
+        out_width=128,
+    ):
         """run forward."""
-        feat = feat.unsqueeze(0)
-        depth = depth.unsqueeze(0)
-        bev_feat_shape = (depth.shape[0], 1, out_height, out_width,
-                          feat.shape[-1])  # (B, Z, Y, X, C)
-        bev_feat = bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev,
-                               bev_feat_shape, interval_starts,
-                               interval_lengths)
-        bev_feat = bev_feat.squeeze(2)
-        bev_feat = bev_feat.permute(0, 2, 3, 1)
+        out = feat.new_zeros(depth.shape[0], 1, out_height, out_width, feat.shape[-1])
+        bev_feat = bev_pool_v2_ext.bev_pool_v2_forward(
+            depth,
+            feat,
+            out,
+            ranks_depth,
+            ranks_feat,
+            ranks_bev,
+            interval_lengths,
+            interval_starts,
+        )
         return bev_feat
-     
+
     @staticmethod
     def backward(ctx, out_grad):
         raise NotImplementedError
 
 
-def bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev,
-                interval_starts, interval_lengths, bev_feat_shape, is_training):
-    
+def bev_pool_v2(
+    depth, feat, ranks_depth, ranks_feat, ranks_bev, interval_starts, interval_lengths, bev_feat_shape, is_training
+):
+
     if is_training:
-        x = QuickCumsumV2TrainingCuda.apply(depth, feat, ranks_depth, ranks_feat, ranks_bev,
-                              bev_feat_shape, interval_starts,
-                              interval_lengths)
+        x = QuickCumsumV2TrainingCuda.apply(
+            depth, feat, ranks_depth, ranks_feat, ranks_bev, bev_feat_shape, interval_starts, interval_lengths
+        )
     else:
         # BEV Shape is (B, Z, Y, X, C)
         out_height, out_width = bev_feat_shape[2], bev_feat_shape[2]
-        x = QuickCumsumV2Cuda.apply(depth, feat, ranks_depth, ranks_feat, ranks_bev,
-                                    out_height, out_width, interval_starts, interval_lengths)
-    
+        x = QuickCumsumV2Cuda.apply(
+            depth, feat, ranks_depth, ranks_feat, ranks_bev, out_height, out_width, interval_starts, interval_lengths
+        )
+
     # Final shape: (B, C, Z, Y, X)
     x = x.permute(0, 4, 1, 2, 3).contiguous()
     return x
@@ -162,15 +169,12 @@ def test_bev_pool_v2():
     depth = np.array([0.3, 0.4, 0.2, 0.1, 0.7, 0.6, 0.8, 0.9])
     depth = torch.from_numpy(depth).float().cuda()
     depth = depth.view(1, 1, 2, 2, 2).requires_grad_()
-    feat = torch.ones(
-        size=[1, 1, 2, 2, 2], dtype=torch.float,
-        device='cuda').requires_grad_()
+    feat = torch.ones(size=[1, 1, 2, 2, 2], dtype=torch.float, device="cuda").requires_grad_()
     ranks_depth = torch.from_numpy(np.array([0, 4, 1, 6])).int().cuda()
     ranks_feat = torch.from_numpy(np.array([0, 0, 1, 2])).int().cuda()
     ranks_bev = torch.from_numpy(np.array([0, 0, 1, 1])).int().cuda()
 
-    kept = torch.ones(
-        ranks_bev.shape[0], device=ranks_bev.device, dtype=torch.bool)
+    kept = torch.ones(ranks_bev.shape[0], device=ranks_bev.device, dtype=torch.bool)
     kept[1:] = ranks_bev[1:] != ranks_bev[:-1]
     interval_starts = torch.where(kept)[0].int()
     if len(interval_starts) == 0:
@@ -178,15 +182,16 @@ def test_bev_pool_v2():
     interval_lengths = torch.zeros_like(interval_starts)
     interval_lengths[:-1] = interval_starts[1:] - interval_starts[:-1]
     interval_lengths[-1] = ranks_bev.shape[0] - interval_starts[-1]
-    bev_feat = bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev,
-                           (1, 1, 2, 2, 2), interval_starts, interval_lengths)
+    bev_feat = bev_pool_v2(
+        depth, feat, ranks_depth, ranks_feat, ranks_bev, (1, 1, 2, 2, 2), interval_starts, interval_lengths
+    )
     loss = torch.sum(bev_feat)
     loss.backward()
     assert loss == 4.4
-    grad_depth = np.array([2., 2., 0., 0., 2., 0., 2., 0.])
+    grad_depth = np.array([2.0, 2.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0])
     grad_depth = torch.from_numpy(grad_depth).float()
     grad_depth = grad_depth.cuda().view(1, 1, 2, 2, 2)
     assert depth.grad.allclose(grad_depth)
-    grad_feat = np.array([1.0, 1.0, 0.4, 0.4, 0.8, 0.8, 0., 0.])
+    grad_feat = np.array([1.0, 1.0, 0.4, 0.4, 0.8, 0.8, 0.0, 0.0])
     grad_feat = torch.from_numpy(grad_feat).float().cuda().view(1, 1, 2, 2, 2)
     assert feat.grad.allclose(grad_feat)
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
index ca3e8f8a2..7a6420d51 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
@@ -5,7 +5,7 @@
 # Image network
 model = dict(
     view_transform=dict(
-        type="LSSTransform",
+        type="LSSTransformV2",
         xbound=[-54.0, 54.0, 0.3],
         ybound=[-54.0, 54.0, 0.3],
         zbound=[-10.0, 10.0, 20.0],

From 7ecba750481a60ea4626c45fdd357c517403cd28 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Thu, 21 May 2026 18:41:09 +0900
Subject: [PATCH 129/183] Update camera config structure

---
 projects/BEVFusion/bevfusion/depth_lss_v2.py | 49 +++++++++++++-------
 1 file changed, 31 insertions(+), 18 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py
index e3213eb6e..d00958b3c 100644
--- a/projects/BEVFusion/bevfusion/depth_lss_v2.py
+++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py
@@ -1,4 +1,4 @@
-from typing import Tuple
+from typing import Optional, Tuple
 
 import torch
 from mmdet3d.registry import MODELS
@@ -61,9 +61,9 @@ def forward(
         geom_feats_precomputed,
     ):
         if geom_feats_precomputed is not None:
-            geom_feats, kept, ranks, indices = geom_feats_precomputed
+            ranks_bev, ranks_depth, ranks_feat = geom_feats_precomputed
             x, depth_softmax = self.get_cam_feats(img)
-            x = self.bev_pool_precomputed(x, depth_softmax, geom_feats, kept, ranks, indices)
+            x = self.bev_pool_precomputed(x, depth_softmax, ranks_bev, ranks_depth, ranks_feat)
 
         else:
             intrins = camera_intrinsics[..., :3, :3]
@@ -126,7 +126,7 @@ def bev_pool_aux(self, geom_feats):
         )
 
         if len(kept) == 0:
-            return None, None, None, None
+            return None, None, None
 
         geom_feats, ranks_depth, ranks_feat = geom_feats[kept], ranks_depth[kept], ranks_feat[kept]
 
@@ -141,25 +141,21 @@ def bev_pool_aux(self, geom_feats):
         indices = ranks_bev.argsort()
         ranks_bev, ranks_depth, ranks_feat = ranks_bev[indices], ranks_depth[indices], ranks_feat[indices]
 
-        intervals = self.compute_intervals(ranks_bev)
-        if intervals is None:
-            return None, None, None, None, None
-
-        interval_starts, interval_lengths = intervals
         return (
             ranks_bev.int().contiguous(),
             ranks_depth.int().contiguous(),
             ranks_feat.int().contiguous(),
-            interval_starts.int().contiguous(),
-            interval_lengths.int().contiguous(),
         )
 
-    def compute_intervals(self, ranks_bev: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+    def compute_intervals(self, ranks_bev: Optional[torch.Tensor]) -> Tuple[torch.Tensor, torch.Tensor]:
+        if ranks_bev is None:
+            return None, None
+
         kept = torch.ones(ranks_bev.shape[0], device=ranks_bev.device, dtype=torch.bool)
         kept[1:] = ranks_bev[1:] != ranks_bev[:-1]
         interval_starts = torch.where(kept)[0].int()
         if len(interval_starts) == 0:
-            return None
+            return None, None
 
         interval_lengths = torch.zeros_like(interval_starts)
         interval_lengths[:-1] = interval_starts[1:] - interval_starts[:-1]
@@ -168,12 +164,18 @@ def compute_intervals(self, ranks_bev: torch.Tensor) -> Tuple[torch.Tensor, torc
 
     def bev_pool(self, view_feats, depth_softmax, geom) -> torch.Tensor:
         """ """
-        ranks_bev, ranks_depth, ranks_feat, interval_starts, interval_lengths = self.bev_pool_aux(geom)
-        if self.expand_batch_axis:
-            view_feats = view_feats.unsqueeze(0)
-            depth_softmax = depth_softmax.unsqueeze(0)
+        ranks_bev, ranks_depth, ranks_feat = self.bev_pool_aux(geom)
+        interval_starts, interval_lengths = self.compute_intervals(ranks_bev)
+        bev_feat = self.compute_bev_pool(
+            view_feats, depth_softmax, ranks_bev, ranks_depth, ranks_feat, interval_starts, interval_lengths
+        )
+        return bev_feat
 
-        if ranks_feat is None:
+    def compute_bev_pool(
+        self, view_feats, depth_softmax, ranks_bev, ranks_depth, ranks_feat, interval_starts, interval_lengths
+    ):
+        """Compute the BEV pool for the given view features, depth softmax, ranks, and intervals."""
+        if interval_starts is None:
             print_log("warning ---> no points within the predefined bev receptive field")
             dummy = torch.zeros(
                 size=[view_feats.shape[0], view_feats.shape[2], int(self.nx[2]), int(self.nx[1]), int(self.nx[0])]
@@ -181,6 +183,10 @@ def bev_pool(self, view_feats, depth_softmax, geom) -> torch.Tensor:
             dummy = torch.cat(dummy.unbind(dim=2), 1)
             return dummy
 
+        if self.expand_batch_axis:
+            view_feats = view_feats.unsqueeze(0)
+            depth_softmax = depth_softmax.unsqueeze(0)
+
         # permute view_feats from (B, N, C, fH, fW) to (B, N, fH, fW, C)
         view_feats = view_feats.permute(0, 1, 3, 4, 2)
         bev_feat_shape = (
@@ -209,6 +215,13 @@ def bev_pool(self, view_feats, depth_softmax, geom) -> torch.Tensor:
 
         return bev_feat
 
+    def bev_pool_precomputed(self, view_feats, depth_softmax, ranks_bev, ranks_depth, ranks_feat):
+        interval_starts, interval_lengths = self.compute_intervals(ranks_bev)
+        bev_feat = self.compute_bev_pool(
+            view_feats, depth_softmax, ranks_bev, ranks_depth, ranks_feat, interval_starts, interval_lengths
+        )
+        return bev_feat
+
 
 class LSSTransformV2(BaseViewTransformV2):
 

From aa0fdda9043f060173eddc8d3ffce22c61a42f1f Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Thu, 21 May 2026 18:47:13 +0900
Subject: [PATCH 130/183] Update camera config structure

---
 projects/BEVFusion/setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/projects/BEVFusion/setup.py b/projects/BEVFusion/setup.py
index 02fe93524..52d397c12 100644
--- a/projects/BEVFusion/setup.py
+++ b/projects/BEVFusion/setup.py
@@ -58,8 +58,8 @@ def make_cuda_ext(name, module, sources, sources_cuda=[], extra_args=[], extra_i
                 name="bev_pool_v2_ext",
                 module="projects.BEVFusion.bevfusion.ops.bev_pool_v2",
                 sources=[
-                    "src/bev_pool_v2.cpp",
-                    "src/bev_pool_v2_cuda.cu",
+                    "src/bev_pool.cpp",
+                    "src/bev_pool_cuda.cu",
                 ],
             ),
             make_cuda_ext(

From 178c0e0a0724247e0aa84f6e488be43c9226aadc Mon Sep 17 00:00:00 2001
From: KSeabgTan <kseangtan@gmail.com>
Date: Fri, 22 May 2026 00:49:30 +0900
Subject: [PATCH 131/183] Added

---
 projects/BEVFusion/bevfusion/depth_lss_v2.py                  | 3 ++-
 .../default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py     | 4 ++--
 .../models/resnet50/camera_resnet50_fpn_depthlss_120m.py      | 3 +--
 .../default/models/resnet50/camera_resnet50_fpn_lss_50m.py    | 2 +-
 .../schedulers/default_30e_8xb16_adamw_linear_cosine.py       | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py
index d00958b3c..c6383e329 100644
--- a/projects/BEVFusion/bevfusion/depth_lss_v2.py
+++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py
@@ -223,6 +223,7 @@ def bev_pool_precomputed(self, view_feats, depth_softmax, ranks_bev, ranks_depth
         return bev_feat
 
 
+@MODELS.register_module()
 class LSSTransformV2(BaseViewTransformV2):
 
     def __init__(
@@ -248,7 +249,7 @@ def __init__(
             dbound=dbound,
         )
         self.depthnet = nn.Conv2d(self.in_channels, self.D + self.C, 1)
-        self.downsample = DownSampleNet(downsample, self.out_channels, self.out_channels)
+        self.downsample = DownSampleNet(downsample, out_channels, out_channels)
 
     def get_cam_feats(self, x):
         B, N, C, fH, fW = x.shape
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py
index ceedda1c9..81859eed7 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py
@@ -11,8 +11,8 @@
 custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
 
 # user setting
-data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_8_0/"
+data_root = "data/t4datasets/"
+info_directory_path = "info/kokseang_2_8/"
 
 experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type
 experiment_name = "bevfusion_camera_30e_8xb16_j6gen2_base_50m"
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py
index a6ccca5dc..90aa87210 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py
@@ -19,8 +19,7 @@
         rgb_to_bgr=False,
     ),
     img_backbone=dict(
-        pretrained="torchvision://resnet50",
-        type="ResNet",
+        type="mmdet.ResNet",
         depth=50,
         num_stages=4,
         out_indices=(2, 3),
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
index 7a6420d51..95ab35f85 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
@@ -1,5 +1,5 @@
 _base_ = [
-    "./default_camera_resnet50_fpn_depthlss_120m.py",
+    "./camera_resnet50_fpn_depthlss_120m.py",
 ]
 
 # Image network
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py
index 261246886..b56cef0d2 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py
@@ -4,7 +4,7 @@
 max_epochs = 30
 val_interval = 1
 
-train_gpu_size = 8
+train_gpu_size = 2
 test_batch_size = 2
 train_batch_size = 8
 

From fb6935dea96af8eab3c06ba9eaa22eb77a10eaf1 Mon Sep 17 00:00:00 2001
From: KSeabgTan <kseangtan@gmail.com>
Date: Fri, 22 May 2026 00:55:38 +0900
Subject: [PATCH 132/183] Added

---
 projects/BEVFusion/bevfusion/ops/__init__.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/projects/BEVFusion/bevfusion/ops/__init__.py b/projects/BEVFusion/bevfusion/ops/__init__.py
index e08abbc6d..f74f0edbb 100644
--- a/projects/BEVFusion/bevfusion/ops/__init__.py
+++ b/projects/BEVFusion/bevfusion/ops/__init__.py
@@ -1,4 +1,12 @@
 from .bev_pool import bev_pool
+from .bev_pool_v2 import bev_pool_v2
 from .voxel import DynamicScatter, Voxelization, dynamic_scatter, voxelization
 
-__all__ = ["bev_pool", "Voxelization", "voxelization", "dynamic_scatter", "DynamicScatter"]
+__all__ = [
+    "bev_pool",
+    "bev_pool_v2",
+    "Voxelization",
+    "voxelization",
+    "dynamic_scatter",
+    "DynamicScatter",
+]

From 4a9557864a6aeb47356142b87dbc322f74669887 Mon Sep 17 00:00:00 2001
From: KSeabgTan <kseangtan@gmail.com>
Date: Fri, 22 May 2026 01:18:59 +0900
Subject: [PATCH 133/183] Added

---
 projects/BEVFusion/bevfusion/depth_lss_v2.py                | 6 +++++-
 ...mera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m.py | 1 -
 ...sion_camera_resnet50_fpn_depthlss_50e_8xb16_base_120m.py | 1 -
 ...ion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py | 1 -
 .../bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m.py | 1 -
 .../default/models/resnet50/camera_resnet50_fpn_lss_50m.py  | 3 +++
 6 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py
index c6383e329..df0740331 100644
--- a/projects/BEVFusion/bevfusion/depth_lss_v2.py
+++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py
@@ -212,7 +212,6 @@ def compute_bev_pool(
         # collapse Z
         if self.collapse_z:
             bev_feat = torch.cat(bev_feat.unbind(dim=2), 1)
-
         return bev_feat
 
     def bev_pool_precomputed(self, view_feats, depth_softmax, ranks_bev, ranks_depth, ranks_feat):
@@ -261,3 +260,8 @@ def get_cam_feats(self, x):
         view_feats = x[:, self.D : (self.D + self.C)]
         view_feats = view_feats.view(B, N, self.C, fH, fW)
         return view_feats, depth_softmax
+    
+    def forward(self, *args, **kwargs):
+        x = super().forward(*args, **kwargs)
+        x = self.downsample(x)
+        return x
\ No newline at end of file
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m.py
index e73416744..9473ceb0f 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m.py
@@ -13,7 +13,6 @@
     view_transform=dict(image_size=_base_.image_size),
     bbox_head=dict(
         class_names=_base_.class_names,
-        in_channels=80,
         train_cfg=dict(
             point_cloud_range=_base_.point_cloud_range,
             grid_size=_base_.grid_size,
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_50e_8xb16_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_50e_8xb16_base_120m.py
index ebdfff437..47c91cfb3 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_50e_8xb16_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_50e_8xb16_base_120m.py
@@ -13,7 +13,6 @@
     view_transform=dict(image_size=_base_.image_size),
     bbox_head=dict(
         class_names=_base_.class_names,
-        in_channels=80,
         train_cfg=dict(
             point_cloud_range=_base_.point_cloud_range,
             grid_size=_base_.grid_size,
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py
index e23efb65a..d7bd79913 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py
@@ -13,7 +13,6 @@
     view_transform=dict(image_size=_base_.image_size),
     bbox_head=dict(
         class_names=_base_.class_names,
-        in_channels=80,
         train_cfg=dict(
             point_cloud_range=_base_.point_cloud_range,
             grid_size=_base_.grid_size,
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m.py
index 7bf63010b..5215dc9f3 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m.py
@@ -13,7 +13,6 @@
     view_transform=dict(image_size=_base_.image_size),
     bbox_head=dict(
         class_names=_base_.class_names,
-        in_channels=80,
         train_cfg=dict(
             point_cloud_range=_base_.point_cloud_range,
             grid_size=_base_.grid_size,
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
index 95ab35f85..5381ea708 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
@@ -1,6 +1,7 @@
 _base_ = [
     "./camera_resnet50_fpn_depthlss_120m.py",
 ]
+num_proposals = 200 
 
 # Image network
 model = dict(
@@ -13,6 +14,8 @@
         downsample=2,
     ),
     bbox_head=dict(
+        in_channels=80,
+        num_proposals=num_proposals,
         bbox_coder=dict(
             post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
         ),

From 237e0aae0a4d7b692f710e4f10014f35dd56f01b Mon Sep 17 00:00:00 2001
From: KSeabgTan <kseangtan@gmail.com>
Date: Fri, 22 May 2026 11:32:04 +0900
Subject: [PATCH 134/183] Added

---
 projects/BEVFusion/bevfusion/depth_lss.py     | 37 +++++++++++++++++++
 projects/BEVFusion/bevfusion/depth_lss_v2.py  | 23 ++++++++----
 .../bevfusion/ops/bev_pool_v2/bev_pool_v2.py  | 12 +++++-
 ...net50_fpn_lss_30e_8xb16_j6gen2_base_50m.py |  2 +-
 .../camera_resnet50_fpn_depthlss_120m.py      |  3 ++
 .../resnet50/camera_resnet50_fpn_lss_50m.py   |  3 +-
 .../default_30e_8xb16_adamw_linear_cosine.py  |  2 +-
 7 files changed, 69 insertions(+), 13 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/depth_lss.py b/projects/BEVFusion/bevfusion/depth_lss.py
index ac7c5b503..f363a8e76 100644
--- a/projects/BEVFusion/bevfusion/depth_lss.py
+++ b/projects/BEVFusion/bevfusion/depth_lss.py
@@ -164,6 +164,7 @@ def __init__(
         ybound: Tuple[float, float, float],
         zbound: Tuple[float, float, float],
         dbound: Tuple[float, float, float],
+        visualize_bev_feat: bool = False,
     ) -> None:
         super().__init__()
         self.in_channels = in_channels
@@ -183,6 +184,7 @@ def __init__(
         self.frustum = self.create_frustum()
         self.D = self.frustum.shape[0]
         self.fp16_enabled = False
+        self.visualize_bev_feat = visualize_bev_feat
 
     def create_frustum(self):
         iH, iW = self.image_size
@@ -319,8 +321,43 @@ def bev_pool_precomputed(self, x, geom_feats, kept, ranks, indices):
 
         # collapse Z
         final = torch.cat(x.unbind(dim=2), 1)
+        if self.visualize_bev_feat:
+            self.visualize_bev_feat(final)
+        
         return final
 
+    def visualize_bev_feat(self, bev_feat):
+        """Visualize the BEV feat for the given batch index."""
+        batch_idx = 0
+        # save first 10 raw channel maps for one batch sample (B, C, Y, X) 
+        num_channels = 10
+        feat = bev_feat[batch_idx].detach().float().cpu().numpy()
+        channel_indices = np.arange(min(num_channels, feat.shape[0]))
+        ncols = min(5, len(channel_indices))
+        nrows = math.ceil(len(channel_indices) / ncols)
+        fig, axes = plt.subplots(nrows, ncols, figsize=(3 * ncols, 3 * nrows), squeeze=False)
+        for ax, ch_idx in zip(axes.ravel(), channel_indices):
+            ch_map = feat[ch_idx]
+            im = ax.imshow(ch_map, cmap="viridis", origin="lower", aspect="equal")
+            ax.set_title(f"ch {ch_idx}", fontsize=9)
+            ax.set_xlabel("X")
+            ax.set_ylabel("Y")
+            fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
+        for ax in axes.ravel()[len(channel_indices) :]:
+            ax.axis("off")
+        fig.suptitle(f"bev_feat channels 0-{len(channel_indices) - 1} (batch={batch_idx})")
+        fig.tight_layout()
+
+        save_dir = Path("work_dirs/bev_feat_vis")
+        save_dir.mkdir(parents=True, exist_ok=True)
+        if not hasattr(self, "_bev_feat_vis_count"):
+            self._bev_feat_vis_count = 0
+        self._bev_feat_vis_count += 1
+        save_path = save_dir / f"bev_feat_batch{batch_idx}_{self._bev_feat_vis_count:06d}.png"
+        fig.savefig(save_path, dpi=150, bbox_inches="tight")
+        plt.close(fig)
+        print_log(f"Saved BEV feat visualization to {save_path.resolve()}")
+
     def forward(
         self,
         img,
diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py
index df0740331..e50992b46 100644
--- a/projects/BEVFusion/bevfusion/depth_lss_v2.py
+++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py
@@ -1,5 +1,9 @@
+import math
+from pathlib import Path
 from typing import Optional, Tuple
 
+import matplotlib.pyplot as plt
+import numpy as np
 import torch
 from mmdet3d.registry import MODELS
 from mmengine.logging import print_log
@@ -23,6 +27,7 @@ def __init__(
         dbound: Tuple[float, float, float],
         collapse_z: bool = True,
         expand_batch_axis: bool = False,
+        visualize_bev_feat: bool = False,
     ):
         """
         Args:
@@ -38,6 +43,7 @@ def __init__(
             ybound=ybound,
             zbound=zbound,
             dbound=dbound,
+            visualize_bev_feat=visualize_bev_feat,
         )
         self.collapse_z = collapse_z
         self.expand_batch_axis = expand_batch_axis
@@ -130,13 +136,12 @@ def bev_pool_aux(self, geom_feats):
 
         geom_feats, ranks_depth, ranks_feat = geom_feats[kept], ranks_depth[kept], ranks_feat[kept]
 
-        # nx is the total number of voxels/cells in the BEV grid
-        # nx[0] is x, nx[1] is y, nx[2] is z
+        # nx[0]=x, nx[1]=y, nx[2]=z; flat index for out shape (B, Z, Y, X, C)
         ranks_bev = (
-            geom_feats[:, 0] * (self.nx[1] * self.nx[2] * B)
-            + geom_feats[:, 1] * (self.nx[2] * B)
-            + geom_feats[:, 2] * B
-            + geom_feats[:, 3]
+            geom_feats[:, 3] * (self.nx[2] * self.nx[1] * self.nx[0])
+            + geom_feats[:, 2] * (self.nx[1] * self.nx[0])
+            + geom_feats[:, 1] * (self.nx[0])
+            + geom_feats[:, 0]
         )
         indices = ranks_bev.argsort()
         ranks_bev, ranks_depth, ranks_feat = ranks_bev[indices], ranks_depth[indices], ranks_feat[indices]
@@ -212,8 +217,12 @@ def compute_bev_pool(
         # collapse Z
         if self.collapse_z:
             bev_feat = torch.cat(bev_feat.unbind(dim=2), 1)
-        return bev_feat
 
+        if self.visualize_bev_feat:
+            self.visualize_bev_feat(bev_feat)
+
+        return bev_feat
+     
     def bev_pool_precomputed(self, view_feats, depth_softmax, ranks_bev, ranks_depth, ranks_feat):
         interval_starts, interval_lengths = self.compute_intervals(ranks_bev)
         bev_feat = self.compute_bev_pool(
diff --git a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py
index d9408d078..57b18a69e 100644
--- a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py
+++ b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py
@@ -155,9 +155,17 @@ def bev_pool_v2(
         )
     else:
         # BEV Shape is (B, Z, Y, X, C)
-        out_height, out_width = bev_feat_shape[2], bev_feat_shape[2]
+        out_height, out_width = bev_feat_shape[2], bev_feat_shape[3]
         x = QuickCumsumV2Cuda.apply(
-            depth, feat, ranks_depth, ranks_feat, ranks_bev, out_height, out_width, interval_starts, interval_lengths
+            depth,
+            feat,
+            ranks_depth,
+            ranks_feat,
+            ranks_bev,
+            interval_starts,
+            interval_lengths,
+            out_height,
+            out_width,
         )
 
     # Final shape: (B, C, Z, Y, X)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py
index d7bd79913..751b24438 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py
@@ -4,7 +4,7 @@
 ]
 
 experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type
-experiment_name = "bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m"
+experiment_name = "bevfusion_camera_resnet50_fpn_view_lss_30e_8xb16_j6gen2_base_50m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py
index 90aa87210..4a1f33040 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py
@@ -54,4 +54,7 @@
         dbound=[1.0, 130, 1.0],
         downsample=2,
     ),
+    bbox_head=dict(
+        in_channels=80,
+    )
 )
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
index 5381ea708..2f556a122 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
@@ -6,7 +6,7 @@
 # Image network
 model = dict(
     view_transform=dict(
-        type="LSSTransformV2",
+        type="LSSTransform",
         xbound=[-54.0, 54.0, 0.3],
         ybound=[-54.0, 54.0, 0.3],
         zbound=[-10.0, 10.0, 20.0],
@@ -14,7 +14,6 @@
         downsample=2,
     ),
     bbox_head=dict(
-        in_channels=80,
         num_proposals=num_proposals,
         bbox_coder=dict(
             post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py
index b56cef0d2..18dd6126c 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py
@@ -1,5 +1,5 @@
 # learning rate
-lr = 2e-4
+lr = 1e-4
 t_max = 3
 max_epochs = 30
 val_interval = 1

From 2749ef586d35e4f908d438585501a9ed3b3453ae Mon Sep 17 00:00:00 2001
From: KSeabgTan <kseangtan@gmail.com>
Date: Fri, 22 May 2026 14:20:24 +0900
Subject: [PATCH 135/183] Added

---
 projects/BEVFusion/bevfusion/depth_lss.py     | 27 +++++++++++++----
 projects/BEVFusion/bevfusion/depth_lss_v2.py  | 30 ++++++++++++-------
 .../bevfusion/ops/bev_pool_v2/bev_pool_v2.py  | 26 ++++------------
 ...net50_fpn_lss_30e_8xb16_j6gen2_base_50m.py |  2 +-
 .../resnet50/camera_resnet50_fpn_lss_50m.py   |  2 +-
 5 files changed, 50 insertions(+), 37 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/depth_lss.py b/projects/BEVFusion/bevfusion/depth_lss.py
index f363a8e76..2e68a3772 100644
--- a/projects/BEVFusion/bevfusion/depth_lss.py
+++ b/projects/BEVFusion/bevfusion/depth_lss.py
@@ -1,8 +1,13 @@
 # modify from https://github.com/mit-han-lab/bevfusion
+import math
+from pathlib import Path
 from typing import Tuple
 
+import matplotlib.pyplot as plt
+import numpy as np
 import torch
 from mmdet3d.registry import MODELS
+from mmengine.logging import print_log
 from torch import nn
 
 from .ops import bev_pool
@@ -322,16 +327,28 @@ def bev_pool_precomputed(self, x, geom_feats, kept, ranks, indices):
         # collapse Z
         final = torch.cat(x.unbind(dim=2), 1)
         if self.visualize_bev_feat:
-            self.visualize_bev_feat(final)
+            self.plot_bev_feat(final)
         
         return final
 
-    def visualize_bev_feat(self, bev_feat):
+    def plot_bev_feat(self, bev_feat):
         """Visualize the BEV feat for the given batch index."""
+        try:
+            import torch.distributed as dist
+
+            if dist.is_available() and dist.is_initialized() and dist.get_rank() != 0:
+                return
+        except ImportError:
+            pass
+
         batch_idx = 0
-        # save first 10 raw channel maps for one batch sample (B, C, Y, X) 
+        if bev_feat.shape[0] <= batch_idx:
+            return
+
+        # save first 10 raw channel maps for one batch sample (B, C, Y, X)
         num_channels = 10
-        feat = bev_feat[batch_idx].detach().float().cpu().numpy()
+        with torch.no_grad():
+            feat = bev_feat[batch_idx].detach().float().cpu().numpy()
         channel_indices = np.arange(min(num_channels, feat.shape[0]))
         ncols = min(5, len(channel_indices))
         nrows = math.ceil(len(channel_indices) / ncols)
@@ -348,7 +365,7 @@ def visualize_bev_feat(self, bev_feat):
         fig.suptitle(f"bev_feat channels 0-{len(channel_indices) - 1} (batch={batch_idx})")
         fig.tight_layout()
 
-        save_dir = Path("work_dirs/bev_feat_vis")
+        save_dir = Path("work_dirs/bev_feat_vis_2")
         save_dir.mkdir(parents=True, exist_ok=True)
         if not hasattr(self, "_bev_feat_vis_count"):
             self._bev_feat_vis_count = 0
diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py
index e50992b46..e30a5534a 100644
--- a/projects/BEVFusion/bevfusion/depth_lss_v2.py
+++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py
@@ -27,7 +27,7 @@ def __init__(
         dbound: Tuple[float, float, float],
         collapse_z: bool = True,
         expand_batch_axis: bool = False,
-        visualize_bev_feat: bool = False,
+        visualize_bev_feat: bool = True,
     ):
         """
         Args:
@@ -104,6 +104,7 @@ def forward(
 
     def bev_pool_aux(self, geom_feats):
         B, N, D, H, W, C = geom_feats.shape
+        print("geom_feats:", geom_feats.shape)
         Nprime = B * N * D * H * W
         assert C == 3
 
@@ -137,15 +138,23 @@ def bev_pool_aux(self, geom_feats):
         geom_feats, ranks_depth, ranks_feat = geom_feats[kept], ranks_depth[kept], ranks_feat[kept]
 
         # nx[0]=x, nx[1]=y, nx[2]=z; flat index for out shape (B, Z, Y, X, C)
+        print("ranks_depth, ranks_feat, geom_feats:", ranks_depth.shape, ranks_feat.shape, geom_feats.shape)
+        # ranks_bev = (
+        #     geom_feats[:, 3] * (self.nx[2] * self.nx[1] * self.nx[0])
+        #     + geom_feats[:, 2] * (self.nx[1] * self.nx[0])
+        #     + geom_feats[:, 1] * (self.nx[0])
+        #     + geom_feats[:, 0]
+        # )
         ranks_bev = (
-            geom_feats[:, 3] * (self.nx[2] * self.nx[1] * self.nx[0])
-            + geom_feats[:, 2] * (self.nx[1] * self.nx[0])
-            + geom_feats[:, 1] * (self.nx[0])
-            + geom_feats[:, 0]
+            geom_feats[:, 0] * (self.nx[2] * self.nx[1] * B)
+            + geom_feats[:, 1] * (self.nx[2] * B)
+            + geom_feats[:, 2] * (B)
+            + geom_feats[:, 3]
         )
         indices = ranks_bev.argsort()
+        print("indices:", indices[:10])
         ranks_bev, ranks_depth, ranks_feat = ranks_bev[indices], ranks_depth[indices], ranks_feat[indices]
-
+        print("ranks_bev, ranks_depth, ranks_feat:", ranks_bev.shape, ranks_depth.shape, ranks_feat.shape)
         return (
             ranks_bev.int().contiguous(),
             ranks_depth.int().contiguous(),
@@ -183,7 +192,7 @@ def compute_bev_pool(
         if interval_starts is None:
             print_log("warning ---> no points within the predefined bev receptive field")
             dummy = torch.zeros(
-                size=[view_feats.shape[0], view_feats.shape[2], int(self.nx[2]), int(self.nx[1]), int(self.nx[0])]
+                size=[view_feats.shape[0], view_feats.shape[2], self.nx[0], self.nx[1], self.nx[2]]
             ).to(view_feats)
             dummy = torch.cat(dummy.unbind(dim=2), 1)
             return dummy
@@ -197,11 +206,12 @@ def compute_bev_pool(
         bev_feat_shape = (
             depth_softmax.shape[0],
             int(self.nx[2]),
-            int(self.nx[1]),
             int(self.nx[0]),
+            int(self.nx[1]),
             view_feats.shape[-1],
         )  # (B, Z, Y, X, C)
-
+        print("bev_feat_shape:", bev_feat_shape)
+        print("nx[0], nx[1], nx[2]:", self.nx[0], self.nx[1], self.nx[2])
         bev_feat = bev_pool_v2(
             depth=depth_softmax,
             feat=view_feats,
@@ -219,7 +229,7 @@ def compute_bev_pool(
             bev_feat = torch.cat(bev_feat.unbind(dim=2), 1)
 
         if self.visualize_bev_feat:
-            self.visualize_bev_feat(bev_feat)
+            self.plot_bev_feat(bev_feat)
 
         return bev_feat
      
diff --git a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py
index 57b18a69e..af1ba15de 100644
--- a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py
+++ b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py
@@ -148,27 +148,13 @@ def backward(ctx, out_grad):
 def bev_pool_v2(
     depth, feat, ranks_depth, ranks_feat, ranks_bev, interval_starts, interval_lengths, bev_feat_shape, is_training
 ):
+    # Always use full (B, Z, H, W, C) buffer; QuickCumsumV2Cuda (Z=1) is ONNX-only.
+    del is_training
+    x = QuickCumsumV2TrainingCuda.apply(
+        depth, feat, ranks_depth, ranks_feat, ranks_bev, bev_feat_shape, interval_starts, interval_lengths
+    )
 
-    if is_training:
-        x = QuickCumsumV2TrainingCuda.apply(
-            depth, feat, ranks_depth, ranks_feat, ranks_bev, bev_feat_shape, interval_starts, interval_lengths
-        )
-    else:
-        # BEV Shape is (B, Z, Y, X, C)
-        out_height, out_width = bev_feat_shape[2], bev_feat_shape[3]
-        x = QuickCumsumV2Cuda.apply(
-            depth,
-            feat,
-            ranks_depth,
-            ranks_feat,
-            ranks_bev,
-            interval_starts,
-            interval_lengths,
-            out_height,
-            out_width,
-        )
-
-    # Final shape: (B, C, Z, Y, X)
+    # Final shape: (B, C, Z, H, W) — matches LSSTransform v1 after permute
     x = x.permute(0, 4, 1, 2, 3).contiguous()
     return x
 
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py
index 751b24438..e47714f50 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py
@@ -4,7 +4,7 @@
 ]
 
 experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type
-experiment_name = "bevfusion_camera_resnet50_fpn_view_lss_30e_8xb16_j6gen2_base_50m"
+experiment_name = "bevfusion_camera_resnet50_fpn_lssV2_30e_8xb16_j6gen2_base_50m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
index 2f556a122..4a0770971 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
@@ -6,7 +6,7 @@
 # Image network
 model = dict(
     view_transform=dict(
-        type="LSSTransform",
+        type="LSSTransformV2",
         xbound=[-54.0, 54.0, 0.3],
         ybound=[-54.0, 54.0, 0.3],
         zbound=[-10.0, 10.0, 20.0],

From 5da147b044da421caf82853698487929a7d7a829 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Fri, 22 May 2026 18:01:25 +0900
Subject: [PATCH 136/183] Add local 3d box expand

---
 projects/BEVFusion/bevfusion/depth_lss_v2.py  | 38 +++++++++----------
 ...fusion_camera_30e_8xb16_j6gen2_base_50m.py |  2 +-
 2 files changed, 18 insertions(+), 22 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py
index e30a5534a..59585cb49 100644
--- a/projects/BEVFusion/bevfusion/depth_lss_v2.py
+++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py
@@ -27,7 +27,7 @@ def __init__(
         dbound: Tuple[float, float, float],
         collapse_z: bool = True,
         expand_batch_axis: bool = False,
-        visualize_bev_feat: bool = True,
+        visualize_bev_feat: bool = False,
     ):
         """
         Args:
@@ -104,13 +104,12 @@ def forward(
 
     def bev_pool_aux(self, geom_feats):
         B, N, D, H, W, C = geom_feats.shape
-        print("geom_feats:", geom_feats.shape)
         Nprime = B * N * D * H * W
         assert C == 3
 
         # record the index of selected points for acceleration purpose
-        ranks_depth = torch.range(0, Nprime - 1, dtype=torch.int, device=geom_feats.device)
-        ranks_feat = torch.range(0, Nprime // D - 1, dtype=torch.int, device=geom_feats.device)
+        ranks_depth = torch.arange(0, Nprime, dtype=torch.int, device=geom_feats.device)
+        ranks_feat = torch.arange(0, Nprime // D, dtype=torch.int, device=geom_feats.device)
         ranks_feat = ranks_feat.reshape(B, N, 1, H, W)
         ranks_feat = ranks_feat.expand(B, N, D, H, W).flatten()
 
@@ -137,24 +136,20 @@ def bev_pool_aux(self, geom_feats):
 
         geom_feats, ranks_depth, ranks_feat = geom_feats[kept], ranks_depth[kept], ranks_feat[kept]
 
-        # nx[0]=x, nx[1]=y, nx[2]=z; flat index for out shape (B, Z, Y, X, C)
-        print("ranks_depth, ranks_feat, geom_feats:", ranks_depth.shape, ranks_feat.shape, geom_feats.shape)
+        ranks_bev = (
+            geom_feats[:, 3] * (self.nx[2] * self.nx[1] * self.nx[0])
+            + geom_feats[:, 2] * (self.nx[1] * self.nx[0])
+            + geom_feats[:, 0] * self.nx[1]
+            + geom_feats[:, 1]
+        )
         # ranks_bev = (
         #     geom_feats[:, 3] * (self.nx[2] * self.nx[1] * self.nx[0])
         #     + geom_feats[:, 2] * (self.nx[1] * self.nx[0])
-        #     + geom_feats[:, 1] * (self.nx[0])
+        #     + geom_feats[:, 1] * self.nx[0]
         #     + geom_feats[:, 0]
         # )
-        ranks_bev = (
-            geom_feats[:, 0] * (self.nx[2] * self.nx[1] * B)
-            + geom_feats[:, 1] * (self.nx[2] * B)
-            + geom_feats[:, 2] * (B)
-            + geom_feats[:, 3]
-        )
         indices = ranks_bev.argsort()
-        print("indices:", indices[:10])
         ranks_bev, ranks_depth, ranks_feat = ranks_bev[indices], ranks_depth[indices], ranks_feat[indices]
-        print("ranks_bev, ranks_depth, ranks_feat:", ranks_bev.shape, ranks_depth.shape, ranks_feat.shape)
         return (
             ranks_bev.int().contiguous(),
             ranks_depth.int().contiguous(),
@@ -192,9 +187,12 @@ def compute_bev_pool(
         if interval_starts is None:
             print_log("warning ---> no points within the predefined bev receptive field")
             dummy = torch.zeros(
-                size=[view_feats.shape[0], view_feats.shape[2], self.nx[0], self.nx[1], self.nx[2]]
-            ).to(view_feats)
-            dummy = torch.cat(dummy.unbind(dim=2), 1)
+                size=[view_feats.shape[0], view_feats.shape[2], self.nx[2], self.nx[1], self.nx[0]],
+                dtype=view_feats.dtype,
+                device=view_feats.device,
+            )
+            if self.collapse_z:
+                dummy = torch.cat(dummy.unbind(dim=2), 1)
             return dummy
 
         if self.expand_batch_axis:
@@ -206,12 +204,10 @@ def compute_bev_pool(
         bev_feat_shape = (
             depth_softmax.shape[0],
             int(self.nx[2]),
-            int(self.nx[0]),
             int(self.nx[1]),
+            int(self.nx[0]),
             view_feats.shape[-1],
         )  # (B, Z, Y, X, C)
-        print("bev_feat_shape:", bev_feat_shape)
-        print("nx[0], nx[1], nx[2]:", self.nx[0], self.nx[1], self.nx[2])
         bev_feat = bev_pool_v2(
             depth=depth_softmax,
             feat=view_feats,
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py
index 81859eed7..ce26b4905 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py
@@ -11,7 +11,7 @@
 custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
 
 # user setting
-data_root = "data/t4datasets/"
+data_root = "data/t4dataset/"
 info_directory_path = "info/kokseang_2_8/"
 
 experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type

From ef44141130a8bb9fc4b648caef474ae0cde59d0f Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Fri, 22 May 2026 19:09:28 +0900
Subject: [PATCH 137/183] Add local 3d box expand

---
 ...mera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m.py | 2 +-
 ...ion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py | 2 +-
 .../schedulers/default_30e_8xb16_adamw_linear_cosine.py     | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m.py
index 9473ceb0f..92501d169 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m.py
@@ -3,7 +3,7 @@
     "../../default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py",
 ]
 
-experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type
+experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type
 experiment_name = "bevfusion_camera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py
index e47714f50..d559d40b1 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py
@@ -3,7 +3,7 @@
     "../../default/models/resnet50/camera_resnet50_fpn_lss_50m.py",
 ]
 
-experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type
+experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type
 experiment_name = "bevfusion_camera_resnet50_fpn_lssV2_30e_8xb16_j6gen2_base_50m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py
index 18dd6126c..1430b10ba 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py
@@ -1,12 +1,12 @@
 # learning rate
-lr = 1e-4
+lr = 2e-4
 t_max = 3
 max_epochs = 30
 val_interval = 1
 
-train_gpu_size = 2
+train_gpu_size = 8
 test_batch_size = 2
-train_batch_size = 8
+train_batch_size = 16
 
 param_scheduler = [
     # learning rate scheduler

From 9594f3b46afc2a94f59664e3439926ea7e3bac32 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sat, 23 May 2026 11:24:43 +0900
Subject: [PATCH 138/183] Updated

---
 ...on_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py | 4 ++--
 ...n_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py | 4 ++--
 ...bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py | 2 ++
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
index 71c1829d4..08df461a4 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
@@ -13,9 +13,9 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_8/"
+info_directory_path = "info/kokseang_2_8_1/"
 
-experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type
+experiment_group_name = "bevfusion_lidar_intensity_2_8_2/j6gen2_base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py
index 02ed7542a..44785da25 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py
@@ -13,9 +13,9 @@
 
 # user setting
 data_root = "data/t4dataset/"
-info_directory_path = "info/kokseang_2_8/"
+info_directory_path = "info/kokseang_2_8_1/"
 
-experiment_group_name = "bevfusion_lidar_intensity_2_8_1/jpntaxi_base/" + _base_.dataset_type
+experiment_group_name = "bevfusion_lidar_intensity_2_8_2/jpntaxi_base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py
index 28499b4f9..3499885e5 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py
@@ -147,3 +147,5 @@
     checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
 )
 log_processor = dict(window_size=50)
+
+resume = True
\ No newline at end of file

From 096f44389f0f359b9bf4be0ff7ee4da86dc5c888 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sat, 23 May 2026 15:41:52 +0900
Subject: [PATCH 139/183] Updated

---
 projects/BEVFusion/bevfusion/bevfusion.py     |  8 +-
 .../BEVFusion/bevfusion/bevfusion_head.py     | 94 ++++++++++++++-----
 ...econd_secfpn_30e_8xb16_j6gen2_base_120m.py |  2 +-
 3 files changed, 78 insertions(+), 26 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py
index b113bb566..aeddc09fa 100644
--- a/projects/BEVFusion/bevfusion/bevfusion.py
+++ b/projects/BEVFusion/bevfusion/bevfusion.py
@@ -179,7 +179,7 @@ def extract_img_feat(
         if not using_image_features:
             x = self.get_image_backbone_features(x)
 
-        with torch.cuda.amp.autocast(enabled=False):
+        with torch.amp.autocast("cuda",enabled=False):
             # with torch.autocast(device_type='cuda', dtype=torch.float32):
             x = self.view_transform(
                 x,
@@ -200,14 +200,14 @@ def extract_img_feat(
     def extract_pts_feat(self, feats, coords, sizes, points=None) -> torch.Tensor:
         if points is not None:
             # NOTE(knzo25): training and normal inference
-            with torch.cuda.amp.autocast(enabled=False):
+            with torch.amp.autocast("cuda", enabled=False):
                 # with torch.autocast('cuda', enabled=False):
                 points = [point.float() for point in points]
                 feats, coords, sizes = self.voxelize(points)
                 batch_size = coords[-1, 0] + 1
         else:
-            # NOTE(knzo25): onnx inference. Voxelization happens outside the graph
-            with torch.cuda.amp.autocast(enabled=False):
+            # NOTE: (knzo25): onnx inference. Voxelization happens outside the graph
+            with torch.amp.autocast("cuda", enabled=False):
                 # with torch.autocast('cuda', enabled=False):
 
                 # NOTE(knzo25): onnx demmands this
diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index 96c38658b..df61ff629 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -175,12 +175,22 @@ def __init__(
 
             self.dense_heatmap_exclude_pooling_classes = sorted(
                 list(set(self.class_name_to_indices.values()) - set(self.dense_heatmap_pooling_class_indices))
-            )
+            ) 
+            # Pre-compute the correct order of the classes for the final local_max
+            heatmap_concat_order = self.dense_heatmap_pooling_class_indices + self.dense_heatmap_exclude_pooling_classes
+            local_concat_class_remapping = [
+                heatmap_concat_order.index(i)
+                for i in range(self.num_classes)
+            ]
         else:
             self.dense_heatmap_pooling_class_indices = None
             self.dense_heatmap_exclude_pooling_classes = None
-
+            local_concat_class_remapping = [i for i in range(self.num_classes)]
+        
+        # Register the remapping as a buffer so it moves to the GPU automatically and gets saved in the state_dict.
+        self.register_buffer("local_concat_class_remapping", torch.tensor(local_concat_class_remapping))
         self.local_heatmap_padding = self.nms_kernel_size // 2
+        
         # NMS clusters
         self.nms_clusters = self.test_cfg.get("nms_clusters", [])
         # Add class indices for nms
@@ -201,7 +211,8 @@ def __init__(
             self.partial_ignore_labels = None
 
         print_log(f"BEVFusionHead Partial ignore labels: {self.partial_ignore_labels}, dense heatmap pooling classes: \
-        {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}", logger="current")
+        {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}, \
+        local_concat_class_remapping: {self.local_concat_class_remapping}", logger="current")
 
     def create_2D_grid(self, x_size, y_size):
         meshgrid = [[0, x_size - 1, x_size], [0, y_size - 1, y_size]]
@@ -261,14 +272,12 @@ def forward_single(self, inputs, metas):
         #################################
         # query initialization
         #################################
-        with torch.cuda.amp.autocast(enabled=False):
+        with torch.amp.autocast("cuda", enabled=False):
             # with torch.autocast('cuda', enabled=False):
             dense_heatmap = self.heatmap_head(fusion_feat.float())
         heatmap = dense_heatmap.detach().sigmoid()
-        local_max = torch.zeros_like(heatmap)
-        # equals to nms radius = voxel_size * out_size_factor * kenel_size
         if self.dense_heatmap_pooling_class_indices is not None:
-            # Pooling
+            # Pooling 
             selected_heatmap = heatmap[:, self.dense_heatmap_pooling_class_indices, :, :]
             local_max_inner = F.max_pool2d(
                 selected_heatmap,
@@ -276,31 +285,74 @@ def forward_single(self, inputs, metas):
                 stride=1,
                 padding=0,
             )
-            local_max[
-                :,
-                self.dense_heatmap_pooling_class_indices,
-                self.local_heatmap_padding : (-self.local_heatmap_padding),
-                self.local_heatmap_padding : (-self.local_heatmap_padding),
-            ] = local_max_inner
-            # Non-pooling classes
+
+            # 2. Restore spatial size using F.pad instead of slice mutation
+            local_max = F.pad(
+                local_max_inner, 
+                (self.local_heatmap_padding, self.local_heatmap_padding, self.local_heatmap_padding, 
+                self.local_heatmap_padding), 
+                mode="constant", 
+                value=0.0
+            )
+            
+            # 3. Any non-pooling classes
             if self.dense_heatmap_exclude_pooling_classes:
-                local_max[:, self.dense_heatmap_exclude_pooling_classes] = heatmap[
-                    :, self.dense_heatmap_exclude_pooling_classes
-                ]
+                excluded_local_max = heatmap[:, self.dense_heatmap_exclude_pooling_classes, :, :]
+                local_max = torch.cat([local_max, excluded_local_max], dim=1)
+                local_max = local_max[:, self.local_concat_class_remapping, :, :]
         else:
-            local_max = heatmap
+            local_max = heatmap 
+            
+        # local_max = torch.zeros_like(heatmap)
+        # # equals to nms radius = voxel_size * out_size_factor * kenel_size
+        # if self.dense_heatmap_pooling_class_indices is not None:
+        #     # Pooling
+        #     selected_heatmap = heatmap[:, self.dense_heatmap_pooling_class_indices, :, :]
+        #     local_max_inner = F.max_pool2d(
+        #         selected_heatmap,
+        #         kernel_size=self.nms_kernel_size,
+        #         stride=1,
+        #         padding=0,
+        #     )
+        #     local_max[
+        #         :,
+        #         self.dense_heatmap_pooling_class_indices,
+        #         self.local_heatmap_padding : (-self.local_heatmap_padding),
+        #         self.local_heatmap_padding : (-self.local_heatmap_padding),
+        #     ] = local_max_inner
+        #     # Non-pooling classes
+        #     if self.dense_heatmap_exclude_pooling_classes:
+        #         local_max[:, self.dense_heatmap_exclude_pooling_classes] = heatmap[
+        #             :, self.dense_heatmap_exclude_pooling_classes
+        #         ]
+        # else:
+        #     local_max = heatmap
 
         heatmap = heatmap * (heatmap == local_max)
         heatmap = heatmap.view(batch_size, heatmap.shape[1], -1)
 
         # top num_proposals among all classes
-        top_proposals = heatmap.view(batch_size, -1).argsort(dim=-1, descending=True)[..., : self.num_proposals]
-        top_proposals_class = top_proposals // heatmap.shape[-1]
-        top_proposals_index = top_proposals % heatmap.shape[-1]
+        flattened_heatmap = heatmap.view(batch_size, -1)
+        
+        # Use topk instead or argsort to avoid sorting the entire flattened heatmap.
+        _, top_proposals = flattened_heatmap.topk(k=self.num_proposals, dim=-1, largest=True, sorted=True)
+        
+        # 2. Calculate class and spatial indices
+        # Use shape[-1] dynamically to handle grid sizes safely.
+        spatial_dim = heatmap.shape[-1]
+        top_proposals_class = top_proposals // spatial_dim
+        top_proposals_index = top_proposals % spatial_dim
         query_feat = fusion_feat_flatten.gather(
             index=top_proposals_index[:, None, :].expand(-1, fusion_feat_flatten.shape[1], -1),
             dim=-1,
         )
+        # top_proposals = heatmap.view(batch_size, -1).argsort(dim=-1, descending=True)[..., : self.num_proposals]
+        # top_proposals_class = top_proposals // heatmap.shape[-1]
+        # top_proposals_index = top_proposals % heatmap.shape[-1]
+        # query_feat = fusion_feat_flatten.gather(
+        #     index=top_proposals_index[:, None, :].expand(-1, fusion_feat_flatten.shape[1], -1),
+        #     dim=-1,
+        # )
         self.query_labels = top_proposals_class
 
         # add category embedding
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
index 08df461a4..5a4bc00fc 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
@@ -152,4 +152,4 @@
 )
 log_processor = dict(window_size=50)
 
-load_from = "work_dirs/bevfusion_lidar_2_8_0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb16_base_120m/best_epoch_47.pth"
+load_from = "work_dirs/bevfusion_lidar_2_8_2/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb16_base_120m/epoch_50.pth"

From c6c79f80d723bd0550baccbc0d1a896bfedbf089 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sat, 23 May 2026 16:49:15 +0900
Subject: [PATCH 140/183] Updated

---
 projects/BEVFusion/bevfusion/bevfusion_head.py                | 4 ++--
 .../default/pipelines/default_lidar_intensity_120m.py         | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index df61ff629..0b59f274d 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -188,7 +188,7 @@ def __init__(
             local_concat_class_remapping = [i for i in range(self.num_classes)]
         
         # Register the remapping as a buffer so it moves to the GPU automatically and gets saved in the state_dict.
-        self.register_buffer("local_concat_class_remapping", torch.tensor(local_concat_class_remapping))
+        self.register_buffer("local_concat_class_remapping", torch.tensor(local_concat_class_remapping), persistent=False)
         self.local_heatmap_padding = self.nms_kernel_size // 2
         
         # NMS clusters
@@ -334,7 +334,7 @@ def forward_single(self, inputs, metas):
         # top num_proposals among all classes
         flattened_heatmap = heatmap.view(batch_size, -1)
         
-        # Use topk instead or argsort to avoid sorting the entire flattened heatmap.
+        # Use topk instead of argsort to avoid sorting the entire flattened heatmap.
         _, top_proposals = flattened_heatmap.topk(k=self.num_proposals, dim=-1, largest=True, sorted=True)
         
         # 2. Calculate class and spatial indices
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
index 1ce2aa2be..ecf983c37 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
@@ -5,8 +5,8 @@
 
 # range setting
 point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0]
-voxel_size = [0.17, 0.17, 0.2]
-grid_size = [1440, 1440, 41]
+voxel_size = [0.15, 0.15, 0.2]
+grid_size = [1632, 1632, 41]
 eval_class_range = {
     "car": 120,
     "truck": 120,

From 1e639c70d0e3cd9e92bdf226e0e0906c3e9714b0 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sun, 24 May 2026 01:46:53 +0900
Subject: [PATCH 141/183] Updated

---
 .../BEVFusion/bevfusion/bevfusion_head.py     | 62 +++++--------------
 .../bevfusion/bevfusion_voxel_encoder.py      | 48 +++++++-------
 2 files changed, 43 insertions(+), 67 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index 0b59f274d..8af826d47 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -92,6 +92,7 @@ def __init__(
         self.loss_iou = MODELS.build(loss_iou) if loss_iou is not None else None
         self.loss_bbox = MODELS.build(loss_bbox)
         self.loss_heatmap = MODELS.build(loss_heatmap)
+        self.share_conv_out_channels = hidden_channel
 
         self.bbox_coder = build_bbox_coder(bbox_coder)
         self.sampling = False
@@ -157,7 +158,11 @@ def __init__(
         # Position Embedding for Cross-Attention, which is re-used during training # noqa: E501
         x_size = self.test_cfg["grid_size"][0] // self.test_cfg["out_size_factor"]
         y_size = self.test_cfg["grid_size"][1] // self.test_cfg["out_size_factor"]
-        self.bev_pos = self.create_2D_grid(x_size, y_size)
+        self.spatial_dim = x_size * y_size
+        bev_pos = self.create_2D_grid(x_size, y_size)
+        
+        # Register the bev_pos as a buffer so it moves to the GPU automatically.
+        self.register_buffer("bev_pos", bev_pos, persistent=False) # (1, H * W, 2)
 
         self.img_feat_pos = None
         self.img_feat_collapsed_pos = None
@@ -266,8 +271,7 @@ def forward_single(self, inputs, metas):
         #################################
         # image to BEV
         #################################
-        fusion_feat_flatten = fusion_feat.view(batch_size, fusion_feat.shape[1], -1)  # [BS, C, H*W]
-        bev_pos = self.bev_pos.repeat(batch_size, 1, 1).to(fusion_feat.device)
+        fusion_feat_flatten = fusion_feat.view(batch_size, self.share_conv_out_channels, -1)  # [BS, C, H*W]
 
         #################################
         # query initialization
@@ -302,34 +306,10 @@ def forward_single(self, inputs, metas):
                 local_max = local_max[:, self.local_concat_class_remapping, :, :]
         else:
             local_max = heatmap 
-            
-        # local_max = torch.zeros_like(heatmap)
-        # # equals to nms radius = voxel_size * out_size_factor * kenel_size
-        # if self.dense_heatmap_pooling_class_indices is not None:
-        #     # Pooling
-        #     selected_heatmap = heatmap[:, self.dense_heatmap_pooling_class_indices, :, :]
-        #     local_max_inner = F.max_pool2d(
-        #         selected_heatmap,
-        #         kernel_size=self.nms_kernel_size,
-        #         stride=1,
-        #         padding=0,
-        #     )
-        #     local_max[
-        #         :,
-        #         self.dense_heatmap_pooling_class_indices,
-        #         self.local_heatmap_padding : (-self.local_heatmap_padding),
-        #         self.local_heatmap_padding : (-self.local_heatmap_padding),
-        #     ] = local_max_inner
-        #     # Non-pooling classes
-        #     if self.dense_heatmap_exclude_pooling_classes:
-        #         local_max[:, self.dense_heatmap_exclude_pooling_classes] = heatmap[
-        #             :, self.dense_heatmap_exclude_pooling_classes
-        #         ]
-        # else:
-        #     local_max = heatmap
 
         heatmap = heatmap * (heatmap == local_max)
-        heatmap = heatmap.view(batch_size, heatmap.shape[1], -1)
+        # (BS, num_classes, H*W)
+        heatmap = heatmap.view(batch_size, self.num_classes, -1)
 
         # top num_proposals among all classes
         flattened_heatmap = heatmap.view(batch_size, -1)
@@ -339,31 +319,21 @@ def forward_single(self, inputs, metas):
         
         # 2. Calculate class and spatial indices
         # Use shape[-1] dynamically to handle grid sizes safely.
-        spatial_dim = heatmap.shape[-1]
-        top_proposals_class = top_proposals // spatial_dim
-        top_proposals_index = top_proposals % spatial_dim
+        top_proposals_class = top_proposals // self.spatial_dim
+        top_proposals_index = top_proposals % self.spatial_dim
         query_feat = fusion_feat_flatten.gather(
-            index=top_proposals_index[:, None, :].expand(-1, fusion_feat_flatten.shape[1], -1),
+            index=top_proposals_index[:, None, :].expand(-1, self.share_conv_out_channels, -1),
             dim=-1,
         )
-        # top_proposals = heatmap.view(batch_size, -1).argsort(dim=-1, descending=True)[..., : self.num_proposals]
-        # top_proposals_class = top_proposals // heatmap.shape[-1]
-        # top_proposals_index = top_proposals % heatmap.shape[-1]
-        # query_feat = fusion_feat_flatten.gather(
-        #     index=top_proposals_index[:, None, :].expand(-1, fusion_feat_flatten.shape[1], -1),
-        #     dim=-1,
-        # )
         self.query_labels = top_proposals_class
 
         # add category embedding
         one_hot = F.one_hot(top_proposals_class, num_classes=self.num_classes).permute(0, 2, 1)
         query_cat_encoding = self.class_encoding(one_hot.float())
         query_feat += query_cat_encoding
-
-        query_pos = bev_pos.gather(
-            index=top_proposals_index[:, None, :].permute(0, 2, 1).expand(-1, -1, bev_pos.shape[-1]),
-            dim=1,
-        )
+        
+        # (B, N, 2)
+        query_pos = self.bev_pos.squeeze(0)[top_proposals_index]
         #################################
         # transformer decoder layer (Fusion feature as K,V)
         #################################
@@ -371,7 +341,7 @@ def forward_single(self, inputs, metas):
         for i in range(self.num_decoder_layers):
             # Transformer Decoder Layer
             # :param query: B C Pq    :param query_pos: B Pq 3/6
-            query_feat = self.decoder[i](query_feat, key=fusion_feat_flatten, query_pos=query_pos, key_pos=bev_pos)
+            query_feat = self.decoder[i](query_feat, key=fusion_feat_flatten, query_pos=query_pos, key_pos=self.bev_pos)
 
             # Prediction
             res_layer = self.prediction_heads[i](query_feat)
diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
index 83cd70482..30afdc41d 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
@@ -2,8 +2,6 @@
 
 import torch
 import numpy as np
-from mmcv.cnn import build_norm_layer
-from mmcv.ops import DynamicScatter
 from torch import Tensor, nn
 
 from mmdet3d.registry import MODELS
@@ -26,14 +24,28 @@ def __init__(self,
             max_norm_values (Tuple[float]): Maximum values for the features.
             in_channels (int): Number of input channels.
         """
-        super(HardSimpleVoxelSinCosEncoder, self).__init__()
+        super().__init__()
       
         # Create PillarFeatureNet layers
         self.in_channels = in_channels
-
-        self.register_buffer("min_norm_values", torch.tensor(min_norm_values))
-        self.register_buffer("max_norm_values", torch.tensor(max_norm_values))
-        self.register_buffer("exponents", (2 ** torch.arange(0, self.in_channels)).float())
+        
+        # Convert the ((x - min) / (max - min)) * pi * exponents to x * scale + bias for folding them into one OP 
+        min_norm_values = torch.tensor(min_norm_values)
+        max_norm_values = torch.tensor(max_norm_values)
+        # Let alpha = pi * exponents, beta = max - min
+        # y = ((x - min) / beta) * alpha 
+        # y = alpha / beta * (x - min)
+        # y = (alpha / beta) * x - (alpha / beta) * min 
+        # Therefore, scale = alpha / beta, bias = - (alpha * min) / beta 
+        # y = scale * x + bias
+        exponents = (2 ** torch.arange(0, self.in_channels)).float()
+        alpha = (torch.pi * exponents).unsqueeze(0) # (1, C)
+        beta = (max_norm_values - min_norm_values).unsqueeze(1) # (C, 1)
+        scale = alpha / beta
+        bias = - (alpha * min_norm_values.unsqueeze(1)) / beta # (C, C)
+        
+        self.register_buffer("exponent_scale", scale.unsqueeze(0), persistent=False) # (1, C, C)
+        self.register_buffer("exponent_bias", bias.unsqueeze(0), persistent=False) # (1, C, C)
 
     def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
                 *args, **kwargs) -> Tensor:
@@ -49,23 +61,17 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
             torch.Tensor: Features of pillars in shape (M, C*C*2).
 
         """
-        num_voxels, max_points_per_voxel = features.shape[0], features.shape[1]
-        
         # Mean in the voxel
-        # (N, M, 3) -> (N, 3)
-        voxel_features = (features.sum(dim=1, keepdim=False) / num_points.type_as(features).view(
-                    -1, 1)).contiguous()
+        # (N, M, C) -> (N, C)
+        voxel_mean_features = (features.sum(dim=1, keepdim=False) / num_points.type_as(features).view(-1, 1)).contiguous()
 
-        # min-max normalization, (N, 3) -> (N, 3)
-        voxel_features_norm = (voxel_features - \
-         self.min_norm_values.view(1, -1)) / ((self.max_norm_values - self.min_norm_values).view(1, -1))
-        
+        # x * scale + bias, (1, C, C) + (1, C, C) * (N, C, 1) -> (N, C, C)
+        # FMA (fused multiply-add): y = bias + scale * voxel_mean_features
+        y = torch.addcmul(self.exponent_bias, self.exponent_scale, voxel_mean_features.unsqueeze(-1))
         # SinCos encoding
-        # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3)
-        y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1)
-        # (N*3, 3) -> (N, 3*3)
-        y = y.reshape(num_voxels, -1)
-        # (N, 3*3) -> (N, 3*3*2)
+        # (N*C, C) -> (N, C*C)
+        y = y.reshape(-1, self.in_channels*self.in_channels)
+        # (N, C*C) -> (N, C*C*2)
         voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1)
         
         return voxel_fourier_features

From 03d94164b806d5596e423b2592c10feef50b8d85 Mon Sep 17 00:00:00 2001
From: KSeangTan <kseangtan@gmail.com>
Date: Sun, 24 May 2026 19:51:48 +0900
Subject: [PATCH 142/183] Updated

---
 .../BEVFusion/bevfusion/bevfusion_head.py     |  8 +++---
 .../BEVFusion/bevfusion/sparse_encoder.py     | 27 ++++++++++++-------
 ...econd_secfpn_30e_8xb16_j6gen2_base_120m.py |  1 +
 ...cond_secfpn_30e_8xb16_jpntaxi_base_120m.py |  1 +
 ...voxel_second_secfpn_50e_8xb16_base_120m.py |  1 +
 .../default_lidar_second_secfpn_120m.py       |  2 --
 .../default/pipelines/default_lidar_120m.py   |  3 +++
 .../pipelines/default_lidar_intensity_120m.py |  3 +++
 8 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index 8af826d47..e3f5610c8 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -265,13 +265,13 @@ def forward_single(self, inputs, metas):
         Returns:
             list[dict]: Output results for tasks.
         """
-        batch_size = inputs.shape[0]
+        # batch_size = inputs.shape[0]
         fusion_feat = self.shared_conv(inputs)
 
         #################################
         # image to BEV
         #################################
-        fusion_feat_flatten = fusion_feat.view(batch_size, self.share_conv_out_channels, -1)  # [BS, C, H*W]
+        fusion_feat_flatten = fusion_feat.view(-1, self.share_conv_out_channels, self.spatial_dim)  # [BS, C, H*W]
 
         #################################
         # query initialization
@@ -309,10 +309,10 @@ def forward_single(self, inputs, metas):
 
         heatmap = heatmap * (heatmap == local_max)
         # (BS, num_classes, H*W)
-        heatmap = heatmap.view(batch_size, self.num_classes, -1)
+        heatmap = heatmap.view(-1, self.num_classes, self.spatial_dim)
 
         # top num_proposals among all classes
-        flattened_heatmap = heatmap.view(batch_size, -1)
+        flattened_heatmap = heatmap.view(-1, self.num_classes*self.spatial_dim)
         
         # Use topk instead of argsort to avoid sorting the entire flattened heatmap.
         _, top_proposals = flattened_heatmap.topk(k=self.num_proposals, dim=-1, largest=True, sorted=True)
diff --git a/projects/BEVFusion/bevfusion/sparse_encoder.py b/projects/BEVFusion/bevfusion/sparse_encoder.py
index 6e98a73ab..cd2ffb50b 100644
--- a/projects/BEVFusion/bevfusion/sparse_encoder.py
+++ b/projects/BEVFusion/bevfusion/sparse_encoder.py
@@ -11,7 +11,7 @@
 from mmdet3d.registry import MODELS
 
 if IS_SPCONV2_AVAILABLE:
-    from spconv.pytorch import SparseConvTensor
+    from .custom_sparse_conv_tensor import CustomSparseConvTensor as SparseConvTensor
 else:
     from mmcv.ops import SparseConvTensor
 
@@ -28,6 +28,7 @@ class BEVFusionSparseEncoder(SparseEncoder):
     Args:
         in_channels (int): The number of input channels.
         sparse_shape (list[int]): The sparse shape of input tensor.
+        dense_output_shape (list[int]): The final shape of the dense output tensor.
         order (list[str], optional): Order of conv module.
             Defaults to ('conv', 'norm', 'act').
         norm_cfg (dict, optional): Config of normalization layer. Defaults to
@@ -52,6 +53,7 @@ def __init__(
         self,
         in_channels,
         sparse_shape,
+        dense_output_shapes,
         order=("conv", "norm", "act"),
         norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01),
         base_channels=16,
@@ -60,19 +62,17 @@ def __init__(
         encoder_paddings=((1,), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1, 1)),
         block_type="conv_module",
         return_middle_feats=False,
-        encoder_strides=(2, 2, 2, -1),
-        output_stride=2,
     ):
         super(SparseEncoder, self).__init__()
         assert block_type in ["conv_module", "basicblock"]
         self.sparse_shape = sparse_shape
+        self.dense_output_shapes = dense_output_shapes
         self.in_channels = in_channels
         self.order = order
         self.base_channels = base_channels
         self.output_channels = output_channels
         self.encoder_channels = encoder_channels
         self.encoder_paddings = encoder_paddings
-        self.encoder_strides = encoder_strides
         self.stage_num = len(self.encoder_channels)
         self.fp16_enabled = False
         self.return_middle_feats = return_middle_feats
@@ -149,11 +149,20 @@ def forward(self, voxel_features, coors, batch_size):
         # for detection head
         # [200, 176, 5] -> [200, 176, 2]
         out = self.conv_out(encode_features[-1])
-        spatial_features = out.dense()
-
-        N, C, H, W, D = spatial_features.shape
-        spatial_features = spatial_features.permute(0, 1, 4, 2, 3).contiguous()
-        spatial_features = spatial_features.view(N, C * D, H, W)
+        # Return (N, H, W, D, C) instead of (N, C, H, W, D)
+        spatial_features = out.dense(channels_first=False)
+
+        # Reshape to (N, C, D, H, W)        
+        spatial_features = spatial_features.permute(0, 4, 3, 1, 2).contiguous()
+        spatial_features = spatial_features.view(
+            batch_size, 
+            self.output_channels * self.dense_output_shapes[2], 
+            self.dense_output_shapes[0], 
+            self.dense_output_shapes[1], 
+        )
+        # N, C, H, W, D = spatial_features.shape
+        # spatial_features = spatial_features.permute(0, 1, 4, 2, 3).contiguous()
+        # spatial_features = spatial_features.view(N, C * D, H, W)
 
         if self.return_middle_feats:
             return spatial_features, encode_features
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
index 5a4bc00fc..269a0f00e 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
@@ -35,6 +35,7 @@
     pts_middle_encoder=dict(
         in_channels=50,
         sparse_shape=_base_.grid_size,
+        dense_output_shapes=_base_.sparse_dense_output_shapes,
     ),
     bbox_head=dict(
         class_names=_base_.class_names,  # Use class names to identify the correct class indices
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py
index 44785da25..d3c5154c6 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py
@@ -35,6 +35,7 @@
     pts_middle_encoder=dict(
         in_channels=50,
         sparse_shape=_base_.grid_size,
+        dense_output_shapes=_base_.sparse_dense_output_shapes,
     ),
     bbox_head=dict(
         class_names=_base_.class_names,  # Use class names to identify the correct class indices
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py
index 3499885e5..70c27f0a7 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py
@@ -35,6 +35,7 @@
     pts_middle_encoder=dict(
         in_channels=32,
         sparse_shape=_base_.grid_size,
+        dense_output_shapes=_base_.sparse_dense_output_shapes,
     ),
     bbox_head=dict(
         class_names=_base_.class_names,  # Use class names to identify the correct class indices
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
index bad602cb7..aa275f558 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
@@ -24,9 +24,7 @@
         norm_cfg=dict(type="BN1d", eps=0.001, momentum=0.01),
         encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)),
         encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, (1, 1, 0)), (0, 0)),
-        encoder_strides=(2, 2, 2, -1), # No stride for the last stage
         block_type="basicblock",
-        output_stride=2, # downsample stride
     ),
     pts_backbone=dict(
         type="SECOND",
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
index 317c594c1..613ff8d0c 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
@@ -7,6 +7,9 @@
 point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0]
 voxel_size = [0.15, 0.15, 0.2]
 grid_size = [1632, 1632, 41]
+# Sparse dense output shapes
+sparse_dense_output_shapes = [204, 204, 2]
+
 eval_class_range = {
     "car": 120,
     "truck": 120,
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
index ecf983c37..e7b78955a 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
@@ -7,6 +7,9 @@
 point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0]
 voxel_size = [0.15, 0.15, 0.2]
 grid_size = [1632, 1632, 41]
+# Sparse dense output shapes
+sparse_dense_output_shapes = [204, 204, 2]
+
 eval_class_range = {
     "car": 120,
     "truck": 120,

From 94e175279116c6e34cb570b9b003f0b339691f4d Mon Sep 17 00:00:00 2001
From: KSeangTan <kseangtan@gmail.com>
Date: Sun, 24 May 2026 21:51:41 +0900
Subject: [PATCH 143/183] Updated

---
 .../bevfusion/custom_sparse_conv_tensor.py    | 68 +++++++++++++++++++
 ...y_lidar_only_intensity_tensorrt_dynamic.py |  1 +
 .../BEVFusion/deploy/rewriters/__init__.py    |  3 +
 .../BEVFusion/deploy/rewriters/layer_norm.py  | 15 ++++
 4 files changed, 87 insertions(+)
 create mode 100644 projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py
 create mode 100644 projects/BEVFusion/deploy/rewriters/__init__.py
 create mode 100644 projects/BEVFusion/deploy/rewriters/layer_norm.py

diff --git a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py
new file mode 100644
index 000000000..c6841f64c
--- /dev/null
+++ b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py
@@ -0,0 +1,68 @@
+"""
+Custom SparseConvTensor for BEVFusion.
+This customiztion is used to support cleaner ONNX export of sparse convolutions.
+"""
+
+from typing import Union, List, Optional
+
+import torch
+from spconv.pytorch import SparseConvTensor
+from spconv.core import ConvAlgo
+
+
+class CustomSparseConvTensor(SparseConvTensor):
+    def __init__(self,
+                 features: torch.Tensor,
+                 indices: torch.Tensor,
+                 spatial_shape: Union[List[int], np.ndarray],
+                 batch_size: int,
+                 grid: Optional[torch.Tensor] = None,
+                 voxel_num: Optional[torch.Tensor] = None,
+                 indice_dict: Optional[dict] = None,
+                 benchmark: bool = False,
+                 permanent_thrust_allocator: bool = False,
+                 enable_timer: bool = False,
+                 force_algo: Optional[ConvAlgo] = None):
+      """
+      Check the superclass documentation for more details.
+      """
+      
+      super().__init__(
+        features=features, 
+        indices=indices, 
+        spatial_shape=spatial_shape, 
+        batch_size=batch_size, 
+        grid=grid, 
+        voxel_num=voxel_num, 
+        indice_dict=indice_dict, 
+        benchmark=benchmark, 
+        permanent_thrust_allocator=permanent_thrust_allocator, 
+        enable_timer=enable_timer, 
+        force_algo=force_algo)
+        
+      # Precomputation for dense output shape.
+      self.spatial_shape_list = list(self.spatial_shape)
+      self.spatial_ndim = len(self.spatial_shape_list)
+      self.trans_params = list(range(0, self.spatial_ndim + 1))
+      self.trans_params.insert(1, self.spatial_ndim + 1)
+
+    def dense(self, channels_first: bool = True):
+        """
+        Convert the sparse tensor to a dense tensor.
+        """
+        C = self.features.shape[1]
+        out = self.features.zeros(
+            [
+                self.batch_size,
+                *self.spatial_shape_list,
+                C,
+            ]
+        )
+        idx = self.indices.to(self.features.device).long()  # [N, 1+D]
+        out.index_put_(idx.unbind(1), self.features)
+        if not channels_first:
+            return out 
+        
+        out = out.permute(*self.trans_params).contiguous()
+        return out
+ 
\ No newline at end of file
diff --git a/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py b/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py
index e22e0f41b..0936cf8a0 100644
--- a/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py
+++ b/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py
@@ -5,6 +5,7 @@
         "projects.BEVFusion.deploy",
         "projects.BEVFusion.bevfusion",
         "projects.SparseConvolution",
+        "projects.BEVFusion.deploy.rewriters",
     ],
     allow_failed_imports=False,
 )
diff --git a/projects/BEVFusion/deploy/rewriters/__init__.py b/projects/BEVFusion/deploy/rewriters/__init__.py
new file mode 100644
index 000000000..1eb59b5c1
--- /dev/null
+++ b/projects/BEVFusion/deploy/rewriters/__init__.py
@@ -0,0 +1,3 @@
+from .layer_norm import layer_norm__passthrough
+
+__all__ = ["layer_norm__passthrough"]
\ No newline at end of file
diff --git a/projects/BEVFusion/deploy/rewriters/layer_norm.py b/projects/BEVFusion/deploy/rewriters/layer_norm.py
new file mode 100644
index 000000000..0b8cc09c7
--- /dev/null
+++ b/projects/BEVFusion/deploy/rewriters/layer_norm.py
@@ -0,0 +1,15 @@
+import torch.nn.functional as F
+from mmdeploy.core import FUNCTION_REWRITER
+
+
+@FUNCTION_REWRITER.register_rewriter(
+    func_name="torch.nn.functional.layer_norm", backend="tensorrt"
+)
+@FUNCTION_REWRITER.register_rewriter(
+    func_name="torch.nn.functional.layer_norm", backend="default"
+)
+def layer_norm__passthrough(input, normalized_shape, weight=None, bias=None, eps=1e-5):
+    # Call the *original* op so the ONNX exporter sees aten::layer_norm
+    # and emits a single LayerNormalization node at opset >= 17.
+    ctx = FUNCTION_REWRITER.get_context()
+    return ctx.origin_func(input, normalized_shape, weight, bias, eps)
\ No newline at end of file

From 36bfd4ed24898d13deb977a926d36329f8fc9b22 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Mon, 25 May 2026 08:15:29 +0900
Subject: [PATCH 144/183] Updated

---
 ...sion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
index 269a0f00e..238690492 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
@@ -15,7 +15,7 @@
 data_root = "data/t4dataset/"
 info_directory_path = "info/kokseang_2_8_1/"
 
-experiment_group_name = "bevfusion_lidar_intensity_2_8_2/j6gen2_base/" + _base_.dataset_type
+experiment_group_name = "bevfusion_lidar_intensity_2_8_3/j6gen2_base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 

From 26e6bb0d8337d3b12a0fc94cfad66442ced9eca6 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Mon, 25 May 2026 09:43:58 +0900
Subject: [PATCH 145/183] Updated

---
 .../bevfusion/custom_sparse_conv_tensor.py    |  2 ++
 ...y_lidar_only_intensity_tensorrt_dynamic.py |  7 ++--
 projects/BEVFusion/deploy/exporter.py         | 32 +++++++++++++++++--
 .../BEVFusion/deploy/rewriters/__init__.py    |  3 --
 .../BEVFusion/deploy/rewriters/layer_norm.py  | 15 ---------
 5 files changed, 35 insertions(+), 24 deletions(-)
 delete mode 100644 projects/BEVFusion/deploy/rewriters/__init__.py
 delete mode 100644 projects/BEVFusion/deploy/rewriters/layer_norm.py

diff --git a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py
index c6841f64c..8481e4853 100644
--- a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py
+++ b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py
@@ -6,6 +6,7 @@
 from typing import Union, List, Optional
 
 import torch
+import numpy as np
 from spconv.pytorch import SparseConvTensor
 from spconv.core import ConvAlgo
 
@@ -58,6 +59,7 @@ def dense(self, channels_first: bool = True):
                 C,
             ]
         )
+        print("out.shape: ", out.shape)
         idx = self.indices.to(self.features.device).long()  # [N, 1+D]
         out.index_put_(idx.unbind(1), self.features)
         if not channels_first:
diff --git a/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py b/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py
index 0936cf8a0..0863889bb 100644
--- a/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py
+++ b/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py
@@ -4,8 +4,7 @@
     imports=[
         "projects.BEVFusion.deploy",
         "projects.BEVFusion.bevfusion",
-        "projects.SparseConvolution",
-        "projects.BEVFusion.deploy.rewriters",
+        "projects.SparseConvolution"
     ],
     allow_failed_imports=False,
 )
@@ -30,7 +29,7 @@
     type="onnx",
     export_params=True,
     keep_initializers_as_inputs=False,
-    opset_version=17,
+    opset_version=18,
     save_file="bevfusion_lidar_intensity.onnx",
     input_names=["voxels", "coors", "num_points_per_voxel"],
     output_names=["bbox_pred", "score", "label_pred"],
@@ -46,5 +45,5 @@
         },
     },
     input_shape=None,
-    verbose=True,
+    verbose=False,
 )
diff --git a/projects/BEVFusion/deploy/exporter.py b/projects/BEVFusion/deploy/exporter.py
index b1a430eb9..ab5489a7e 100644
--- a/projects/BEVFusion/deploy/exporter.py
+++ b/projects/BEVFusion/deploy/exporter.py
@@ -2,7 +2,7 @@
 
 import logging
 import os.path as osp
-from typing import Optional
+from typing import Optional, Any
 
 import numpy as np
 import onnx
@@ -11,12 +11,33 @@
 from builder import ExportBuilder
 from containers import TrtBevFusionCameraOnlyContainer, TrtBevFusionImageBackboneContainer, TrtBevFusionMainContainer
 from data_classes import ModelData, SetupConfigs
-from mmdeploy.core import RewriterContext
+from mmdeploy.core import RewriterContext, SYMBOLIC_REWRITER
 from mmdeploy.utils import (
     get_root_logger,
 )
 
 
+def purge_mmdeploy_symbolics(op_names: list[str]) -> dict:
+    """Delete mmdeploy's symbolic records for the given op names.
+    Both the op-name key (e.g. `"layer_norm"`) and the function-path
+    bookkeeping key (e.g. `"mmdeploy.pytorch.symbolics.layer_norm.layer_norm__default"`)
+    are removed. Returns a snapshot of what was deleted for optional restore.
+    """
+    records = SYMBOLIC_REWRITER._registry._rewrite_records
+    removed: dict = {}
+    for key in list(records.keys()):
+        # Primary key: the aten op name itself.
+        if key in op_names:
+            removed[key] = records.pop(key)
+            continue
+        # Bookkeeping key: full Python path of an implementer function.
+        # Match by "...symbolics.<op_name>." or "...symbolics.<op_name>__"
+        if any(f".symbolics.{op}." in key or f".symbolics.{op}__" in key
+               for op in op_names):
+            removed[key] = records.pop(key)
+    return removed
+
+
 class Torch2OnnxExporter:
 
     def __init__(self, setup_configs: SetupConfigs, log_level: str):
@@ -62,7 +83,14 @@ def _export_model(
           patched_model (torch.nn.Module): Patched Pytorch model.
           ir_configs (dict): Configs for intermediate representations in ONNX.
         """
+        # Purge the mmdeploy symbolic records for the layer_norm op, remove this if LayerNorm OP is not supported 
+        # in the tensorrt version
+        removed = purge_mmdeploy_symbolics(["layer_norm"])
+        self.logger.info(
+          f"Purged {len(removed)} mmdeploy symbolic records: {list(removed.keys())}"
+        )
         with RewriterContext(**context_info), torch.no_grad():
+            list_layer_norm_rewriters()
             image_feats = None
             if "img_backbone" in self.setup_configs.model_cfg.model:
                 image_feats = self._export_image_backbone(model_data, ir_configs, patched_model)
diff --git a/projects/BEVFusion/deploy/rewriters/__init__.py b/projects/BEVFusion/deploy/rewriters/__init__.py
deleted file mode 100644
index 1eb59b5c1..000000000
--- a/projects/BEVFusion/deploy/rewriters/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from .layer_norm import layer_norm__passthrough
-
-__all__ = ["layer_norm__passthrough"]
\ No newline at end of file
diff --git a/projects/BEVFusion/deploy/rewriters/layer_norm.py b/projects/BEVFusion/deploy/rewriters/layer_norm.py
deleted file mode 100644
index 0b8cc09c7..000000000
--- a/projects/BEVFusion/deploy/rewriters/layer_norm.py
+++ /dev/null
@@ -1,15 +0,0 @@
-import torch.nn.functional as F
-from mmdeploy.core import FUNCTION_REWRITER
-
-
-@FUNCTION_REWRITER.register_rewriter(
-    func_name="torch.nn.functional.layer_norm", backend="tensorrt"
-)
-@FUNCTION_REWRITER.register_rewriter(
-    func_name="torch.nn.functional.layer_norm", backend="default"
-)
-def layer_norm__passthrough(input, normalized_shape, weight=None, bias=None, eps=1e-5):
-    # Call the *original* op so the ONNX exporter sees aten::layer_norm
-    # and emits a single LayerNormalization node at opset >= 17.
-    ctx = FUNCTION_REWRITER.get_context()
-    return ctx.origin_func(input, normalized_shape, weight, bias, eps)
\ No newline at end of file

From b096792a9e87f1307c5fe6b5d9f99ad6bde25438 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Mon, 25 May 2026 12:42:26 +0900
Subject: [PATCH 146/183] Updated

---
 .../bevfusion/custom_sparse_conv_tensor.py    | 92 +++++++------------
 .../BEVFusion/bevfusion/sparse_encoder.py     |  7 +-
 projects/BEVFusion/deploy/exporter.py         |  1 -
 3 files changed, 35 insertions(+), 65 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py
index 8481e4853..b16191ca8 100644
--- a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py
+++ b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py
@@ -2,69 +2,39 @@
 Custom SparseConvTensor for BEVFusion.
 This customiztion is used to support cleaner ONNX export of sparse convolutions.
 """
+import torch
 
-from typing import Union, List, Optional
+from mmdet3d.models.layers.spconv import IS_SPCONV2_AVAILABLE
 
-import torch
-import numpy as np
-from spconv.pytorch import SparseConvTensor
-from spconv.core import ConvAlgo
+if IS_SPCONV2_AVAILABLE:
+    from spconv.pytorch import SparseConvTensor
+else:
+    from mmcv.ops import SparseConvTensor
 
 
-class CustomSparseConvTensor(SparseConvTensor):
-    def __init__(self,
-                 features: torch.Tensor,
-                 indices: torch.Tensor,
-                 spatial_shape: Union[List[int], np.ndarray],
-                 batch_size: int,
-                 grid: Optional[torch.Tensor] = None,
-                 voxel_num: Optional[torch.Tensor] = None,
-                 indice_dict: Optional[dict] = None,
-                 benchmark: bool = False,
-                 permanent_thrust_allocator: bool = False,
-                 enable_timer: bool = False,
-                 force_algo: Optional[ConvAlgo] = None):
-      """
-      Check the superclass documentation for more details.
-      """
-      
-      super().__init__(
-        features=features, 
-        indices=indices, 
-        spatial_shape=spatial_shape, 
-        batch_size=batch_size, 
-        grid=grid, 
-        voxel_num=voxel_num, 
-        indice_dict=indice_dict, 
-        benchmark=benchmark, 
-        permanent_thrust_allocator=permanent_thrust_allocator, 
-        enable_timer=enable_timer, 
-        force_algo=force_algo)
-        
-      # Precomputation for dense output shape.
-      self.spatial_shape_list = list(self.spatial_shape)
-      self.spatial_ndim = len(self.spatial_shape_list)
-      self.trans_params = list(range(0, self.spatial_ndim + 1))
-      self.trans_params.insert(1, self.spatial_ndim + 1)
+def sparse_to_dense(sparse_tensor: SparseConvTensor, batch_size: int, spatial_shapes: list[int], out_channels: int):
+    """
+    Convert the sparse tensor to a dense tensor.
+    """
+    H, W, D = spatial_shapes
+    num_cells = batch_size * H * W * D
+    idx = sparse_tensor.indices.to(sparse_tensor.features.device).long()  # [N, 1+D]
+    b, h, w, d = idx.unbind(1)
+    # b * (H * W * D) + h*(W*D) + w*D + d
+    # Factor out the common terms D and W
+    # (b*H*W + h*W + w) * D + d -> (b*H + h) * W + w) * D + d
+    linear_idx = ((b * H + h) * W + w) * D + d                     # [N]
 
-    def dense(self, channels_first: bool = True):
-        """
-        Convert the sparse tensor to a dense tensor.
-        """
-        C = self.features.shape[1]
-        out = self.features.zeros(
-            [
-                self.batch_size,
-                *self.spatial_shape_list,
-                C,
-            ]
-        )
-        print("out.shape: ", out.shape)
-        idx = self.indices.to(self.features.device).long()  # [N, 1+D]
-        out.index_put_(idx.unbind(1), self.features)
-        if not channels_first:
-            return out 
-        
-        out = out.permute(*self.trans_params).contiguous()
-        return out
- 
\ No newline at end of file
+    out = torch.zeros(
+        [
+            num_cells,
+            out_channels
+        ], 
+        device=sparse_tensor.features.device,
+        dtype=sparse_tensor.features.dtype,
+    )
+    # out = out.index_copy(0, linear_idx, sparse_tensor.features)
+    # out = out.scatter(0, linear_idx, sparse_tensor.features)
+    scatter_idx = linear_idx.unsqueeze(1).expand(-1, out_channels)            # [N, C]
+    out = out.scatter(0, scatter_idx, sparse_tensor.features)
+    return out.view(batch_size, H, W, D, out_channels)
diff --git a/projects/BEVFusion/bevfusion/sparse_encoder.py b/projects/BEVFusion/bevfusion/sparse_encoder.py
index cd2ffb50b..f46ca621b 100644
--- a/projects/BEVFusion/bevfusion/sparse_encoder.py
+++ b/projects/BEVFusion/bevfusion/sparse_encoder.py
@@ -11,10 +11,11 @@
 from mmdet3d.registry import MODELS
 
 if IS_SPCONV2_AVAILABLE:
-    from .custom_sparse_conv_tensor import CustomSparseConvTensor as SparseConvTensor
+    from spconv.pytorch import SparseConvTensor
 else:
     from mmcv.ops import SparseConvTensor
 
+from .custom_sparse_conv_tensor import sparse_to_dense
 
 
 @MODELS.register_module()
@@ -150,8 +151,8 @@ def forward(self, voxel_features, coors, batch_size):
         # [200, 176, 5] -> [200, 176, 2]
         out = self.conv_out(encode_features[-1])
         # Return (N, H, W, D, C) instead of (N, C, H, W, D)
-        spatial_features = out.dense(channels_first=False)
-
+        # spatial_features = out.dense(channels_first=False)
+        spatial_features = sparse_to_dense(out, batch_size, self.dense_output_shapes, self.output_channels)
         # Reshape to (N, C, D, H, W)        
         spatial_features = spatial_features.permute(0, 4, 3, 1, 2).contiguous()
         spatial_features = spatial_features.view(
diff --git a/projects/BEVFusion/deploy/exporter.py b/projects/BEVFusion/deploy/exporter.py
index ab5489a7e..4c5e72ac7 100644
--- a/projects/BEVFusion/deploy/exporter.py
+++ b/projects/BEVFusion/deploy/exporter.py
@@ -90,7 +90,6 @@ def _export_model(
           f"Purged {len(removed)} mmdeploy symbolic records: {list(removed.keys())}"
         )
         with RewriterContext(**context_info), torch.no_grad():
-            list_layer_norm_rewriters()
             image_feats = None
             if "img_backbone" in self.setup_configs.model_cfg.model:
                 image_feats = self._export_image_backbone(model_data, ir_configs, patched_model)

From 0af40fc3ad622b7dbe1198d77329072512127264 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Mon, 25 May 2026 18:59:46 +0900
Subject: [PATCH 147/183] Updated

---
 .../BEVFusion/bevfusion/sparse_encoder.py     | 38 +++++++++++++++----
 ...econd_secfpn_30e_8xb16_j6gen2_base_120m.py |  2 +-
 2 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/sparse_encoder.py b/projects/BEVFusion/bevfusion/sparse_encoder.py
index f46ca621b..e046d8917 100644
--- a/projects/BEVFusion/bevfusion/sparse_encoder.py
+++ b/projects/BEVFusion/bevfusion/sparse_encoder.py
@@ -1,5 +1,6 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 
+import os
 from typing import Dict, Optional
 
 import numpy as np
@@ -152,18 +153,39 @@ def forward(self, voxel_features, coors, batch_size):
         out = self.conv_out(encode_features[-1])
         # Return (N, H, W, D, C) instead of (N, C, H, W, D)
         # spatial_features = out.dense(channels_first=False)
-        spatial_features = sparse_to_dense(out, batch_size, self.dense_output_shapes, self.output_channels)
+        # spatial_features = sparse_to_dense(out, batch_size, self.dense_output_shapes, self.output_channels)
         # Reshape to (N, C, D, H, W)        
+        # spatial_features = out.dense(channels_first=False)
+
+        # with torch.no_grad():
+        #     ref = spatial_features
+        #     cand = sparse_to_dense(
+        #         out, batch_size, self.dense_output_shapes, self.output_channels
+        #     )
+        #     assert ref.shape == cand.shape, (
+        #         f"shape mismatch: dense={tuple(ref.shape)} "
+        #         f"sparse_to_dense={tuple(cand.shape)}"
+        #     )
+        #     max_abs = (ref - cand).abs().max().item()
+        #     num_mismatch = (ref != cand).sum().item()
+        #     allclose = torch.allclose(ref, cand, rtol=1e-5, atol=1e-6)
+        #     print(
+        #         f"[BEVFusionSparseEncoder] dense vs sparse_to_dense: "
+        #         f"shape={tuple(ref.shape)} max_abs_diff={max_abs:.3e} "
+        #         f"num_mismatch={num_mismatch} allclose={allclose}"
+        #     )
+        #     assert allclose, (
+        #         "sparse_to_dense disagrees with out.dense(channels_first=False) "
+        #         "-- index/order mismatch in BEVFusionSparseEncoder."
+        #     )
+        spatial_features = sparse_to_dense(out, batch_size, self.dense_output_shapes, self.output_channels)
         spatial_features = spatial_features.permute(0, 4, 3, 1, 2).contiguous()
         spatial_features = spatial_features.view(
-            batch_size, 
-            self.output_channels * self.dense_output_shapes[2], 
-            self.dense_output_shapes[0], 
-            self.dense_output_shapes[1], 
+            batch_size,
+            self.output_channels * self.dense_output_shapes[2],
+            self.dense_output_shapes[0],
+            self.dense_output_shapes[1],
         )
-        # N, C, H, W, D = spatial_features.shape
-        # spatial_features = spatial_features.permute(0, 1, 4, 2, 3).contiguous()
-        # spatial_features = spatial_features.view(N, C * D, H, W)
 
         if self.return_middle_feats:
             return spatial_features, encode_features
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
index 238690492..7a8afad3e 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
@@ -15,7 +15,7 @@
 data_root = "data/t4dataset/"
 info_directory_path = "info/kokseang_2_8_1/"
 
-experiment_group_name = "bevfusion_lidar_intensity_2_8_3/j6gen2_base/" + _base_.dataset_type
+experiment_group_name = "bevfusion_lidar_intensity_2_8_3/j6gen2_base_normal_dense/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 

From f871a14e2cb15b9c9a8a9c0f96f53b612ae6585e Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Mon, 25 May 2026 22:08:46 +0900
Subject: [PATCH 148/183] Updated

---
 .../BEVFusion/bevfusion/bevfusion_head.py     |  2 +-
 .../bevfusion/custom_sparse_conv_tensor.py    |  5 ++--
 .../BEVFusion/bevfusion/sparse_encoder.py     | 29 ++-----------------
 .../default/pipelines/default_lidar_120m.py   |  6 ++--
 .../pipelines/default_lidar_intensity_120m.py |  6 ++--
 projects/BEVFusion/deploy/containers.py       |  9 ++++--
 6 files changed, 19 insertions(+), 38 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index e3f5610c8..d06305db9 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -315,7 +315,7 @@ def forward_single(self, inputs, metas):
         flattened_heatmap = heatmap.view(-1, self.num_classes*self.spatial_dim)
         
         # Use topk instead of argsort to avoid sorting the entire flattened heatmap.
-        _, top_proposals = flattened_heatmap.topk(k=self.num_proposals, dim=-1, largest=True, sorted=True)
+        _, top_proposals = flattened_heatmap.topk(k=self.num_proposals, dim=-1, largest=True, sorted=False)
         
         # 2. Calculate class and spatial indices
         # Use shape[-1] dynamically to handle grid sizes safely.
diff --git a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py
index b16191ca8..054b0609b 100644
--- a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py
+++ b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py
@@ -24,11 +24,12 @@ def sparse_to_dense(sparse_tensor: SparseConvTensor, batch_size: int, spatial_sh
     # Factor out the common terms D and W
     # (b*H*W + h*W + w) * D + d -> (b*H + h) * W + w) * D + d
     linear_idx = ((b * H + h) * W + w) * D + d                     # [N]
-
+    
+		# out_channels = sparse_tensor.features.shape[1]
     out = torch.zeros(
         [
             num_cells,
-            out_channels
+            sparse_tensor.features.shape[1]
         ], 
         device=sparse_tensor.features.device,
         dtype=sparse_tensor.features.dtype,
diff --git a/projects/BEVFusion/bevfusion/sparse_encoder.py b/projects/BEVFusion/bevfusion/sparse_encoder.py
index e046d8917..0fc20cd19 100644
--- a/projects/BEVFusion/bevfusion/sparse_encoder.py
+++ b/projects/BEVFusion/bevfusion/sparse_encoder.py
@@ -151,34 +151,9 @@ def forward(self, voxel_features, coors, batch_size):
         # for detection head
         # [200, 176, 5] -> [200, 176, 2]
         out = self.conv_out(encode_features[-1])
-        # Return (N, H, W, D, C) instead of (N, C, H, W, D)
-        # spatial_features = out.dense(channels_first=False)
-        # spatial_features = sparse_to_dense(out, batch_size, self.dense_output_shapes, self.output_channels)
-        # Reshape to (N, C, D, H, W)        
-        # spatial_features = out.dense(channels_first=False)
-
-        # with torch.no_grad():
-        #     ref = spatial_features
-        #     cand = sparse_to_dense(
-        #         out, batch_size, self.dense_output_shapes, self.output_channels
-        #     )
-        #     assert ref.shape == cand.shape, (
-        #         f"shape mismatch: dense={tuple(ref.shape)} "
-        #         f"sparse_to_dense={tuple(cand.shape)}"
-        #     )
-        #     max_abs = (ref - cand).abs().max().item()
-        #     num_mismatch = (ref != cand).sum().item()
-        #     allclose = torch.allclose(ref, cand, rtol=1e-5, atol=1e-6)
-        #     print(
-        #         f"[BEVFusionSparseEncoder] dense vs sparse_to_dense: "
-        #         f"shape={tuple(ref.shape)} max_abs_diff={max_abs:.3e} "
-        #         f"num_mismatch={num_mismatch} allclose={allclose}"
-        #     )
-        #     assert allclose, (
-        #         "sparse_to_dense disagrees with out.dense(channels_first=False) "
-        #         "-- index/order mismatch in BEVFusionSparseEncoder."
-        #     )
+        
         spatial_features = sparse_to_dense(out, batch_size, self.dense_output_shapes, self.output_channels)
+        # spatial_features = out.dense(channels_first=False)
         spatial_features = spatial_features.permute(0, 4, 3, 1, 2).contiguous()
         spatial_features = spatial_features.view(
             batch_size,
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
index 613ff8d0c..d384bb4e0 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
@@ -5,10 +5,10 @@
 
 # range setting
 point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0]
-voxel_size = [0.15, 0.15, 0.2]
-grid_size = [1632, 1632, 41]
+voxel_size = [0.17, 0.17, 0.2]
+grid_size = [1440, 1440, 41]
 # Sparse dense output shapes
-sparse_dense_output_shapes = [204, 204, 2]
+sparse_dense_output_shapes = [180, 180, 2]
 
 eval_class_range = {
     "car": 120,
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
index e7b78955a..394647684 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
@@ -5,10 +5,10 @@
 
 # range setting
 point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0]
-voxel_size = [0.15, 0.15, 0.2]
-grid_size = [1632, 1632, 41]
+voxel_size = [0.17, 0.17, 0.2]
+grid_size = [1440, 1440, 41]
 # Sparse dense output shapes
-sparse_dense_output_shapes = [204, 204, 2]
+sparse_dense_output_shapes = [180, 180, 2]
 
 eval_class_range = {
     "car": 120,
diff --git a/projects/BEVFusion/deploy/containers.py b/projects/BEVFusion/deploy/containers.py
index 51f2316df..018b5db7e 100644
--- a/projects/BEVFusion/deploy/containers.py
+++ b/projects/BEVFusion/deploy/containers.py
@@ -2,7 +2,7 @@
 import torch.nn.functional as F
 
 # Wrapper Classes for onnx conversion
-
+import numpy as np
 
 class TrtBevFusionImageBackboneContainer(torch.nn.Module):
     def __init__(self, mod, mean, std) -> None:
@@ -49,8 +49,13 @@ def forward(
 
         batch_inputs_dict = {
             "voxels": {"voxels": voxels, "coors": coors, "num_points_per_voxel": num_points_per_voxel},
+         
         }
-
+        
+        voxels.cpu().numpy().astype(np.float32).tofile("work_dirs/dummy_inputs/voxels.bin")
+        coors[:, :3].cpu().numpy().astype(np.int32).tofile("work_dirs/dummy_inputs/coors.bin")
+        num_points_per_voxel.cpu().numpy().astype(np.int32).tofile("work_dirs/dummy_inputs/num_points_per_voxel.bin")
+        print("voxels.shape, coors.shape, num_points_per_voxel.shape:", voxels.shape, coors.shape, num_points_per_voxel.shape)
         if points is not None:
             batch_inputs_dict["points"] = [points]
 

From 9a4642a59d8f869cb389d7317adbdc7267e069da Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 26 May 2026 17:12:53 +0900
Subject: [PATCH 149/183] Updated

---
 .../BEVFusion/bevfusion/bevfusion_head.py     |  4 +-
 .../bevfusion/custom_sparse_conv_tensor.py    |  1 -
 .../BEVFusion/bevfusion/ops/topk/__init__.py  |  0
 projects/BEVFusion/bevfusion/ops/topk/topk.py | 45 +++++++++++++++++++
 ...voxel_second_secfpn_50e_8xb16_base_120m.py |  4 +-
 .../default_camera_lidar_intensity_120m.py    |  2 +
 .../default/pipelines/default_lidar_120m.py   |  2 +
 .../pipelines/default_lidar_intensity_120m.py |  2 +
 8 files changed, 55 insertions(+), 5 deletions(-)
 create mode 100644 projects/BEVFusion/bevfusion/ops/topk/__init__.py
 create mode 100644 projects/BEVFusion/bevfusion/ops/topk/topk.py

diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index d06305db9..5b6c5ad92 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -17,6 +17,8 @@
 from mmengine.structures import InstanceData
 from torch import nn
 
+from .ops.topk.topk import topk
+
 
 def clip_sigmoid(x, eps=1e-4):
     y = torch.clamp(x.sigmoid_(), min=eps, max=1 - eps)
@@ -315,7 +317,7 @@ def forward_single(self, inputs, metas):
         flattened_heatmap = heatmap.view(-1, self.num_classes*self.spatial_dim)
         
         # Use topk instead of argsort to avoid sorting the entire flattened heatmap.
-        _, top_proposals = flattened_heatmap.topk(k=self.num_proposals, dim=-1, largest=True, sorted=False)
+        top_proposals_indices = topk(x=flattened_heatmap, k=self.num_proposals, dim=-1, sorted=False)
         
         # 2. Calculate class and spatial indices
         # Use shape[-1] dynamically to handle grid sizes safely.
diff --git a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py
index 054b0609b..175c08bed 100644
--- a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py
+++ b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py
@@ -25,7 +25,6 @@ def sparse_to_dense(sparse_tensor: SparseConvTensor, batch_size: int, spatial_sh
     # (b*H*W + h*W + w) * D + d -> (b*H + h) * W + w) * D + d
     linear_idx = ((b * H + h) * W + w) * D + d                     # [N]
     
-		# out_channels = sparse_tensor.features.shape[1]
     out = torch.zeros(
         [
             num_cells,
diff --git a/projects/BEVFusion/bevfusion/ops/topk/__init__.py b/projects/BEVFusion/bevfusion/ops/topk/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/projects/BEVFusion/bevfusion/ops/topk/topk.py b/projects/BEVFusion/bevfusion/ops/topk/topk.py
new file mode 100644
index 000000000..020e677bf
--- /dev/null
+++ b/projects/BEVFusion/bevfusion/ops/topk/topk.py
@@ -0,0 +1,45 @@
+"""
+This file is used to write functions to deploy custom plugins to support Autoware, for example, TopK.
+"""
+
+import torch
+from torch.autograd import Function
+from torch.onnx.symbolic_helper import _get_tensor_sizes
+
+
+class TopK(Function):
+
+    @staticmethod
+    def symbolic(
+        g,
+        x: torch.Tensor,
+				k: int,
+				dim: int,
+				sorted: bool = False,
+    ):
+
+        output = g.op(
+            "autoware::Argsort",
+            x,
+            outputs=1,
+        )
+        x_shape = _get_tensor_sizes(x)
+        if x_shape is not None and hasattr(output.type(), "with_sizes"):
+            output_type = x.type().with_sizes(x_shape)
+            output.setType(output_type)
+				# Argsort from Autoware is in ascending order, so we need to return the last k elements.
+        return output[-k:]
+
+    @staticmethod
+    def forward(
+        ctx,
+        x: torch.Tensor,
+				k: int, 
+				dim: int,
+				sorted: bool = False,
+    ):
+        indices = torch.topk(x, k=k, dim=dim, largest=True, sorted=sorted)
+        return indices
+
+def topk(x: torch.Tensor, k: int, dim: int, sorted: bool = False):
+    return TopK.apply(x, k, dim, sorted)
\ No newline at end of file
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py
index 70c27f0a7..22762bed1 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py
@@ -15,7 +15,7 @@
 data_root = "data/t4dataset/"
 info_directory_path = "info/kokseang_2_8_1/"
 
-experiment_group_name = "bevfusion_lidar_2_8_2/base/" + _base_.dataset_type
+experiment_group_name = "bevfusion_lidar_2_8_0/base/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
@@ -148,5 +148,3 @@
     checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
 )
 log_processor = dict(window_size=50)
-
-resume = True
\ No newline at end of file
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
index 0b0f44c08..1b0f91c3d 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
@@ -83,6 +83,8 @@
             "barrier",
         ],
     ),
+		dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2),
+    dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1),
     dict(type="PointShuffle"),
     dict(
         type="Pack3DDetInputs",
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
index d384bb4e0..929c81c81 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
@@ -66,6 +66,8 @@
             "barrier",
         ],
     ),
+		dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2),
+    dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1),
     dict(type="PointShuffle"),
     dict(
         type="Pack3DDetInputs",
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
index 394647684..2c95445ea 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
@@ -66,6 +66,8 @@
             "barrier",
         ],
     ),
+		dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2),
+    dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1),
     dict(type="PointShuffle"),
     dict(
         type="Pack3DDetInputs",

From 3cfe9870cef009e69e7e860cfb2c105b6f8e0662 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 26 May 2026 17:35:19 +0900
Subject: [PATCH 150/183] Updated

---
 projects/BEVFusion/bevfusion/bevfusion_head.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index 5b6c5ad92..2f0a99a9f 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -317,7 +317,7 @@ def forward_single(self, inputs, metas):
         flattened_heatmap = heatmap.view(-1, self.num_classes*self.spatial_dim)
         
         # Use topk instead of argsort to avoid sorting the entire flattened heatmap.
-        top_proposals_indices = topk(x=flattened_heatmap, k=self.num_proposals, dim=-1, sorted=False)
+        top_proposals = topk(x=flattened_heatmap, k=self.num_proposals, dim=-1, sorted=False)
         
         # 2. Calculate class and spatial indices
         # Use shape[-1] dynamically to handle grid sizes safely.

From 798021d22468eb10d0b89e8b73265807d4871bff Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Tue, 26 May 2026 17:48:40 +0900
Subject: [PATCH 151/183] Updated

---
 projects/BEVFusion/bevfusion/ops/topk/topk.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/projects/BEVFusion/bevfusion/ops/topk/topk.py b/projects/BEVFusion/bevfusion/ops/topk/topk.py
index 020e677bf..a767bb720 100644
--- a/projects/BEVFusion/bevfusion/ops/topk/topk.py
+++ b/projects/BEVFusion/bevfusion/ops/topk/topk.py
@@ -38,7 +38,7 @@ def forward(
 				dim: int,
 				sorted: bool = False,
     ):
-        indices = torch.topk(x, k=k, dim=dim, largest=True, sorted=sorted)
+        _, indices = torch.topk(x, k=k, dim=dim, largest=True, sorted=sorted)
         return indices
 
 def topk(x: torch.Tensor, k: int, dim: int, sorted: bool = False):

From b7a32d0b5d1431c67427e5ee0657b78e4d316cfb Mon Sep 17 00:00:00 2001
From: KSeabgTan <kseangtan@gmail.com>
Date: Wed, 27 May 2026 13:58:22 +0900
Subject: [PATCH 152/183] Added

---
 projects/BEVFusion/bevfusion/depth_lss_v2.py             | 9 ++-------
 ...default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py | 6 +-----
 ...camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py} | 2 +-
 .../models/resnet50/camera_resnet50_fpn_lss_50m.py       | 2 +-
 .../schedulers/default_30e_8xb16_adamw_linear_cosine.py  | 4 ++--
 5 files changed, 7 insertions(+), 16 deletions(-)
 rename projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/{bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py => bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py} (92%)

diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py
index 59585cb49..81d1fb3c6 100644
--- a/projects/BEVFusion/bevfusion/depth_lss_v2.py
+++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py
@@ -135,19 +135,14 @@ def bev_pool_aux(self, geom_feats):
             return None, None, None
 
         geom_feats, ranks_depth, ranks_feat = geom_feats[kept], ranks_depth[kept], ranks_feat[kept]
-
+        
+        # Switch x and y to match the order of the BEV grid
         ranks_bev = (
             geom_feats[:, 3] * (self.nx[2] * self.nx[1] * self.nx[0])
             + geom_feats[:, 2] * (self.nx[1] * self.nx[0])
             + geom_feats[:, 0] * self.nx[1]
             + geom_feats[:, 1]
         )
-        # ranks_bev = (
-        #     geom_feats[:, 3] * (self.nx[2] * self.nx[1] * self.nx[0])
-        #     + geom_feats[:, 2] * (self.nx[1] * self.nx[0])
-        #     + geom_feats[:, 1] * self.nx[0]
-        #     + geom_feats[:, 0]
-        # )
         indices = ranks_bev.argsort()
         ranks_bev, ranks_depth, ranks_feat = ranks_bev[indices], ranks_depth[indices], ranks_feat[indices]
         return (
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py
index ce26b4905..591399a4e 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py
@@ -11,13 +11,9 @@
 custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
 
 # user setting
-data_root = "data/t4dataset/"
+data_root = "data/t4datasets/"
 info_directory_path = "info/kokseang_2_8/"
 
-experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type
-experiment_name = "bevfusion_camera_30e_8xb16_j6gen2_base_50m"
-work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
-
 # Dataset parameters
 train_dataloader = dict(
     batch_size=_base_.train_batch_size,
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py
similarity index 92%
rename from projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py
rename to projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py
index d559d40b1..c39ca365d 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py
@@ -4,7 +4,7 @@
 ]
 
 experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type
-experiment_name = "bevfusion_camera_resnet50_fpn_lssV2_30e_8xb16_j6gen2_base_50m"
+experiment_name = "bevfusion_camera_resnet50_fpn_lss_v1_30e_8xb16_j6gen2_base_50m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
index 4a0770971..2f556a122 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
@@ -6,7 +6,7 @@
 # Image network
 model = dict(
     view_transform=dict(
-        type="LSSTransformV2",
+        type="LSSTransform",
         xbound=[-54.0, 54.0, 0.3],
         ybound=[-54.0, 54.0, 0.3],
         zbound=[-10.0, 10.0, 20.0],
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py
index 1430b10ba..f4ec3e0db 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py
@@ -1,10 +1,10 @@
 # learning rate
-lr = 2e-4
+lr = 1e-4
 t_max = 3
 max_epochs = 30
 val_interval = 1
 
-train_gpu_size = 8
+train_gpu_size = 2
 test_batch_size = 2
 train_batch_size = 16
 

From 088c1b3fa22c8578983d9933499ff194102154d8 Mon Sep 17 00:00:00 2001
From: KSeabgTan <kseangtan@gmail.com>
Date: Wed, 27 May 2026 15:24:57 +0900
Subject: [PATCH 153/183] Added

---
 ...n_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py | 2 +-
 .../models/resnet50/camera_resnet50_fpn_depthlss_120m.py  | 8 ++++----
 .../models/resnet50/camera_resnet50_fpn_lss_50m.py        | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py
index c39ca365d..69df79b51 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py
@@ -4,7 +4,7 @@
 ]
 
 experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type
-experiment_name = "bevfusion_camera_resnet50_fpn_lss_v1_30e_8xb16_j6gen2_base_50m"
+experiment_name = "bevfusion_camera_resnet50_fpn_lss_v2_30e_8xb16_j6gen2_base_50m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py
index 4a1f33040..1059ce9e8 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py
@@ -22,11 +22,11 @@
         type="mmdet.ResNet",
         depth=50,
         num_stages=4,
-        out_indices=(2, 3),
+        out_indices=(1, 2, 3),
         frozen_stages=-1,
         norm_cfg=dict(type="BN2d", requires_grad=True),
         norm_eval=False,
-        with_cp=False,
+        with_cp=True,
         style="pytorch",
         init_cfg=dict(
             type="Pretrained",
@@ -35,7 +35,7 @@
     ),
     img_neck=dict(
         type="GeneralizedLSSFPN",
-        in_channels=[1024, 2048],
+        in_channels=[512, 1024, 2048],
         out_channels=256,
         start_level=0,
         num_outs=2,
@@ -47,7 +47,7 @@
         type="DepthLSSTransform",
         in_channels=256,
         out_channels=80,
-        feature_size=[24, 48],
+        feature_size=[48, 96],
         xbound=[-122.40, 122.40, 0.68],
         ybound=[-122.40, 122.40, 0.68],
         zbound=[-10.0, 10.0, 20.0],
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
index 2f556a122..4a0770971 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
@@ -6,7 +6,7 @@
 # Image network
 model = dict(
     view_transform=dict(
-        type="LSSTransform",
+        type="LSSTransformV2",
         xbound=[-54.0, 54.0, 0.3],
         ybound=[-54.0, 54.0, 0.3],
         zbound=[-10.0, 10.0, 20.0],

From 40f3ea943da2d862c7bf42d0fd54bde90f7e1d9d Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Fri, 29 May 2026 16:56:32 +0900
Subject: [PATCH 154/183] Updated

---
 ...bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py | 2 +-
 .../configs/t4dataset/default/pipelines/default_lidar_120m.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py
index 22762bed1..ea4f75569 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py
@@ -15,7 +15,7 @@
 data_root = "data/t4dataset/"
 info_directory_path = "info/kokseang_2_8_1/"
 
-experiment_group_name = "bevfusion_lidar_2_8_0/base/" + _base_.dataset_type
+experiment_group_name = "bevfusion_lidar_2_8_0/base_more_filters/" + _base_.dataset_type
 experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
index 929c81c81..6472bb582 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
@@ -66,8 +66,8 @@
             "barrier",
         ],
     ),
-		dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2),
-    dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1),
+	dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=5),
+    dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=3),
     dict(type="PointShuffle"),
     dict(
         type="Pack3DDetInputs",

From 7053c2d6942aa0b2ed306602ac606d7186b9501a Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Sat, 30 May 2026 00:20:35 +0900
Subject: [PATCH 155/183] Updated

---
 .../bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py  | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py
index ea4f75569..171c3076e 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py
@@ -148,3 +148,5 @@
     checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"),
 )
 log_processor = dict(window_size=50)
+
+resume = True
\ No newline at end of file

From 49e214068b46c0339d4a2ce00b36ffab3d9988c6 Mon Sep 17 00:00:00 2001
From: KSeabgTan <kseangtan@gmail.com>
Date: Sat, 30 May 2026 12:14:17 +0900
Subject: [PATCH 156/183] Added

---
 .../t4dataset/default/pipelines/cameras/default_camera_50m.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
index 77470a938..a8098ed8e 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
@@ -29,7 +29,7 @@
     dict(
         type="ImageAug3D",
         final_dim=image_size,
-        resize_lim=[0.28, 0.40],
+				resize_lim=[0.29, 0.35],
         bot_pct_lim=[0.0, 0.0],
         rot_lim=[0.0, 0.0],
         rand_flip=True,
@@ -100,7 +100,7 @@
     dict(
         type="ImageAug3D",
         final_dim=image_size,
-        resize_lim=[0.34, 0.34],
+        resize_lim=[0.32, 0.32],
         bot_pct_lim=[0.0, 0.0],
         rot_lim=[0.0, 0.0],
         rand_flip=False,

From bfb3d2e1cbbb37b3865f254c11f18d7c3b8ed4fe Mon Sep 17 00:00:00 2001
From: KSeangTan <kseangtan@gmail.com>
Date: Sat, 30 May 2026 22:21:30 +0900
Subject: [PATCH 157/183] Updated

---
 projects/BEVFusion/bevfusion/bevfusion.py    |  75 +++++++--
 projects/BEVFusion/bevfusion/depth_lss_v2.py |  18 ++-
 projects/BEVFusion/bevfusion/loading.py      | 161 ++++++++++++++++++-
 3 files changed, 238 insertions(+), 16 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py
index aeddc09fa..bda80ed0d 100644
--- a/projects/BEVFusion/bevfusion/bevfusion.py
+++ b/projects/BEVFusion/bevfusion/bevfusion.py
@@ -34,6 +34,8 @@ def __init__(
         bbox_head: Optional[dict] = None,
         init_cfg: OptMultiConfig = None,
         seg_head: Optional[dict] = None,
+        loss_depth_weight: float = 3.0,
+        depth_gt_downsample: int = 1,
         **kwargs,
     ) -> None:
         """Initialize BEVFusion model.
@@ -76,6 +78,8 @@ def __init__(
         self.bbox_head = MODELS.build(bbox_head)
 
         self.init_weights()
+        self.loss_depth_weight = loss_depth_weight
+        self.depth_gt_downsample = depth_gt_downsample
 
     def _forward(
         self, batch_inputs_dict: Tensor, batch_data_samples: OptSampleList = [], using_image_features=False, **kwargs
@@ -174,14 +178,14 @@ def extract_img_feat(
         lidar_aug_matrix_inverse=None,
         geom_feats=None,
         using_image_features=False,
-    ) -> torch.Tensor:
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
 
         if not using_image_features:
             x = self.get_image_backbone_features(x)
 
         with torch.amp.autocast("cuda",enabled=False):
             # with torch.autocast(device_type='cuda', dtype=torch.float32):
-            x = self.view_transform(
+            x, pred_depths = self.view_transform(
                 x,
                 points,
                 lidar2image,
@@ -195,7 +199,7 @@ def extract_img_feat(
                 lidar_aug_matrix_inverse,
                 geom_feats,
             )
-        return x
+        return x, pred_depths
 
     def extract_pts_feat(self, feats, coords, sizes, points=None) -> torch.Tensor:
         if points is not None:
@@ -320,7 +324,7 @@ def extract_feat(
             camera2lidar = imgs.new_tensor(np.asarray(camera2lidar))
             img_aug_matrix = imgs.new_tensor(np.asarray(img_aug_matrix))
             lidar_aug_matrix = imgs.new_tensor(np.asarray(lidar_aug_matrix))
-            img_feature = self.extract_img_feat(
+            img_feature, pred_depths = self.extract_img_feat(
                 imgs,
                 deepcopy(points),
                 lidar2image,
@@ -342,7 +346,7 @@ def extract_feat(
             lidar_aug_matrix = batch_inputs_dict["lidar_aug_matrix"]
             geom_feats = batch_inputs_dict["geom_feats"]
 
-            img_feature = self.extract_img_feat(
+            img_feature, pred_depths = self.extract_img_feat(
                 imgs,
                 points,
                 lidar2image,
@@ -377,7 +381,7 @@ def extract_feat(
         if self.pts_neck is not None:
             x = self.pts_neck(x)
 
-        return x
+        return x, pred_depths
 
     def loss(
         self,
@@ -387,12 +391,65 @@ def loss(
         **kwargs,
     ) -> List[Det3DDataSample]:
         batch_input_metas = [item.metainfo for item in batch_data_samples]
-        feats = self.extract_feat(batch_inputs_dict, batch_input_metas, using_image_features)
+        feats, pred_depths = self.extract_feat(batch_inputs_dict, batch_input_metas, using_image_features)
 
         losses = dict()
+        if self.loss_depth_weight > 0 and "gt_depths" in batch_inputs_dict:
+            with torch.amp.autocast("cuda", enabled=False):
+                gt_depths = batch_inputs_dict["gt_depths"]
+                depth_loss = self.get_depth_loss(gt_depths, pred_depths)
+                losses["loss_depth"] = depth_loss
+        
         if self.with_bbox_head:
             bbox_loss = self.bbox_head.loss(feats, batch_data_samples)
-
-        losses.update(bbox_loss)
+            losses.update(bbox_loss)
 
         return losses
+ 
+    def get_downsampled_gt_depth(self, gt_depths):
+        """
+        Input:
+            gt_depths: [B, N, H, W]
+        Output:
+            gt_depths: [B*N*h*w, d]
+        """
+        B, N, H, W = gt_depths.shape
+        D = self.view_transform.D
+        dbounds = self.view_transform.dbound
+        gt_depths = gt_depths.view(B * N, H // self.depth_gt_downsample,
+                                   self.depth_gt_downsample, W // self.depth_gt_downsample,
+                                   self.depth_gt_downsample, 1)
+        gt_depths = gt_depths.permute(0, 1, 3, 5, 2, 4).contiguous()
+        gt_depths = gt_depths.view(-1, self.depth_gt_downsample * self.depth_gt_downsample)
+        gt_depths_tmp = torch.where(gt_depths == 0.0,
+                                    1e5 * torch.ones_like(gt_depths),
+                                    gt_depths)
+        gt_depths = torch.min(gt_depths_tmp, dim=-1).values
+        gt_depths = gt_depths.view(B * N, H // self.depth_gt_downsample,
+                                   W // self.downsample)
+
+        gt_depths = torch.log(gt_depths) - torch.log(
+            torch.tensor(dbounds[0]).float())
+        gt_depths = gt_depths * (D - 1) / torch.log(
+            torch.tensor(dbounds[1] - 1.).float() /
+            dbounds[0])
+        gt_depths = gt_depths + 1.
+        gt_depths = torch.where((gt_depths < D + 1) & (gt_depths >= 0.0),
+                                gt_depths, torch.zeros_like(gt_depths))
+        gt_depths = F.one_hot(
+            gt_depths.long(), num_classes=D + 1).view(-1, D + 1)[:, 1:]
+        return gt_depths.float()
+
+    def get_depth_loss(self, depth_labels, depth_preds):
+        depth_labels = self.get_downsampled_gt_depth(depth_labels)
+        depth_preds = depth_preds.permute(0, 2, 3,
+                                          1).contiguous().view(-1, self.D)
+        fg_mask = torch.max(depth_labels, dim=1).values > 0.0
+        depth_labels = depth_labels[fg_mask]
+        depth_preds = depth_preds[fg_mask]
+        depth_loss = F.binary_cross_entropy(
+            depth_preds,
+            depth_labels,
+            reduction='none',
+        ).sum() / max(1.0, fg_mask.sum())
+        return self.loss_depth_weight * depth_loss
\ No newline at end of file
diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py
index 81d1fb3c6..66c58add8 100644
--- a/projects/BEVFusion/bevfusion/depth_lss_v2.py
+++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py
@@ -70,7 +70,10 @@ def forward(
             ranks_bev, ranks_depth, ranks_feat = geom_feats_precomputed
             x, depth_softmax = self.get_cam_feats(img)
             x = self.bev_pool_precomputed(x, depth_softmax, ranks_bev, ranks_depth, ranks_feat)
-
+            
+            # No return depth predictions when precomputed geometry features are used
+            depth_softmax = None
+        
         else:
             intrins = camera_intrinsics[..., :3, :3]
             post_rots = img_aug_matrix[..., :3, :3]
@@ -99,8 +102,8 @@ def forward(
                 depth_softmax,
             ) = self.get_cam_feats(img)
             x = self.bev_pool(view_feats, depth_softmax, geom)
-
-        return x
+         
+        return x, depth_softmax
 
     def bev_pool_aux(self, geom_feats):
         B, N, D, H, W, C = geom_feats.shape
@@ -229,7 +232,7 @@ def bev_pool_precomputed(self, view_feats, depth_softmax, ranks_bev, ranks_depth
         bev_feat = self.compute_bev_pool(
             view_feats, depth_softmax, ranks_bev, ranks_depth, ranks_feat, interval_starts, interval_lengths
         )
-        return bev_feat
+        return bev_feat 
 
 
 @MODELS.register_module()
@@ -245,6 +248,7 @@ def __init__(
         ybound: Tuple[float, float, float],
         zbound: Tuple[float, float, float],
         dbound: Tuple[float, float, float],
+        loss_depth_weight: float = 3.0,
         downsample: int = 1,
     ):
         super().__init__(
@@ -259,6 +263,7 @@ def __init__(
         )
         self.depthnet = nn.Conv2d(self.in_channels, self.D + self.C, 1)
         self.downsample = DownSampleNet(downsample, out_channels, out_channels)
+        self.loss_depth_weight = loss_depth_weight
 
     def get_cam_feats(self, x):
         B, N, C, fH, fW = x.shape
@@ -272,6 +277,7 @@ def get_cam_feats(self, x):
         return view_feats, depth_softmax
     
     def forward(self, *args, **kwargs):
-        x = super().forward(*args, **kwargs)
+        x, depth_softmax = super().forward(*args, **kwargs)
         x = self.downsample(x)
-        return x
\ No newline at end of file
+        return x
+    
\ No newline at end of file
diff --git a/projects/BEVFusion/bevfusion/loading.py b/projects/BEVFusion/bevfusion/loading.py
index 0478d67a3..5cdc499b9 100644
--- a/projects/BEVFusion/bevfusion/loading.py
+++ b/projects/BEVFusion/bevfusion/loading.py
@@ -1,14 +1,18 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import copy
-import os
+from pathlib import Path
 from typing import List, Optional
 
+import matplotlib.pyplot as plt
+
 import mmcv
 import numpy as np
 from mmdet3d.datasets.transforms import LoadMultiViewImageFromFiles
 from mmdet3d.registry import TRANSFORMS
 from mmengine.fileio import get
 from mmengine.logging import print_log
+from mmcv.transforms import BaseTransform
+
 
 
 @TRANSFORMS.register_module()
@@ -217,3 +221,158 @@ def transform(self, results: dict) -> Optional[dict]:
         results["num_views"] = self.num_views
         results["num_ref_frames"] = self.num_ref_frames
         return results
+
+
+@TRANSFORMS.register_module()
+class PointsToMultiViewImageDepths(BaseTransform):
+    """Convert points to multi-view image depths.
+
+    Args:
+        points (np.ndarray): Points in the world coordinate system.
+        img_shape (tuple): Shape of the image.
+        cam2img (np.ndarray): Camera to image transformation matrix.
+        lidar2cam (np.ndarray): LiDAR to camera transformation matrix.
+        visualize_dir (str, optional): If set, saves a per-sample subplot
+            of `gt_depths` (one panel per camera) to this directory.
+            Useful for debugging the projection. Defaults to None.
+        max_depth (float): Upper clip for the depth color scale (m).
+            Defaults to 80.
+    """
+    def __init__(
+        self,
+        img_shape,
+        num_cameras: int,
+        visualize_dir: Optional[str] = None,
+        max_depth: float = 80.0,
+    ):
+        self.img_shape = img_shape
+        self.num_cameras = num_cameras
+        self.visualize_dir = visualize_dir
+        self.max_depth = max_depth
+        self.visualize_dir = Path(visualize_dir) if visualize_dir is not None else None
+        if self.visualize_dir is not None:
+            self.visualize_dir.mkdir(parents=True, exist_ok=True)
+        self._depth_idx = 0
+    
+    def transform(self, results: dict) -> Optional[dict]:
+        """Call function to load multi-view image from files.
+
+        Args:
+            results (dict): Result dict containing multi-view image filenames.
+
+        Returns:
+            dict: The result dict containing the multi-view image data.
+            Added keys:
+                - gt_depths (np.ndarray): Ground truth depths in (N, H, W) for (number of cameras, height, width).
+        """ 
+        lidar2image = results["lidar2img"]
+        img_aug_matrix = results.get("img_aug_matrix", np.eye(4))
+        lidar_aug_matrix = results.get("lidar_aug_matrix", np.eye(4))
+        
+        lidar_aug_matrix_inverse = np.linalg.inv(lidar_aug_matrix)
+        depth = np.zeros((self.num_cameras, self.img_shape[0], self.img_shape[1]), dtype=np.float32)
+        
+        cur_coords = results["points"][:,:3]
+        # inverse aug
+        cur_coords -= lidar_aug_matrix[:3, 3]
+        cur_coords = lidar_aug_matrix_inverse[:3, :3].matmul(cur_coords.transpose(1, 0))
+
+        # lidar2image
+        cur_coords = lidar2image[:, :3, :3].matmul(cur_coords)
+        cur_coords += lidar2image[:, :3, 3].reshape(-1, 3, 1)
+
+        # get 2d coords
+        dist = cur_coords[:, 2, :]
+        valid_dist_mask = dist > 0
+
+        cur_coords[:, 2, :] = np.clip(cur_coords[:, 2, :], 1e-5, 1e5)
+        cur_coords[:, :2, :] /= cur_coords[:, 2:3, :]
+
+        # imgaug
+        cur_coords = img_aug_matrix[:, :3, :3].matmul(cur_coords)
+        cur_coords += img_aug_matrix[:, :3, 3].reshape(-1, 3, 1)
+        cur_coords = cur_coords[:, :2, :].transpose(1, 2)
+
+        # normalize coords for grid sample
+        cur_coords = cur_coords[..., [1, 0]]
+        on_img = (
+            (cur_coords[..., 0] < self.img_shape[0])
+            & (cur_coords[..., 0] >= 0)
+            & (cur_coords[..., 1] < self.img_shape[1])
+            & (cur_coords[..., 1] >= 0)
+            & valid_dist_mask
+        )
+        for c in range(self.num_cameras):
+            masked_coords = cur_coords[c, on_img[c]].astype(np.int64)
+            masked_dist = dist[c, on_img[c]]
+            depth[c, masked_coords[:, 0], masked_coords[:, 1]] = masked_dist
+
+        results["gt_depths"] = depth
+
+        if self.visualize_dir is not None:
+            self._save_depth_subplot(depth, results)
+
+        return results
+
+    def _save_depth_subplot(self, depth: np.ndarray, results: dict) -> None:
+        """Save `gt_depths` as a subplot with one panel per camera.
+
+        Each panel shows the camera image (if available) with the projected
+        LiDAR depth points overlaid, color-coded by distance. A standalone
+        depth-only figure is also saved alongside it.
+
+        Args:
+            depth (np.ndarray): (num_cameras, H, W) ground-truth depth map.
+            results (dict): The pipeline result dict; used for the underlay
+                image and to derive a unique filename.
+        """
+        imgs = results.get("img", None)
+
+        # Layout: keep it a single row up to 6 cameras, otherwise wrap to a
+        # roughly-square grid.
+        if self.num_cameras <= 6:
+            rows, cols = 1, self.num_cameras
+        else:
+            cols = int(np.ceil(np.sqrt(self.num_cameras)))
+            rows = int(np.ceil(self.num_cameras / cols))
+
+        fig, axes = plt.subplots(
+            rows, cols, figsize=(4 * cols, 4 * rows), squeeze=False
+        )
+
+        for c in range(self.num_cameras):
+            ax = axes[c // cols, c % cols]
+            d = depth[c]
+            ys, xs = np.nonzero(d)
+            vals = d[ys, xs]
+
+            if imgs is not None and c < len(imgs):
+                ax.imshow(imgs[c].astype(np.uint8))
+                if vals.size > 0:
+                    ax.scatter(
+                        xs, ys, c=vals, cmap="turbo",
+                        vmin=0, vmax=self.max_depth, s=1,
+                    )
+            else:
+                ax.imshow(
+                    d, cmap="turbo", vmin=0, vmax=self.max_depth,
+                    interpolation="nearest",
+                )
+
+            ax.set_title(f"cam {c}  ({vals.size} pts)")
+            ax.set_xticks([])
+            ax.set_yticks([])
+
+        # Hide any unused subplots when n doesn't fill the grid.
+        for c in range(self.num_cameras, rows * cols):
+            axes[c // cols, c % cols].axis("off")
+
+        fig.suptitle(f"gt_depths — {self._depth_idx}")
+        fig.tight_layout()
+        
+        self._depth_idx += 1
+        out_path = self.visualize_dir / f"{self._depth_idx:06d}_gt_depths.png"
+        fig.savefig(out_path, dpi=120, bbox_inches="tight")
+        plt.close(fig)
+        print(f"Saved gt_depths visualization to {out_path}")
+ 
\ No newline at end of file

From 029dd7655d537c85cfefa061c0c1563f78c07c38 Mon Sep 17 00:00:00 2001
From: KSeangTan <kseangtan@gmail.com>
Date: Sat, 30 May 2026 22:50:24 +0900
Subject: [PATCH 158/183] Updated

---
 projects/BEVFusion/bevfusion/bevfusion.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py
index bda80ed0d..a047c08ca 100644
--- a/projects/BEVFusion/bevfusion/bevfusion.py
+++ b/projects/BEVFusion/bevfusion/bevfusion.py
@@ -426,14 +426,9 @@ def get_downsampled_gt_depth(self, gt_depths):
                                     gt_depths)
         gt_depths = torch.min(gt_depths_tmp, dim=-1).values
         gt_depths = gt_depths.view(B * N, H // self.depth_gt_downsample,
-                                   W // self.downsample)
-
-        gt_depths = torch.log(gt_depths) - torch.log(
-            torch.tensor(dbounds[0]).float())
-        gt_depths = gt_depths * (D - 1) / torch.log(
-            torch.tensor(dbounds[1] - 1.).float() /
-            dbounds[0])
-        gt_depths = gt_depths + 1.
+                                   W // self.depth_gt_downsample)
+
+        gt_depths = (gt_depths - (dbounds[0] - dbounds[2])) / dbounds[2]
         gt_depths = torch.where((gt_depths < D + 1) & (gt_depths >= 0.0),
                                 gt_depths, torch.zeros_like(gt_depths))
         gt_depths = F.one_hot(

From 28e6af83947d610609e3490bd70775be60b9fe4c Mon Sep 17 00:00:00 2001
From: KSeabgTan <kseangtan@gmail.com>
Date: Sat, 30 May 2026 23:06:14 +0900
Subject: [PATCH 159/183] Added

---
 projects/BEVFusion/bevfusion/__init__.py                 | 3 ++-
 projects/BEVFusion/bevfusion/bevfusion.py                | 2 ++
 projects/BEVFusion/bevfusion/depth_lss_v2.py             | 4 +---
 projects/BEVFusion/bevfusion/loading.py                  | 6 +++---
 ..._camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py | 2 +-
 .../models/resnet50/camera_resnet50_fpn_lss_50m.py       | 2 ++
 .../default/pipelines/cameras/default_camera_120m.py     | 2 ++
 .../default/pipelines/cameras/default_camera_50m.py      | 9 ++++++++-
 8 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py
index 385ccd89e..9e1ca1cf4 100644
--- a/projects/BEVFusion/bevfusion/__init__.py
+++ b/projects/BEVFusion/bevfusion/__init__.py
@@ -4,7 +4,7 @@
 from .bevfusion_voxel_encoder import BEVFusionVoxelFeatureNet, HardSimpleVoxelSinCosEncoder
 from .depth_lss import DepthLSSTransform, LSSTransform
 from .depth_lss_v2 import LSSTransformV2
-from .loading import BEVLoadMultiViewImageFromFiles
+from .loading import BEVLoadMultiViewImageFromFiles, PointsToMultiViewImageDepths
 from .sparse_encoder import BEVFusionSparseEncoder
 from .transformer import TransformerDecoderLayer
 from .transforms_3d import (
@@ -39,4 +39,5 @@
     "HardSimpleVoxelSinCosEncoder",
     "BEVFusionVoxelFeatureNet",
     "LSSTransformV2",
+    "PointsToMultiViewImageDepths"
 ]
diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py
index a047c08ca..5c387ddb4 100644
--- a/projects/BEVFusion/bevfusion/bevfusion.py
+++ b/projects/BEVFusion/bevfusion/bevfusion.py
@@ -397,6 +397,8 @@ def loss(
         if self.loss_depth_weight > 0 and "gt_depths" in batch_inputs_dict:
             with torch.amp.autocast("cuda", enabled=False):
                 gt_depths = batch_inputs_dict["gt_depths"]
+                print("gt_depths shape: ", gt_depths.shape)
+                print("pred_depths shape: ", pred_depths.shape)
                 depth_loss = self.get_depth_loss(gt_depths, pred_depths)
                 losses["loss_depth"] = depth_loss
         
diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py
index 66c58add8..def32ee5e 100644
--- a/projects/BEVFusion/bevfusion/depth_lss_v2.py
+++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py
@@ -248,7 +248,6 @@ def __init__(
         ybound: Tuple[float, float, float],
         zbound: Tuple[float, float, float],
         dbound: Tuple[float, float, float],
-        loss_depth_weight: float = 3.0,
         downsample: int = 1,
     ):
         super().__init__(
@@ -263,7 +262,6 @@ def __init__(
         )
         self.depthnet = nn.Conv2d(self.in_channels, self.D + self.C, 1)
         self.downsample = DownSampleNet(downsample, out_channels, out_channels)
-        self.loss_depth_weight = loss_depth_weight
 
     def get_cam_feats(self, x):
         B, N, C, fH, fW = x.shape
@@ -279,5 +277,5 @@ def get_cam_feats(self, x):
     def forward(self, *args, **kwargs):
         x, depth_softmax = super().forward(*args, **kwargs)
         x = self.downsample(x)
-        return x
+        return x, depth_softmax
     
\ No newline at end of file
diff --git a/projects/BEVFusion/bevfusion/loading.py b/projects/BEVFusion/bevfusion/loading.py
index 5cdc499b9..8f9456d19 100644
--- a/projects/BEVFusion/bevfusion/loading.py
+++ b/projects/BEVFusion/bevfusion/loading.py
@@ -275,10 +275,10 @@ def transform(self, results: dict) -> Optional[dict]:
         cur_coords = results["points"][:,:3]
         # inverse aug
         cur_coords -= lidar_aug_matrix[:3, 3]
-        cur_coords = lidar_aug_matrix_inverse[:3, :3].matmul(cur_coords.transpose(1, 0))
+        cur_coords = lidar_aug_matrix_inverse[:3, :3] @ cur_coords.transpose(1, 0)
 
         # lidar2image
-        cur_coords = lidar2image[:, :3, :3].matmul(cur_coords)
+        cur_coords = lidar2image[:, :3, :3] @ cur_coords
         cur_coords += lidar2image[:, :3, 3].reshape(-1, 3, 1)
 
         # get 2d coords
@@ -289,7 +289,7 @@ def transform(self, results: dict) -> Optional[dict]:
         cur_coords[:, :2, :] /= cur_coords[:, 2:3, :]
 
         # imgaug
-        cur_coords = img_aug_matrix[:, :3, :3].matmul(cur_coords)
+        cur_coords = img_aug_matrix[:, :3, :3] @ cur_coords
         cur_coords += img_aug_matrix[:, :3, 3].reshape(-1, 3, 1)
         cur_coords = cur_coords[:, :2, :].transpose(1, 2)
 
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py
index 69df79b51..6695e397e 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py
@@ -3,7 +3,7 @@
     "../../default/models/resnet50/camera_resnet50_fpn_lss_50m.py",
 ]
 
-experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type
+experiment_group_name = "bevfusion_camera/j6gen2_depth_base/" + _base_.dataset_type
 experiment_name = "bevfusion_camera_resnet50_fpn_lss_v2_30e_8xb16_j6gen2_base_50m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
index 4a0770971..c29925243 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
@@ -5,6 +5,8 @@
 
 # Image network
 model = dict(
+    depth_gt_downsample=8, 
+    loss_depth_weight=3.0,
     view_transform=dict(
         type="LSSTransformV2",
         xbound=[-54.0, 54.0, 0.3],
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py
index fc7338699..78bc0167d 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py
@@ -89,6 +89,8 @@
             "timestamp",
             "vehicle_type",
             "city",
+            "traffic_cone_barrier_status",
+            "gt_depths",
         ],
     ),
 ]
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
index a8098ed8e..05917e6bd 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
@@ -29,12 +29,18 @@
     dict(
         type="ImageAug3D",
         final_dim=image_size,
-				resize_lim=[0.29, 0.35],
+		resize_lim=[0.29, 0.35],
         bot_pct_lim=[0.0, 0.0],
         rot_lim=[0.0, 0.0],
         rand_flip=True,
         is_train=True,
     ),
+    dict(
+        type="PointsToMultiViewImageDepths", 
+        img_shape=image_size, 
+        num_cameras=len(camera_order), 
+        visualize_dir="work_dirs/visualize_depths",
+    ),
     dict(
         type="BEVFusionGlobalRotScaleTrans",
         scale_ratio_range=[0.95, 1.05],
@@ -85,6 +91,7 @@
             "vehicle_type",
             "city",
             "traffic_cone_barrier_status",
+            "gt_depths",
         ],
     ),
 ]

From 3903988fe91cfad4af6c9b78ee77033427cabc2a Mon Sep 17 00:00:00 2001
From: KSeangTan <kseangtan@gmail.com>
Date: Sat, 30 May 2026 23:29:51 +0900
Subject: [PATCH 160/183] Updated

---
 projects/BEVFusion/bevfusion/loading.py | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/loading.py b/projects/BEVFusion/bevfusion/loading.py
index 8f9456d19..79019e0bf 100644
--- a/projects/BEVFusion/bevfusion/loading.py
+++ b/projects/BEVFusion/bevfusion/loading.py
@@ -265,13 +265,16 @@ def transform(self, results: dict) -> Optional[dict]:
             Added keys:
                 - gt_depths (np.ndarray): Ground truth depths in (N, H, W) for (number of cameras, height, width).
         """ 
-        lidar2image = results["lidar2img"]
-        img_aug_matrix = results.get("img_aug_matrix", np.eye(4))
-        lidar_aug_matrix = results.get("lidar_aug_matrix", np.eye(4))
+        lidar2image = np.array(results["lidar2img"])
+        img_aug_matrix = np.array(results.get("img_aug_matrix", np.eye(4)))
+        lidar_aug_matrix = np.array(results.get("lidar_aug_matrix", np.eye(4)))
         
         lidar_aug_matrix_inverse = np.linalg.inv(lidar_aug_matrix)
-        depth = np.zeros((self.num_cameras, self.img_shape[0], self.img_shape[1]), dtype=np.float32)
-        
+        depth = np.full(
+            (self.num_cameras, self.img_shape[0], self.img_shape[1]),
+            np.inf,
+            dtype=np.float32,
+        )        
         cur_coords = results["points"][:,:3]
         # inverse aug
         cur_coords -= lidar_aug_matrix[:3, 3]
@@ -305,8 +308,13 @@ def transform(self, results: dict) -> Optional[dict]:
         for c in range(self.num_cameras):
             masked_coords = cur_coords[c, on_img[c]].astype(np.int64)
             masked_dist = dist[c, on_img[c]]
-            depth[c, masked_coords[:, 0], masked_coords[:, 1]] = masked_dist
+            np.fmin.at(
+                depth[c],
+                (masked_coords[:, 0], masked_coords[:, 1]),
+                masked_dist,
+            )
 
+        depth[np.isinf(depth)] = 0
         results["gt_depths"] = depth
 
         if self.visualize_dir is not None:

From 6e6de1c88f54116595e42bb9f0e5ef497693d521 Mon Sep 17 00:00:00 2001
From: KSeabgTan <kseangtan@gmail.com>
Date: Sun, 31 May 2026 00:23:40 +0900
Subject: [PATCH 161/183] Added

---
 projects/BEVFusion/bevfusion/bevfusion.py     | 20 +++--
 projects/BEVFusion/bevfusion/loading.py       | 73 ++++++++++++++-----
 .../pipelines/cameras/default_camera_50m.py   |  2 +-
 3 files changed, 70 insertions(+), 25 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py
index 5c387ddb4..c305405cd 100644
--- a/projects/BEVFusion/bevfusion/bevfusion.py
+++ b/projects/BEVFusion/bevfusion/bevfusion.py
@@ -286,7 +286,7 @@ def predict(
                 contains a tensor with shape (num_instances, 7).
         """
         batch_input_metas = [item.metainfo for item in batch_data_samples]
-        feats = self.extract_feat(batch_inputs_dict, batch_input_metas, using_image_features)
+        feats, _ = self.extract_feat(batch_inputs_dict, batch_input_metas, using_image_features)
 
         if self.with_bbox_head:
             outputs = self.bbox_head.predict(feats, batch_input_metas)
@@ -307,6 +307,7 @@ def extract_feat(
         features = []
 
         is_onnx_inference = False
+        pred_depths = None
         if imgs is not None and "lidar2img" not in batch_inputs_dict:
             # NOTE(knzo25): normal training and testing
             imgs = imgs.contiguous()
@@ -394,11 +395,16 @@ def loss(
         feats, pred_depths = self.extract_feat(batch_inputs_dict, batch_input_metas, using_image_features)
 
         losses = dict()
-        if self.loss_depth_weight > 0 and "gt_depths" in batch_inputs_dict:
+        if self.loss_depth_weight > 0 and pred_depths is not None:
             with torch.amp.autocast("cuda", enabled=False):
-                gt_depths = batch_inputs_dict["gt_depths"]
-                print("gt_depths shape: ", gt_depths.shape)
-                print("pred_depths shape: ", pred_depths.shape)
+                gt_depths = torch.stack(
+                    [
+                        meta["gt_depths"]
+                        if isinstance(meta["gt_depths"], torch.Tensor)
+                        else torch.as_tensor(meta["gt_depths"])
+                        for meta in batch_input_metas
+                    ]
+                ).to(device=pred_depths.device, dtype=torch.float32)
                 depth_loss = self.get_depth_loss(gt_depths, pred_depths)
                 losses["loss_depth"] = depth_loss
         
@@ -439,8 +445,8 @@ def get_downsampled_gt_depth(self, gt_depths):
 
     def get_depth_loss(self, depth_labels, depth_preds):
         depth_labels = self.get_downsampled_gt_depth(depth_labels)
-        depth_preds = depth_preds.permute(0, 2, 3,
-                                          1).contiguous().view(-1, self.D)
+        # (B, N, D, H, W) -> (B*N*H*W, D)
+        depth_preds = depth_preds.permute(0, 1, 3, 4, 2).contiguous().view(-1, self.view_transform.D)
         fg_mask = torch.max(depth_labels, dim=1).values > 0.0
         depth_labels = depth_labels[fg_mask]
         depth_preds = depth_preds[fg_mask]
diff --git a/projects/BEVFusion/bevfusion/loading.py b/projects/BEVFusion/bevfusion/loading.py
index 79019e0bf..fc1d9b985 100644
--- a/projects/BEVFusion/bevfusion/loading.py
+++ b/projects/BEVFusion/bevfusion/loading.py
@@ -294,7 +294,7 @@ def transform(self, results: dict) -> Optional[dict]:
         # imgaug
         cur_coords = img_aug_matrix[:, :3, :3] @ cur_coords
         cur_coords += img_aug_matrix[:, :3, 3].reshape(-1, 3, 1)
-        cur_coords = cur_coords[:, :2, :].transpose(1, 2)
+        cur_coords = cur_coords[:, :2, :].transpose(0, 2, 1)
 
         # normalize coords for grid sample
         cur_coords = cur_coords[..., [1, 0]]
@@ -325,9 +325,10 @@ def transform(self, results: dict) -> Optional[dict]:
     def _save_depth_subplot(self, depth: np.ndarray, results: dict) -> None:
         """Save `gt_depths` as a subplot with one panel per camera.
 
-        Each panel shows the camera image (if available) with the projected
-        LiDAR depth points overlaid, color-coded by distance. A standalone
-        depth-only figure is also saved alongside it.
+        The figure contains three row blocks per camera:
+        - image underlay (if available) + projected LiDAR depth points
+        - image pixels only
+        - depth-only heatmap (no image pixel values)
 
         Args:
             depth (np.ndarray): (num_cameras, H, W) ground-truth depth map.
@@ -336,47 +337,85 @@ def _save_depth_subplot(self, depth: np.ndarray, results: dict) -> None:
         """
         imgs = results.get("img", None)
 
-        # Layout: keep it a single row up to 6 cameras, otherwise wrap to a
-        # roughly-square grid.
+        # Layout:
+        # - Top block: image underlay + projected depth points.
+        # - Middle block: image pixels only.
+        # - Bottom block: depth-only heatmap (no image pixel values).
         if self.num_cameras <= 6:
-            rows, cols = 1, self.num_cameras
+            base_rows, cols = 1, self.num_cameras
         else:
             cols = int(np.ceil(np.sqrt(self.num_cameras)))
-            rows = int(np.ceil(self.num_cameras / cols))
+            base_rows = int(np.ceil(self.num_cameras / cols))
+        rows = base_rows * 3
 
         fig, axes = plt.subplots(
             rows, cols, figsize=(4 * cols, 4 * rows), squeeze=False
         )
 
         for c in range(self.num_cameras):
-            ax = axes[c // cols, c % cols]
             d = depth[c]
             ys, xs = np.nonzero(d)
             vals = d[ys, xs]
 
+            # Row block 1: image + depth scatter.
+            ax_overlay = axes[c // cols, c % cols]
             if imgs is not None and c < len(imgs):
-                ax.imshow(imgs[c].astype(np.uint8))
+                ax_overlay.imshow(imgs[c].astype(np.uint8))
                 if vals.size > 0:
-                    ax.scatter(
+                    ax_overlay.scatter(
                         xs, ys, c=vals, cmap="turbo",
                         vmin=0, vmax=self.max_depth, s=1,
                     )
             else:
-                ax.imshow(
+                ax_overlay.imshow(
                     d, cmap="turbo", vmin=0, vmax=self.max_depth,
                     interpolation="nearest",
                 )
+            ax_overlay.set_title(f"cam {c} overlay  ({vals.size} pts)")
+            ax_overlay.set_xticks([])
+            ax_overlay.set_yticks([])
 
-            ax.set_title(f"cam {c}  ({vals.size} pts)")
-            ax.set_xticks([])
-            ax.set_yticks([])
+            # Row block 2: image-only visualization.
+            ax_img = axes[base_rows + (c // cols), c % cols]
+            if imgs is not None and c < len(imgs):
+                ax_img.imshow(imgs[c].astype(np.uint8))
+            else:
+                ax_img.imshow(
+                    d, cmap="gray", vmin=0, vmax=self.max_depth,
+                    interpolation="nearest",
+                )
+            ax_img.set_title(f"cam {c} image-only")
+            ax_img.set_xticks([])
+            ax_img.set_yticks([])
+
+            # Row block 3: depth-only visualization.
+            ax_depth = axes[(base_rows * 2) + (c // cols), c % cols]
+            ax_depth.imshow(
+                d, cmap="turbo", vmin=0, vmax=self.max_depth,
+                interpolation="nearest",
+            )
+            ax_depth.set_title(f"cam {c} depth-only")
+            ax_depth.set_xticks([])
+            ax_depth.set_yticks([])
 
         # Hide any unused subplots when n doesn't fill the grid.
-        for c in range(self.num_cameras, rows * cols):
+        for c in range(self.num_cameras, base_rows * cols):
             axes[c // cols, c % cols].axis("off")
+            axes[base_rows + (c // cols), c % cols].axis("off")
+            axes[(base_rows * 2) + (c // cols), c % cols].axis("off")
+
+        # Shared depth colorbar with numeric values.
+        depth_mappable = plt.cm.ScalarMappable(
+            cmap="turbo", norm=plt.Normalize(vmin=0, vmax=self.max_depth)
+        )
+        depth_mappable.set_array([])
+        cbar = fig.colorbar(
+            depth_mappable, ax=axes, location="right", fraction=0.02, pad=0.02
+        )
+        cbar.set_label("Depth (m)")
 
         fig.suptitle(f"gt_depths — {self._depth_idx}")
-        fig.tight_layout()
+        fig.tight_layout(rect=[0, 0, 0.96, 0.97])
         
         self._depth_idx += 1
         out_path = self.visualize_dir / f"{self._depth_idx:06d}_gt_depths.png"
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
index 05917e6bd..556c9ef06 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
@@ -39,7 +39,7 @@
         type="PointsToMultiViewImageDepths", 
         img_shape=image_size, 
         num_cameras=len(camera_order), 
-        visualize_dir="work_dirs/visualize_depths",
+        # visualize_dir="work_dirs/visualize_depths",
     ),
     dict(
         type="BEVFusionGlobalRotScaleTrans",

From b87c3a379e9d02831af18cd28ff7d8741b6ebc4a Mon Sep 17 00:00:00 2001
From: KSeabgTan <kseangtan@gmail.com>
Date: Sun, 31 May 2026 00:47:36 +0900
Subject: [PATCH 162/183] Added

---
 .../t4dataset/default/pipelines/cameras/default_camera_50m.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
index 556c9ef06..77ff07e29 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
@@ -35,6 +35,10 @@
         rand_flip=True,
         is_train=True,
     ),
+    dict(
+        type="PointsRangeFilter", 
+        point_cloud_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0]
+    ),
     dict(
         type="PointsToMultiViewImageDepths", 
         img_shape=image_size, 

From 3b7966093baa211e2528c231d8ace1cabee1fb75 Mon Sep 17 00:00:00 2001
From: KSeabgTan <kseangtan@gmail.com>
Date: Sun, 31 May 2026 00:50:54 +0900
Subject: [PATCH 163/183] Added

---
 .../t4dataset/default/pipelines/cameras/default_camera_120m.py  | 2 ++
 .../t4dataset/default/pipelines/cameras/default_camera_50m.py   | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py
index 78bc0167d..f31a604b0 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py
@@ -53,6 +53,8 @@
     ),
     dict(type="BEVFusionRandomFlip3D"),
     dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range),
+	dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=5),
+    dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=3),
     dict(
         type="ObjectNameFilter",
         classes=[
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
index 77ff07e29..741524a34 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
@@ -53,6 +53,8 @@
     ),
     dict(type="BEVFusionRandomFlip3D"),
     dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range),
+	dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=5),
+    dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=3),
     # Remove LiDAR points from the data
     dict(type="BEVFusionRemoveLiDARPoints"),
     dict(

From d5cbf2b9e5c84c85a33de9650be917883f241531 Mon Sep 17 00:00:00 2001
From: KSeabgTan <kseangtan@gmail.com>
Date: Sun, 31 May 2026 00:52:05 +0900
Subject: [PATCH 164/183] Added

---
 .../t4dataset/default/pipelines/cameras/default_camera_120m.py   | 1 -
 .../t4dataset/default/pipelines/cameras/default_camera_50m.py    | 1 -
 2 files changed, 2 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py
index f31a604b0..9496cda0c 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py
@@ -54,7 +54,6 @@
     dict(type="BEVFusionRandomFlip3D"),
     dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range),
 	dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=5),
-    dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=3),
     dict(
         type="ObjectNameFilter",
         classes=[
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
index 741524a34..15b63829b 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
@@ -54,7 +54,6 @@
     dict(type="BEVFusionRandomFlip3D"),
     dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range),
 	dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=5),
-    dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=3),
     # Remove LiDAR points from the data
     dict(type="BEVFusionRemoveLiDARPoints"),
     dict(

From 5df6adf593603ba234b6edc16b7768a180687838 Mon Sep 17 00:00:00 2001
From: KSeabgTan <kseangtan@gmail.com>
Date: Sun, 31 May 2026 00:52:25 +0900
Subject: [PATCH 165/183] Added

---
 .../t4dataset/default/pipelines/cameras/default_camera_120m.py   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py
index 9496cda0c..f31a604b0 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py
@@ -54,6 +54,7 @@
     dict(type="BEVFusionRandomFlip3D"),
     dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range),
 	dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=5),
+    dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=3),
     dict(
         type="ObjectNameFilter",
         classes=[

From 8da5b1699646b92195cd4f7faa3f7f45fa823d51 Mon Sep 17 00:00:00 2001
From: KSeabgTan <kseangtan@gmail.com>
Date: Sun, 31 May 2026 00:57:33 +0900
Subject: [PATCH 166/183] Added

---
 .../t4dataset/default/pipelines/cameras/default_camera_50m.py   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
index 15b63829b..11b0869d3 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
@@ -37,7 +37,7 @@
     ),
     dict(
         type="PointsRangeFilter", 
-        point_cloud_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0]
+        point_cloud_range=[-80.0, -80.0, -10.0, 80.0, 80.0, 10.0]
     ),
     dict(
         type="PointsToMultiViewImageDepths", 

From 216e5577e26c608b1c0f95e9a76bf0dda52a63f9 Mon Sep 17 00:00:00 2001
From: KSeabgTan <kseangtan@gmail.com>
Date: Sun, 31 May 2026 01:09:18 +0900
Subject: [PATCH 167/183] Added

---
 projects/BEVFusion/bevfusion/bevfusion.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py
index c305405cd..1c1a159d7 100644
--- a/projects/BEVFusion/bevfusion/bevfusion.py
+++ b/projects/BEVFusion/bevfusion/bevfusion.py
@@ -437,8 +437,8 @@ def get_downsampled_gt_depth(self, gt_depths):
                                    W // self.depth_gt_downsample)
 
         gt_depths = (gt_depths - (dbounds[0] - dbounds[2])) / dbounds[2]
-        gt_depths = torch.where((gt_depths < D + 1) & (gt_depths >= 0.0),
-                                gt_depths, torch.zeros_like(gt_depths))
+        gt_depths = torch.where(gt_depths >= 0.0, gt_depths, torch.zeros_like(gt_depths))
+        gt_depths = torch.clamp(gt_depths, max=float(D))
         gt_depths = F.one_hot(
             gt_depths.long(), num_classes=D + 1).view(-1, D + 1)[:, 1:]
         return gt_depths.float()

From d899a6209b7d0239e136b8a5d79dfa028f0e9b02 Mon Sep 17 00:00:00 2001
From: KSeabgTan <kseangtan@gmail.com>
Date: Mon, 1 Jun 2026 17:10:09 +0900
Subject: [PATCH 168/183] Added

---
 .../configs/detection3d/default_runtime.py      | 12 ++++++++++--
 projects/BEVFusion/bevfusion/bevfusion.py       | 12 ++++++++----
 projects/BEVFusion/bevfusion/loading.py         |  6 ++++--
 ...esnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py |  2 +-
 .../resnet50/camera_resnet50_fpn_lss_50m.py     |  2 +-
 .../pipelines/cameras/default_camera_50m.py     | 11 ++++++-----
 .../default/pipelines/default_lidar_50m.py      | 16 ++++++++--------
 .../default_30e_8xb16_adamw_cosine.py           |  6 +++---
 .../default_30e_8xb16_adamw_linear_cosine.py    | 17 ++++++++++++-----
 9 files changed, 53 insertions(+), 31 deletions(-)

diff --git a/autoware_ml/configs/detection3d/default_runtime.py b/autoware_ml/configs/detection3d/default_runtime.py
index cc2b896f7..6da761425 100644
--- a/autoware_ml/configs/detection3d/default_runtime.py
+++ b/autoware_ml/configs/detection3d/default_runtime.py
@@ -2,9 +2,17 @@
 
 default_hooks = dict(
     timer=dict(type="IterTimerHook"),
-    logger=dict(type="LoggerHook", interval=50),
+    logger=dict(
+        type="LoggerHook",
+        interval=50,
+        backend_args=dict(backend="local"),
+    ),
     param_scheduler=dict(type="ParamSchedulerHook"),
-    checkpoint=dict(type="CheckpointHook", interval=-1),
+    checkpoint=dict(
+        type="CheckpointHook",
+        interval=-1,
+        backend_args=dict(backend="local"),
+    ),
     sampler_seed=dict(type="DistSamplerSeedHook"),
     visualization=dict(type="Det3DVisualizationHook"),
 )
diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py
index 1c1a159d7..17d924ff9 100644
--- a/projects/BEVFusion/bevfusion/bevfusion.py
+++ b/projects/BEVFusion/bevfusion/bevfusion.py
@@ -76,8 +76,7 @@ def __init__(
         self.pts_neck = MODELS.build(pts_neck) if pts_neck is not None else None
 
         self.bbox_head = MODELS.build(bbox_head)
-
-        self.init_weights()
+        self._weights_initialized = False
         self.loss_depth_weight = loss_depth_weight
         self.depth_gt_downsample = depth_gt_downsample
 
@@ -135,8 +134,11 @@ def parse_losses(self, losses: Dict[str, torch.Tensor]) -> Tuple[torch.Tensor, D
         return loss, log_vars  # type: ignore
 
     def init_weights(self) -> None:
+        if self._weights_initialized:
+            return
         if self.img_backbone is not None:
             self.img_backbone.init_weights()
+        self._weights_initialized = True
 
     @property
     def with_bbox_head(self):
@@ -437,8 +439,10 @@ def get_downsampled_gt_depth(self, gt_depths):
                                    W // self.depth_gt_downsample)
 
         gt_depths = (gt_depths - (dbounds[0] - dbounds[2])) / dbounds[2]
-        gt_depths = torch.where(gt_depths >= 0.0, gt_depths, torch.zeros_like(gt_depths))
-        gt_depths = torch.clamp(gt_depths, max=float(D))
+        # gt_depths = torch.where(gt_depths >= 0.0, gt_depths, torch.zeros_like(gt_depths))
+        # gt_depths = torch.clamp(gt_depths, max=float(D))
+        gt_depths = torch.where((gt_depths >= 0.0) & (gt_depths < D + 1), gt_depths, torch.zeros_like(gt_depths))
+        # gt_depths = torch.clamp(gt_depths, max=float(D))
         gt_depths = F.one_hot(
             gt_depths.long(), num_classes=D + 1).view(-1, D + 1)[:, 1:]
         return gt_depths.float()
diff --git a/projects/BEVFusion/bevfusion/loading.py b/projects/BEVFusion/bevfusion/loading.py
index fc1d9b985..40999f562 100644
--- a/projects/BEVFusion/bevfusion/loading.py
+++ b/projects/BEVFusion/bevfusion/loading.py
@@ -1,7 +1,7 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import copy
 from pathlib import Path
-from typing import List, Optional
+from typing import List, Optional, Tuple
 
 import matplotlib.pyplot as plt
 
@@ -242,6 +242,7 @@ def __init__(
         self,
         img_shape,
         num_cameras: int,
+        depth_bounds: Tuple[float, float],
         visualize_dir: Optional[str] = None,
         max_depth: float = 80.0,
     ):
@@ -249,6 +250,7 @@ def __init__(
         self.num_cameras = num_cameras
         self.visualize_dir = visualize_dir
         self.max_depth = max_depth
+        self.depth_bounds = depth_bounds
         self.visualize_dir = Path(visualize_dir) if visualize_dir is not None else None
         if self.visualize_dir is not None:
             self.visualize_dir.mkdir(parents=True, exist_ok=True)
@@ -286,7 +288,7 @@ def transform(self, results: dict) -> Optional[dict]:
 
         # get 2d coords
         dist = cur_coords[:, 2, :]
-        valid_dist_mask = dist > 0
+        valid_dist_mask = (dist >= self.depth_bounds[0]) & (dist < self.depth_bounds[1])
 
         cur_coords[:, 2, :] = np.clip(cur_coords[:, 2, :], 1e-5, 1e5)
         cur_coords[:, :2, :] /= cur_coords[:, 2:3, :]
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py
index 6695e397e..10a8924d2 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py
@@ -3,7 +3,7 @@
     "../../default/models/resnet50/camera_resnet50_fpn_lss_50m.py",
 ]
 
-experiment_group_name = "bevfusion_camera/j6gen2_depth_base/" + _base_.dataset_type
+experiment_group_name = "bevfusion_camera/j6gen2_depth_base_adjusted/" + _base_.dataset_type
 experiment_name = "bevfusion_camera_resnet50_fpn_lss_v2_30e_8xb16_j6gen2_base_50m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
index c29925243..d0920ccf1 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
@@ -6,7 +6,7 @@
 # Image network
 model = dict(
     depth_gt_downsample=8, 
-    loss_depth_weight=3.0,
+    loss_depth_weight=2.0,
     view_transform=dict(
         type="LSSTransformV2",
         xbound=[-54.0, 54.0, 0.3],
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
index 11b0869d3..0a2452de3 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
@@ -35,15 +35,16 @@
         rand_flip=True,
         is_train=True,
     ),
-    dict(
-        type="PointsRangeFilter", 
-        point_cloud_range=[-80.0, -80.0, -10.0, 80.0, 80.0, 10.0]
-    ),
+    # dict(
+    #     type="PointsRangeFilter", 
+    #     point_cloud_range=[-80.0, -80.0, -10.0, 80.0, 80.0, 10.0]
+    # ),
     dict(
         type="PointsToMultiViewImageDepths", 
         img_shape=image_size, 
         num_cameras=len(camera_order), 
-        # visualize_dir="work_dirs/visualize_depths",
+        depth_bounds=[1.0, 60.0],
+        # visualize_dir="work_dirs/visualize_depths_2",
     ),
     dict(
         type="BEVFusionGlobalRotScaleTrans",
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py
index 68c3e3688..eb1737e27 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py
@@ -1,6 +1,6 @@
 # Dataset parameters
 backend_args = None
-num_workers = 16
+num_workers = 8
 input_modality = dict(use_lidar=True, use_camera=False)
 
 # range setting
@@ -8,13 +8,13 @@
 voxel_size = [0.075, 0.075, 0.2]
 grid_size = [1440, 1440, 41]
 eval_class_range = {
-    "car": 54.0,
-    "truck": 54.0,
-    "bus": 54.0,
-    "bicycle": 54.0,
-    "pedestrian": 54.0,
-    "traffic_cone": 54.0,
-    "barrier": 54.0,
+    "car": 51.2,
+    "truck": 51.2,
+    "bus": 51.2,
+    "bicycle": 51.2,
+    "pedestrian": 51.2,
+    "traffic_cone": 51.2,
+    "barrier": 51.2,
 }
 
 # LiDAR parameters
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py
index d28468f71..2893b2e74 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py
@@ -1,12 +1,12 @@
 # learning rate
 lr = 2.0e-4
-t_max = 3
+t_max = 1
 max_epochs = 30
 val_interval = 5
 
-train_gpu_size = 8
+train_gpu_size = 2
 test_batch_size = 4
-train_batch_size = 16
+train_batch_size = 32
 
 param_scheduler = [
     # learning rate scheduler
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py
index f4ec3e0db..5b018de19 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py
@@ -1,6 +1,6 @@
 # learning rate
-lr = 1e-4
-t_max = 3
+lr = 2e-4
+t_max = 1
 max_epochs = 30
 val_interval = 1
 
@@ -10,7 +10,14 @@
 
 param_scheduler = [
     # learning rate scheduler
-    dict(type="LinearLR", start_factor=1.0 / 3, begin=0, end=t_max, by_epoch=True),
+    dict(
+        type="LinearLR",
+        start_factor=1.0 / 3,
+        begin=0,
+        end=t_max,
+        by_epoch=True,
+        convert_to_iter_based=True,
+    ),
     dict(
         type="CosineAnnealingLR",
         T_max=(max_epochs - t_max),
@@ -51,8 +58,8 @@
 
 optim_wrapper = dict(
     type="OptimWrapper",
-    optimizer=dict(type="AdamW", lr=lr, weight_decay=0.01),
-    clip_grad=dict(max_norm=0.1, norm_type=2),
+    optimizer=dict(type="AdamW", lr=lr, weight_decay=1e-3),
+    clip_grad=dict(max_norm=5.0, norm_type=2),
 )
 
 auto_scale_lr = dict(enable=False, base_batch_size=train_gpu_size * train_batch_size)

From 8b6c1577e0726aaf20e5626c5745bae8c67e1ffe Mon Sep 17 00:00:00 2001
From: KSeabgTan <kseangtan@gmail.com>
Date: Tue, 2 Jun 2026 14:28:54 +0900
Subject: [PATCH 169/183] Added

---
 projects/BEVFusion/bevfusion/loading.py       | 28 +++++++++----------
 .../pipelines/cameras/default_camera_50m.py   |  2 +-
 2 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/loading.py b/projects/BEVFusion/bevfusion/loading.py
index 40999f562..6bf3e1b38 100644
--- a/projects/BEVFusion/bevfusion/loading.py
+++ b/projects/BEVFusion/bevfusion/loading.py
@@ -272,11 +272,6 @@ def transform(self, results: dict) -> Optional[dict]:
         lidar_aug_matrix = np.array(results.get("lidar_aug_matrix", np.eye(4)))
         
         lidar_aug_matrix_inverse = np.linalg.inv(lidar_aug_matrix)
-        depth = np.full(
-            (self.num_cameras, self.img_shape[0], self.img_shape[1]),
-            np.inf,
-            dtype=np.float32,
-        )        
         cur_coords = results["points"][:,:3]
         # inverse aug
         cur_coords -= lidar_aug_matrix[:3, 3]
@@ -307,16 +302,19 @@ def transform(self, results: dict) -> Optional[dict]:
             & (cur_coords[..., 1] >= 0)
             & valid_dist_mask
         )
-        for c in range(self.num_cameras):
-            masked_coords = cur_coords[c, on_img[c]].astype(np.int64)
-            masked_dist = dist[c, on_img[c]]
-            np.fmin.at(
-                depth[c],
-                (masked_coords[:, 0], masked_coords[:, 1]),
-                masked_dist,
-            )
-
-        depth[np.isinf(depth)] = 0
+        
+        # Avoid loops since it's slow 
+        indices = np.nonzero(on_img)
+        camera_indices = indices[0]
+        point_indices = indices[1]
+        masked_coords = cur_coords[camera_indices, point_indices].astype(np.int64)
+        masked_dist = dist[camera_indices, point_indices]
+
+        # Possibly to have duplicates and the last one will be used, however, the chance is small	
+        flatten_indices = camera_indices * self.img_shape[0] * self.img_shape[1] + masked_coords[:, 0] * self.img_shape[1] + masked_coords[:, 1]
+        depth_flat = np.zeros(self.num_cameras * self.img_shape[0] * self.img_shape[1], dtype=np.float32)
+        depth_flat[flatten_indices] = masked_dist
+        depth = depth_flat.reshape(self.num_cameras, self.img_shape[0], self.img_shape[1])
         results["gt_depths"] = depth
 
         if self.visualize_dir is not None:
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
index 0a2452de3..b846f3624 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
@@ -44,7 +44,7 @@
         img_shape=image_size, 
         num_cameras=len(camera_order), 
         depth_bounds=[1.0, 60.0],
-        # visualize_dir="work_dirs/visualize_depths_2",
+        # visualize_dir="work_dirs/visualize_depths_3",
     ),
     dict(
         type="BEVFusionGlobalRotScaleTrans",

From 5002fb2a265042a0ee75016fe7ae1430d6dbaaec Mon Sep 17 00:00:00 2001
From: KSeabgTan <kseangtan@gmail.com>
Date: Tue, 2 Jun 2026 15:12:36 +0900
Subject: [PATCH 170/183] Added

---
 projects/BEVFusion/bevfusion/loading.py       | 31 ++++++++++++-------
 .../pipelines/cameras/default_camera_50m.py   | 10 +++---
 .../default/pipelines/default_lidar_50m.py    |  2 +-
 3 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/loading.py b/projects/BEVFusion/bevfusion/loading.py
index 6bf3e1b38..2ef6e718c 100644
--- a/projects/BEVFusion/bevfusion/loading.py
+++ b/projects/BEVFusion/bevfusion/loading.py
@@ -1,6 +1,7 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import copy
 from pathlib import Path
+import time
 from typing import List, Optional, Tuple
 
 import matplotlib.pyplot as plt
@@ -266,17 +267,22 @@ def transform(self, results: dict) -> Optional[dict]:
             dict: The result dict containing the multi-view image data.
             Added keys:
                 - gt_depths (np.ndarray): Ground truth depths in (N, H, W) for (number of cameras, height, width).
-        """ 
-        lidar2image = np.array(results["lidar2img"])
-        img_aug_matrix = np.array(results.get("img_aug_matrix", np.eye(4)))
-        lidar_aug_matrix = np.array(results.get("lidar_aug_matrix", np.eye(4)))
+        """
+        start_time = time.perf_counter()
         
-        lidar_aug_matrix_inverse = np.linalg.inv(lidar_aug_matrix)
-        cur_coords = results["points"][:,:3]
-        # inverse aug
-        cur_coords -= lidar_aug_matrix[:3, 3]
-        cur_coords = lidar_aug_matrix_inverse[:3, :3] @ cur_coords.transpose(1, 0)
-
+        lidar2image = np.asarray(results["lidar2img"])
+        img_aug_matrix = np.asarray(results["img_aug_matrix"]) if "img_aug_matrix" in results else np.eye(4)
+        cur_coords = results["points"].numpy()[:,:3]
+
+        # inverse lidar aug
+        if "lidar_aug_matrix" in results:
+          lidar_aug_matrix = np.asarray(results["lidar_aug_matrix"])
+          lidar_aug_matrix_inverse = np.linalg.inv(lidar_aug_matrix)
+          cur_coords -= lidar_aug_matrix[:3, 3]
+          cur_coords = lidar_aug_matrix_inverse[:3, :3] @ cur_coords.transpose(1, 0)
+        else:
+          cur_coords = cur_coords.transpose(1, 0)
+          
         # lidar2image
         cur_coords = lidar2image[:, :3, :3] @ cur_coords
         cur_coords += lidar2image[:, :3, 3].reshape(-1, 3, 1)
@@ -302,7 +308,7 @@ def transform(self, results: dict) -> Optional[dict]:
             & (cur_coords[..., 1] >= 0)
             & valid_dist_mask
         )
-        
+
         # Avoid loops since it's slow 
         indices = np.nonzero(on_img)
         camera_indices = indices[0]
@@ -319,7 +325,8 @@ def transform(self, results: dict) -> Optional[dict]:
 
         if self.visualize_dir is not None:
             self._save_depth_subplot(depth, results)
-
+        end_time = time.perf_counter()
+        print(f"Time taken: {end_time - start_time} seconds")
         return results
 
     def _save_depth_subplot(self, depth: np.ndarray, results: dict) -> None:
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
index b846f3624..00e7ac896 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
@@ -35,16 +35,16 @@
         rand_flip=True,
         is_train=True,
     ),
-    # dict(
-    #     type="PointsRangeFilter", 
-    #     point_cloud_range=[-80.0, -80.0, -10.0, 80.0, 80.0, 10.0]
-    # ),
+    dict(
+        type="PointsRangeFilter", 
+        point_cloud_range=[-80.0, -80.0, -10.0, 80.0, 80.0, 10.0]
+    ),
     dict(
         type="PointsToMultiViewImageDepths", 
         img_shape=image_size, 
         num_cameras=len(camera_order), 
         depth_bounds=[1.0, 60.0],
-        # visualize_dir="work_dirs/visualize_depths_3",
+        # visualize_dir="work_dirs/visualize_depths_6",
     ),
     dict(
         type="BEVFusionGlobalRotScaleTrans",
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py
index eb1737e27..f49e2dbb4 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py
@@ -1,6 +1,6 @@
 # Dataset parameters
 backend_args = None
-num_workers = 8
+num_workers = 4
 input_modality = dict(use_lidar=True, use_camera=False)
 
 # range setting

From cbe6f3bcd0bd93efdc144fea48722e57610f4a0f Mon Sep 17 00:00:00 2001
From: KSeabgTan <kseangtan@gmail.com>
Date: Tue, 2 Jun 2026 15:12:57 +0900
Subject: [PATCH 171/183] Added

---
 projects/BEVFusion/bevfusion/loading.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/loading.py b/projects/BEVFusion/bevfusion/loading.py
index 2ef6e718c..5d5c8d13a 100644
--- a/projects/BEVFusion/bevfusion/loading.py
+++ b/projects/BEVFusion/bevfusion/loading.py
@@ -1,7 +1,6 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import copy
 from pathlib import Path
-import time
 from typing import List, Optional, Tuple
 
 import matplotlib.pyplot as plt
@@ -268,8 +267,6 @@ def transform(self, results: dict) -> Optional[dict]:
             Added keys:
                 - gt_depths (np.ndarray): Ground truth depths in (N, H, W) for (number of cameras, height, width).
         """
-        start_time = time.perf_counter()
-        
         lidar2image = np.asarray(results["lidar2img"])
         img_aug_matrix = np.asarray(results["img_aug_matrix"]) if "img_aug_matrix" in results else np.eye(4)
         cur_coords = results["points"].numpy()[:,:3]
@@ -325,8 +322,6 @@ def transform(self, results: dict) -> Optional[dict]:
 
         if self.visualize_dir is not None:
             self._save_depth_subplot(depth, results)
-        end_time = time.perf_counter()
-        print(f"Time taken: {end_time - start_time} seconds")
         return results
 
     def _save_depth_subplot(self, depth: np.ndarray, results: dict) -> None:

From 4547bb3011b80300d5201769ca3c9d8d446ec147 Mon Sep 17 00:00:00 2001
From: KSeabgTan <kseangtan@gmail.com>
Date: Tue, 2 Jun 2026 15:13:43 +0900
Subject: [PATCH 172/183] Added

---
 .../configs/t4dataset/default/pipelines/default_lidar_50m.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py
index f49e2dbb4..eb1737e27 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py
@@ -1,6 +1,6 @@
 # Dataset parameters
 backend_args = None
-num_workers = 4
+num_workers = 8
 input_modality = dict(use_lidar=True, use_camera=False)
 
 # range setting

From a33fc4c96a563acaffa6e9e8eef86a1e60fd10cc Mon Sep 17 00:00:00 2001
From: KSeabgTan <kseangtan@gmail.com>
Date: Tue, 2 Jun 2026 15:21:23 +0900
Subject: [PATCH 173/183] Added

---
 .../configs/t4dataset/default/pipelines/default_lidar_50m.py    | 2 +-
 .../default/schedulers/default_30e_8xb16_adamw_linear_cosine.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py
index eb1737e27..51688a7aa 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py
@@ -1,6 +1,6 @@
 # Dataset parameters
 backend_args = None
-num_workers = 8
+num_workers = 16
 input_modality = dict(use_lidar=True, use_camera=False)
 
 # range setting
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py
index 5b018de19..2c00474d6 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py
@@ -6,7 +6,7 @@
 
 train_gpu_size = 2
 test_batch_size = 2
-train_batch_size = 16
+train_batch_size = 32
 
 param_scheduler = [
     # learning rate scheduler

From 44cd0896ac7f58da22b2798a0e5a0b1f8172bf89 Mon Sep 17 00:00:00 2001
From: KSeangTan <kseangtan@gmail.com>
Date: Wed, 3 Jun 2026 20:05:06 +0900
Subject: [PATCH 174/183] Added

---
 projects/BEVFusion/bevfusion/__init__.py      |   5 +-
 projects/BEVFusion/bevfusion/bevfusion.py     |  51 +++
 projects/BEVFusion/bevfusion/depth_lss_v2.py  | 290 +++++++++++++++++-
 ...net50_fpn_lss_30e_2xb16_j6gen2_base_50m.py |   4 +-
 ...era_resnet50_fpn_lss_50e_8xb16_base_50m.py |   4 +-
 ...snet50_fpn_camera_depth_aware_lssv2_50m.py |  28 ++
 6 files changed, 365 insertions(+), 17 deletions(-)
 create mode 100644 projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_camera_depth_aware_lssv2_50m.py

diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py
index 9e1ca1cf4..c4afb6a86 100644
--- a/projects/BEVFusion/bevfusion/__init__.py
+++ b/projects/BEVFusion/bevfusion/__init__.py
@@ -3,7 +3,7 @@
 from .bevfusion_necks import GeneralizedLSSFPN
 from .bevfusion_voxel_encoder import BEVFusionVoxelFeatureNet, HardSimpleVoxelSinCosEncoder
 from .depth_lss import DepthLSSTransform, LSSTransform
-from .depth_lss_v2 import LSSTransformV2
+from .depth_lss_v2 import LSSTransformV2, LSSTransformV2DepthAware
 from .loading import BEVLoadMultiViewImageFromFiles, PointsToMultiViewImageDepths
 from .sparse_encoder import BEVFusionSparseEncoder
 from .transformer import TransformerDecoderLayer
@@ -39,5 +39,6 @@
     "HardSimpleVoxelSinCosEncoder",
     "BEVFusionVoxelFeatureNet",
     "LSSTransformV2",
-    "PointsToMultiViewImageDepths"
+    "PointsToMultiViewImageDepths",
+    "LSSTransformV2DepthAware",
 ]
diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py
index 17d924ff9..1cdff32ad 100644
--- a/projects/BEVFusion/bevfusion/bevfusion.py
+++ b/projects/BEVFusion/bevfusion/bevfusion.py
@@ -149,6 +149,47 @@ def with_bbox_head(self):
     def with_seg_head(self):
         """bool: Whether the detector has a segmentation head."""
         return hasattr(self, "seg_head") and self.seg_head is not None
+    
+    def prepare_camera_depth_aware_parameters(
+        self, 
+        camera_intrinsics: torch.Tensor, 
+        img_aug_matrix: torch.Tensor,
+        lidar_aug_matrix: torch.Tensor,
+        camera2lidar: torch.Tensor,
+    ) -> torch.Tensor:
+        """
+        Args:
+            camera_intrinsics: torch.Tensor, the camera intrinsics of shape (B, N, 3, 3).
+            img_aug_matrix: torch.Tensor, the image augmentation matrix of shape (B, N, 4, 4).
+            lidar_aug_matrix: torch.Tensor, the lidar augmentation matrix of shape (B, N, 4, 4).
+            camera2lidar: torch.Tensor, the camera to lidar matrix of shape (B, N, 4, 4).
+        Returns:
+            torch.Tensor, the camera depth aware parameters of shape (B*N, N_CAMERA_DEPTH_PARAMETERS).
+        """
+        # (B*N, 15)
+        mlp_input = torch.stack([
+            camera_intrinsics[:, :, 0, 0],   # fx
+            camera_intrinsics[:, :, 1, 1],   # fy
+            camera_intrinsics[:, :, 0, 2],   # cx
+            camera_intrinsics[:, :, 1, 2],   # cy
+            img_aug_matrix[:, :, 0, 0],   # r11
+            img_aug_matrix[:, :, 0, 1],   # r12
+            img_aug_matrix[:, :, 0, 3],   # t1
+            img_aug_matrix[:, :, 1, 0],   # r21
+            img_aug_matrix[:, :, 1, 1],   # r22
+            img_aug_matrix[:, :, 1, 3],   # t2
+            lidar_aug_matrix[:, :, 0, 0],   # r11
+            lidar_aug_matrix[:, :, 0, 1],   # r12
+            lidar_aug_matrix[:, :, 1, 0],   # r21
+            lidar_aug_matrix[:, :, 1, 1],   # r22
+            lidar_aug_matrix[:, :, 2, 2],   # r33
+        ], dim=-1)
+        # (B, N, 4, 4) -> (B, N, 3, 4) -> (B*N, 12)
+        camera2lidar_flatten = camera2lidar[:,:,:3,:].view(-1, 12)
+
+        # (B*N, 15+12)
+        mlp_input = torch.cat([mlp_input, camera2lidar_flatten], dim=-1)
+        return mlp_input
 
     def get_image_backbone_features(self, x: torch.Tensor) -> torch.Tensor:
         B, N, C, H, W = x.size()
@@ -327,6 +368,12 @@ def extract_feat(
             camera2lidar = imgs.new_tensor(np.asarray(camera2lidar))
             img_aug_matrix = imgs.new_tensor(np.asarray(img_aug_matrix))
             lidar_aug_matrix = imgs.new_tensor(np.asarray(lidar_aug_matrix))
+            camera_depth_aware_parameters = self.prepare_camera_depth_aware_parameters(
+                camera_intrinsics=camera_intrinsics,
+                img_aug_matrix=img_aug_matrix,
+                lidar_aug_matrix=lidar_aug_matrix,
+                camera2lidar=camera2lidar,
+            )
             img_feature, pred_depths = self.extract_img_feat(
                 imgs,
                 deepcopy(points),
@@ -337,6 +384,7 @@ def extract_feat(
                 lidar_aug_matrix,
                 batch_input_metas,
                 using_image_features=using_image_features,
+                camera_depth_aware_parameters=camera_depth_aware_parameters,
             )
             features.append(img_feature)
         elif imgs is not None:
@@ -348,6 +396,8 @@ def extract_feat(
             img_aug_matrix = batch_inputs_dict["img_aug_matrix"]
             lidar_aug_matrix = batch_inputs_dict["lidar_aug_matrix"]
             geom_feats = batch_inputs_dict["geom_feats"]
+            # Retrieve the parameters from deployment code directly
+            camera_depth_aware_parameters = batch_inputs_dict["camera_depth_aware_parameters"]
 
             img_feature, pred_depths = self.extract_img_feat(
                 imgs,
@@ -360,6 +410,7 @@ def extract_feat(
                 batch_input_metas,
                 geom_feats=geom_feats,
                 using_image_features=using_image_features,
+                camera_depth_aware_parameters=camera_depth_aware_parameters,
             )
             features.append(img_feature)
 
diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py
index def32ee5e..251fc61fc 100644
--- a/projects/BEVFusion/bevfusion/depth_lss_v2.py
+++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py
@@ -7,12 +7,215 @@
 import torch
 from mmdet3d.registry import MODELS
 from mmengine.logging import print_log
+from mmdet.models.backbones.resnet import BasicBlock
 from torch import nn
+from torch.utils.checkpoint import checkpoint
 
 from .depth_lss import BaseViewTransform, DepthLSSNet, DownSampleNet, LidarDepthImageNet
 from .ops import bev_pool_v2
 
 
+class SELayer(nn.Module):
+    """
+    Squeeze-and-Excitation (SE) layer. 
+    This is used to modulate features with camera-depth aware parameters.
+    The code is taken from BEVDET (https://github.com/hustvl/BEVDET).
+    """
+
+    def __init__(self, channels, act_layer=nn.ReLU, gate_layer=nn.Sigmoid):
+        super().__init__()
+        # Dont need global pooling because inputs are (B*N, C, 1, 1).
+        self.sequeeze_net = nn.Sequential(
+            [
+                # Squeeze with 1x1 convolution
+                nn.Conv2d(channels, channels, 1, bias=True), 
+                # Activation
+                act_layer(),
+                # Expand with 1x1 convolution
+                nn.Conv2d(channels, channels, 1, bias=True),
+                # Gate with sigmoid activation
+                gate_layer(),
+            ]
+        )
+
+    def forward(self, x: torch.Tensor, depth_aware_features: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            x: Tuple[torch.Tensor, torch.Tensor], the input tuple containing the image features and camera-depth aware parameters.
+        Returns:
+            torch.Tensor, the output tensor of shape (B, N, C).
+        """
+        feature_attentions = self.sequeeze_net(depth_aware_features)
+        return x * feature_attentions
+
+
+class CameraDepthLinearProjectionMLP(nn.Module):
+    """
+    Linear projection module by MLP. This is used to project image (context) features and camera-depth 
+    aware parameters (for example, intrinsics) to embedding space.
+    The code is taken from BEVDET (https://github.com/hustvl/BEVDET).
+    """
+
+    def __init__(self, in_channels: int, hidden_channels:int, out_channels:int, drop_out: float = 0.0):
+        """
+        Args:
+            in_channels: int, the number of input channels.
+            hidden_channels: int, the number of hidden channels.
+            out_channels: int, the number of output channels.
+            drop_out: float, the dropout rate.
+        """
+        super().__init__()
+        self.in_channels = in_channels
+        self.hidden_channels = hidden_channels
+        self.out_channels = out_channels
+        self.drop_out = drop_out
+
+        self.sequential_mlp = nn.Sequential(
+            nn.Linear(in_channels, hidden_channels),
+            nn.ReLU(inplace=True),
+            nn.Dropout(drop_out),
+            nn.Linear(hidden_channels, out_channels),
+            nn.Dropout(drop_out),
+        )
+    
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            x: torch.Tensor, the input tensor of shape (B, N, C).
+        Returns:
+            torch.Tensor, the output tensor of shape (B, N, C).
+        """
+        return self.sequential_mlp(x)
+
+
+class CameraDepthAwareNet(nn.Module):
+    """
+    Camera-depth aware depth net. This is used to predict the depth of the scene.
+    The code is taken from BEVDET (https://github.com/hustvl/BEVDET).
+    """
+
+    def __init__(
+        self, 
+        in_channels: int, 
+        hidden_channels: int,
+        out_channels: int,
+        mlp_drop_out: float, 
+        downsample: int,
+        depth_channels: int,
+        with_cp: bool = False,
+        num_camera_depth_parameters: int = 27) -> None:
+        """
+        Args:
+            in_channels: int, the number of input channels.
+            out_channels: int, the number of output channels.
+            mlp_drop_out: float, the dropout rate of the MLP.
+            mlp_hidden_channels: int, the number of hidden channels of the MLP.
+            mlp_out_channels: int, the number of output channels of the MLP.
+        """
+        super().__init__()
+        self.in_channels = in_channels
+        self.hidden_channels = hidden_channels
+        self.out_channels = out_channels
+        self.mlp_drop_out = mlp_drop_out
+        self.num_camera_depth_parameters = num_camera_depth_parameters
+        self.downsample = downsample
+        self.depth_channels = depth_channels
+        self.with_cp = with_cp
+
+        # Input convolution for context/image features
+        # Camera depth aware parameters branch
+        self.camera_depth_aware_parameters_bn = nn.BatchNorm1d(
+            self.num_camera_depth_parameters
+        )
+        
+        # Context/image feature branch
+        self.context_input_conv = nn.Sequential(
+            nn.Conv2d(
+                in_channels, hidden_channels, kernel_size=3, stride=1, padding=1),
+            nn.BatchNorm2d(hidden_channels),
+            nn.ReLU(inplace=True),
+        )
+        self.context_camera_depth_aware_mlp = CameraDepthLinearProjectionMLP(
+            in_channels=self.num_camera_depth_parameters, 
+            hidden_channels=hidden_channels, 
+            out_channels=hidden_channels, 
+            drop_out=self.mlp_drop_out
+        )
+        self.context_se = SELayer(channels=hidden_channels)
+        self.context_conv = nn.Conv2d(
+            hidden_channels, 
+            out_channels, 
+            stride=1, padding=1)
+
+        # Depth branch 
+        self.depth_camera_depth_aware_mlp = CameraDepthLinearProjectionMLP(
+            in_channels=self.num_camera_depth_parameters, 
+            hidden_channels=hidden_channels, 
+            out_channels=hidden_channels, 
+            drop_out=self.mlp_drop_out
+        )
+        self.depth_se = SELayer(channels=hidden_channels)
+        self.depth_conv = nn.Sequantial(
+            BasicBlock(hidden_channels, hidden_channels, downsample=downsample),
+            BasicBlock(hidden_channels, hidden_channels, downsample=downsample),
+            BasicBlock(hidden_channels, hidden_channels, downsample=downsample),
+            nn.Conv2d(hidden_channels, depth_channels, kernel_size=1, stride=1, padding=0)
+        ) 
+    
+    def context_forward(self, context_features: torch.Tensor, camera_depth_aware_features: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            x: torch.Tensor, the input tensor of shape (B*N, C, H, W).
+            camera_depth_aware_parameters: torch.Tensor, the camera-depth aware parameters of shape (B*N, N_CAMERA_DEPTH_PARAMETERS).
+        Returns:
+            torch.Tensor, the output tensor of shape (B*N, C, H, W).
+        """
+        context_camera_depth_aware_features = self.context_camera_depth_aware_mlp(camera_depth_aware_features)
+        # (B*N, mlp_out_channels) -> (B*N, mlp_out_channels, 1, 1)
+        context_camera_depth_aware_features = context_camera_depth_aware_features.view(-1, self.hidden_channels, 1, 1)
+        context_features = self.context_se(context_features, context_camera_depth_aware_features)
+        context_features = self.context_conv(context_features)
+        return context_features
+    
+    def depth_forward(self, depth_features: torch.Tensor, camera_depth_aware_features: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            depth_features: torch.Tensor, the input tensor of shape (B*N, C, H, W).
+            camera_depth_aware_parameters: torch.Tensor, the camera-depth aware parameters of shape (B, N, D).
+        Returns:
+            torch.Tensor, the output tensor of shape (B*N, C, H, W).
+        """
+        depth_camera_depth_aware_features = self.depth_camera_depth_aware_mlp(camera_depth_aware_features)
+        # (B*N, mlp_out_channels) -> (B*N, mlp_out_channels, 1, 1)
+        depth_camera_depth_aware_features = depth_camera_depth_aware_features.view(-1, self.hidden_channels, 1, 1)
+        # (B*N, C, H, W)
+        depth_features = self.depth_se(depth_features, depth_camera_depth_aware_features)
+        if self.with_cp:
+            depth_features = checkpoint(self.depth_conv, depth_features)
+        else:
+            depth_features = self.depth_conv(depth_features)
+        return depth_features
+
+    def forward(self, x: torch.Tensor, camera_depth_aware_parameters: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            x: torch.Tensor, the input tensor of shape (B, N, C, H, W).
+            camera_depth_aware_parameters: torch.Tensor, the camera-depth aware parameters of shape (B, N, N_CAMERA_DEPTH_PARAMETERS).
+        Returns:
+            torch.Tensor, the output tensor of shape (B*N, C, H, W).
+        """
+        # (B, N, N_CAMERA_DEPTH_PARAMETERS) -> (B*N, N_CAMERA_DEPTH_PARAMETERS)
+        camera_depth_aware_parameters = camera_depth_aware_parameters.view(-1, self.num_camera_depth_parameters)
+        
+        # (B*N, N_CAMERA_DEPTH_PARAMETERS)
+        camera_depth_aware_features = self.camera_depth_aware_parameters_bn(camera_depth_aware_parameters)
+        context_features = self.input_conv(x) 
+
+        context_features = self.context_forward(context_features, camera_depth_aware_features)
+        depth_features = self.depth_forward(context_features, camera_depth_aware_features)
+        return torch.cat([depth_features, context_features], dim=1)
+
+
 class BaseViewTransformV2(BaseViewTransform):
 
     def __init__(
@@ -48,7 +251,7 @@ def __init__(
         self.collapse_z = collapse_z
         self.expand_batch_axis = expand_batch_axis
 
-    def get_cam_feats(self, x) -> Tuple[torch.Tensor, torch.Tensor]:
+    def get_cam_feats(self, x, camera_depth_aware_parameters: Optional[torch.Tensor] = None) -> Tuple[torch.Tensor, torch.Tensor]:
         raise NotImplementedError
 
     def forward(
@@ -65,6 +268,7 @@ def forward(
         img_aug_matrix_inverse,
         lidar_aug_matrix_inverse,
         geom_feats_precomputed,
+        camera_depth_aware_parameters: Optional[torch.Tensor] = None,
     ):
         if geom_feats_precomputed is not None:
             ranks_bev, ranks_depth, ranks_feat = geom_feats_precomputed
@@ -100,7 +304,7 @@ def forward(
             (
                 view_feats,
                 depth_softmax,
-            ) = self.get_cam_feats(img)
+            ) = self.get_cam_feats(img, camera_depth_aware_parameters)
             x = self.bev_pool(view_feats, depth_softmax, geom)
          
         return x, depth_softmax
@@ -232,9 +436,23 @@ def bev_pool_precomputed(self, view_feats, depth_softmax, ranks_bev, ranks_depth
         bev_feat = self.compute_bev_pool(
             view_feats, depth_softmax, ranks_bev, ranks_depth, ranks_feat, interval_starts, interval_lengths
         )
-        return bev_feat 
-
+        return bev_feat
 
+    def get_depth_softmax(self, x: torch.Tensor, B, N, fH, fW) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Args:
+            x: torch.Tensor, the input tensor of shape (B*N, D+C, H, W).
+        Returns:
+            Tuple[torch.Tensor, torch.Tensor], the tuple containing the view features and depth softmax.
+            view_feats: torch.Tensor, the view features of shape (B, N, C, H, W).
+            depth_softmax: torch.Tensor, the depth softmax of shape (B, N, D, H, W).
+        """
+        depth_softmax = x[:, : self.D].softmax(dim=1)
+        depth_softmax = depth_softmax.view(B, N, self.D, fH, fW)
+        view_feats = x[:, self.D : (self.D + self.C)]
+        view_feats = view_feats.view(B, N, self.C, fH, fW)
+        return view_feats, depth_softmax
+    
 @MODELS.register_module()
 class LSSTransformV2(BaseViewTransformV2):
 
@@ -263,19 +481,69 @@ def __init__(
         self.depthnet = nn.Conv2d(self.in_channels, self.D + self.C, 1)
         self.downsample = DownSampleNet(downsample, out_channels, out_channels)
 
-    def get_cam_feats(self, x):
+    def get_cam_feats(
+        self, 
+        x: torch.Tensor, 
+        camera_depth_aware_parameters: Optional[torch.Tensor] = None
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
         B, N, C, fH, fW = x.shape
         x = x.view(B * N, C, fH, fW)
         x = self.depthnet(x)
+        return self.get_depth_softmax(x, B=B, N=N, fH=fH, fW=fW)
+    
+    def forward(self, *args, **kwargs):
+        x, depth_softmax = super().forward(*args, **kwargs)
+        x = self.downsample(x)
+        return x, depth_softmax
 
-        depth_softmax = x[:, : self.D].softmax(dim=1)
-        depth_softmax = depth_softmax.view(B, N, self.D, fH, fW)
-        view_feats = x[:, self.D : (self.D + self.C)]
-        view_feats = view_feats.view(B, N, self.C, fH, fW)
-        return view_feats, depth_softmax
+
+@MODELS.register_module()
+class LSSTransformV2DepthAware(BaseViewTransformV2):
+
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        image_size: Tuple[int, int],
+        feature_size: Tuple[int, int],
+        xbound: Tuple[float, float, float],
+        ybound: Tuple[float, float, float],
+        zbound: Tuple[float, float, float],
+        dbound: Tuple[float, float, float],
+        camera_depth_aware_configs: dict, 
+        downsample: int = 1,
+    ):
+        super().__init__(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            image_size=image_size,
+            feature_size=feature_size,
+            xbound=xbound,
+            ybound=ybound,
+            zbound=zbound,
+            dbound=dbound,
+        )
+        self.downsample = DownSampleNet(downsample, out_channels, out_channels)
+        self.camera_depth_aware_net = CameraDepthAwareNet(
+            in_channels=in_channels,
+            hidden_channels=in_channels,
+            mlp_drop_out=camera_depth_aware_configs["mlp_drop_out"],
+            downsample=camera_depth_aware_configs["downsample"],
+            depth_channels=self.D,
+            out_channels=self.C,
+        )
+    
+    def get_cam_feats(
+        self, 
+        x: torch.Tensor, 
+        camera_depth_aware_parameters: Optional[torch.Tensor] = None
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        B, N, C, fH, fW = x.shape
+        x = x.view(B * N, C, fH, fW)
+        x = self.camera_depth_aware_net(x, camera_depth_aware_parameters)
+        return self.get_depth_softmax(x, B=B, N=N, fH=fH, fW=fW)
     
     def forward(self, *args, **kwargs):
         x, depth_softmax = super().forward(*args, **kwargs)
         x = self.downsample(x)
         return x, depth_softmax
-    
\ No newline at end of file
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py
index 10a8924d2..5de72725c 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py
@@ -1,10 +1,10 @@
 _base_ = [
     "../default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py",
-    "../../default/models/resnet50/camera_resnet50_fpn_lss_50m.py",
+    "../../default/models/resnet50/camera_resnet50_fpn_camera_aware_lssv2_50m.py",
 ]
 
 experiment_group_name = "bevfusion_camera/j6gen2_depth_base_adjusted/" + _base_.dataset_type
-experiment_name = "bevfusion_camera_resnet50_fpn_lss_v2_30e_8xb16_j6gen2_base_50m"
+experiment_name = "bevfusion_camera_resnet50_fpn_camera_depth_aware_lssv2_30e_8xb16_j6gen2_base_50m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m.py
index 5215dc9f3..40f008b34 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m.py
@@ -1,10 +1,10 @@
 _base_ = [
     "../default_bevfusion_camera_50e_8xb16_base_50m.py",
-    "../../default/models/resnet50/camera_resnet50_fpn_lss_50m.py",
+    "../../default/models/resnet50/camera_resnet50_fpn_camera_aware_lssv2_50m.py",
 ]
 
 experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type
-experiment_name = "bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m"
+experiment_name = "bevfusion_camera_resnet50_fpn_camera_depth_aware_lssv2_50e_8xb16_base_50m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_camera_depth_aware_lssv2_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_camera_depth_aware_lssv2_50m.py
new file mode 100644
index 000000000..37b98f82d
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_camera_depth_aware_lssv2_50m.py
@@ -0,0 +1,28 @@
+_base_ = [
+    "./camera_resnet50_fpn_depthlss_120m.py",
+]
+num_proposals = 200 
+
+# Image network
+model = dict(
+    depth_gt_downsample=8, 
+    loss_depth_weight=2.0,
+    view_transform=dict(
+        type="LSSTransformV2DepthAware",
+        xbound=[-54.0, 54.0, 0.3],
+        ybound=[-54.0, 54.0, 0.3],
+        zbound=[-10.0, 10.0, 20.0],
+        dbound=[1.0, 60, 0.5],
+        downsample=2,
+        camera_depth_aware_configs=dict(
+            mlp_drop_out=0.0,
+            downsample=8,
+        ),
+    ),
+    bbox_head=dict(
+        num_proposals=num_proposals,
+        bbox_coder=dict(
+            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
+        ),
+    ),
+)

From b7eae63e5a90c85a74cd772895d9f7f5cad55df5 Mon Sep 17 00:00:00 2001
From: KSeabgTan <kseangtan@gmail.com>
Date: Thu, 4 Jun 2026 11:09:33 +0900
Subject: [PATCH 175/183] Added

---
 projects/BEVFusion/bevfusion/bevfusion.py     | 13 ++-
 projects/BEVFusion/bevfusion/depth_lss_v2.py  | 92 +++++++++----------
 ...net50_fpn_lss_30e_2xb16_j6gen2_base_50m.py |  4 +-
 ...snet50_fpn_camera_depth_aware_lssv2_50m.py | 28 ------
 .../default_30e_8xb16_adamw_linear_cosine.py  |  8 +-
 5 files changed, 59 insertions(+), 86 deletions(-)
 delete mode 100644 projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_camera_depth_aware_lssv2_50m.py

diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py
index 1cdff32ad..75fbf181f 100644
--- a/projects/BEVFusion/bevfusion/bevfusion.py
+++ b/projects/BEVFusion/bevfusion/bevfusion.py
@@ -161,11 +161,14 @@ def prepare_camera_depth_aware_parameters(
         Args:
             camera_intrinsics: torch.Tensor, the camera intrinsics of shape (B, N, 3, 3).
             img_aug_matrix: torch.Tensor, the image augmentation matrix of shape (B, N, 4, 4).
-            lidar_aug_matrix: torch.Tensor, the lidar augmentation matrix of shape (B, N, 4, 4).
+            lidar_aug_matrix: torch.Tensor, the lidar augmentation matrix of shape (B, 4, 4).
             camera2lidar: torch.Tensor, the camera to lidar matrix of shape (B, N, 4, 4).
         Returns:
             torch.Tensor, the camera depth aware parameters of shape (B*N, N_CAMERA_DEPTH_PARAMETERS).
         """
+        B, N, _, _ = camera_intrinsics.shape
+        lidar_aug_matrix = lidar_aug_matrix.view(B, 1, 4, 4).repeat(1, N, 1, 1)
+        
         # (B*N, 15)
         mlp_input = torch.stack([
             camera_intrinsics[:, :, 0, 0],   # fx
@@ -185,9 +188,9 @@ def prepare_camera_depth_aware_parameters(
             lidar_aug_matrix[:, :, 2, 2],   # r33
         ], dim=-1)
         # (B, N, 4, 4) -> (B, N, 3, 4) -> (B*N, 12)
-        camera2lidar_flatten = camera2lidar[:,:,:3,:].view(-1, 12)
-
-        # (B*N, 15+12)
+        camera2lidar_flatten = camera2lidar[:,:,:3,:].view(B, N, -1)
+        
+        # (B, N, 15+12)
         mlp_input = torch.cat([mlp_input, camera2lidar_flatten], dim=-1)
         return mlp_input
 
@@ -221,6 +224,7 @@ def extract_img_feat(
         lidar_aug_matrix_inverse=None,
         geom_feats=None,
         using_image_features=False,
+        camera_depth_aware_parameters=None
     ) -> Tuple[torch.Tensor, torch.Tensor]:
 
         if not using_image_features:
@@ -241,6 +245,7 @@ def extract_img_feat(
                 img_aug_matrix_inverse,
                 lidar_aug_matrix_inverse,
                 geom_feats,
+                camera_depth_aware_parameters=camera_depth_aware_parameters
             )
         return x, pred_depths
 
diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py
index 251fc61fc..4190cc582 100644
--- a/projects/BEVFusion/bevfusion/depth_lss_v2.py
+++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py
@@ -26,16 +26,14 @@ def __init__(self, channels, act_layer=nn.ReLU, gate_layer=nn.Sigmoid):
         super().__init__()
         # Dont need global pooling because inputs are (B*N, C, 1, 1).
         self.sequeeze_net = nn.Sequential(
-            [
-                # Squeeze with 1x1 convolution
-                nn.Conv2d(channels, channels, 1, bias=True), 
-                # Activation
-                act_layer(),
-                # Expand with 1x1 convolution
-                nn.Conv2d(channels, channels, 1, bias=True),
-                # Gate with sigmoid activation
-                gate_layer(),
-            ]
+            # Squeeze with 1x1 convolution
+            nn.Conv2d(channels, channels, 1, bias=True), 
+            # Activation
+            act_layer(),
+            # Expand with 1x1 convolution
+            nn.Conv2d(channels, channels, 1, bias=True),
+            # Gate with sigmoid activation
+            gate_layer(),
         )
 
     def forward(self, x: torch.Tensor, depth_aware_features: torch.Tensor) -> torch.Tensor:
@@ -100,7 +98,6 @@ def __init__(
         hidden_channels: int,
         out_channels: int,
         mlp_drop_out: float, 
-        downsample: int,
         depth_channels: int,
         with_cp: bool = False,
         num_camera_depth_parameters: int = 27) -> None:
@@ -118,15 +115,14 @@ def __init__(
         self.out_channels = out_channels
         self.mlp_drop_out = mlp_drop_out
         self.num_camera_depth_parameters = num_camera_depth_parameters
-        self.downsample = downsample
         self.depth_channels = depth_channels
         self.with_cp = with_cp
 
         # Input convolution for context/image features
         # Camera depth aware parameters branch
-        self.camera_depth_aware_parameters_bn = nn.BatchNorm1d(
-            self.num_camera_depth_parameters
-        )
+        # self.camera_depth_aware_parameters_bn = nn.BatchNorm1d(
+        #     self.num_camera_depth_parameters
+        # )
         
         # Context/image feature branch
         self.context_input_conv = nn.Sequential(
@@ -135,30 +131,31 @@ def __init__(
             nn.BatchNorm2d(hidden_channels),
             nn.ReLU(inplace=True),
         )
-        self.context_camera_depth_aware_mlp = CameraDepthLinearProjectionMLP(
-            in_channels=self.num_camera_depth_parameters, 
-            hidden_channels=hidden_channels, 
-            out_channels=hidden_channels, 
-            drop_out=self.mlp_drop_out
-        )
-        self.context_se = SELayer(channels=hidden_channels)
+        # self.context_camera_depth_aware_mlp = CameraDepthLinearProjectionMLP(
+        #     in_channels=self.num_camera_depth_parameters, 
+        #     hidden_channels=hidden_channels, 
+        #     out_channels=hidden_channels, 
+        #     drop_out=self.mlp_drop_out
+        # )
+        # self.context_se = SELayer(channels=hidden_channels)
         self.context_conv = nn.Conv2d(
             hidden_channels, 
             out_channels, 
-            stride=1, padding=1)
+            kernel_size=1,
+            stride=1, padding=0)
 
         # Depth branch 
-        self.depth_camera_depth_aware_mlp = CameraDepthLinearProjectionMLP(
-            in_channels=self.num_camera_depth_parameters, 
-            hidden_channels=hidden_channels, 
-            out_channels=hidden_channels, 
-            drop_out=self.mlp_drop_out
-        )
-        self.depth_se = SELayer(channels=hidden_channels)
-        self.depth_conv = nn.Sequantial(
-            BasicBlock(hidden_channels, hidden_channels, downsample=downsample),
-            BasicBlock(hidden_channels, hidden_channels, downsample=downsample),
-            BasicBlock(hidden_channels, hidden_channels, downsample=downsample),
+        # self.depth_camera_depth_aware_mlp = CameraDepthLinearProjectionMLP(
+        #     in_channels=self.num_camera_depth_parameters, 
+        #     hidden_channels=hidden_channels, 
+        #     out_channels=hidden_channels, 
+        #     drop_out=self.mlp_drop_out
+        # )
+        # self.depth_se = SELayer(channels=hidden_channels)
+        self.depth_conv = nn.Sequential(
+            BasicBlock(hidden_channels, hidden_channels, downsample=None),
+            BasicBlock(hidden_channels, hidden_channels),
+            BasicBlock(hidden_channels, hidden_channels),
             nn.Conv2d(hidden_channels, depth_channels, kernel_size=1, stride=1, padding=0)
         ) 
     
@@ -170,10 +167,10 @@ def context_forward(self, context_features: torch.Tensor, camera_depth_aware_fea
         Returns:
             torch.Tensor, the output tensor of shape (B*N, C, H, W).
         """
-        context_camera_depth_aware_features = self.context_camera_depth_aware_mlp(camera_depth_aware_features)
-        # (B*N, mlp_out_channels) -> (B*N, mlp_out_channels, 1, 1)
-        context_camera_depth_aware_features = context_camera_depth_aware_features.view(-1, self.hidden_channels, 1, 1)
-        context_features = self.context_se(context_features, context_camera_depth_aware_features)
+        # context_camera_depth_aware_features = self.context_camera_depth_aware_mlp(camera_depth_aware_features)
+        # # (B*N, mlp_out_channels) -> (B*N, mlp_out_channels, 1, 1)
+        # context_camera_depth_aware_features = context_camera_depth_aware_features.view(-1, self.hidden_channels, 1, 1)
+        # context_features = self.context_se(context_features, context_camera_depth_aware_features)
         context_features = self.context_conv(context_features)
         return context_features
     
@@ -185,11 +182,11 @@ def depth_forward(self, depth_features: torch.Tensor, camera_depth_aware_feature
         Returns:
             torch.Tensor, the output tensor of shape (B*N, C, H, W).
         """
-        depth_camera_depth_aware_features = self.depth_camera_depth_aware_mlp(camera_depth_aware_features)
-        # (B*N, mlp_out_channels) -> (B*N, mlp_out_channels, 1, 1)
-        depth_camera_depth_aware_features = depth_camera_depth_aware_features.view(-1, self.hidden_channels, 1, 1)
-        # (B*N, C, H, W)
-        depth_features = self.depth_se(depth_features, depth_camera_depth_aware_features)
+        # depth_camera_depth_aware_features = self.depth_camera_depth_aware_mlp(camera_depth_aware_features)
+        # # (B*N, mlp_out_channels) -> (B*N, mlp_out_channels, 1, 1)
+        # depth_camera_depth_aware_features = depth_camera_depth_aware_features.view(-1, self.hidden_channels, 1, 1)
+        # # (B*N, C, H, W)
+        # depth_features = self.depth_se(depth_features, depth_camera_depth_aware_features)
         if self.with_cp:
             depth_features = checkpoint(self.depth_conv, depth_features)
         else:
@@ -208,11 +205,11 @@ def forward(self, x: torch.Tensor, camera_depth_aware_parameters: torch.Tensor)
         camera_depth_aware_parameters = camera_depth_aware_parameters.view(-1, self.num_camera_depth_parameters)
         
         # (B*N, N_CAMERA_DEPTH_PARAMETERS)
-        camera_depth_aware_features = self.camera_depth_aware_parameters_bn(camera_depth_aware_parameters)
-        context_features = self.input_conv(x) 
+        # camera_depth_aware_features = self.camera_depth_aware_parameters_bn(camera_depth_aware_parameters)
+        context_input_features = self.context_input_conv(x) 
 
-        context_features = self.context_forward(context_features, camera_depth_aware_features)
-        depth_features = self.depth_forward(context_features, camera_depth_aware_features)
+        context_features = self.context_forward(context_input_features, None)
+        depth_features = self.depth_forward(context_input_features, None)
         return torch.cat([depth_features, context_features], dim=1)
 
 
@@ -528,7 +525,6 @@ def __init__(
             in_channels=in_channels,
             hidden_channels=in_channels,
             mlp_drop_out=camera_depth_aware_configs["mlp_drop_out"],
-            downsample=camera_depth_aware_configs["downsample"],
             depth_channels=self.D,
             out_channels=self.C,
         )
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py
index 5de72725c..c9afb963b 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py
@@ -1,10 +1,10 @@
 _base_ = [
     "../default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py",
-    "../../default/models/resnet50/camera_resnet50_fpn_camera_aware_lssv2_50m.py",
+    "../../default/models/resnet50/camera_resnet50_fpn_lss_depth_50m.py",
 ]
 
 experiment_group_name = "bevfusion_camera/j6gen2_depth_base_adjusted/" + _base_.dataset_type
-experiment_name = "bevfusion_camera_resnet50_fpn_camera_depth_aware_lssv2_30e_8xb16_j6gen2_base_50m"
+experiment_name = "bevfusion_camera_resnet50_fpn_lss_depth_30e_8xb16_j6gen2_base_50m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
 # model parameter
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_camera_depth_aware_lssv2_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_camera_depth_aware_lssv2_50m.py
deleted file mode 100644
index 37b98f82d..000000000
--- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_camera_depth_aware_lssv2_50m.py
+++ /dev/null
@@ -1,28 +0,0 @@
-_base_ = [
-    "./camera_resnet50_fpn_depthlss_120m.py",
-]
-num_proposals = 200 
-
-# Image network
-model = dict(
-    depth_gt_downsample=8, 
-    loss_depth_weight=2.0,
-    view_transform=dict(
-        type="LSSTransformV2DepthAware",
-        xbound=[-54.0, 54.0, 0.3],
-        ybound=[-54.0, 54.0, 0.3],
-        zbound=[-10.0, 10.0, 20.0],
-        dbound=[1.0, 60, 0.5],
-        downsample=2,
-        camera_depth_aware_configs=dict(
-            mlp_drop_out=0.0,
-            downsample=8,
-        ),
-    ),
-    bbox_head=dict(
-        num_proposals=num_proposals,
-        bbox_coder=dict(
-            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
-        ),
-    ),
-)
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py
index 2c00474d6..4965e981f 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py
@@ -1,6 +1,6 @@
 # learning rate
-lr = 2e-4
-t_max = 1
+lr = 1e-4
+t_max = 3
 max_epochs = 30
 val_interval = 1
 
@@ -58,8 +58,8 @@
 
 optim_wrapper = dict(
     type="OptimWrapper",
-    optimizer=dict(type="AdamW", lr=lr, weight_decay=1e-3),
-    clip_grad=dict(max_norm=5.0, norm_type=2),
+    optimizer=dict(type="AdamW", lr=lr, weight_decay=1e-2),
+    clip_grad=dict(max_norm=0.1, norm_type=2),
 )
 
 auto_scale_lr = dict(enable=False, base_batch_size=train_gpu_size * train_batch_size)

From 6c88b6196b81a77d8e40ce5f5325fbe0a30176fd Mon Sep 17 00:00:00 2001
From: KSeabgTan <kseangtan@gmail.com>
Date: Thu, 4 Jun 2026 11:35:35 +0900
Subject: [PATCH 176/183] Added

---
 .../camera_resnet50_fpn_lss_depth_50m.py      | 29 +++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_depth_50m.py

diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_depth_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_depth_50m.py
new file mode 100644
index 000000000..50e8098a5
--- /dev/null
+++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_depth_50m.py
@@ -0,0 +1,29 @@
+_base_ = [
+    "./camera_resnet50_fpn_depthlss_120m.py",
+]
+num_proposals = 200 
+
+# Image network
+model = dict(
+    depth_gt_downsample=8, 
+    loss_depth_weight=2.0,
+    view_transform=dict(
+        type="LSSTransformV2DepthAware",
+        xbound=[-54.0, 54.0, 0.3],
+        ybound=[-54.0, 54.0, 0.3],
+        zbound=[-10.0, 10.0, 20.0],
+        dbound=[1.0, 60, 0.5],
+        downsample=2,
+        camera_depth_aware_configs=dict(
+            mlp_drop_out=0.0,
+            downsample=8,
+            num_camera_depth_parameters=27
+        ),
+    ),
+    bbox_head=dict(
+        num_proposals=num_proposals,
+        bbox_coder=dict(
+            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
+        ),
+    ),
+)

From f05e742ceced32b4893312f937142b83e3a2e1d8 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Thu, 4 Jun 2026 20:05:18 +0900
Subject: [PATCH 177/183] Updated

---
 projects/BEVFusion/bevfusion/bevfusion.py     |  92 +++++++++++-
 projects/BEVFusion/bevfusion/depth_lss_v2.py  | 134 ++++++++++++++----
 ...fusion_camera_30e_8xb16_j6gen2_base_50m.py |   2 +-
 ...net50_fpn_lss_30e_2xb16_j6gen2_base_50m.py |   2 +-
 .../camera_resnet50_fpn_lss_depth_50m.py      |   3 +-
 .../default/pipelines/default_lidar_50m.py    |   2 +-
 .../default_30e_8xb16_adamw_linear_cosine.py  |   4 +-
 7 files changed, 203 insertions(+), 36 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py
index 75fbf181f..242ffb658 100644
--- a/projects/BEVFusion/bevfusion/bevfusion.py
+++ b/projects/BEVFusion/bevfusion/bevfusion.py
@@ -1,7 +1,10 @@
+import math
 from collections import OrderedDict
 from copy import deepcopy
+from pathlib import Path
 from typing import Dict, List, Optional, Tuple
 
+import matplotlib.pyplot as plt
 import numpy as np
 import torch
 import torch.distributed as dist
@@ -9,6 +12,7 @@
 from mmdet3d.registry import MODELS
 from mmdet3d.structures import Det3DDataSample
 from mmdet3d.utils import OptConfigType, OptMultiConfig, OptSampleList
+from mmengine.logging import print_log
 from mmengine.utils import is_list_of
 from torch import Tensor
 from torch.nn import functional as F
@@ -36,6 +40,7 @@ def __init__(
         seg_head: Optional[dict] = None,
         loss_depth_weight: float = 3.0,
         depth_gt_downsample: int = 1,
+        visualize_gt_depth_dir: Optional[str] = None,
         **kwargs,
     ) -> None:
         """Initialize BEVFusion model.
@@ -79,6 +84,11 @@ def __init__(
         self._weights_initialized = False
         self.loss_depth_weight = loss_depth_weight
         self.depth_gt_downsample = depth_gt_downsample
+        self.visualize_gt_depth_dir = (
+            Path(visualize_gt_depth_dir) if visualize_gt_depth_dir is not None else None
+        )
+        if self.visualize_gt_depth_dir is not None:
+            self.visualize_gt_depth_dir.mkdir(parents=True, exist_ok=True)
 
     def _forward(
         self, batch_inputs_dict: Tensor, batch_data_samples: OptSampleList = [], using_image_features=False, **kwargs
@@ -471,7 +481,86 @@ def loss(
             losses.update(bbox_loss)
 
         return losses
- 
+
+    def _visualize_one_hot_gt_depth(
+        self,
+        gt_depths_one_hot: Tensor,
+        batch_size: int,
+        num_cameras: int,
+        height: int,
+        width: int,
+        batch_idx: int = 0,
+        num_channels: int = 6,
+    ) -> None:
+        """Save one-hot depth GT maps for the first batch and first few depth channels.
+
+        Args:
+            gt_depths_one_hot (Tensor): One-hot depth GT of shape [B*N*H*W, D].
+            batch_size (int): Batch size B from the original input.
+            num_cameras (int): Number of camera views N from the original input.
+            height (int): Original input height H before downsampling.
+            width (int): Original input width W before downsampling.
+            batch_idx (int): Batch index to visualize.
+            num_channels (int): Number of depth-bin channels to visualize.
+        """
+        if self.visualize_gt_depth_dir is None:
+            return
+
+        if dist.is_available() and dist.is_initialized() and dist.get_rank() != 0:
+            return
+
+        if batch_size <= batch_idx or num_cameras == 0:
+            return
+
+        downsample = self.depth_gt_downsample
+        height_down = height // downsample
+        width_down = width // downsample
+        num_depth_bins = gt_depths_one_hot.shape[1]
+
+        num_channels = min(num_channels, num_depth_bins)
+        if num_channels == 0 or height_down == 0 or width_down == 0:
+            return
+
+        with torch.no_grad():
+            one_hot = gt_depths_one_hot.view(
+                batch_size, num_cameras, height_down, width_down, num_depth_bins
+            )
+            depth_channels = one_hot[batch_idx, 0, :, :, :num_channels].detach().float().cpu().numpy()
+
+        ncols = min(3, num_channels)
+        nrows = math.ceil(num_channels / ncols)
+        fig, axes = plt.subplots(nrows, ncols, figsize=(4 * ncols, 4 * nrows), squeeze=False)
+
+        dbounds = self.view_transform.dbound
+        for ch_idx in range(num_channels):
+            ax = axes[ch_idx // ncols, ch_idx % ncols]
+            channel_map = depth_channels[:, :, ch_idx]
+            depth_m = dbounds[0] + (ch_idx + 0.5) * dbounds[2]
+            im = ax.imshow(channel_map, cmap="viridis", vmin=0, vmax=1, interpolation="nearest")
+            ax.set_title(f"batch {batch_idx}, depth bin {ch_idx} (~{depth_m:.1f}m)")
+            ax.set_xticks([])
+            ax.set_yticks([])
+            fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
+
+        for ch_idx in range(num_channels, nrows * ncols):
+            axes[ch_idx // ncols, ch_idx % ncols].axis("off")
+
+        fig.suptitle(
+            f"one-hot gt_depth (batch={batch_idx}, cam=0, bins=0-{num_channels - 1})"
+        )
+        fig.tight_layout()
+
+        if not hasattr(self, "_gt_depth_one_hot_vis_count"):
+            self._gt_depth_one_hot_vis_count = 0
+        self._gt_depth_one_hot_vis_count += 1
+        save_path = (
+            self.visualize_gt_depth_dir
+            / f"gt_depth_one_hot_{self._gt_depth_one_hot_vis_count:06d}.png"
+        )
+        fig.savefig(save_path, dpi=150, bbox_inches="tight")
+        plt.close(fig)
+        print_log(f"Saved one-hot gt_depth visualization to {save_path.resolve()}")
+
     def get_downsampled_gt_depth(self, gt_depths):
         """
         Input:
@@ -501,6 +590,7 @@ def get_downsampled_gt_depth(self, gt_depths):
         # gt_depths = torch.clamp(gt_depths, max=float(D))
         gt_depths = F.one_hot(
             gt_depths.long(), num_classes=D + 1).view(-1, D + 1)[:, 1:]
+        self._visualize_one_hot_gt_depth(gt_depths, B, N, H, W)
         return gt_depths.float()
 
     def get_depth_loss(self, depth_labels, depth_preds):
diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py
index 4190cc582..23c74d82d 100644
--- a/projects/BEVFusion/bevfusion/depth_lss_v2.py
+++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py
@@ -7,7 +7,10 @@
 import torch
 from mmdet3d.registry import MODELS
 from mmengine.logging import print_log
+from mmcv.runner import BaseModule
+from mmcv.cnn import build_conv_layer, build_norm_layer, build_plugin_layer
 from mmdet.models.backbones.resnet import BasicBlock
+
 from torch import nn
 from torch.utils.checkpoint import checkpoint
 
@@ -15,6 +18,56 @@
 from .ops import bev_pool_v2
 
 
+class CustomDepthBasicBlock(BaseModule):
+    def __init__(
+      self, 
+      in_channels: int, 
+      out_channel: int, 
+      kernel_size: int = 3,
+      stride: int = 1, 
+      dilation: int = 1,
+      with_cp: bool = False, 
+      downsample: Optional[nn.Module] = None, 
+      init_cfg: OptMultiConfig = None):
+        super().__init__(init_cfg)
+
+        self.norm1_name, norm1 = build_norm_layer(norm_cfg, out_channel, postfix=1)
+        self.norm2_name, norm2 = build_norm_layer(norm_cfg, out_channel, postfix=2)
+        self.conv1 = build_conv_layer(
+          conv_cfg, 
+          in_channels, 
+          out_channel, 
+          kernel_size, 
+          stride=stride, 
+          padding=dilation, 
+          dilation=dilation, bias=False
+        )
+        self.add_module(self.norm1_name, norm1)
+        self.conv2 = build_conv_layer(
+            conv_cfg, planes, planes, 3, padding=1, bias=False)
+        self.add_module(self.norm2_name, norm2)
+
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+        self.dilation = dilation
+        self.with_cp = with_cp
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        identity = x
+        out = self.conv1(x)
+        out = self.norm1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.norm2(out)
+
+        if self.downsample is not None:
+            identity = self.downsample(x)
+
+        out += identity
+        return out
+
 class SELayer(nn.Module):
     """
     Squeeze-and-Excitation (SE) layer. 
@@ -120,45 +173,67 @@ def __init__(
 
         # Input convolution for context/image features
         # Camera depth aware parameters branch
-        # self.camera_depth_aware_parameters_bn = nn.BatchNorm1d(
-        #     self.num_camera_depth_parameters
-        # )
+        self.camera_depth_aware_parameters_bn = nn.BatchNorm1d(
+            self.num_camera_depth_parameters
+        )
         
         # Context/image feature branch
+        # self.context_input_conv = nn.Sequential(
+            # nn.Conv2d(
+                # in_channels, hidden_channels, kernel_size=3, stride=1, padding=1, bias=False),
+        #     nn.BatchNorm2d(hidden_channels),
+        #     nn.ReLU(inplace=True),
+        # )
         self.context_input_conv = nn.Sequential(
             nn.Conv2d(
-                in_channels, hidden_channels, kernel_size=3, stride=1, padding=1),
+                in_channels, hidden_channels, kernel_size=1, stride=1, bias=False),
             nn.BatchNorm2d(hidden_channels),
             nn.ReLU(inplace=True),
         )
-        # self.context_camera_depth_aware_mlp = CameraDepthLinearProjectionMLP(
-        #     in_channels=self.num_camera_depth_parameters, 
-        #     hidden_channels=hidden_channels, 
-        #     out_channels=hidden_channels, 
-        #     drop_out=self.mlp_drop_out
-        # )
-        # self.context_se = SELayer(channels=hidden_channels)
+        self.context_camera_depth_aware_mlp = CameraDepthLinearProjectionMLP(
+            in_channels=self.num_camera_depth_parameters, 
+            hidden_channels=hidden_channels, 
+            out_channels=hidden_channels, 
+            drop_out=self.mlp_drop_out
+        )
+        self.context_se = SELayer(channels=hidden_channels)
         self.context_conv = nn.Conv2d(
             hidden_channels, 
-            out_channels, 
+            depth_channels + out_channels, 
             kernel_size=1,
-            stride=1, padding=0)
+            stride=1, padding=0, bias=True)
 
         # Depth branch 
-        # self.depth_camera_depth_aware_mlp = CameraDepthLinearProjectionMLP(
-        #     in_channels=self.num_camera_depth_parameters, 
-        #     hidden_channels=hidden_channels, 
-        #     out_channels=hidden_channels, 
-        #     drop_out=self.mlp_drop_out
-        # )
-        # self.depth_se = SELayer(channels=hidden_channels)
+        self.depth_camera_depth_aware_mlp = CameraDepthLinearProjectionMLP(
+            in_channels=self.num_camera_depth_parameters, 
+            hidden_channels=hidden_channels, 
+            out_channels=hidden_channels, 
+            drop_out=self.mlp_drop_out
+        )
+        self.depth_se = SELayer(channels=hidden_channels)
         self.depth_conv = nn.Sequential(
             BasicBlock(hidden_channels, hidden_channels, downsample=None),
             BasicBlock(hidden_channels, hidden_channels),
             BasicBlock(hidden_channels, hidden_channels),
-            nn.Conv2d(hidden_channels, depth_channels, kernel_size=1, stride=1, padding=0)
+            nn.Conv2d(hidden_channels, depth_channels, kernel_size=1, stride=1, padding=0, bias=True)
         ) 
-    
+        # self._init_weight()
+
+    def _init_weight(self):
+        print_log("Initializing depth weights...", logger="current")
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                torch.nn.init.kaiming_normal_(m.weight)
+                if m.bias is not None:
+                    m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm1d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            
+
     def context_forward(self, context_features: torch.Tensor, camera_depth_aware_features: torch.Tensor) -> torch.Tensor:
         """
         Args:
@@ -167,10 +242,10 @@ def context_forward(self, context_features: torch.Tensor, camera_depth_aware_fea
         Returns:
             torch.Tensor, the output tensor of shape (B*N, C, H, W).
         """
-        # context_camera_depth_aware_features = self.context_camera_depth_aware_mlp(camera_depth_aware_features)
+        context_camera_depth_aware_features = self.context_camera_depth_aware_mlp(camera_depth_aware_features)
         # # (B*N, mlp_out_channels) -> (B*N, mlp_out_channels, 1, 1)
-        # context_camera_depth_aware_features = context_camera_depth_aware_features.view(-1, self.hidden_channels, 1, 1)
-        # context_features = self.context_se(context_features, context_camera_depth_aware_features)
+        context_camera_depth_aware_features = context_camera_depth_aware_features.view(-1, self.hidden_channels, 1, 1)
+        context_features = self.context_se(context_features, context_camera_depth_aware_features)
         context_features = self.context_conv(context_features)
         return context_features
     
@@ -205,11 +280,12 @@ def forward(self, x: torch.Tensor, camera_depth_aware_parameters: torch.Tensor)
         camera_depth_aware_parameters = camera_depth_aware_parameters.view(-1, self.num_camera_depth_parameters)
         
         # (B*N, N_CAMERA_DEPTH_PARAMETERS)
-        # camera_depth_aware_features = self.camera_depth_aware_parameters_bn(camera_depth_aware_parameters)
+        camera_depth_aware_features = self.camera_depth_aware_parameters_bn(camera_depth_aware_parameters)
         context_input_features = self.context_input_conv(x) 
-
-        context_features = self.context_forward(context_input_features, None)
-        depth_features = self.depth_forward(context_input_features, None)
+        context_features = self.context_forward(context_input_features, camera_depth_aware_features)
+        # return context_features
+        # context_features = self.context_forward(context_input_features, None)
+        depth_features = self.depth_forward(context_input_features, camera_depth_aware_features)
         return torch.cat([depth_features, context_features], dim=1)
 
 
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py
index 591399a4e..ffe9f1363 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py
@@ -11,7 +11,7 @@
 custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"]
 
 # user setting
-data_root = "data/t4datasets/"
+data_root = "data/t4dataset/"
 info_directory_path = "info/kokseang_2_8/"
 
 # Dataset parameters
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py
index c9afb963b..9074f14d2 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py
@@ -3,7 +3,7 @@
     "../../default/models/resnet50/camera_resnet50_fpn_lss_depth_50m.py",
 ]
 
-experiment_group_name = "bevfusion_camera/j6gen2_depth_base_adjusted/" + _base_.dataset_type
+experiment_group_name = "bevfusion_camera/j6gen2_base_depth_adjust_v2/" + _base_.dataset_type
 experiment_name = "bevfusion_camera_resnet50_fpn_lss_depth_30e_8xb16_j6gen2_base_50m"
 work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name
 
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_depth_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_depth_50m.py
index 50e8098a5..997fa1e76 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_depth_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_depth_50m.py
@@ -6,8 +6,9 @@
 # Image network
 model = dict(
     depth_gt_downsample=8, 
-    loss_depth_weight=2.0,
+    loss_depth_weight=1.0,
     view_transform=dict(
+        # type="LSSTransformV2",
         type="LSSTransformV2DepthAware",
         xbound=[-54.0, 54.0, 0.3],
         ybound=[-54.0, 54.0, 0.3],
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py
index 51688a7aa..f49e2dbb4 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py
@@ -1,6 +1,6 @@
 # Dataset parameters
 backend_args = None
-num_workers = 16
+num_workers = 4
 input_modality = dict(use_lidar=True, use_camera=False)
 
 # range setting
diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py
index 4965e981f..95f5f96bd 100644
--- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py
+++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py
@@ -1,5 +1,5 @@
 # learning rate
-lr = 1e-4
+lr = 2e-4
 t_max = 3
 max_epochs = 30
 val_interval = 1
@@ -59,7 +59,7 @@
 optim_wrapper = dict(
     type="OptimWrapper",
     optimizer=dict(type="AdamW", lr=lr, weight_decay=1e-2),
-    clip_grad=dict(max_norm=0.1, norm_type=2),
+    clip_grad=dict(max_norm=5.0, norm_type=2),
 )
 
 auto_scale_lr = dict(enable=False, base_batch_size=train_gpu_size * train_batch_size)

From 69300aae2b77ab70b42309ba342f56899f0951ba Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Thu, 4 Jun 2026 21:33:00 +0900
Subject: [PATCH 178/183] Updated

---
 projects/BEVFusion/bevfusion/depth_lss_v2.py | 48 ++++++++++++--------
 1 file changed, 29 insertions(+), 19 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py
index 23c74d82d..c3cef18ae 100644
--- a/projects/BEVFusion/bevfusion/depth_lss_v2.py
+++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py
@@ -7,9 +7,10 @@
 import torch
 from mmdet3d.registry import MODELS
 from mmengine.logging import print_log
-from mmcv.runner import BaseModule
-from mmcv.cnn import build_conv_layer, build_norm_layer, build_plugin_layer
-from mmdet.models.backbones.resnet import BasicBlock
+from mmengine.model import BaseModule
+from mmcv.cnn import build_conv_layer, build_norm_layer 
+# from mmdet.models.backbones.resnet import BasicBlock
+from mmdet3d.utils import ConfigType, OptConfigType, OptMultiConfig
 
 from torch import nn
 from torch.utils.checkpoint import checkpoint
@@ -23,29 +24,32 @@ def __init__(
       self, 
       in_channels: int, 
       out_channel: int, 
-      kernel_size: int = 3,
+      padding: int = 0,
+      kernel_size: int = 1,
       stride: int = 1, 
       dilation: int = 1,
-      with_cp: bool = False, 
+      with_cp: bool = False,
+      norm_cfg=dict(type='BN'), 
+      conv_cfg=None,
       downsample: Optional[nn.Module] = None, 
       init_cfg: OptMultiConfig = None):
         super().__init__(init_cfg)
 
-        self.norm1_name, norm1 = build_norm_layer(norm_cfg, out_channel, postfix=1)
-        self.norm2_name, norm2 = build_norm_layer(norm_cfg, out_channel, postfix=2)
+        self.norm1_name, self.norm1 = build_norm_layer(norm_cfg, out_channel, postfix=1)
+        self.norm2_name, self.norm2 = build_norm_layer(norm_cfg, out_channel, postfix=2)
         self.conv1 = build_conv_layer(
           conv_cfg, 
           in_channels, 
           out_channel, 
           kernel_size, 
           stride=stride, 
-          padding=dilation, 
+          padding=padding, 
           dilation=dilation, bias=False
         )
-        self.add_module(self.norm1_name, norm1)
+        self.add_module(self.norm1_name, self.norm1)
         self.conv2 = build_conv_layer(
-            conv_cfg, planes, planes, 3, padding=1, bias=False)
-        self.add_module(self.norm2_name, norm2)
+            conv_cfg, out_channel, out_channel, kernel_size, padding=padding, bias=False)
+        self.add_module(self.norm2_name, self.norm2)
 
         self.relu = nn.ReLU(inplace=True)
         self.downsample = downsample
@@ -199,7 +203,7 @@ def __init__(
         self.context_se = SELayer(channels=hidden_channels)
         self.context_conv = nn.Conv2d(
             hidden_channels, 
-            depth_channels + out_channels, 
+            out_channels, 
             kernel_size=1,
             stride=1, padding=0, bias=True)
 
@@ -211,12 +215,18 @@ def __init__(
             drop_out=self.mlp_drop_out
         )
         self.depth_se = SELayer(channels=hidden_channels)
+        # self.depth_conv = nn.Sequential(
+        #     BasicBlock(hidden_channels, hidden_channels, downsample=None),
+        #     BasicBlock(hidden_channels, hidden_channels),
+        #     BasicBlock(hidden_channels, hidden_channels),
+        #     nn.Conv2d(hidden_channels, depth_channels, kernel_size=1, stride=1, padding=0, bias=True)
+        # )
         self.depth_conv = nn.Sequential(
-            BasicBlock(hidden_channels, hidden_channels, downsample=None),
-            BasicBlock(hidden_channels, hidden_channels),
-            BasicBlock(hidden_channels, hidden_channels),
+            CustomDepthBasicBlock(hidden_channels, hidden_channels, downsample=None, kernel_size=1),
+            CustomDepthBasicBlock(hidden_channels, hidden_channels, kernel_size=1),
+            CustomDepthBasicBlock(hidden_channels, hidden_channels, kernel_size=1),
             nn.Conv2d(hidden_channels, depth_channels, kernel_size=1, stride=1, padding=0, bias=True)
-        ) 
+        )
         # self._init_weight()
 
     def _init_weight(self):
@@ -257,11 +267,11 @@ def depth_forward(self, depth_features: torch.Tensor, camera_depth_aware_feature
         Returns:
             torch.Tensor, the output tensor of shape (B*N, C, H, W).
         """
-        # depth_camera_depth_aware_features = self.depth_camera_depth_aware_mlp(camera_depth_aware_features)
+        depth_camera_depth_aware_features = self.depth_camera_depth_aware_mlp(camera_depth_aware_features)
         # # (B*N, mlp_out_channels) -> (B*N, mlp_out_channels, 1, 1)
-        # depth_camera_depth_aware_features = depth_camera_depth_aware_features.view(-1, self.hidden_channels, 1, 1)
+        depth_camera_depth_aware_features = depth_camera_depth_aware_features.view(-1, self.hidden_channels, 1, 1)
         # # (B*N, C, H, W)
-        # depth_features = self.depth_se(depth_features, depth_camera_depth_aware_features)
+        depth_features = self.depth_se(depth_features, depth_camera_depth_aware_features)
         if self.with_cp:
             depth_features = checkpoint(self.depth_conv, depth_features)
         else:

From e15de56e9adb9029e6886a3b65909bfee90fdf89 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Fri, 5 Jun 2026 10:04:22 +0900
Subject: [PATCH 179/183] Updated

---
 projects/BEVFusion/bevfusion/depth_lss_v2.py | 23 +++-----------------
 1 file changed, 3 insertions(+), 20 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py
index c3cef18ae..3c25402e4 100644
--- a/projects/BEVFusion/bevfusion/depth_lss_v2.py
+++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py
@@ -222,28 +222,13 @@ def __init__(
         #     nn.Conv2d(hidden_channels, depth_channels, kernel_size=1, stride=1, padding=0, bias=True)
         # )
         self.depth_conv = nn.Sequential(
-            CustomDepthBasicBlock(hidden_channels, hidden_channels, downsample=None, kernel_size=1),
-            CustomDepthBasicBlock(hidden_channels, hidden_channels, kernel_size=1),
-            CustomDepthBasicBlock(hidden_channels, hidden_channels, kernel_size=1),
+            # CustomDepthBasicBlock(hidden_channels, hidden_channels, downsample=None, kernel_size=1, padding=0),
+            # CustomDepthBasicBlock(hidden_channels, hidden_channels, kernel_size=1),
+            # CustomDepthBasicBlock(hidden_channels, hidden_channels, kernel_size=1),
             nn.Conv2d(hidden_channels, depth_channels, kernel_size=1, stride=1, padding=0, bias=True)
         )
         # self._init_weight()
 
-    def _init_weight(self):
-        print_log("Initializing depth weights...", logger="current")
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                torch.nn.init.kaiming_normal_(m.weight)
-                if m.bias is not None:
-                    m.bias.data.zero_()
-            elif isinstance(m, nn.BatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-            elif isinstance(m, nn.BatchNorm1d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-            
-
     def context_forward(self, context_features: torch.Tensor, camera_depth_aware_features: torch.Tensor) -> torch.Tensor:
         """
         Args:
@@ -293,8 +278,6 @@ def forward(self, x: torch.Tensor, camera_depth_aware_parameters: torch.Tensor)
         camera_depth_aware_features = self.camera_depth_aware_parameters_bn(camera_depth_aware_parameters)
         context_input_features = self.context_input_conv(x) 
         context_features = self.context_forward(context_input_features, camera_depth_aware_features)
-        # return context_features
-        # context_features = self.context_forward(context_input_features, None)
         depth_features = self.depth_forward(context_input_features, camera_depth_aware_features)
         return torch.cat([depth_features, context_features], dim=1)
 

From aa3818d467bcfb04aed529cf69949235f0c6c6aa Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 5 Jun 2026 03:13:05 +0000
Subject: [PATCH 180/183] ci(pre-commit): autofix

---
 autoware_ml/detection3d/datasets/t4dataset.py |   4 +-
 .../datasets/transforms/__init__.py           |   2 +-
 .../datasets/transforms/local_3d_bbox.py      |  19 +-
 projects/BEVFusion/bevfusion/bevfusion.py     | 111 +++++-----
 .../BEVFusion/bevfusion/bevfusion_head.py     |  55 ++---
 .../bevfusion/bevfusion_voxel_encoder.py      | 195 ++++++++----------
 .../bevfusion/custom_sparse_conv_tensor.py    |  13 +-
 projects/BEVFusion/bevfusion/depth_lss.py     |   2 +-
 projects/BEVFusion/bevfusion/depth_lss_v2.py  | 151 +++++++-------
 projects/BEVFusion/bevfusion/loading.py       |  70 ++++---
 .../bevfusion/ops/bev_pool_v2/__init__.py     |   2 +-
 projects/BEVFusion/bevfusion/ops/topk/topk.py |  17 +-
 .../BEVFusion/bevfusion/sparse_encoder.py     |   7 +-
 projects/BEVFusion/bevfusion/transforms_3d.py |   1 +
 projects/BEVFusion/bevfusion/utils.py         |   2 +-
 ...y_lidar_only_intensity_tensorrt_dynamic.py |   6 +-
 ...econd_secfpn_30e_8xb16_j6gen2_base_120m.py |  16 +-
 ...cond_secfpn_30e_8xb16_jpntaxi_base_120m.py |  20 +-
 ...voxel_second_secfpn_50e_8xb16_base_120m.py |   2 +-
 .../default_lidar_second_secfpn_120m.py       |   6 +-
 .../camera_resnet50_fpn_depthlss_120m.py      |   2 +-
 .../resnet50/camera_resnet50_fpn_lss_50m.py   |   4 +-
 .../camera_resnet50_fpn_lss_depth_50m.py      |  10 +-
 .../pipelines/cameras/default_camera_120m.py  |   2 +-
 .../pipelines/cameras/default_camera_50m.py   |  15 +-
 .../default_camera_lidar_intensity_120m.py    |   4 +-
 .../default/pipelines/default_lidar_120m.py   |   2 +-
 .../default/pipelines/default_lidar_50m.py    |   2 +-
 .../pipelines/default_lidar_intensity_120m.py |   2 +-
 projects/BEVFusion/deploy/containers.py       |  14 +-
 projects/BEVFusion/deploy/exporter.py         |  13 +-
 31 files changed, 390 insertions(+), 381 deletions(-)

diff --git a/autoware_ml/detection3d/datasets/t4dataset.py b/autoware_ml/detection3d/datasets/t4dataset.py
index 384cc13b4..38c5f69dd 100644
--- a/autoware_ml/detection3d/datasets/t4dataset.py
+++ b/autoware_ml/detection3d/datasets/t4dataset.py
@@ -2,10 +2,10 @@
 from typing import List
 
 import numpy as np
+import tqdm
 from mmdet3d.datasets import NuScenesDataset
 from mmengine.logging import print_log
 from mmengine.registry import DATASETS
-import tqdm
 
 
 @DATASETS.register_module()
@@ -63,7 +63,7 @@ def filter_data(self) -> List[dict]:
                 if camera_order not in entry["images"]:
                     filtered = True
                     break
-                
+
                 if entry["images"][camera_order]["img_path"] is None:
                     filtered = True
                     break
diff --git a/autoware_ml/detection3d/datasets/transforms/__init__.py b/autoware_ml/detection3d/datasets/transforms/__init__.py
index b20961db6..a63ff1eea 100644
--- a/autoware_ml/detection3d/datasets/transforms/__init__.py
+++ b/autoware_ml/detection3d/datasets/transforms/__init__.py
@@ -1,4 +1,4 @@
-from .object_min_points_filter import ObjectMinPointsFilter
 from .local_3d_bbox import Local3DBBoxExpand
+from .object_min_points_filter import ObjectMinPointsFilter
 
 __all__ = ["ObjectMinPointsFilter", "Local3DBBoxExpand"]
diff --git a/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py b/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py
index 96772cf44..ae06d4005 100644
--- a/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py
+++ b/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py
@@ -1,7 +1,6 @@
 from typing import List
 
-import numpy as np 
-
+import numpy as np
 from mmcv.transforms import BaseTransform
 from mmdet3d.structures.ops import box_np_ops
 from mmengine.registry import TRANSFORMS
@@ -13,11 +12,11 @@ class Local3DBBoxExpand(BaseTransform):
 
     Args:
         expand_widths: (List[float]): Uniformly sampled expand width.
-        width_dim: (int): The dimension of the width. Default is 4, which is the width dimension of the 3D 
+        width_dim: (int): The dimension of the width. Default is 4, which is the width dimension of the 3D
                   bounding box. Since 3D Bbox is in the format of [x, y, z, dx, dy, dz, heading], the width dimension is the
                     4th dimension.
             label_ids: (List[int]): The label IDs to expand. If None, all label IDs will be expanded.
-        """
+    """
 
     def __init__(self, expand_widths: List[float], width_dim: int = 4, label_ids: List[int] = None) -> None:
         assert isinstance(expand_widths, list)
@@ -26,7 +25,7 @@ def __init__(self, expand_widths: List[float], width_dim: int = 4, label_ids: Li
         self.expand_widths = expand_widths
         self.width_dim = width_dim
         self.label_ids = label_ids
-    
+
     def transform(self, input_dict: dict) -> dict:
         """Call function to locally augment the 3D bounding boxes by scaling the width.
 
@@ -37,19 +36,19 @@ def transform(self, input_dict: dict) -> dict:
             dict: Results after locally augmenting the 3D bounding boxes by scaling the width, 'gt_bboxes_3d' \
                 key is updated in the result dict.
         """
-        # Label mask 
+        # Label mask
         if self.label_ids is not None:
-            label_masks = [True if label in self.label_ids else False for label in input_dict["gt_labels_3d"]] 
+            label_masks = [True if label in self.label_ids else False for label in input_dict["gt_labels_3d"]]
         else:
             label_masks = np.ones(len(input_dict["gt_labels_3d"]), dtype=bool)
 
         for i in range(len(input_dict["gt_bboxes_3d"])):
             if not label_masks[i]:
-                continue 
-              
+                continue
+
             expand_width = np.random.uniform(self.expand_widths[0], self.expand_widths[1])
             input_dict["gt_bboxes_3d"].tensor[i, self.width_dim] += expand_width
-        
+
         return input_dict
 
     def __repr__(self) -> str:
diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py
index 242ffb658..2eb3ef3d3 100644
--- a/projects/BEVFusion/bevfusion/bevfusion.py
+++ b/projects/BEVFusion/bevfusion/bevfusion.py
@@ -84,9 +84,7 @@ def __init__(
         self._weights_initialized = False
         self.loss_depth_weight = loss_depth_weight
         self.depth_gt_downsample = depth_gt_downsample
-        self.visualize_gt_depth_dir = (
-            Path(visualize_gt_depth_dir) if visualize_gt_depth_dir is not None else None
-        )
+        self.visualize_gt_depth_dir = Path(visualize_gt_depth_dir) if visualize_gt_depth_dir is not None else None
         if self.visualize_gt_depth_dir is not None:
             self.visualize_gt_depth_dir.mkdir(parents=True, exist_ok=True)
 
@@ -159,10 +157,10 @@ def with_bbox_head(self):
     def with_seg_head(self):
         """bool: Whether the detector has a segmentation head."""
         return hasattr(self, "seg_head") and self.seg_head is not None
-    
+
     def prepare_camera_depth_aware_parameters(
-        self, 
-        camera_intrinsics: torch.Tensor, 
+        self,
+        camera_intrinsics: torch.Tensor,
         img_aug_matrix: torch.Tensor,
         lidar_aug_matrix: torch.Tensor,
         camera2lidar: torch.Tensor,
@@ -178,28 +176,31 @@ def prepare_camera_depth_aware_parameters(
         """
         B, N, _, _ = camera_intrinsics.shape
         lidar_aug_matrix = lidar_aug_matrix.view(B, 1, 4, 4).repeat(1, N, 1, 1)
-        
+
         # (B*N, 15)
-        mlp_input = torch.stack([
-            camera_intrinsics[:, :, 0, 0],   # fx
-            camera_intrinsics[:, :, 1, 1],   # fy
-            camera_intrinsics[:, :, 0, 2],   # cx
-            camera_intrinsics[:, :, 1, 2],   # cy
-            img_aug_matrix[:, :, 0, 0],   # r11
-            img_aug_matrix[:, :, 0, 1],   # r12
-            img_aug_matrix[:, :, 0, 3],   # t1
-            img_aug_matrix[:, :, 1, 0],   # r21
-            img_aug_matrix[:, :, 1, 1],   # r22
-            img_aug_matrix[:, :, 1, 3],   # t2
-            lidar_aug_matrix[:, :, 0, 0],   # r11
-            lidar_aug_matrix[:, :, 0, 1],   # r12
-            lidar_aug_matrix[:, :, 1, 0],   # r21
-            lidar_aug_matrix[:, :, 1, 1],   # r22
-            lidar_aug_matrix[:, :, 2, 2],   # r33
-        ], dim=-1)
+        mlp_input = torch.stack(
+            [
+                camera_intrinsics[:, :, 0, 0],  # fx
+                camera_intrinsics[:, :, 1, 1],  # fy
+                camera_intrinsics[:, :, 0, 2],  # cx
+                camera_intrinsics[:, :, 1, 2],  # cy
+                img_aug_matrix[:, :, 0, 0],  # r11
+                img_aug_matrix[:, :, 0, 1],  # r12
+                img_aug_matrix[:, :, 0, 3],  # t1
+                img_aug_matrix[:, :, 1, 0],  # r21
+                img_aug_matrix[:, :, 1, 1],  # r22
+                img_aug_matrix[:, :, 1, 3],  # t2
+                lidar_aug_matrix[:, :, 0, 0],  # r11
+                lidar_aug_matrix[:, :, 0, 1],  # r12
+                lidar_aug_matrix[:, :, 1, 0],  # r21
+                lidar_aug_matrix[:, :, 1, 1],  # r22
+                lidar_aug_matrix[:, :, 2, 2],  # r33
+            ],
+            dim=-1,
+        )
         # (B, N, 4, 4) -> (B, N, 3, 4) -> (B*N, 12)
-        camera2lidar_flatten = camera2lidar[:,:,:3,:].view(B, N, -1)
-        
+        camera2lidar_flatten = camera2lidar[:, :, :3, :].view(B, N, -1)
+
         # (B, N, 15+12)
         mlp_input = torch.cat([mlp_input, camera2lidar_flatten], dim=-1)
         return mlp_input
@@ -234,13 +235,13 @@ def extract_img_feat(
         lidar_aug_matrix_inverse=None,
         geom_feats=None,
         using_image_features=False,
-        camera_depth_aware_parameters=None
+        camera_depth_aware_parameters=None,
     ) -> Tuple[torch.Tensor, torch.Tensor]:
 
         if not using_image_features:
             x = self.get_image_backbone_features(x)
 
-        with torch.amp.autocast("cuda",enabled=False):
+        with torch.amp.autocast("cuda", enabled=False):
             # with torch.autocast(device_type='cuda', dtype=torch.float32):
             x, pred_depths = self.view_transform(
                 x,
@@ -255,7 +256,7 @@ def extract_img_feat(
                 img_aug_matrix_inverse,
                 lidar_aug_matrix_inverse,
                 geom_feats,
-                camera_depth_aware_parameters=camera_depth_aware_parameters
+                camera_depth_aware_parameters=camera_depth_aware_parameters,
             )
         return x, pred_depths
 
@@ -305,11 +306,11 @@ def voxelize(self, points):
         coords = torch.cat(coords, dim=0)
         assert len(sizes) > 0, "No points in the voxel"
         sizes = torch.cat(sizes, dim=0)
-        
+
         # if self.voxelize_reduce:
         #     feats = feats.sum(dim=1, keepdim=False) / sizes.type_as(feats).view(-1, 1)
         #     feats = feats.contiguous()
-        
+
         return feats, coords, sizes
 
     def predict(
@@ -467,15 +468,17 @@ def loss(
             with torch.amp.autocast("cuda", enabled=False):
                 gt_depths = torch.stack(
                     [
-                        meta["gt_depths"]
-                        if isinstance(meta["gt_depths"], torch.Tensor)
-                        else torch.as_tensor(meta["gt_depths"])
+                        (
+                            meta["gt_depths"]
+                            if isinstance(meta["gt_depths"], torch.Tensor)
+                            else torch.as_tensor(meta["gt_depths"])
+                        )
                         for meta in batch_input_metas
                     ]
                 ).to(device=pred_depths.device, dtype=torch.float32)
                 depth_loss = self.get_depth_loss(gt_depths, pred_depths)
                 losses["loss_depth"] = depth_loss
-        
+
         if self.with_bbox_head:
             bbox_loss = self.bbox_head.loss(feats, batch_data_samples)
             losses.update(bbox_loss)
@@ -522,9 +525,7 @@ def _visualize_one_hot_gt_depth(
             return
 
         with torch.no_grad():
-            one_hot = gt_depths_one_hot.view(
-                batch_size, num_cameras, height_down, width_down, num_depth_bins
-            )
+            one_hot = gt_depths_one_hot.view(batch_size, num_cameras, height_down, width_down, num_depth_bins)
             depth_channels = one_hot[batch_idx, 0, :, :, :num_channels].detach().float().cpu().numpy()
 
         ncols = min(3, num_channels)
@@ -545,18 +546,13 @@ def _visualize_one_hot_gt_depth(
         for ch_idx in range(num_channels, nrows * ncols):
             axes[ch_idx // ncols, ch_idx % ncols].axis("off")
 
-        fig.suptitle(
-            f"one-hot gt_depth (batch={batch_idx}, cam=0, bins=0-{num_channels - 1})"
-        )
+        fig.suptitle(f"one-hot gt_depth (batch={batch_idx}, cam=0, bins=0-{num_channels - 1})")
         fig.tight_layout()
 
         if not hasattr(self, "_gt_depth_one_hot_vis_count"):
             self._gt_depth_one_hot_vis_count = 0
         self._gt_depth_one_hot_vis_count += 1
-        save_path = (
-            self.visualize_gt_depth_dir
-            / f"gt_depth_one_hot_{self._gt_depth_one_hot_vis_count:06d}.png"
-        )
+        save_path = self.visualize_gt_depth_dir / f"gt_depth_one_hot_{self._gt_depth_one_hot_vis_count:06d}.png"
         fig.savefig(save_path, dpi=150, bbox_inches="tight")
         plt.close(fig)
         print_log(f"Saved one-hot gt_depth visualization to {save_path.resolve()}")
@@ -571,25 +567,26 @@ def get_downsampled_gt_depth(self, gt_depths):
         B, N, H, W = gt_depths.shape
         D = self.view_transform.D
         dbounds = self.view_transform.dbound
-        gt_depths = gt_depths.view(B * N, H // self.depth_gt_downsample,
-                                   self.depth_gt_downsample, W // self.depth_gt_downsample,
-                                   self.depth_gt_downsample, 1)
+        gt_depths = gt_depths.view(
+            B * N,
+            H // self.depth_gt_downsample,
+            self.depth_gt_downsample,
+            W // self.depth_gt_downsample,
+            self.depth_gt_downsample,
+            1,
+        )
         gt_depths = gt_depths.permute(0, 1, 3, 5, 2, 4).contiguous()
         gt_depths = gt_depths.view(-1, self.depth_gt_downsample * self.depth_gt_downsample)
-        gt_depths_tmp = torch.where(gt_depths == 0.0,
-                                    1e5 * torch.ones_like(gt_depths),
-                                    gt_depths)
+        gt_depths_tmp = torch.where(gt_depths == 0.0, 1e5 * torch.ones_like(gt_depths), gt_depths)
         gt_depths = torch.min(gt_depths_tmp, dim=-1).values
-        gt_depths = gt_depths.view(B * N, H // self.depth_gt_downsample,
-                                   W // self.depth_gt_downsample)
+        gt_depths = gt_depths.view(B * N, H // self.depth_gt_downsample, W // self.depth_gt_downsample)
 
         gt_depths = (gt_depths - (dbounds[0] - dbounds[2])) / dbounds[2]
         # gt_depths = torch.where(gt_depths >= 0.0, gt_depths, torch.zeros_like(gt_depths))
         # gt_depths = torch.clamp(gt_depths, max=float(D))
         gt_depths = torch.where((gt_depths >= 0.0) & (gt_depths < D + 1), gt_depths, torch.zeros_like(gt_depths))
         # gt_depths = torch.clamp(gt_depths, max=float(D))
-        gt_depths = F.one_hot(
-            gt_depths.long(), num_classes=D + 1).view(-1, D + 1)[:, 1:]
+        gt_depths = F.one_hot(gt_depths.long(), num_classes=D + 1).view(-1, D + 1)[:, 1:]
         self._visualize_one_hot_gt_depth(gt_depths, B, N, H, W)
         return gt_depths.float()
 
@@ -603,6 +600,6 @@ def get_depth_loss(self, depth_labels, depth_preds):
         depth_loss = F.binary_cross_entropy(
             depth_preds,
             depth_labels,
-            reduction='none',
+            reduction="none",
         ).sum() / max(1.0, fg_mask.sum())
-        return self.loss_depth_weight * depth_loss
\ No newline at end of file
+        return self.loss_depth_weight * depth_loss
diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py
index 3e464ebc4..8dc4bce45 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_head.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_head.py
@@ -169,9 +169,9 @@ def __init__(
         y_size = self.test_cfg["grid_size"][1] // self.test_cfg["out_size_factor"]
         self.spatial_dim = x_size * y_size
         bev_pos = self.create_2D_grid(x_size, y_size)
-        
+
         # Register the bev_pos as a buffer so it moves to the GPU automatically.
-        self.register_buffer("bev_pos", bev_pos, persistent=False) # (1, H * W, 2)
+        self.register_buffer("bev_pos", bev_pos, persistent=False)  # (1, H * W, 2)
 
         self.img_feat_pos = None
         self.img_feat_collapsed_pos = None
@@ -189,22 +189,23 @@ def __init__(
 
             self.dense_heatmap_exclude_pooling_classes = sorted(
                 list(set(self.class_name_to_indices.values()) - set(self.dense_heatmap_pooling_class_indices))
-            ) 
+            )
             # Pre-compute the correct order of the classes for the final local_max
-            heatmap_concat_order = self.dense_heatmap_pooling_class_indices + self.dense_heatmap_exclude_pooling_classes
-            local_concat_class_remapping = [
-                heatmap_concat_order.index(i)
-                for i in range(self.num_classes)
-            ]
+            heatmap_concat_order = (
+                self.dense_heatmap_pooling_class_indices + self.dense_heatmap_exclude_pooling_classes
+            )
+            local_concat_class_remapping = [heatmap_concat_order.index(i) for i in range(self.num_classes)]
         else:
             self.dense_heatmap_pooling_class_indices = None
             self.dense_heatmap_exclude_pooling_classes = None
             local_concat_class_remapping = [i for i in range(self.num_classes)]
-        
+
         # Register the remapping as a buffer so it moves to the GPU automatically and gets saved in the state_dict.
-        self.register_buffer("local_concat_class_remapping", torch.tensor(local_concat_class_remapping), persistent=False)
+        self.register_buffer(
+            "local_concat_class_remapping", torch.tensor(local_concat_class_remapping), persistent=False
+        )
         self.local_heatmap_padding = self.nms_kernel_size // 2
-        
+
         # NMS clusters
         self.nms_clusters = self.test_cfg.get("nms_clusters", [])
         # Add class indices for nms
@@ -290,7 +291,7 @@ def forward_single(self, inputs, metas):
             dense_heatmap = self.heatmap_head(fusion_feat.float())
         heatmap = dense_heatmap.detach().sigmoid()
         if self.dense_heatmap_pooling_class_indices is not None:
-            # Pooling 
+            # Pooling
             selected_heatmap = heatmap[:, self.dense_heatmap_pooling_class_indices, :, :]
             local_max_inner = F.max_pool2d(
                 selected_heatmap,
@@ -301,31 +302,35 @@ def forward_single(self, inputs, metas):
 
             # 2. Restore spatial size using F.pad instead of slice mutation
             local_max = F.pad(
-                local_max_inner, 
-                (self.local_heatmap_padding, self.local_heatmap_padding, self.local_heatmap_padding, 
-                self.local_heatmap_padding), 
-                mode="constant", 
-                value=0.0
+                local_max_inner,
+                (
+                    self.local_heatmap_padding,
+                    self.local_heatmap_padding,
+                    self.local_heatmap_padding,
+                    self.local_heatmap_padding,
+                ),
+                mode="constant",
+                value=0.0,
             )
-            
+
             # 3. Any non-pooling classes
             if self.dense_heatmap_exclude_pooling_classes:
                 excluded_local_max = heatmap[:, self.dense_heatmap_exclude_pooling_classes, :, :]
                 local_max = torch.cat([local_max, excluded_local_max], dim=1)
                 local_max = local_max[:, self.local_concat_class_remapping, :, :]
         else:
-            local_max = heatmap 
+            local_max = heatmap
 
         heatmap = heatmap * (heatmap == local_max)
         # (BS, num_classes, H*W)
         heatmap = heatmap.view(-1, self.num_classes, self.spatial_dim)
 
         # top num_proposals among all classes
-        flattened_heatmap = heatmap.view(-1, self.num_classes*self.spatial_dim)
-        
+        flattened_heatmap = heatmap.view(-1, self.num_classes * self.spatial_dim)
+
         # Use topk instead of argsort to avoid sorting the entire flattened heatmap.
         top_proposals = topk(x=flattened_heatmap, k=self.num_proposals, dim=-1, sorted=False)
-        
+
         # 2. Calculate class and spatial indices
         # Use shape[-1] dynamically to handle grid sizes safely.
         top_proposals_class = top_proposals // self.spatial_dim
@@ -340,7 +345,7 @@ def forward_single(self, inputs, metas):
         one_hot = F.one_hot(top_proposals_class, num_classes=self.num_classes).permute(0, 2, 1)
         query_cat_encoding = self.class_encoding(one_hot.float())
         query_feat += query_cat_encoding
-        
+
         # (B, N, 2)
         query_pos = self.bev_pos.squeeze(0)[top_proposals_index]
         #################################
@@ -350,7 +355,9 @@ def forward_single(self, inputs, metas):
         for i in range(self.num_decoder_layers):
             # Transformer Decoder Layer
             # :param query: B C Pq    :param query_pos: B Pq 3/6
-            query_feat = self.decoder[i](query_feat, key=fusion_feat_flatten, query_pos=query_pos, key_pos=self.bev_pos)
+            query_feat = self.decoder[i](
+                query_feat, key=fusion_feat_flatten, query_pos=query_pos, key_pos=self.bev_pos
+            )
 
             # Prediction
             res_layer = self.prediction_heads[i](query_feat)
diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
index 30afdc41d..d7801482a 100644
--- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
+++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py
@@ -1,21 +1,19 @@
 from typing import Optional, Tuple
 
-import torch
 import numpy as np
-from torch import Tensor, nn
-
+import torch
+from mmdet3d.models.voxel_encoders.utils import PFNLayer, get_paddings_indicator
 from mmdet3d.registry import MODELS
-from mmdet3d.models.voxel_encoders.utils import get_paddings_indicator, PFNLayer
+from torch import Tensor, nn
 
 
 @MODELS.register_module()
 class HardSimpleVoxelSinCosEncoder(nn.Module):
-    def __init__(self, 
-                 min_norm_values: Tuple[float],
-                 max_norm_values: Tuple[float],
-                 in_channels: Optional[int] = 4) -> None:
+    def __init__(
+        self, min_norm_values: Tuple[float], max_norm_values: Tuple[float], in_channels: Optional[int] = 4
+    ) -> None:
         """
-        Simple voxel encoder that only performs mean pooling on the normalize features, and then 
+        Simple voxel encoder that only performs mean pooling on the normalize features, and then
         performs sin-cos (fourier encoding) on each voxel channels.
 
         The output shape of each voxel is (N, feature_channels*2).
@@ -25,30 +23,29 @@ def __init__(self,
             in_channels (int): Number of input channels.
         """
         super().__init__()
-      
+
         # Create PillarFeatureNet layers
         self.in_channels = in_channels
-        
-        # Convert the ((x - min) / (max - min)) * pi * exponents to x * scale + bias for folding them into one OP 
+
+        # Convert the ((x - min) / (max - min)) * pi * exponents to x * scale + bias for folding them into one OP
         min_norm_values = torch.tensor(min_norm_values)
         max_norm_values = torch.tensor(max_norm_values)
         # Let alpha = pi * exponents, beta = max - min
-        # y = ((x - min) / beta) * alpha 
+        # y = ((x - min) / beta) * alpha
         # y = alpha / beta * (x - min)
-        # y = (alpha / beta) * x - (alpha / beta) * min 
-        # Therefore, scale = alpha / beta, bias = - (alpha * min) / beta 
+        # y = (alpha / beta) * x - (alpha / beta) * min
+        # Therefore, scale = alpha / beta, bias = - (alpha * min) / beta
         # y = scale * x + bias
         exponents = (2 ** torch.arange(0, self.in_channels)).float()
-        alpha = (torch.pi * exponents).unsqueeze(0) # (1, C)
-        beta = (max_norm_values - min_norm_values).unsqueeze(1) # (C, 1)
+        alpha = (torch.pi * exponents).unsqueeze(0)  # (1, C)
+        beta = (max_norm_values - min_norm_values).unsqueeze(1)  # (C, 1)
         scale = alpha / beta
-        bias = - (alpha * min_norm_values.unsqueeze(1)) / beta # (C, C)
-        
-        self.register_buffer("exponent_scale", scale.unsqueeze(0), persistent=False) # (1, C, C)
-        self.register_buffer("exponent_bias", bias.unsqueeze(0), persistent=False) # (1, C, C)
+        bias = -(alpha * min_norm_values.unsqueeze(1)) / beta  # (C, C)
 
-    def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
-                *args, **kwargs) -> Tensor:
+        self.register_buffer("exponent_scale", scale.unsqueeze(0), persistent=False)  # (1, C, C)
+        self.register_buffer("exponent_bias", bias.unsqueeze(0), persistent=False)  # (1, C, C)
+
+    def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, *args, **kwargs) -> Tensor:
         """Forward function.
 
         Args:
@@ -63,26 +60,28 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
         """
         # Mean in the voxel
         # (N, M, C) -> (N, C)
-        voxel_mean_features = (features.sum(dim=1, keepdim=False) / num_points.type_as(features).view(-1, 1)).contiguous()
+        voxel_mean_features = (
+            features.sum(dim=1, keepdim=False) / num_points.type_as(features).view(-1, 1)
+        ).contiguous()
 
         # x * scale + bias, (1, C, C) + (1, C, C) * (N, C, 1) -> (N, C, C)
         # FMA (fused multiply-add): y = bias + scale * voxel_mean_features
         y = torch.addcmul(self.exponent_bias, self.exponent_scale, voxel_mean_features.unsqueeze(-1))
         # SinCos encoding
         # (N*C, C) -> (N, C*C)
-        y = y.reshape(-1, self.in_channels*self.in_channels)
+        y = y.reshape(-1, self.in_channels * self.in_channels)
         # (N, C*C) -> (N, C*C*2)
         voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1)
-        
+
         return voxel_fourier_features
 
 
 @MODELS.register_module()
 class BEVFusionVoxelFeatureNet(HardSimpleVoxelSinCosEncoder):
     """BEVFusion Voxel Encoder Feature Net.
-    
-    The network is same as HardSimpleVoxelSinCosEncoder, but it performs PFNLayers on the 
-    offset features, for example, distances. After that, it concatenates the fourier features and the PFN features 
+
+    The network is same as HardSimpleVoxelSinCosEncoder, but it performs PFNLayers on the
+    offset features, for example, distances. After that, it concatenates the fourier features and the PFN features
     along the channel dimension for each voxel.
 
     Args:
@@ -93,25 +92,24 @@ class BEVFusionVoxelFeatureNet(HardSimpleVoxelSinCosEncoder):
             N PFNLayers. Defaults to (64, ).
     """
 
-    def __init__(self,
-                 min_norm_values: Optional[Tuple[float]] = None,
-                 max_norm_values: Optional[Tuple[float]] = None,
-                 in_channels: Optional[int] = 4,
-                 feat_channels: Optional[tuple] = (64, ),
-                 with_distance: Optional[bool] = False,
-                 with_cluster_center: Optional[bool] = True,
-                 with_voxel_center: Optional[bool] = True,
-                 voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4),
-                 point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4,
-                                                              40, 1),
-                 norm_cfg: Optional[dict] = dict(
-                     type='BN1d', eps=1e-3, momentum=0.01),
-                 mode: Optional[str] = 'max',
-                 legacy: Optional[bool] = True):
-        
+    def __init__(
+        self,
+        min_norm_values: Optional[Tuple[float]] = None,
+        max_norm_values: Optional[Tuple[float]] = None,
+        in_channels: Optional[int] = 4,
+        feat_channels: Optional[tuple] = (64,),
+        with_distance: Optional[bool] = False,
+        with_cluster_center: Optional[bool] = True,
+        with_voxel_center: Optional[bool] = True,
+        voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4),
+        point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4, 40, 1),
+        norm_cfg: Optional[dict] = dict(type="BN1d", eps=1e-3, momentum=0.01),
+        mode: Optional[str] = "max",
+        legacy: Optional[bool] = True,
+    ):
+
         super(BEVFusionVoxelFeatureNet, self).__init__(
-            min_norm_values=min_norm_values, 
-            max_norm_values=max_norm_values, in_channels=in_channels
+            min_norm_values=min_norm_values, max_norm_values=max_norm_values, in_channels=in_channels
         )
         assert len(feat_channels) > 0
         self.legacy = legacy
@@ -122,12 +120,12 @@ def __init__(self,
             pfn_in_channels += 3
         if with_distance:
             pfn_in_channels += 1
-        
+
         assert pfn_in_channels > 0, "pfn_in_channels must be greater than 0"
         self._with_distance = with_distance
         self._with_cluster_center = with_cluster_center
         self._with_voxel_center = with_voxel_center
-        
+
         # Create VoxelFeatureNet layers
         feat_channels = [pfn_in_channels] + list(feat_channels)
         pfn_layers = []
@@ -138,13 +136,7 @@ def __init__(self,
                 last_layer = False
             else:
                 last_layer = True
-            pfn_layers.append(
-                PFNLayer(
-                    in_filters,
-                    out_filters,
-                    norm_cfg=norm_cfg,
-                    last_layer=last_layer,
-                    mode=mode))
+            pfn_layers.append(PFNLayer(in_filters, out_filters, norm_cfg=norm_cfg, last_layer=last_layer, mode=mode))
         self.pfn_layers = nn.ModuleList(pfn_layers)
 
         # Need pillar (voxel) size and x/y offset in order to calculate offset
@@ -156,8 +148,7 @@ def __init__(self,
         self.z_offset = self.vz / 2 + point_cloud_range[2]
         self.point_cloud_range = point_cloud_range
 
-    def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
-                *args, **kwargs) -> Tensor:
+    def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, *args, **kwargs) -> Tensor:
         """Forward function.
 
         Args:
@@ -173,17 +164,17 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
         voxel_fourier_features = super().forward(features, num_points, coors)
 
         # Normalize the features
-        norm_features = (features - self.min_norm_values.view(1, -1)) / ((self.max_norm_values - self.min_norm_values).view(1, -1))
-        
+        norm_features = (features - self.min_norm_values.view(1, -1)) / (
+            (self.max_norm_values - self.min_norm_values).view(1, -1)
+        )
+
         # Offset features
-        max_points_per_voxel = features.shape[1] 
-        
+        max_points_per_voxel = features.shape[1]
+
         features_ls = [norm_features]
         # Find distance of x, y, and z from cluster center, mapped to [-1,   1] if available
         if self._with_cluster_center:
-            points_mean = features[:, :, :3].sum(
-                dim=1, keepdim=True) / num_points.type_as(features).view(
-                    -1, 1, 1)
+            points_mean = features[:, :, :3].sum(dim=1, keepdim=True) / num_points.type_as(features).view(-1, 1, 1)
             f_cluster = features[:, :, :3] - points_mean
             # Map to [0, 1] if available
             # if self.min_norm_values is not None and self.max_norm_values is not None:
@@ -196,27 +187,21 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
         if self._with_voxel_center:
             if not self.legacy:
                 f_center = torch.zeros_like(features[:, :, :3])
-                f_center[:, :, 0] = features[:, :, 0] - (
-                    coors[:, 3].to(dtype).unsqueeze(1) * self.vx +
-                    self.x_offset)
-                f_center[:, :, 1] = features[:, :, 1] - (
-                    coors[:, 2].to(dtype).unsqueeze(1) * self.vy +
-                    self.y_offset)
-                f_center[:, :, 2] = features[:, :, 2] - (
-                    coors[:, 1].to(dtype).unsqueeze(1) * self.vz +
-                    self.z_offset)
+                f_center[:, :, 0] = features[:, :, 0] - (coors[:, 3].to(dtype).unsqueeze(1) * self.vx + self.x_offset)
+                f_center[:, :, 1] = features[:, :, 1] - (coors[:, 2].to(dtype).unsqueeze(1) * self.vy + self.y_offset)
+                f_center[:, :, 2] = features[:, :, 2] - (coors[:, 1].to(dtype).unsqueeze(1) * self.vz + self.z_offset)
             else:
                 f_center = features[:, :, :3]
                 f_center[:, :, 0] = f_center[:, :, 0] - (
-                    coors[:, 3].type_as(features).unsqueeze(1) * self.vx +
-                    self.x_offset)
+                    coors[:, 3].type_as(features).unsqueeze(1) * self.vx + self.x_offset
+                )
                 f_center[:, :, 1] = f_center[:, :, 1] - (
-                    coors[:, 2].type_as(features).unsqueeze(1) * self.vy +
-                    self.y_offset)
+                    coors[:, 2].type_as(features).unsqueeze(1) * self.vy + self.y_offset
+                )
                 f_center[:, :, 2] = f_center[:, :, 2] - (
-                    coors[:, 1].type_as(features).unsqueeze(1) * self.vz +
-                    self.z_offset)
-            
+                    coors[:, 1].type_as(features).unsqueeze(1) * self.vz + self.z_offset
+                )
+
             # if self.min_norm_values is not None and self.max_norm_values is not None:
             #     f_center = f_center / (voxel_size * 0.5)
             features_ls.append(f_center)
@@ -234,12 +219,12 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
         mask = get_paddings_indicator(num_points, max_points_per_voxel, axis=0)
         mask = torch.unsqueeze(mask, -1).type_as(voxel_feature_offsets)
         voxel_feature_offsets *= mask
-        
+
         # PFN
         for pfn in self.pfn_layers:
             voxel_feature_offsets = pfn(voxel_feature_offsets, num_points)
-        
-        # Concat 
+
+        # Concat
         features = torch.cat([voxel_fourier_features, voxel_feature_offsets.squeeze(1)], dim=-1)
 
         return features
@@ -247,7 +232,7 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
 
 # @MODELS.register_module()
 # class BEVFusionVoxelSinCosEncoder(nn.Module):
-#     def __init__(self, 
+#     def __init__(self,
 #                  min_norm_values: Tuple[float],
 #                  max_norm_values: Tuple[float],
 #                  time_lag_channel_index: int = 3,
@@ -279,7 +264,7 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
 #         self.y_offset = self.vy / 2 + point_cloud_range[1]
 #         self.z_offset = self.vz / 2 + point_cloud_range[2]
 #         self.point_cloud_range = point_cloud_range
-        
+
 #         self.xyz_channels = 3
 #         feat_offset_channels = in_channels - self.xyz_channels
 #         if with_cluster_center:
@@ -310,7 +295,7 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
 
 #         self.time_lag_channel_index = time_lag_channel_index
 #         self.time_exp_factor = time_exp_factor
-        
+
 #         self.register_buffer("min_norm_values", torch.tensor(min_norm_values))
 #         self.register_buffer("max_norm_values", torch.tensor(max_norm_values))
 #         self.register_buffer("voxel_size", torch.tensor([self.vx, self.vy, self.vz]))
@@ -328,9 +313,9 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
 
 #         Returns:
 #             torch.Tensor: Features of pillars in shape (M, C).
-#         """ 
+#         """
 #         num_voxels, max_points_per_voxel = features.shape[0], features.shape[1]
-        
+
 #         # Mean in the voxel
 #         # (N, M, 3) -> (N, 3)
 #         voxel_features = (features[:, :, :self.xyz_channels].sum(dim=1, keepdim=False) / num_points.type_as(features).view(
@@ -339,7 +324,7 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
 #         # min-max normalization, (N, 3) -> (N, 3)
 #         voxel_features_norm = (voxel_features - \
 #          self.min_norm_values[:self.xyz_channels].view(1, -1)) / ((self.max_norm_values[:self.xyz_channels] - self.min_norm_values[:self.xyz_channels]).view(1, -1))
-        
+
 #         # SinCos encoding
 #         # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3)
 #         y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1)
@@ -348,22 +333,22 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
 #         # (N, 3*3) -> (N, 3*3*2)
 #         voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1)
 
-#         # PFN 
-#         # Other features, for example, intensity or time_lag 
+#         # PFN
+#         # Other features, for example, intensity or time_lag
 #         other_features = features[:, :, self.xyz_channels:]
-        
-#         # Normalization 
-#         other_features_norm = (other_features - self.min_norm_values[self.xyz_channels:]) / (self.max_norm_values[self.xyz_channels:] - self.min_norm_values[self.xyz_channels:])    
+
+#         # Normalization
+#         other_features_norm = (other_features - self.min_norm_values[self.xyz_channels:]) / (self.max_norm_values[self.xyz_channels:] - self.min_norm_values[self.xyz_channels:])
 
 #         time_lag_feature_index = self.time_lag_channel_index - self.xyz_channels
-#         # exponentiate time_lag features, it's higher when the normlized time lag is lower 
+#         # exponentiate time_lag features, it's higher when the normlized time lag is lower
 #         # (1.0 when time_lag_features is 0.0)
 #         if self.time_exp_factor is not None:
 #             other_features_norm[:, :, time_lag_feature_index] = torch.exp(- other_features_norm[:, :, time_lag_feature_index] * self.time_exp_factor)
 #         else:
-#             # Inverse the time_lag feature 
+#             # Inverse the time_lag feature
 #             other_features_norm[:, :, time_lag_feature_index] = 1.0 - other_features_norm[:, :, time_lag_feature_index]
-            
+
 #         # Offsets
 #         voxel_feature_offsets = [other_features_norm]
 #         # Find distance of x, y, and z from cluster center
@@ -371,7 +356,7 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
 #             points_mean = features[:, :, :3].sum(
 #                 dim=1, keepdim=True) / num_points.type_as(features).view(
 #                     -1, 1, 1)
-            
+
 #             # f_cluster = (features[:, :, :3] - points_mean)
 #             f_cluster = features[:, :, :3] - points_mean
 #             voxel_feature_offsets.append(f_cluster)
@@ -389,7 +374,7 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
 #             f_center[:, :, 2] = features[:, :, 2] - (
 #                 coors[:, 1].to(dtype).unsqueeze(1) * self.vz +
 #                 self.z_offset)
-            
+
 #             # Map to [-1, 1]
 #             # f_center = f_center / (self.voxel_size * 0.5)
 #             voxel_feature_offsets.append(f_center)
@@ -397,7 +382,7 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
 #         if self._with_distance:
 #             points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)
 #             voxel_feature_offsets.append(points_dist)
-        
+
 #         voxel_feature_offsets = torch.cat(voxel_feature_offsets, dim=-1)
 #         # The feature decorations were calculated without regard to whether
 #         # pillar was empty. Need to ensure that
@@ -405,13 +390,11 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
 #         mask = get_paddings_indicator(num_points, max_points_per_voxel, axis=0)
 #         mask = torch.unsqueeze(mask, -1).type_as(voxel_feature_offsets)
 #         voxel_feature_offsets *= mask
-        
+
 #         # PFN
 #         for pfn in self.pfn_layers:
 #             voxel_feature_offsets = pfn(voxel_feature_offsets, num_points)
-        
-#         # Concat 
+
+#         # Concat
 #         features = torch.cat([voxel_fourier_features, voxel_feature_offsets.squeeze(1)], dim=-1)
 #         return features
-
-
diff --git a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py
index 175c08bed..888d2b1c0 100644
--- a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py
+++ b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py
@@ -2,8 +2,8 @@
 Custom SparseConvTensor for BEVFusion.
 This customiztion is used to support cleaner ONNX export of sparse convolutions.
 """
-import torch
 
+import torch
 from mmdet3d.models.layers.spconv import IS_SPCONV2_AVAILABLE
 
 if IS_SPCONV2_AVAILABLE:
@@ -23,18 +23,15 @@ def sparse_to_dense(sparse_tensor: SparseConvTensor, batch_size: int, spatial_sh
     # b * (H * W * D) + h*(W*D) + w*D + d
     # Factor out the common terms D and W
     # (b*H*W + h*W + w) * D + d -> (b*H + h) * W + w) * D + d
-    linear_idx = ((b * H + h) * W + w) * D + d                     # [N]
-    
+    linear_idx = ((b * H + h) * W + w) * D + d  # [N]
+
     out = torch.zeros(
-        [
-            num_cells,
-            sparse_tensor.features.shape[1]
-        ], 
+        [num_cells, sparse_tensor.features.shape[1]],
         device=sparse_tensor.features.device,
         dtype=sparse_tensor.features.dtype,
     )
     # out = out.index_copy(0, linear_idx, sparse_tensor.features)
     # out = out.scatter(0, linear_idx, sparse_tensor.features)
-    scatter_idx = linear_idx.unsqueeze(1).expand(-1, out_channels)            # [N, C]
+    scatter_idx = linear_idx.unsqueeze(1).expand(-1, out_channels)  # [N, C]
     out = out.scatter(0, scatter_idx, sparse_tensor.features)
     return out.view(batch_size, H, W, D, out_channels)
diff --git a/projects/BEVFusion/bevfusion/depth_lss.py b/projects/BEVFusion/bevfusion/depth_lss.py
index 2e68a3772..d0a547258 100644
--- a/projects/BEVFusion/bevfusion/depth_lss.py
+++ b/projects/BEVFusion/bevfusion/depth_lss.py
@@ -328,7 +328,7 @@ def bev_pool_precomputed(self, x, geom_feats, kept, ranks, indices):
         final = torch.cat(x.unbind(dim=2), 1)
         if self.visualize_bev_feat:
             self.plot_bev_feat(final)
-        
+
         return final
 
     def plot_bev_feat(self, bev_feat):
diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py
index 3c25402e4..46b74b969 100644
--- a/projects/BEVFusion/bevfusion/depth_lss_v2.py
+++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py
@@ -5,13 +5,13 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import torch
+from mmcv.cnn import build_conv_layer, build_norm_layer
 from mmdet3d.registry import MODELS
-from mmengine.logging import print_log
-from mmengine.model import BaseModule
-from mmcv.cnn import build_conv_layer, build_norm_layer 
+
 # from mmdet.models.backbones.resnet import BasicBlock
 from mmdet3d.utils import ConfigType, OptConfigType, OptMultiConfig
-
+from mmengine.logging import print_log
+from mmengine.model import BaseModule
 from torch import nn
 from torch.utils.checkpoint import checkpoint
 
@@ -21,34 +21,35 @@
 
 class CustomDepthBasicBlock(BaseModule):
     def __init__(
-      self, 
-      in_channels: int, 
-      out_channel: int, 
-      padding: int = 0,
-      kernel_size: int = 1,
-      stride: int = 1, 
-      dilation: int = 1,
-      with_cp: bool = False,
-      norm_cfg=dict(type='BN'), 
-      conv_cfg=None,
-      downsample: Optional[nn.Module] = None, 
-      init_cfg: OptMultiConfig = None):
+        self,
+        in_channels: int,
+        out_channel: int,
+        padding: int = 0,
+        kernel_size: int = 1,
+        stride: int = 1,
+        dilation: int = 1,
+        with_cp: bool = False,
+        norm_cfg=dict(type="BN"),
+        conv_cfg=None,
+        downsample: Optional[nn.Module] = None,
+        init_cfg: OptMultiConfig = None,
+    ):
         super().__init__(init_cfg)
 
         self.norm1_name, self.norm1 = build_norm_layer(norm_cfg, out_channel, postfix=1)
         self.norm2_name, self.norm2 = build_norm_layer(norm_cfg, out_channel, postfix=2)
         self.conv1 = build_conv_layer(
-          conv_cfg, 
-          in_channels, 
-          out_channel, 
-          kernel_size, 
-          stride=stride, 
-          padding=padding, 
-          dilation=dilation, bias=False
+            conv_cfg,
+            in_channels,
+            out_channel,
+            kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            bias=False,
         )
         self.add_module(self.norm1_name, self.norm1)
-        self.conv2 = build_conv_layer(
-            conv_cfg, out_channel, out_channel, kernel_size, padding=padding, bias=False)
+        self.conv2 = build_conv_layer(conv_cfg, out_channel, out_channel, kernel_size, padding=padding, bias=False)
         self.add_module(self.norm2_name, self.norm2)
 
         self.relu = nn.ReLU(inplace=True)
@@ -72,9 +73,10 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         out += identity
         return out
 
+
 class SELayer(nn.Module):
     """
-    Squeeze-and-Excitation (SE) layer. 
+    Squeeze-and-Excitation (SE) layer.
     This is used to modulate features with camera-depth aware parameters.
     The code is taken from BEVDET (https://github.com/hustvl/BEVDET).
     """
@@ -84,7 +86,7 @@ def __init__(self, channels, act_layer=nn.ReLU, gate_layer=nn.Sigmoid):
         # Dont need global pooling because inputs are (B*N, C, 1, 1).
         self.sequeeze_net = nn.Sequential(
             # Squeeze with 1x1 convolution
-            nn.Conv2d(channels, channels, 1, bias=True), 
+            nn.Conv2d(channels, channels, 1, bias=True),
             # Activation
             act_layer(),
             # Expand with 1x1 convolution
@@ -106,12 +108,12 @@ def forward(self, x: torch.Tensor, depth_aware_features: torch.Tensor) -> torch.
 
 class CameraDepthLinearProjectionMLP(nn.Module):
     """
-    Linear projection module by MLP. This is used to project image (context) features and camera-depth 
+    Linear projection module by MLP. This is used to project image (context) features and camera-depth
     aware parameters (for example, intrinsics) to embedding space.
     The code is taken from BEVDET (https://github.com/hustvl/BEVDET).
     """
 
-    def __init__(self, in_channels: int, hidden_channels:int, out_channels:int, drop_out: float = 0.0):
+    def __init__(self, in_channels: int, hidden_channels: int, out_channels: int, drop_out: float = 0.0):
         """
         Args:
             in_channels: int, the number of input channels.
@@ -132,7 +134,7 @@ def __init__(self, in_channels: int, hidden_channels:int, out_channels:int, drop
             nn.Linear(hidden_channels, out_channels),
             nn.Dropout(drop_out),
         )
-    
+
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         """
         Args:
@@ -150,14 +152,15 @@ class CameraDepthAwareNet(nn.Module):
     """
 
     def __init__(
-        self, 
-        in_channels: int, 
+        self,
+        in_channels: int,
         hidden_channels: int,
         out_channels: int,
-        mlp_drop_out: float, 
+        mlp_drop_out: float,
         depth_channels: int,
         with_cp: bool = False,
-        num_camera_depth_parameters: int = 27) -> None:
+        num_camera_depth_parameters: int = 27,
+    ) -> None:
         """
         Args:
             in_channels: int, the number of input channels.
@@ -177,42 +180,35 @@ def __init__(
 
         # Input convolution for context/image features
         # Camera depth aware parameters branch
-        self.camera_depth_aware_parameters_bn = nn.BatchNorm1d(
-            self.num_camera_depth_parameters
-        )
-        
+        self.camera_depth_aware_parameters_bn = nn.BatchNorm1d(self.num_camera_depth_parameters)
+
         # Context/image feature branch
         # self.context_input_conv = nn.Sequential(
-            # nn.Conv2d(
-                # in_channels, hidden_channels, kernel_size=3, stride=1, padding=1, bias=False),
+        # nn.Conv2d(
+        # in_channels, hidden_channels, kernel_size=3, stride=1, padding=1, bias=False),
         #     nn.BatchNorm2d(hidden_channels),
         #     nn.ReLU(inplace=True),
         # )
         self.context_input_conv = nn.Sequential(
-            nn.Conv2d(
-                in_channels, hidden_channels, kernel_size=1, stride=1, bias=False),
+            nn.Conv2d(in_channels, hidden_channels, kernel_size=1, stride=1, bias=False),
             nn.BatchNorm2d(hidden_channels),
             nn.ReLU(inplace=True),
         )
         self.context_camera_depth_aware_mlp = CameraDepthLinearProjectionMLP(
-            in_channels=self.num_camera_depth_parameters, 
-            hidden_channels=hidden_channels, 
-            out_channels=hidden_channels, 
-            drop_out=self.mlp_drop_out
+            in_channels=self.num_camera_depth_parameters,
+            hidden_channels=hidden_channels,
+            out_channels=hidden_channels,
+            drop_out=self.mlp_drop_out,
         )
         self.context_se = SELayer(channels=hidden_channels)
-        self.context_conv = nn.Conv2d(
-            hidden_channels, 
-            out_channels, 
-            kernel_size=1,
-            stride=1, padding=0, bias=True)
+        self.context_conv = nn.Conv2d(hidden_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=True)
 
-        # Depth branch 
+        # Depth branch
         self.depth_camera_depth_aware_mlp = CameraDepthLinearProjectionMLP(
-            in_channels=self.num_camera_depth_parameters, 
-            hidden_channels=hidden_channels, 
-            out_channels=hidden_channels, 
-            drop_out=self.mlp_drop_out
+            in_channels=self.num_camera_depth_parameters,
+            hidden_channels=hidden_channels,
+            out_channels=hidden_channels,
+            drop_out=self.mlp_drop_out,
         )
         self.depth_se = SELayer(channels=hidden_channels)
         # self.depth_conv = nn.Sequential(
@@ -229,7 +225,9 @@ def __init__(
         )
         # self._init_weight()
 
-    def context_forward(self, context_features: torch.Tensor, camera_depth_aware_features: torch.Tensor) -> torch.Tensor:
+    def context_forward(
+        self, context_features: torch.Tensor, camera_depth_aware_features: torch.Tensor
+    ) -> torch.Tensor:
         """
         Args:
             x: torch.Tensor, the input tensor of shape (B*N, C, H, W).
@@ -243,7 +241,7 @@ def context_forward(self, context_features: torch.Tensor, camera_depth_aware_fea
         context_features = self.context_se(context_features, context_camera_depth_aware_features)
         context_features = self.context_conv(context_features)
         return context_features
-    
+
     def depth_forward(self, depth_features: torch.Tensor, camera_depth_aware_features: torch.Tensor) -> torch.Tensor:
         """
         Args:
@@ -273,10 +271,10 @@ def forward(self, x: torch.Tensor, camera_depth_aware_parameters: torch.Tensor)
         """
         # (B, N, N_CAMERA_DEPTH_PARAMETERS) -> (B*N, N_CAMERA_DEPTH_PARAMETERS)
         camera_depth_aware_parameters = camera_depth_aware_parameters.view(-1, self.num_camera_depth_parameters)
-        
+
         # (B*N, N_CAMERA_DEPTH_PARAMETERS)
         camera_depth_aware_features = self.camera_depth_aware_parameters_bn(camera_depth_aware_parameters)
-        context_input_features = self.context_input_conv(x) 
+        context_input_features = self.context_input_conv(x)
         context_features = self.context_forward(context_input_features, camera_depth_aware_features)
         depth_features = self.depth_forward(context_input_features, camera_depth_aware_features)
         return torch.cat([depth_features, context_features], dim=1)
@@ -317,7 +315,9 @@ def __init__(
         self.collapse_z = collapse_z
         self.expand_batch_axis = expand_batch_axis
 
-    def get_cam_feats(self, x, camera_depth_aware_parameters: Optional[torch.Tensor] = None) -> Tuple[torch.Tensor, torch.Tensor]:
+    def get_cam_feats(
+        self, x, camera_depth_aware_parameters: Optional[torch.Tensor] = None
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
         raise NotImplementedError
 
     def forward(
@@ -340,10 +340,10 @@ def forward(
             ranks_bev, ranks_depth, ranks_feat = geom_feats_precomputed
             x, depth_softmax = self.get_cam_feats(img)
             x = self.bev_pool_precomputed(x, depth_softmax, ranks_bev, ranks_depth, ranks_feat)
-            
+
             # No return depth predictions when precomputed geometry features are used
             depth_softmax = None
-        
+
         else:
             intrins = camera_intrinsics[..., :3, :3]
             post_rots = img_aug_matrix[..., :3, :3]
@@ -372,7 +372,7 @@ def forward(
                 depth_softmax,
             ) = self.get_cam_feats(img, camera_depth_aware_parameters)
             x = self.bev_pool(view_feats, depth_softmax, geom)
-         
+
         return x, depth_softmax
 
     def bev_pool_aux(self, geom_feats):
@@ -408,7 +408,7 @@ def bev_pool_aux(self, geom_feats):
             return None, None, None
 
         geom_feats, ranks_depth, ranks_feat = geom_feats[kept], ranks_depth[kept], ranks_feat[kept]
-        
+
         # Switch x and y to match the order of the BEV grid
         ranks_bev = (
             geom_feats[:, 3] * (self.nx[2] * self.nx[1] * self.nx[0])
@@ -496,7 +496,7 @@ def compute_bev_pool(
             self.plot_bev_feat(bev_feat)
 
         return bev_feat
-     
+
     def bev_pool_precomputed(self, view_feats, depth_softmax, ranks_bev, ranks_depth, ranks_feat):
         interval_starts, interval_lengths = self.compute_intervals(ranks_bev)
         bev_feat = self.compute_bev_pool(
@@ -518,7 +518,8 @@ def get_depth_softmax(self, x: torch.Tensor, B, N, fH, fW) -> Tuple[torch.Tensor
         view_feats = x[:, self.D : (self.D + self.C)]
         view_feats = view_feats.view(B, N, self.C, fH, fW)
         return view_feats, depth_softmax
-    
+
+
 @MODELS.register_module()
 class LSSTransformV2(BaseViewTransformV2):
 
@@ -548,15 +549,13 @@ def __init__(
         self.downsample = DownSampleNet(downsample, out_channels, out_channels)
 
     def get_cam_feats(
-        self, 
-        x: torch.Tensor, 
-        camera_depth_aware_parameters: Optional[torch.Tensor] = None
+        self, x: torch.Tensor, camera_depth_aware_parameters: Optional[torch.Tensor] = None
     ) -> Tuple[torch.Tensor, torch.Tensor]:
         B, N, C, fH, fW = x.shape
         x = x.view(B * N, C, fH, fW)
         x = self.depthnet(x)
         return self.get_depth_softmax(x, B=B, N=N, fH=fH, fW=fW)
-    
+
     def forward(self, *args, **kwargs):
         x, depth_softmax = super().forward(*args, **kwargs)
         x = self.downsample(x)
@@ -576,7 +575,7 @@ def __init__(
         ybound: Tuple[float, float, float],
         zbound: Tuple[float, float, float],
         dbound: Tuple[float, float, float],
-        camera_depth_aware_configs: dict, 
+        camera_depth_aware_configs: dict,
         downsample: int = 1,
     ):
         super().__init__(
@@ -597,17 +596,15 @@ def __init__(
             depth_channels=self.D,
             out_channels=self.C,
         )
-    
+
     def get_cam_feats(
-        self, 
-        x: torch.Tensor, 
-        camera_depth_aware_parameters: Optional[torch.Tensor] = None
+        self, x: torch.Tensor, camera_depth_aware_parameters: Optional[torch.Tensor] = None
     ) -> Tuple[torch.Tensor, torch.Tensor]:
         B, N, C, fH, fW = x.shape
         x = x.view(B * N, C, fH, fW)
         x = self.camera_depth_aware_net(x, camera_depth_aware_parameters)
         return self.get_depth_softmax(x, B=B, N=N, fH=fH, fW=fW)
-    
+
     def forward(self, *args, **kwargs):
         x, depth_softmax = super().forward(*args, **kwargs)
         x = self.downsample(x)
diff --git a/projects/BEVFusion/bevfusion/loading.py b/projects/BEVFusion/bevfusion/loading.py
index 5d5c8d13a..7f3322fda 100644
--- a/projects/BEVFusion/bevfusion/loading.py
+++ b/projects/BEVFusion/bevfusion/loading.py
@@ -4,15 +4,13 @@
 from typing import List, Optional, Tuple
 
 import matplotlib.pyplot as plt
-
 import mmcv
 import numpy as np
+from mmcv.transforms import BaseTransform
 from mmdet3d.datasets.transforms import LoadMultiViewImageFromFiles
 from mmdet3d.registry import TRANSFORMS
 from mmengine.fileio import get
 from mmengine.logging import print_log
-from mmcv.transforms import BaseTransform
-
 
 
 @TRANSFORMS.register_module()
@@ -238,6 +236,7 @@ class PointsToMultiViewImageDepths(BaseTransform):
         max_depth (float): Upper clip for the depth color scale (m).
             Defaults to 80.
     """
+
     def __init__(
         self,
         img_shape,
@@ -255,7 +254,7 @@ def __init__(
         if self.visualize_dir is not None:
             self.visualize_dir.mkdir(parents=True, exist_ok=True)
         self._depth_idx = 0
-    
+
     def transform(self, results: dict) -> Optional[dict]:
         """Call function to load multi-view image from files.
 
@@ -269,17 +268,17 @@ def transform(self, results: dict) -> Optional[dict]:
         """
         lidar2image = np.asarray(results["lidar2img"])
         img_aug_matrix = np.asarray(results["img_aug_matrix"]) if "img_aug_matrix" in results else np.eye(4)
-        cur_coords = results["points"].numpy()[:,:3]
+        cur_coords = results["points"].numpy()[:, :3]
 
         # inverse lidar aug
         if "lidar_aug_matrix" in results:
-          lidar_aug_matrix = np.asarray(results["lidar_aug_matrix"])
-          lidar_aug_matrix_inverse = np.linalg.inv(lidar_aug_matrix)
-          cur_coords -= lidar_aug_matrix[:3, 3]
-          cur_coords = lidar_aug_matrix_inverse[:3, :3] @ cur_coords.transpose(1, 0)
+            lidar_aug_matrix = np.asarray(results["lidar_aug_matrix"])
+            lidar_aug_matrix_inverse = np.linalg.inv(lidar_aug_matrix)
+            cur_coords -= lidar_aug_matrix[:3, 3]
+            cur_coords = lidar_aug_matrix_inverse[:3, :3] @ cur_coords.transpose(1, 0)
         else:
-          cur_coords = cur_coords.transpose(1, 0)
-          
+            cur_coords = cur_coords.transpose(1, 0)
+
         # lidar2image
         cur_coords = lidar2image[:, :3, :3] @ cur_coords
         cur_coords += lidar2image[:, :3, 3].reshape(-1, 3, 1)
@@ -306,15 +305,19 @@ def transform(self, results: dict) -> Optional[dict]:
             & valid_dist_mask
         )
 
-        # Avoid loops since it's slow 
+        # Avoid loops since it's slow
         indices = np.nonzero(on_img)
         camera_indices = indices[0]
         point_indices = indices[1]
         masked_coords = cur_coords[camera_indices, point_indices].astype(np.int64)
         masked_dist = dist[camera_indices, point_indices]
 
-        # Possibly to have duplicates and the last one will be used, however, the chance is small	
-        flatten_indices = camera_indices * self.img_shape[0] * self.img_shape[1] + masked_coords[:, 0] * self.img_shape[1] + masked_coords[:, 1]
+        # Possibly to have duplicates and the last one will be used, however, the chance is small
+        flatten_indices = (
+            camera_indices * self.img_shape[0] * self.img_shape[1]
+            + masked_coords[:, 0] * self.img_shape[1]
+            + masked_coords[:, 1]
+        )
         depth_flat = np.zeros(self.num_cameras * self.img_shape[0] * self.img_shape[1], dtype=np.float32)
         depth_flat[flatten_indices] = masked_dist
         depth = depth_flat.reshape(self.num_cameras, self.img_shape[0], self.img_shape[1])
@@ -350,9 +353,7 @@ def _save_depth_subplot(self, depth: np.ndarray, results: dict) -> None:
             base_rows = int(np.ceil(self.num_cameras / cols))
         rows = base_rows * 3
 
-        fig, axes = plt.subplots(
-            rows, cols, figsize=(4 * cols, 4 * rows), squeeze=False
-        )
+        fig, axes = plt.subplots(rows, cols, figsize=(4 * cols, 4 * rows), squeeze=False)
 
         for c in range(self.num_cameras):
             d = depth[c]
@@ -365,12 +366,20 @@ def _save_depth_subplot(self, depth: np.ndarray, results: dict) -> None:
                 ax_overlay.imshow(imgs[c].astype(np.uint8))
                 if vals.size > 0:
                     ax_overlay.scatter(
-                        xs, ys, c=vals, cmap="turbo",
-                        vmin=0, vmax=self.max_depth, s=1,
+                        xs,
+                        ys,
+                        c=vals,
+                        cmap="turbo",
+                        vmin=0,
+                        vmax=self.max_depth,
+                        s=1,
                     )
             else:
                 ax_overlay.imshow(
-                    d, cmap="turbo", vmin=0, vmax=self.max_depth,
+                    d,
+                    cmap="turbo",
+                    vmin=0,
+                    vmax=self.max_depth,
                     interpolation="nearest",
                 )
             ax_overlay.set_title(f"cam {c} overlay  ({vals.size} pts)")
@@ -383,7 +392,10 @@ def _save_depth_subplot(self, depth: np.ndarray, results: dict) -> None:
                 ax_img.imshow(imgs[c].astype(np.uint8))
             else:
                 ax_img.imshow(
-                    d, cmap="gray", vmin=0, vmax=self.max_depth,
+                    d,
+                    cmap="gray",
+                    vmin=0,
+                    vmax=self.max_depth,
                     interpolation="nearest",
                 )
             ax_img.set_title(f"cam {c} image-only")
@@ -393,7 +405,10 @@ def _save_depth_subplot(self, depth: np.ndarray, results: dict) -> None:
             # Row block 3: depth-only visualization.
             ax_depth = axes[(base_rows * 2) + (c // cols), c % cols]
             ax_depth.imshow(
-                d, cmap="turbo", vmin=0, vmax=self.max_depth,
+                d,
+                cmap="turbo",
+                vmin=0,
+                vmax=self.max_depth,
                 interpolation="nearest",
             )
             ax_depth.set_title(f"cam {c} depth-only")
@@ -407,21 +422,16 @@ def _save_depth_subplot(self, depth: np.ndarray, results: dict) -> None:
             axes[(base_rows * 2) + (c // cols), c % cols].axis("off")
 
         # Shared depth colorbar with numeric values.
-        depth_mappable = plt.cm.ScalarMappable(
-            cmap="turbo", norm=plt.Normalize(vmin=0, vmax=self.max_depth)
-        )
+        depth_mappable = plt.cm.ScalarMappable(cmap="turbo", norm=plt.Normalize(vmin=0, vmax=self.max_depth))
         depth_mappable.set_array([])
-        cbar = fig.colorbar(
-            depth_mappable, ax=axes, location="right", fraction=0.02, pad=0.02
-        )
+        cbar = fig.colorbar(depth_mappable, ax=axes, location="right", fraction=0.02, pad=0.02)
         cbar.set_label("Depth (m)")
 
         fig.suptitle(f"gt_depths — {self._depth_idx}")
         fig.tight_layout(rect=[0, 0, 0.96, 0.97])
-        
+
         self._depth_idx += 1
         out_path = self.visualize_dir / f"{self._depth_idx:06d}_gt_depths.png"
         fig.savefig(out_path, dpi=120, bbox_inches="tight")
         plt.close(fig)
         print(f"Saved gt_depths visualization to {out_path}")
- 
\ No newline at end of file
diff --git a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/__init__.py b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/__init__.py
index 549a97e81..ff2fdfff7 100644
--- a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/__init__.py
+++ b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/__init__.py
@@ -1,3 +1,3 @@
 from .bev_pool_v2 import bev_pool_v2
 
-__all__ = ["bev_pool_v2"]
\ No newline at end of file
+__all__ = ["bev_pool_v2"]
diff --git a/projects/BEVFusion/bevfusion/ops/topk/topk.py b/projects/BEVFusion/bevfusion/ops/topk/topk.py
index a767bb720..f0f9a8779 100644
--- a/projects/BEVFusion/bevfusion/ops/topk/topk.py
+++ b/projects/BEVFusion/bevfusion/ops/topk/topk.py
@@ -13,9 +13,9 @@ class TopK(Function):
     def symbolic(
         g,
         x: torch.Tensor,
-				k: int,
-				dim: int,
-				sorted: bool = False,
+        k: int,
+        dim: int,
+        sorted: bool = False,
     ):
 
         output = g.op(
@@ -27,19 +27,20 @@ def symbolic(
         if x_shape is not None and hasattr(output.type(), "with_sizes"):
             output_type = x.type().with_sizes(x_shape)
             output.setType(output_type)
-				# Argsort from Autoware is in ascending order, so we need to return the last k elements.
+        # Argsort from Autoware is in ascending order, so we need to return the last k elements.
         return output[-k:]
 
     @staticmethod
     def forward(
         ctx,
         x: torch.Tensor,
-				k: int, 
-				dim: int,
-				sorted: bool = False,
+        k: int,
+        dim: int,
+        sorted: bool = False,
     ):
         _, indices = torch.topk(x, k=k, dim=dim, largest=True, sorted=sorted)
         return indices
 
+
 def topk(x: torch.Tensor, k: int, dim: int, sorted: bool = False):
-    return TopK.apply(x, k, dim, sorted)
\ No newline at end of file
+    return TopK.apply(x, k, dim, sorted)
diff --git a/projects/BEVFusion/bevfusion/sparse_encoder.py b/projects/BEVFusion/bevfusion/sparse_encoder.py
index 0fc20cd19..6bf0592b6 100644
--- a/projects/BEVFusion/bevfusion/sparse_encoder.py
+++ b/projects/BEVFusion/bevfusion/sparse_encoder.py
@@ -5,7 +5,6 @@
 
 import numpy as np
 import torch
-
 from mmdet3d.models.layers import make_sparse_convmodule
 from mmdet3d.models.layers.spconv import IS_SPCONV2_AVAILABLE
 from mmdet3d.models.middle_encoders import SparseEncoder
@@ -119,7 +118,7 @@ def __init__(
             indice_key="spconv_down2",
             conv_type="SparseConv3d",
         )
-    
+
     def forward(self, voxel_features, coors, batch_size):
         """Forward of SparseEncoder.
 
@@ -147,11 +146,11 @@ def forward(self, voxel_features, coors, batch_size):
         for encoder_layer in self.encoder_layers:
             x = encoder_layer(x)
             encode_features.append(x)
-        
+
         # for detection head
         # [200, 176, 5] -> [200, 176, 2]
         out = self.conv_out(encode_features[-1])
-        
+
         spatial_features = sparse_to_dense(out, batch_size, self.dense_output_shapes, self.output_channels)
         # spatial_features = out.dense(channels_first=False)
         spatial_features = spatial_features.permute(0, 4, 3, 1, 2).contiguous()
diff --git a/projects/BEVFusion/bevfusion/transforms_3d.py b/projects/BEVFusion/bevfusion/transforms_3d.py
index c311f9254..31d0cc417 100644
--- a/projects/BEVFusion/bevfusion/transforms_3d.py
+++ b/projects/BEVFusion/bevfusion/transforms_3d.py
@@ -191,6 +191,7 @@ def transform(self, input_dict: dict) -> dict:
 @TRANSFORMS.register_module()
 class BEVFusionRemoveLiDARPoints(BaseTransform):
     """Remove LiDAR points from the data."""
+
     def __init__(self):
         super().__init__()
 
diff --git a/projects/BEVFusion/bevfusion/utils.py b/projects/BEVFusion/bevfusion/utils.py
index 84797cc51..39c6a0ded 100644
--- a/projects/BEVFusion/bevfusion/utils.py
+++ b/projects/BEVFusion/bevfusion/utils.py
@@ -93,7 +93,7 @@ def decode(self, heatmap, rot, dim, center, height, vel, filter=False):
                 predictions_dict = {"bboxes": boxes3d, "scores": scores, "labels": labels}
                 predictions_dicts.append(predictions_dict)
             return predictions_dicts
-        
+
         # use score threshold
         if self.score_threshold is not None:
             if isinstance(self.score_threshold, float):
diff --git a/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py b/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py
index 0863889bb..2652b3965 100644
--- a/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py
+++ b/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py
@@ -1,11 +1,7 @@
 codebase_config = dict(type="mmdet3d", task="VoxelDetection", model_type="end2end")
 
 custom_imports = dict(
-    imports=[
-        "projects.BEVFusion.deploy",
-        "projects.BEVFusion.bevfusion",
-        "projects.SparseConvolution"
-    ],
+    imports=["projects.BEVFusion.deploy", "projects.BEVFusion.bevfusion", "projects.SparseConvolution"],
     allow_failed_imports=False,
 )
 
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
index 7a8afad3e..bcf9870c6 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py
@@ -29,8 +29,20 @@
     pts_voxel_encoder=dict(
         in_channels=len(_base_.lidar_sweep_dims),
         # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
-        min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0, 0.0],
-        max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 255.0, 0.2],
+        min_norm_values=[
+            _base_.point_cloud_range[0],
+            _base_.point_cloud_range[1],
+            _base_.point_cloud_range[2],
+            0.0,
+            0.0,
+        ],
+        max_norm_values=[
+            _base_.point_cloud_range[3],
+            _base_.point_cloud_range[4],
+            _base_.point_cloud_range[5],
+            255.0,
+            0.2,
+        ],
     ),
     pts_middle_encoder=dict(
         in_channels=50,
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py
index d3c5154c6..1f2acd6ab 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py
@@ -29,8 +29,20 @@
     pts_voxel_encoder=dict(
         in_channels=len(_base_.lidar_sweep_dims),
         # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here
-        min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0, 0.0],
-        max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 255.0, 0.2],
+        min_norm_values=[
+            _base_.point_cloud_range[0],
+            _base_.point_cloud_range[1],
+            _base_.point_cloud_range[2],
+            0.0,
+            0.0,
+        ],
+        max_norm_values=[
+            _base_.point_cloud_range[3],
+            _base_.point_cloud_range[4],
+            _base_.point_cloud_range[5],
+            255.0,
+            0.2,
+        ],
     ),
     pts_middle_encoder=dict(
         in_channels=50,
@@ -153,4 +165,6 @@
 )
 log_processor = dict(window_size=50)
 
-load_from = "work_dirs/bevfusion_lidar_2_8_0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb16_base_120m/best_epoch_47.pth"
+load_from = (
+    "work_dirs/bevfusion_lidar_2_8_0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb16_base_120m/best_epoch_47.pth"
+)
diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py
index 171c3076e..19394ab16 100644
--- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py
@@ -149,4 +149,4 @@
 )
 log_processor = dict(window_size=50)
 
-resume = True
\ No newline at end of file
+resume = True
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
index aa275f558..f11431814 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py
@@ -14,7 +14,7 @@
         pad_size_divisor=32,
     ),
     pts_voxel_encoder=dict(
-        type="HardSimpleVoxelSinCosEncoder", 
+        type="HardSimpleVoxelSinCosEncoder",
         in_channels=4,
     ),
     pts_middle_encoder=dict(
@@ -89,7 +89,9 @@
             # Set NMS for different clusters
             nms_clusters=[
                 # Sqrt(0.25) = 0.50
-                dict(class_names=["car", "truck", "bus"], class_indices=[0, 1, 2], nms_threshold=0.25, post_max_size=300),  # It's radius if using circle_nms
+                dict(
+                    class_names=["car", "truck", "bus"], class_indices=[0, 1, 2], nms_threshold=0.25, post_max_size=300
+                ),  # It's radius if using circle_nms
                 dict(class_names=["bicycle"], class_indices=[3], nms_threshold=0.0, post_max_size=50),
                 dict(class_names=["pedestrian"], class_indices=[4], nms_threshold=0.0, post_max_size=100),
                 dict(class_names=["traffic_cone"], class_indices=[5], nms_threshold=0.0, post_max_size=100),
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py
index 1059ce9e8..c807668a3 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py
@@ -56,5 +56,5 @@
     ),
     bbox_head=dict(
         in_channels=80,
-    )
+    ),
 )
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
index d0920ccf1..1457207c8 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py
@@ -1,11 +1,11 @@
 _base_ = [
     "./camera_resnet50_fpn_depthlss_120m.py",
 ]
-num_proposals = 200 
+num_proposals = 200
 
 # Image network
 model = dict(
-    depth_gt_downsample=8, 
+    depth_gt_downsample=8,
     loss_depth_weight=2.0,
     view_transform=dict(
         type="LSSTransformV2",
diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_depth_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_depth_50m.py
index 997fa1e76..dd90ccb6e 100644
--- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_depth_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_depth_50m.py
@@ -1,11 +1,11 @@
 _base_ = [
     "./camera_resnet50_fpn_depthlss_120m.py",
 ]
-num_proposals = 200 
+num_proposals = 200
 
 # Image network
 model = dict(
-    depth_gt_downsample=8, 
+    depth_gt_downsample=8,
     loss_depth_weight=1.0,
     view_transform=dict(
         # type="LSSTransformV2",
@@ -15,11 +15,7 @@
         zbound=[-10.0, 10.0, 20.0],
         dbound=[1.0, 60, 0.5],
         downsample=2,
-        camera_depth_aware_configs=dict(
-            mlp_drop_out=0.0,
-            downsample=8,
-            num_camera_depth_parameters=27
-        ),
+        camera_depth_aware_configs=dict(mlp_drop_out=0.0, downsample=8, num_camera_depth_parameters=27),
     ),
     bbox_head=dict(
         num_proposals=num_proposals,
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py
index f31a604b0..3a7a428b7 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py
@@ -53,7 +53,7 @@
     ),
     dict(type="BEVFusionRandomFlip3D"),
     dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range),
-	dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=5),
+    dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=5),
     dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=3),
     dict(
         type="ObjectNameFilter",
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
index 00e7ac896..011e460f0 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py
@@ -29,20 +29,17 @@
     dict(
         type="ImageAug3D",
         final_dim=image_size,
-		resize_lim=[0.29, 0.35],
+        resize_lim=[0.29, 0.35],
         bot_pct_lim=[0.0, 0.0],
         rot_lim=[0.0, 0.0],
         rand_flip=True,
         is_train=True,
     ),
+    dict(type="PointsRangeFilter", point_cloud_range=[-80.0, -80.0, -10.0, 80.0, 80.0, 10.0]),
     dict(
-        type="PointsRangeFilter", 
-        point_cloud_range=[-80.0, -80.0, -10.0, 80.0, 80.0, 10.0]
-    ),
-    dict(
-        type="PointsToMultiViewImageDepths", 
-        img_shape=image_size, 
-        num_cameras=len(camera_order), 
+        type="PointsToMultiViewImageDepths",
+        img_shape=image_size,
+        num_cameras=len(camera_order),
         depth_bounds=[1.0, 60.0],
         # visualize_dir="work_dirs/visualize_depths_6",
     ),
@@ -54,7 +51,7 @@
     ),
     dict(type="BEVFusionRandomFlip3D"),
     dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range),
-	dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=5),
+    dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=5),
     # Remove LiDAR points from the data
     dict(type="BEVFusionRemoveLiDARPoints"),
     dict(
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
index e13597aec..11e297c09 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py
@@ -84,7 +84,7 @@
             "barrier",
         ],
     ),
-		dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2),
+    dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2),
     dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1),
     dict(type="PointShuffle"),
     dict(
@@ -124,7 +124,7 @@
         backend_args=backend_args,
         camera_order=camera_order,
     ),
-		dict(
+    dict(
         type="LoadPointsFromFile",
         coord_type="LIDAR",
         load_dim=point_load_dim,
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
index ed0e35fbf..d797779de 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py
@@ -66,7 +66,7 @@
             "barrier",
         ],
     ),
-	dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=5),
+    dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=5),
     dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=3),
     dict(type="PointShuffle"),
     dict(
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py
index f49e2dbb4..ca75d799f 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py
@@ -63,7 +63,7 @@
             "barrier",
         ],
     ),
-		dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2),
+    dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2),
     dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1),
     dict(type="PointShuffle"),
     dict(
diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
index 23dd78687..d5e426f58 100644
--- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
+++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py
@@ -66,7 +66,7 @@
             "barrier",
         ],
     ),
-		dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2),
+    dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2),
     dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1),
     dict(type="PointShuffle"),
     dict(
diff --git a/projects/BEVFusion/deploy/containers.py b/projects/BEVFusion/deploy/containers.py
index 018b5db7e..55586e0f5 100644
--- a/projects/BEVFusion/deploy/containers.py
+++ b/projects/BEVFusion/deploy/containers.py
@@ -1,8 +1,8 @@
+# Wrapper Classes for onnx conversion
+import numpy as np
 import torch
 import torch.nn.functional as F
 
-# Wrapper Classes for onnx conversion
-import numpy as np
 
 class TrtBevFusionImageBackboneContainer(torch.nn.Module):
     def __init__(self, mod, mean, std) -> None:
@@ -49,13 +49,17 @@ def forward(
 
         batch_inputs_dict = {
             "voxels": {"voxels": voxels, "coors": coors, "num_points_per_voxel": num_points_per_voxel},
-         
         }
-        
+
         voxels.cpu().numpy().astype(np.float32).tofile("work_dirs/dummy_inputs/voxels.bin")
         coors[:, :3].cpu().numpy().astype(np.int32).tofile("work_dirs/dummy_inputs/coors.bin")
         num_points_per_voxel.cpu().numpy().astype(np.int32).tofile("work_dirs/dummy_inputs/num_points_per_voxel.bin")
-        print("voxels.shape, coors.shape, num_points_per_voxel.shape:", voxels.shape, coors.shape, num_points_per_voxel.shape)
+        print(
+            "voxels.shape, coors.shape, num_points_per_voxel.shape:",
+            voxels.shape,
+            coors.shape,
+            num_points_per_voxel.shape,
+        )
         if points is not None:
             batch_inputs_dict["points"] = [points]
 
diff --git a/projects/BEVFusion/deploy/exporter.py b/projects/BEVFusion/deploy/exporter.py
index 4c5e72ac7..c7cd9e6b0 100644
--- a/projects/BEVFusion/deploy/exporter.py
+++ b/projects/BEVFusion/deploy/exporter.py
@@ -2,7 +2,7 @@
 
 import logging
 import os.path as osp
-from typing import Optional, Any
+from typing import Any, Optional
 
 import numpy as np
 import onnx
@@ -11,7 +11,7 @@
 from builder import ExportBuilder
 from containers import TrtBevFusionCameraOnlyContainer, TrtBevFusionImageBackboneContainer, TrtBevFusionMainContainer
 from data_classes import ModelData, SetupConfigs
-from mmdeploy.core import RewriterContext, SYMBOLIC_REWRITER
+from mmdeploy.core import SYMBOLIC_REWRITER, RewriterContext
 from mmdeploy.utils import (
     get_root_logger,
 )
@@ -32,8 +32,7 @@ def purge_mmdeploy_symbolics(op_names: list[str]) -> dict:
             continue
         # Bookkeeping key: full Python path of an implementer function.
         # Match by "...symbolics.<op_name>." or "...symbolics.<op_name>__"
-        if any(f".symbolics.{op}." in key or f".symbolics.{op}__" in key
-               for op in op_names):
+        if any(f".symbolics.{op}." in key or f".symbolics.{op}__" in key for op in op_names):
             removed[key] = records.pop(key)
     return removed
 
@@ -83,12 +82,10 @@ def _export_model(
           patched_model (torch.nn.Module): Patched Pytorch model.
           ir_configs (dict): Configs for intermediate representations in ONNX.
         """
-        # Purge the mmdeploy symbolic records for the layer_norm op, remove this if LayerNorm OP is not supported 
+        # Purge the mmdeploy symbolic records for the layer_norm op, remove this if LayerNorm OP is not supported
         # in the tensorrt version
         removed = purge_mmdeploy_symbolics(["layer_norm"])
-        self.logger.info(
-          f"Purged {len(removed)} mmdeploy symbolic records: {list(removed.keys())}"
-        )
+        self.logger.info(f"Purged {len(removed)} mmdeploy symbolic records: {list(removed.keys())}")
         with RewriterContext(**context_info), torch.no_grad():
             image_feats = None
             if "img_backbone" in self.setup_configs.model_cfg.model:

From a4bd66e2a4749b15f9295e99e8f14017c5cea79f Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Fri, 5 Jun 2026 12:29:57 +0900
Subject: [PATCH 181/183] Clean configs

---
 projects/BEVFusion/deploy/containers.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/projects/BEVFusion/deploy/containers.py b/projects/BEVFusion/deploy/containers.py
index 55586e0f5..ad9243412 100644
--- a/projects/BEVFusion/deploy/containers.py
+++ b/projects/BEVFusion/deploy/containers.py
@@ -51,15 +51,6 @@ def forward(
             "voxels": {"voxels": voxels, "coors": coors, "num_points_per_voxel": num_points_per_voxel},
         }
 
-        voxels.cpu().numpy().astype(np.float32).tofile("work_dirs/dummy_inputs/voxels.bin")
-        coors[:, :3].cpu().numpy().astype(np.int32).tofile("work_dirs/dummy_inputs/coors.bin")
-        num_points_per_voxel.cpu().numpy().astype(np.int32).tofile("work_dirs/dummy_inputs/num_points_per_voxel.bin")
-        print(
-            "voxels.shape, coors.shape, num_points_per_voxel.shape:",
-            voxels.shape,
-            coors.shape,
-            num_points_per_voxel.shape,
-        )
         if points is not None:
             batch_inputs_dict["points"] = [points]
 

From 1d15699a6aa6a10e50c20e9d3c6c99f214a68fa5 Mon Sep 17 00:00:00 2001
From: Kok Seang Tan <kseangtan@gmail.com>
Date: Fri, 5 Jun 2026 14:03:25 +0900
Subject: [PATCH 182/183] Updated

---
 projects/BEVFusion/bevfusion/depth_lss_v2.py | 76 --------------------
 1 file changed, 76 deletions(-)

diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py
index 46b74b969..f843d1d94 100644
--- a/projects/BEVFusion/bevfusion/depth_lss_v2.py
+++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py
@@ -2,16 +2,11 @@
 from pathlib import Path
 from typing import Optional, Tuple
 
-import matplotlib.pyplot as plt
 import numpy as np
 import torch
-from mmcv.cnn import build_conv_layer, build_norm_layer
 from mmdet3d.registry import MODELS
 
-# from mmdet.models.backbones.resnet import BasicBlock
-from mmdet3d.utils import ConfigType, OptConfigType, OptMultiConfig
 from mmengine.logging import print_log
-from mmengine.model import BaseModule
 from torch import nn
 from torch.utils.checkpoint import checkpoint
 
@@ -19,61 +14,6 @@
 from .ops import bev_pool_v2
 
 
-class CustomDepthBasicBlock(BaseModule):
-    def __init__(
-        self,
-        in_channels: int,
-        out_channel: int,
-        padding: int = 0,
-        kernel_size: int = 1,
-        stride: int = 1,
-        dilation: int = 1,
-        with_cp: bool = False,
-        norm_cfg=dict(type="BN"),
-        conv_cfg=None,
-        downsample: Optional[nn.Module] = None,
-        init_cfg: OptMultiConfig = None,
-    ):
-        super().__init__(init_cfg)
-
-        self.norm1_name, self.norm1 = build_norm_layer(norm_cfg, out_channel, postfix=1)
-        self.norm2_name, self.norm2 = build_norm_layer(norm_cfg, out_channel, postfix=2)
-        self.conv1 = build_conv_layer(
-            conv_cfg,
-            in_channels,
-            out_channel,
-            kernel_size,
-            stride=stride,
-            padding=padding,
-            dilation=dilation,
-            bias=False,
-        )
-        self.add_module(self.norm1_name, self.norm1)
-        self.conv2 = build_conv_layer(conv_cfg, out_channel, out_channel, kernel_size, padding=padding, bias=False)
-        self.add_module(self.norm2_name, self.norm2)
-
-        self.relu = nn.ReLU(inplace=True)
-        self.downsample = downsample
-        self.stride = stride
-        self.dilation = dilation
-        self.with_cp = with_cp
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        identity = x
-        out = self.conv1(x)
-        out = self.norm1(out)
-        out = self.relu(out)
-
-        out = self.conv2(out)
-        out = self.norm2(out)
-
-        if self.downsample is not None:
-            identity = self.downsample(x)
-
-        out += identity
-        return out
-
-
 class SELayer(nn.Module):
     """
     Squeeze-and-Excitation (SE) layer.
@@ -183,12 +123,6 @@ def __init__(
         self.camera_depth_aware_parameters_bn = nn.BatchNorm1d(self.num_camera_depth_parameters)
 
         # Context/image feature branch
-        # self.context_input_conv = nn.Sequential(
-        # nn.Conv2d(
-        # in_channels, hidden_channels, kernel_size=3, stride=1, padding=1, bias=False),
-        #     nn.BatchNorm2d(hidden_channels),
-        #     nn.ReLU(inplace=True),
-        # )
         self.context_input_conv = nn.Sequential(
             nn.Conv2d(in_channels, hidden_channels, kernel_size=1, stride=1, bias=False),
             nn.BatchNorm2d(hidden_channels),
@@ -211,19 +145,9 @@ def __init__(
             drop_out=self.mlp_drop_out,
         )
         self.depth_se = SELayer(channels=hidden_channels)
-        # self.depth_conv = nn.Sequential(
-        #     BasicBlock(hidden_channels, hidden_channels, downsample=None),
-        #     BasicBlock(hidden_channels, hidden_channels),
-        #     BasicBlock(hidden_channels, hidden_channels),
-        #     nn.Conv2d(hidden_channels, depth_channels, kernel_size=1, stride=1, padding=0, bias=True)
-        # )
         self.depth_conv = nn.Sequential(
-            # CustomDepthBasicBlock(hidden_channels, hidden_channels, downsample=None, kernel_size=1, padding=0),
-            # CustomDepthBasicBlock(hidden_channels, hidden_channels, kernel_size=1),
-            # CustomDepthBasicBlock(hidden_channels, hidden_channels, kernel_size=1),
             nn.Conv2d(hidden_channels, depth_channels, kernel_size=1, stride=1, padding=0, bias=True)
         )
-        # self._init_weight()
 
     def context_forward(
         self, context_features: torch.Tensor, camera_depth_aware_features: torch.Tensor

From 4828cf29fe4f3f2e9ffa8227311fc23418535474 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 5 Jun 2026 05:03:52 +0000
Subject: [PATCH 183/183] ci(pre-commit): autofix

---
 projects/BEVFusion/bevfusion/depth_lss_v2.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py
index f843d1d94..a95a88e4e 100644
--- a/projects/BEVFusion/bevfusion/depth_lss_v2.py
+++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py
@@ -5,7 +5,6 @@
 import numpy as np
 import torch
 from mmdet3d.registry import MODELS
-
 from mmengine.logging import print_log
 from torch import nn
 from torch.utils.checkpoint import checkpoint