From 9bb04cee8803d5f8479ae58197c21931c75b5613 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Wed, 14 Jan 2026 16:08:31 +0900 Subject: [PATCH 001/183] Update docker installation commit for perception_evaluation --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index c5240c498..22d18f5ba 100644 --- a/Dockerfile +++ b/Dockerfile @@ -61,7 +61,7 @@ RUN python3 -m pip --no-cache-dir install \ RUN python3 -m pip install git+https://github.com/tier4/t4-devkit@v0.5.1 # Install autoware-perception-evaluation -RUN python3 -m pip install git+https://github.com/tier4/autoware_perception_evaluation@develop +RUN python3 -m pip install git+https://github.com/tier4/autoware_perception_evaluation@dd37a546352f953565033f1d4b8cb443df1232c59 # Need to dowgrade setuptools to 60.2.0 to fix setup RUN python3 -m pip --no-cache-dir install \ From 1a7ebee39f66d37cd45b20e23575d352a6cdc493 Mon Sep 17 00:00:00 2001 From: KokSeang Tan Date: Thu, 19 Mar 2026 20:57:55 +0900 Subject: [PATCH 002/183] Update config --- ...evfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py} | 4 ++-- ...idar_voxel_second_secfpn_20e_8xb8_j6gen2_base_120m.py} | 4 ++-- ...oxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m.py} | 4 ++-- ...idar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py} | 4 ++-- ...oxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py} | 4 ++-- ...fline_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py} | 4 ++-- ...idar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py} | 4 ++-- ...oxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py} | 4 ++-- ...usion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py} | 8 ++++---- ..._cosine.py => default_20e_8xb8_adamw_linear_cosine.py} | 6 +++--- ...8_adamw_cosine.py => default_30e_8xb8_adamw_cosine.py} | 4 ++-- ..._cosine.py => default_30e_8xb8_adamw_linear_cosine.py} | 4 ++-- ...8_adamw_cosine.py => default_50e_8xb8_adamw_cosine.py} | 4 ++-- 13 files changed, 29 insertions(+), 29 deletions(-) rename projects/BEVFusion/configs/t4dataset/BEVFusion-C/{bevfusion_camera_swin_fpn_30e_4xb8_j6gen2_base_120m.py => bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py} (97%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-CL/{bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_j6gen2_base_120m.py => bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_j6gen2_base_120m.py} (98%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-CL/{bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_jpntaxi_gen2_base_120m.py => bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m.py} (98%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/{bevfusion_offline_lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m.py => bevfusion_offline_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py} (97%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/{bevfusion_offline_lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m.py => bevfusion_offline_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py} (97%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/{bevfusion_offline_lidar_voxel_second_secfpn_50e_4xb8_base_120m.py => bevfusion_offline_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py} (97%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py} (97%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py} (97%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_50e_4xb8_base_120m.py => bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py} (94%) rename projects/BEVFusion/configs/t4dataset/default/schedulers/{default_20e_4xb8_adamw_linear_cosine.py => default_20e_8xb8_adamw_linear_cosine.py} (97%) rename projects/BEVFusion/configs/t4dataset/default/schedulers/{default_30e_4xb8_adamw_cosine.py => default_30e_8xb8_adamw_cosine.py} (98%) rename projects/BEVFusion/configs/t4dataset/default/schedulers/{default_30e_4xb8_adamw_linear_cosine.py => default_30e_8xb8_adamw_linear_cosine.py} (98%) rename projects/BEVFusion/configs/t4dataset/default/schedulers/{default_50e_4xb8_adamw_cosine.py => default_50e_8xb8_adamw_cosine.py} (98%) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_4xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py similarity index 97% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_4xb8_j6gen2_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py index b781e2c71..e65c52ece 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_4xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", "../default/pipelines/default_camera_lidar_intensity_120m.py", "../default/models/default_camera_swin_fpn_120m.py", - "../default/schedulers/default_30e_4xb8_adamw_linear_cosine.py", + "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py", "../default/default_misc.py", ] @@ -16,7 +16,7 @@ info_directory_path = "info/user_name/" experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type -experiment_name = "bevfusion_camera_swin_fpn_30e_4xb8_j6gen2_base_120m" +experiment_name = "bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_j6gen2_base_120m.py similarity index 98% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_j6gen2_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_j6gen2_base_120m.py index a1ab10f57..4f81af760 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_j6gen2_base_120m.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", "../default/pipelines/default_camera_lidar_intensity_120m.py", "../default/models/default_camera_swin_fpn_lidar_second_secfpn_120m.py", - "../default/schedulers/default_20e_4xb8_adamw_linear_cosine.py", + "../default/schedulers/default_20e_8xb8_adamw_linear_cosine.py", "../default/default_misc.py", ] @@ -16,7 +16,7 @@ info_directory_path = "info/user_name/" experiment_group_name = "bevfusion_camera_lidar_intensity/j6gen2_base/" + _base_.dataset_type -experiment_name = "bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_j6gen2_base_120m" +experiment_name = "bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_jpntaxi_gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m.py similarity index 98% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_jpntaxi_gen2_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m.py index a9887a15c..6556cf818 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_jpntaxi_gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2_base.py", "../default/pipelines/default_camera_lidar_intensity_120m.py", "../default/models/default_camera_swin_fpn_lidar_second_secfpn_120m.py", - "../default/schedulers/default_20e_4xb8_adamw_linear_cosine.py", + "../default/schedulers/default_20e_8xb8_adamw_linear_cosine.py", "../default/default_misc.py", ] @@ -16,7 +16,7 @@ info_directory_path = "info/user_name/" experiment_group_name = "bevfusion_camera_lidar_intensity/jpntaxi_gen2_base/" + _base_.dataset_type -experiment_name = "bevfusion_camera_lidar_voxel_second_secfpn_20e_4xb8_jpntaxi_gen2_base_120m" +experiment_name = "bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py similarity index 97% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index 8b0aef32e..5d743e184 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", "../default/pipelines/default_offline_lidar_intensity_120m.py", "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_30e_4xb8_adamw_cosine.py", + "../default/schedulers/default_30e_8xb8_adamw_cosine.py", "../default/default_misc.py", ] @@ -16,7 +16,7 @@ info_directory_path = "info/user_name/" experiment_group_name = "bevfusion_lidar_intensity_offline/j6gen2_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m" +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py similarity index 97% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py index 59f91c8a4..da461a567 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2_base.py", "../default/pipelines/default_offline_lidar_intensity_120m.py", "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_30e_4xb8_adamw_cosine.py", + "../default/schedulers/default_30e_8xb8_adamw_cosine.py", "../default/default_misc.py", ] @@ -16,7 +16,7 @@ info_directory_path = "info/user_name/" experiment_group_name = "bevfusion_lidar_intensity_offline/jpntaxi_gen2_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m" +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_50e_4xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py similarity index 97% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_50e_4xb8_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py index df500705c..89bb7cd7b 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_50e_4xb8_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L-offline/bevfusion_offline_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py @@ -4,7 +4,7 @@ "../default/pipelines/default_offline_lidar_120m.py", "../models/default_lidar_second_secfpn_120m.py", "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_50e_4xb8_adamw_cosine.py", + "../default/schedulers/default_50e_8xb8_adamw_cosine.py", "../default/default_misc.py", ] @@ -17,7 +17,7 @@ info_directory_path = "info/user_name/" experiment_group_name = "bevfusion_lidar_offline/base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_50e_4xb8_base_120m" +experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py similarity index 97% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index 178f5ff3d..69be0f0d9 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", "../default/pipelines/default_lidar_intensity_120m.py", "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_30e_4xb8_adamw_cosine.py", + "../default/schedulers/default_30e_8xb8_adamw_cosine.py", "../default/default_misc.py", ] @@ -16,7 +16,7 @@ info_directory_path = "info/user_name/" experiment_group_name = "bevfusion_lidar_intensity/j6gen2_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_4xb8_j6gen2_base_120m" +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py similarity index 97% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py index e2b2d1678..e6addac7d 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2_base.py", "../default/pipelines/default_lidar_intensity_120m.py", "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_30e_4xb8_adamw_cosine.py", + "../default/schedulers/default_30e_8xb8_adamw_cosine.py", "../default/default_misc.py", ] @@ -16,7 +16,7 @@ info_directory_path = "info/user_name/" experiment_group_name = "bevfusion_lidar_intensity/jpntaxi_gen2_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_4xb8_jpntaxi_gen2_base_120m" +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_4xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py similarity index 94% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_4xb8_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py index d65c470d9..a5c72aef0 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_4xb8_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", "../default/pipelines/default_lidar_120m.py", "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_50e_4xb8_adamw_cosine.py", + "../default/schedulers/default_50e_8xb8_adamw_cosine.py", "../default/default_misc.py", ] @@ -13,10 +13,10 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" +info_directory_path = "info/kokseang_2_6_1/" -experiment_group_name = "bevfusion_lidar/base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_50e_4xb8_base_120m" +experiment_group_name = "bevfusion_lidar_2.6.0/base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_4xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py similarity index 97% rename from projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_4xb8_adamw_linear_cosine.py rename to projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py index 8dc5e7bf1..d491eaa4b 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_4xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py @@ -1,11 +1,11 @@ # learning rate -# lr = 0.0001 -lr = 1e-4 +# lr = 0.0002 +lr = 2e-4 t_max = 6 max_epochs = 20 val_interval = 1 -train_gpu_size = 4 +train_gpu_size = 8 test_batch_size = 2 train_batch_size = 8 diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_4xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py similarity index 98% rename from projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_4xb8_adamw_cosine.py rename to projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py index 07a9110c7..c5053d943 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_4xb8_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py @@ -1,10 +1,10 @@ # learning rate -lr = 0.0001 +lr = 2e-4 t_max = 8 max_epochs = 30 val_interval = 5 -train_gpu_size = 4 +train_gpu_size = 8 test_batch_size = 2 train_batch_size = 8 diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_4xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py similarity index 98% rename from projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_4xb8_adamw_linear_cosine.py rename to projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py index 9d4ee8e61..c05aeafcb 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_4xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py @@ -1,10 +1,10 @@ # learning rate -lr = 1e-4 +lr = 2e-4 t_max = 8 max_epochs = 30 val_interval = 1 -train_gpu_size = 4 +train_gpu_size = 8 test_batch_size = 2 train_batch_size = 8 diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_4xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py similarity index 98% rename from projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_4xb8_adamw_cosine.py rename to projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py index 29fc38b4c..fc1914dde 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_4xb8_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py @@ -1,10 +1,10 @@ # learning rate -lr = 0.0001 +lr = 2e-4 t_max = 15 max_epochs = 50 val_interval = 5 -train_gpu_size = 4 +train_gpu_size = 8 test_batch_size = 2 train_batch_size = 8 From 84024516eafe70aac0252f8b1cafbf40ba7fd05f Mon Sep 17 00:00:00 2001 From: KokSeang Tan Date: Tue, 24 Mar 2026 16:48:43 +0900 Subject: [PATCH 003/183] Update config --- ..._voxel_second_secfpn_20e_8xb8_jpntaxi_base_120m.py} | 6 +++--- ...ar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py | 6 ++++-- ..._voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py} | 10 ++++++---- 3 files changed, 13 insertions(+), 9 deletions(-) rename projects/BEVFusion/configs/t4dataset/BEVFusion-CL/{bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m.py => bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_base_120m.py} (98%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py} (92%) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_base_120m.py similarity index 98% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_base_120m.py index 6556cf818..20c85b1d8 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_base_120m.py @@ -1,6 +1,6 @@ _base_ = [ "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2_base.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py", "../default/pipelines/default_camera_lidar_intensity_120m.py", "../default/models/default_camera_swin_fpn_lidar_second_secfpn_120m.py", "../default/schedulers/default_20e_8xb8_adamw_linear_cosine.py", @@ -15,8 +15,8 @@ data_root = "data/t4dataset/" info_directory_path = "info/user_name/" -experiment_group_name = "bevfusion_camera_lidar_intensity/jpntaxi_gen2_base/" + _base_.dataset_type -experiment_name = "bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_gen2_base_120m" +experiment_group_name = "bevfusion_camera_lidar_intensity/jpntaxi_base/" + _base_.dataset_type +experiment_name = "bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index 69be0f0d9..e43f9c485 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -13,9 +13,9 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" +info_directory_path = "info/kokseang_2_6_1/" -experiment_group_name = "bevfusion_lidar_intensity/j6gen2_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2.6.1/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name @@ -143,3 +143,5 @@ checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), ) log_processor = dict(window_size=50) + +load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_40.pth" \ No newline at end of file diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py similarity index 92% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py index e6addac7d..8538784f8 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py @@ -1,6 +1,6 @@ _base_ = [ "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2_base.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py", "../default/pipelines/default_lidar_intensity_120m.py", "../default/models/default_lidar_second_secfpn_120m.py", "../default/schedulers/default_30e_8xb8_adamw_cosine.py", @@ -13,10 +13,10 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" +info_directory_path = "info/kokseang_2_6_1/" -experiment_group_name = "bevfusion_lidar_intensity/jpntaxi_gen2_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_gen2_base_120m" +experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter @@ -143,3 +143,5 @@ checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), ) log_processor = dict(window_size=50) + +load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_40.pth" \ No newline at end of file From cf6e13dea087c2983e045df78840b86c166cd79a Mon Sep 17 00:00:00 2001 From: KokSeang Tan Date: Wed, 25 Mar 2026 19:07:44 +0900 Subject: [PATCH 004/183] Update config --- ...n_30e_8xb8_j6gen2_base_120m_t4metric_v2.py | 78 +++++++++++++++++++ ..._30e_8xb8_jpntaxi_base_120m_t4metric_v2.py | 78 +++++++++++++++++++ ...d_secfpn_50e_8xb8_base_120m_t4metric_v2.py | 78 +++++++++++++++++++ 3 files changed, 234 insertions(+) create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py new file mode 100644 index 000000000..f048ab1a8 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py @@ -0,0 +1,78 @@ +_base_ = [ + "./bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py", +] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_6_1/" + +experiment_group_name = "bevfusion_lidar_intensity_2.6.1/j6gen2_base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# Add evaluator configs +perception_evaluator_configs = dict( + dataset_paths=_base_.data_root, + frame_id="base_link", + evaluation_config_dict=_base_.evaluator_metric_configs, + load_raw_data=False, +) + +frame_pass_fail_config = dict( + target_labels=_base_.class_names, + # Matching thresholds per class (must align with `plane_distance_thresholds` used in evaluation) + matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0], + confidence_threshold_list=None, +) + +training_statistics_parquet_path = ( + _base_.data_root + _base_.info_directory_path + _base_.info_train_statistics_file_name +) +testing_statistics_parquet_path = _base_.data_root + _base_.info_directory_path + _base_.info_test_statistics_file_name +validation_statistics_parquet_path = ( + _base_.data_root + _base_.info_directory_path + _base_.info_val_statistics_file_name +) + +val_evaluator = dict( + _delete_=True, + type="T4MetricV2", + data_root=_base_.data_root, + ann_file=_base_.data_root + _base_.info_directory_path + _base_.info_val_file_name, + training_statistics_parquet_path=training_statistics_parquet_path, + testing_statistics_parquet_path=testing_statistics_parquet_path, + validation_statistics_parquet_path=validation_statistics_parquet_path, + output_dir="validation", + dataset_name="base", + perception_evaluator_configs=perception_evaluator_configs, + critical_object_filter_config=None, + frame_pass_fail_config=frame_pass_fail_config, + num_workers=64, + scene_batch_size=-1, + write_metric_summary=False, + class_names={{_base_.class_names}}, + name_mapping={{_base_.name_mapping}}, + experiment_name=experiment_name, + experiment_group_name=_base_.experiment_group_name, +) + +test_evaluator = dict( + _delete_=True, + type="T4MetricV2", + data_root=_base_.data_root, + ann_file=_base_.data_root + _base_.info_directory_path + _base_.info_test_file_name, + training_statistics_parquet_path=training_statistics_parquet_path, + testing_statistics_parquet_path=testing_statistics_parquet_path, + validation_statistics_parquet_path=validation_statistics_parquet_path, + output_dir="testing", + dataset_name="base", + perception_evaluator_configs=perception_evaluator_configs, + critical_object_filter_config=None, + frame_pass_fail_config=frame_pass_fail_config, + num_workers=64, + scene_batch_size=-1, + write_metric_summary=True, + class_names={{_base_.class_names}}, + name_mapping={{_base_.name_mapping}}, + experiment_name=experiment_name, + experiment_group_name=_base_.experiment_group_name, +) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py new file mode 100644 index 000000000..b4d8ddfbf --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py @@ -0,0 +1,78 @@ +_base_ = [ + "./bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py", +] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_6_1/" + +experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# Add evaluator configs +perception_evaluator_configs = dict( + dataset_paths=_base_.data_root, + frame_id="base_link", + evaluation_config_dict=_base_.evaluator_metric_configs, + load_raw_data=False, +) + +frame_pass_fail_config = dict( + target_labels=_base_.class_names, + # Matching thresholds per class (must align with `plane_distance_thresholds` used in evaluation) + matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0], + confidence_threshold_list=None, +) + +training_statistics_parquet_path = ( + _base_.data_root + _base_.info_directory_path + _base_.info_train_statistics_file_name +) +testing_statistics_parquet_path = _base_.data_root + _base_.info_directory_path + _base_.info_test_statistics_file_name +validation_statistics_parquet_path = ( + _base_.data_root + _base_.info_directory_path + _base_.info_val_statistics_file_name +) + +val_evaluator = dict( + _delete_=True, + type="T4MetricV2", + data_root=_base_.data_root, + ann_file=_base_.data_root + _base_.info_directory_path + _base_.info_val_file_name, + training_statistics_parquet_path=training_statistics_parquet_path, + testing_statistics_parquet_path=testing_statistics_parquet_path, + validation_statistics_parquet_path=validation_statistics_parquet_path, + output_dir="validation", + dataset_name="base", + perception_evaluator_configs=perception_evaluator_configs, + critical_object_filter_config=None, + frame_pass_fail_config=frame_pass_fail_config, + num_workers=64, + scene_batch_size=-1, + write_metric_summary=False, + class_names={{_base_.class_names}}, + name_mapping={{_base_.name_mapping}}, + experiment_name=experiment_name, + experiment_group_name=_base_.experiment_group_name, +) + +test_evaluator = dict( + _delete_=True, + type="T4MetricV2", + data_root=_base_.data_root, + ann_file=_base_.data_root + _base_.info_directory_path + _base_.info_test_file_name, + training_statistics_parquet_path=training_statistics_parquet_path, + testing_statistics_parquet_path=testing_statistics_parquet_path, + validation_statistics_parquet_path=validation_statistics_parquet_path, + output_dir="testing", + dataset_name="base", + perception_evaluator_configs=perception_evaluator_configs, + critical_object_filter_config=None, + frame_pass_fail_config=frame_pass_fail_config, + num_workers=64, + scene_batch_size=-1, + write_metric_summary=True, + class_names={{_base_.class_names}}, + name_mapping={{_base_.name_mapping}}, + experiment_name=experiment_name, + experiment_group_name=_base_.experiment_group_name, +) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py new file mode 100644 index 000000000..7a0215139 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py @@ -0,0 +1,78 @@ +_base_ = [ + "./bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py", +] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_6_1/" + +experiment_group_name = "bevfusion_lidar_2.6.0/base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# Add evaluator configs +perception_evaluator_configs = dict( + dataset_paths=_base_.data_root, + frame_id="base_link", + evaluation_config_dict=_base_.evaluator_metric_configs, + load_raw_data=False, +) + +frame_pass_fail_config = dict( + target_labels=_base_.class_names, + # Matching thresholds per class (must align with `plane_distance_thresholds` used in evaluation) + matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0], + confidence_threshold_list=None, +) + +training_statistics_parquet_path = ( + _base_.data_root + _base_.info_directory_path + _base_.info_train_statistics_file_name +) +testing_statistics_parquet_path = _base_.data_root + _base_.info_directory_path + _base_.info_test_statistics_file_name +validation_statistics_parquet_path = ( + _base_.data_root + _base_.info_directory_path + _base_.info_val_statistics_file_name +) + +val_evaluator = dict( + _delete_=True, + type="T4MetricV2", + data_root=_base_.data_root, + ann_file=_base_.data_root + _base_.info_directory_path + _base_.info_val_file_name, + training_statistics_parquet_path=training_statistics_parquet_path, + testing_statistics_parquet_path=testing_statistics_parquet_path, + validation_statistics_parquet_path=validation_statistics_parquet_path, + output_dir="validation", + dataset_name="base", + perception_evaluator_configs=perception_evaluator_configs, + critical_object_filter_config=None, + frame_pass_fail_config=frame_pass_fail_config, + num_workers=64, + scene_batch_size=-1, + write_metric_summary=False, + class_names={{_base_.class_names}}, + name_mapping={{_base_.name_mapping}}, + experiment_name=experiment_name, + experiment_group_name=_base_.experiment_group_name, +) + +test_evaluator = dict( + _delete_=True, + type="T4MetricV2", + data_root=_base_.data_root, + ann_file=_base_.data_root + _base_.info_directory_path + _base_.info_test_file_name, + training_statistics_parquet_path=training_statistics_parquet_path, + testing_statistics_parquet_path=testing_statistics_parquet_path, + validation_statistics_parquet_path=validation_statistics_parquet_path, + output_dir="testing", + dataset_name="base", + perception_evaluator_configs=perception_evaluator_configs, + critical_object_filter_config=None, + frame_pass_fail_config=frame_pass_fail_config, + num_workers=64, + scene_batch_size=-1, + write_metric_summary=True, + class_names={{_base_.class_names}}, + name_mapping={{_base_.name_mapping}}, + experiment_name=experiment_name, + experiment_group_name=_base_.experiment_group_name, +) From e1ebc993b9c8f17090a64b5a11476a3733e89084 Mon Sep 17 00:00:00 2001 From: KokSeang Tan Date: Thu, 26 Mar 2026 20:20:08 +0900 Subject: [PATCH 005/183] Update config --- .../configs/detection3d/dataset/t4dataset/jpntaxi_base.py | 1 - .../pipelines/default_camera_lidar_intensity_120m.py | 4 ++-- .../t4dataset/default/pipelines/default_lidar_120m.py | 8 ++++++-- .../default/pipelines/default_lidar_intensity_120m.py | 8 ++++++-- .../schedulers/default_20e_8xb8_adamw_linear_cosine.py | 2 +- .../default/schedulers/default_30e_8xb8_adamw_cosine.py | 2 +- .../schedulers/default_30e_8xb8_adamw_linear_cosine.py | 2 +- .../default/schedulers/default_50e_8xb8_adamw_cosine.py | 2 +- 8 files changed, 18 insertions(+), 11 deletions(-) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py index 28ba4ab33..a50cf8852 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py @@ -18,7 +18,6 @@ # dataset scene setting dataset_test_groups = { - "jpntaxi_gen2": ("t4dataset_jpntaxi_gen2_infos_test.pkl", False), "jpntaxi_base": ("t4dataset_jpntaxi_base_infos_test.pkl", True), } diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py index acac440dc..9d1910dab 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py @@ -84,8 +84,8 @@ "traffic_cone", ], ), - dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), - dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), + # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), + # dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), dict(type="PointShuffle"), dict( type="Pack3DDetInputs", diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index 7ffedc232..7ee393ea6 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -64,8 +64,8 @@ "traffic_cone", ], ), - dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), - dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), + # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=1), + # dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), dict(type="PointShuffle"), dict( type="Pack3DDetInputs", @@ -89,6 +89,8 @@ "img_aug_matrix", "lidar_aug_matrix", "timestamp", + "vehicle_type", + "city", ], ), ] @@ -130,6 +132,8 @@ "num_pts_feats", "num_views", "timestamp", + "vehicle_type", + "city", ], ), ] diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index c7fa1b2cb..8b154901e 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -64,8 +64,8 @@ "traffic_cone", ], ), - dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), - dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), + # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), + # dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), dict(type="PointShuffle"), dict( type="Pack3DDetInputs", @@ -89,6 +89,8 @@ "img_aug_matrix", "lidar_aug_matrix", "timestamp", + "vehicle_type", + "city", ], ), ] @@ -130,6 +132,8 @@ "num_pts_feats", "num_views", "timestamp", + "vehicle_type", + "city", ], ), ] diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py index d491eaa4b..c3f82e76d 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py @@ -1,6 +1,6 @@ # learning rate # lr = 0.0002 -lr = 2e-4 +lr = 1.5e-4 t_max = 6 max_epochs = 20 val_interval = 1 diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py index c5053d943..94c2a4160 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py @@ -1,5 +1,5 @@ # learning rate -lr = 2e-4 +lr = 1.5e-4 t_max = 8 max_epochs = 30 val_interval = 5 diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py index c05aeafcb..f5c747e62 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py @@ -1,5 +1,5 @@ # learning rate -lr = 2e-4 +lr = 1.5e-4 t_max = 8 max_epochs = 30 val_interval = 1 diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py index fc1914dde..f0bd87ca4 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py @@ -1,5 +1,5 @@ # learning rate -lr = 2e-4 +lr = 1.5e-4 t_max = 15 max_epochs = 50 val_interval = 5 From 2dac4333e0dce899701b0602778626e0df191a8a Mon Sep 17 00:00:00 2001 From: KokSeang Tan Date: Fri, 27 Mar 2026 11:01:29 +0900 Subject: [PATCH 006/183] Update config --- .../t4dataset/default/pipelines/default_lidar_120m.py | 10 +++++----- .../schedulers/default_20e_8xb8_adamw_linear_cosine.py | 6 +++++- .../schedulers/default_30e_8xb8_adamw_cosine.py | 6 +++++- .../schedulers/default_30e_8xb8_adamw_linear_cosine.py | 6 +++++- .../schedulers/default_50e_8xb8_adamw_cosine.py | 6 +++++- 5 files changed, 25 insertions(+), 9 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index 7ee393ea6..347ba6452 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -54,14 +54,14 @@ classes=[ "car", "truck", - "construction_vehicle", + # "construction_vehicle", "bus", - "trailer", - "barrier", - "motorcycle", + # "trailer", + # "barrier", + # "motorcycle", "bicycle", "pedestrian", - "traffic_cone", + # "traffic_cone", ], ), # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=1), diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py index c3f82e76d..a0be6f4ab 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py @@ -1,6 +1,6 @@ # learning rate # lr = 0.0002 -lr = 1.5e-4 +lr = 1.0e-4 t_max = 6 max_epochs = 20 val_interval = 1 @@ -57,3 +57,7 @@ ) auto_scale_lr = dict(enable=False, base_batch_size=train_gpu_size * train_batch_size) + +# Only set if the number of train_gpu_size more than 1 +if train_gpu_size > 1: + sync_bn = "torch" \ No newline at end of file diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py index 94c2a4160..edcbd74bf 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py @@ -1,5 +1,5 @@ # learning rate -lr = 1.5e-4 +lr = 1.0e-4 t_max = 8 max_epochs = 30 val_interval = 5 @@ -69,3 +69,7 @@ ) auto_scale_lr = dict(enable=False, base_batch_size=train_gpu_size * train_batch_size) + +# Only set if the number of train_gpu_size more than 1 +if train_gpu_size > 1: + sync_bn = "torch" \ No newline at end of file diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py index f5c747e62..32e8d59fa 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py @@ -1,5 +1,5 @@ # learning rate -lr = 1.5e-4 +lr = 1.0e-4 t_max = 8 max_epochs = 30 val_interval = 1 @@ -56,3 +56,7 @@ ) auto_scale_lr = dict(enable=False, base_batch_size=train_gpu_size * train_batch_size) + +# Only set if the number of train_gpu_size more than 1 +if train_gpu_size > 1: + sync_bn = "torch" \ No newline at end of file diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py index f0bd87ca4..58192c2de 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py @@ -1,5 +1,5 @@ # learning rate -lr = 1.5e-4 +lr = 1.0e-4 t_max = 15 max_epochs = 50 val_interval = 5 @@ -69,3 +69,7 @@ ) auto_scale_lr = dict(enable=False, base_batch_size=train_gpu_size * train_batch_size) + +# Only set if the number of train_gpu_size more than 1 +if train_gpu_size > 1: + sync_bn = "torch" \ No newline at end of file From 9046b7bb1b3603345dc886903794ea118e9737f4 Mon Sep 17 00:00:00 2001 From: KokSeang Tan Date: Fri, 27 Mar 2026 11:29:07 +0900 Subject: [PATCH 007/183] Update config --- tools/detection3d/train.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/detection3d/train.py b/tools/detection3d/train.py index c379025d1..254783b92 100644 --- a/tools/detection3d/train.py +++ b/tools/detection3d/train.py @@ -123,6 +123,10 @@ def main(): # build customized runner from the registry # if 'runner_type' is set in the cfg runner = RUNNERS.build(cfg) + + # Output all model + print_log(f"Runner model: ", logger="current") + print_log(f"{runner.model}", logger="current") # start training runner.train() From d4d93fa3bab4fbe63c528c2a029bfb8e3a5346db Mon Sep 17 00:00:00 2001 From: KokSeang Tan Date: Sat, 28 Mar 2026 00:39:14 +0900 Subject: [PATCH 008/183] Added --- projects/BEVFusion/bevfusion/__init__.py | 3 ++- projects/BEVFusion/bevfusion/bevfusion_head.py | 2 +- projects/BEVFusion/bevfusion/utils.py | 1 + .../default/models/default_lidar_second_secfpn_120m.py | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py index e849db227..947ebab23 100644 --- a/projects/BEVFusion/bevfusion/__init__.py +++ b/projects/BEVFusion/bevfusion/__init__.py @@ -6,7 +6,7 @@ from .sparse_encoder import BEVFusionSparseEncoder from .transformer import TransformerDecoderLayer from .transforms_3d import BEVFusionGlobalRotScaleTrans, BEVFusionRandomFlip3D, GridMask, ImageAug3D -from .utils import BBoxBEVL1Cost, HeuristicAssigner3D, HungarianAssigner3D, IoU3DCost +from .utils import BBoxBEVL1Cost, HeuristicAssigner3D, HungarianAssigner3D, IoU3DCost, TransFusionBBoxCoder __all__ = [ "BEVFusion", @@ -26,4 +26,5 @@ "TransformerDecoderLayer", "BEVFusionRandomFlip3D", "BEVFusionGlobalRotScaleTrans", + "TransFusionBBoxCoder", ] diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 143c35a14..853523c4f 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -554,7 +554,7 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx): vel = None boxes_dict = self.bbox_coder.decode( - score, rot, dim, center, height, vel + score, rot, dim, center, height, vel, filter=False ) # decode the prediction to real world metric bbox bboxes_tensor = boxes_dict[0]["bboxes"] gt_bboxes_tensor = gt_bboxes_3d.tensor.to(score.device) diff --git a/projects/BEVFusion/bevfusion/utils.py b/projects/BEVFusion/bevfusion/utils.py index c47604dbd..5b7c94877 100644 --- a/projects/BEVFusion/bevfusion/utils.py +++ b/projects/BEVFusion/bevfusion/utils.py @@ -93,6 +93,7 @@ def decode(self, heatmap, rot, dim, center, height, vel, filter=False): predictions_dicts.append(predictions_dict) if filter is False: + print("filter is False") return predictions_dicts # use score threshold diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index b5d9a8fdc..a7fac4b37 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -98,7 +98,7 @@ bbox_coder=dict( type="TransFusionBBoxCoder", post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0], - score_threshold=0.0, + score_threshold=0.1, out_size_factor=8, code_size=10, ), From b2714a87052e441b94bf8cc15793ef12fb39698f Mon Sep 17 00:00:00 2001 From: KokSeang Tan Date: Tue, 31 Mar 2026 10:58:41 +0900 Subject: [PATCH 009/183] Added --- projects/BEVFusion/bevfusion/utils.py | 1 - .../default/models/default_lidar_second_secfpn_120m.py | 2 +- .../t4dataset/default/pipelines/default_lidar_120m.py | 7 +------ .../default/pipelines/default_lidar_intensity_120m.py | 5 ----- .../schedulers/default_20e_8xb8_adamw_linear_cosine.py | 4 ++-- .../default/schedulers/default_30e_8xb8_adamw_cosine.py | 3 ++- .../schedulers/default_30e_8xb8_adamw_linear_cosine.py | 3 ++- .../default/schedulers/default_50e_8xb8_adamw_cosine.py | 3 ++- 8 files changed, 10 insertions(+), 18 deletions(-) diff --git a/projects/BEVFusion/bevfusion/utils.py b/projects/BEVFusion/bevfusion/utils.py index 5b7c94877..c47604dbd 100644 --- a/projects/BEVFusion/bevfusion/utils.py +++ b/projects/BEVFusion/bevfusion/utils.py @@ -93,7 +93,6 @@ def decode(self, heatmap, rot, dim, center, height, vel, filter=False): predictions_dicts.append(predictions_dict) if filter is False: - print("filter is False") return predictions_dicts # use score threshold diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index a7fac4b37..b5d9a8fdc 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -98,7 +98,7 @@ bbox_coder=dict( type="TransFusionBBoxCoder", post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0], - score_threshold=0.1, + score_threshold=0.0, out_size_factor=8, code_size=10, ), diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index 347ba6452..e79c30710 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -54,17 +54,12 @@ classes=[ "car", "truck", - # "construction_vehicle", "bus", - # "trailer", - # "barrier", - # "motorcycle", "bicycle", "pedestrian", - # "traffic_cone", ], ), - # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=1), + # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), # dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), dict(type="PointShuffle"), dict( diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index 8b154901e..ce7985fd8 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -54,14 +54,9 @@ classes=[ "car", "truck", - "construction_vehicle", "bus", - "trailer", - "barrier", - "motorcycle", "bicycle", "pedestrian", - "traffic_cone", ], ), # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py index a0be6f4ab..d1d11e7c9 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py @@ -1,6 +1,6 @@ # learning rate -# lr = 0.0002 -lr = 1.0e-4 +# 1e-4 * sqrt(2) = 0.0001414 +lr = 1.4141e-4 t_max = 6 max_epochs = 20 val_interval = 1 diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py index edcbd74bf..f4f102170 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py @@ -1,5 +1,6 @@ # learning rate -lr = 1.0e-4 +# 1e-4 * sqrt(2) = 0.0001414 +lr = 1.4141e-4 t_max = 8 max_epochs = 30 val_interval = 5 diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py index 32e8d59fa..44870ccf4 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py @@ -1,5 +1,6 @@ # learning rate -lr = 1.0e-4 +# 1e-4 * sqrt(2) = 0.0001414 +lr = 1.4141e-4 t_max = 8 max_epochs = 30 val_interval = 1 diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py index 58192c2de..542ccdd8e 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py @@ -1,5 +1,6 @@ # learning rate -lr = 1.0e-4 +# 1e-4 * sqrt(2) = 0.0001414 +lr = 1.4141e-4 t_max = 15 max_epochs = 50 val_interval = 5 From 43adb38bf15ea01b0ff81b06a5e4e435a6e490f9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 31 Mar 2026 02:01:59 +0000 Subject: [PATCH 010/183] ci(pre-commit): autofix --- projects/BEVFusion/bevfusion/__init__.py | 2 +- ...ion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py | 2 +- ...on_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py | 2 +- .../schedulers/default_20e_8xb8_adamw_linear_cosine.py | 2 +- .../default/schedulers/default_30e_8xb8_adamw_cosine.py | 2 +- .../schedulers/default_30e_8xb8_adamw_linear_cosine.py | 2 +- .../default/schedulers/default_50e_8xb8_adamw_cosine.py | 2 +- tools/detection3d/train.py | 4 ++-- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py index 947ebab23..60a64b532 100644 --- a/projects/BEVFusion/bevfusion/__init__.py +++ b/projects/BEVFusion/bevfusion/__init__.py @@ -26,5 +26,5 @@ "TransformerDecoderLayer", "BEVFusionRandomFlip3D", "BEVFusionGlobalRotScaleTrans", - "TransFusionBBoxCoder", + "TransFusionBBoxCoder", ] diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index e43f9c485..d984b5585 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -144,4 +144,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_40.pth" \ No newline at end of file +load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_40.pth" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py index 8538784f8..0878cef29 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py @@ -144,4 +144,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_40.pth" \ No newline at end of file +load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_40.pth" diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py index d1d11e7c9..15ba38878 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py @@ -60,4 +60,4 @@ # Only set if the number of train_gpu_size more than 1 if train_gpu_size > 1: - sync_bn = "torch" \ No newline at end of file + sync_bn = "torch" diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py index f4f102170..a2cd2d2e9 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py @@ -73,4 +73,4 @@ # Only set if the number of train_gpu_size more than 1 if train_gpu_size > 1: - sync_bn = "torch" \ No newline at end of file + sync_bn = "torch" diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py index 44870ccf4..264eda921 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py @@ -60,4 +60,4 @@ # Only set if the number of train_gpu_size more than 1 if train_gpu_size > 1: - sync_bn = "torch" \ No newline at end of file + sync_bn = "torch" diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py index 542ccdd8e..87571d0b3 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py @@ -73,4 +73,4 @@ # Only set if the number of train_gpu_size more than 1 if train_gpu_size > 1: - sync_bn = "torch" \ No newline at end of file + sync_bn = "torch" diff --git a/tools/detection3d/train.py b/tools/detection3d/train.py index 254783b92..f7e6309fb 100644 --- a/tools/detection3d/train.py +++ b/tools/detection3d/train.py @@ -123,8 +123,8 @@ def main(): # build customized runner from the registry # if 'runner_type' is set in the cfg runner = RUNNERS.build(cfg) - - # Output all model + + # Output all model print_log(f"Runner model: ", logger="current") print_log(f"{runner.model}", logger="current") From 73dd1c40c1100879b3a700fe5e82536019045cb0 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Wed, 8 Apr 2026 19:05:59 +0900 Subject: [PATCH 011/183] Added --- .../bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py | 4 +++- .../configs/t4dataset/default/pipelines/default_lidar_120m.py | 2 +- projects/BEVFusion/setup.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py index a5c72aef0..64bf2208a 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py @@ -13,7 +13,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_1/" +info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_lidar_2.6.0/base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m" @@ -143,3 +143,5 @@ checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), ) log_processor = dict(window_size=50) + +resume = True \ No newline at end of file diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index e79c30710..a74ad2ea0 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -1,6 +1,6 @@ # Dataset parameters backend_args = None -num_workers = 32 +num_workers = 16 input_modality = dict(use_lidar=True, use_camera=False) # range setting diff --git a/projects/BEVFusion/setup.py b/projects/BEVFusion/setup.py index 837d1f53e..38f588b20 100644 --- a/projects/BEVFusion/setup.py +++ b/projects/BEVFusion/setup.py @@ -43,7 +43,7 @@ def make_cuda_ext(name, module, sources, sources_cuda=[], extra_args=[], extra_i name="bev_pool", install_requires=[ "onnx_graphsurgeon==0.5.8", - "spconv-cu120==2.3.6", + "spconv-cu126==2.3.8", ], ext_modules=[ make_cuda_ext( From 89e26700d614ab6abc92212ad69083c84019083f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 8 Apr 2026 10:06:34 +0000 Subject: [PATCH 012/183] ci(pre-commit): autofix --- .../bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py index 64bf2208a..0a2a178c6 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py @@ -144,4 +144,4 @@ ) log_processor = dict(window_size=50) -resume = True \ No newline at end of file +resume = True From 380d7aa0d4e85934bc2bdd3bd8e88f7746acd4d4 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Fri, 10 Apr 2026 23:47:24 +0900 Subject: [PATCH 013/183] Added --- ...ion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index d984b5585..605e3cf7c 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -13,7 +13,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_1/" +info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_lidar_intensity_2.6.1/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m" @@ -144,4 +144,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_40.pth" +load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth" From b60e45ae0ca338c30fdb16a7efcc9fb5332bae0e Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sun, 12 Apr 2026 21:42:53 +0900 Subject: [PATCH 014/183] Update configs --- ..._lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py | 4 ++-- ..._second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py | 2 +- ...evfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py | 6 +++++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py index 0878cef29..563f71cf9 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py @@ -13,7 +13,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_1/" +info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m" @@ -144,4 +144,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_40.pth" +load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py index b4d8ddfbf..238054ab5 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py @@ -4,7 +4,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_1/" +info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py index 0a2a178c6..8bf21b1b2 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py @@ -15,7 +15,7 @@ data_root = "data/t4dataset/" info_directory_path = "info/kokseang_2_6_2/" -experiment_group_name = "bevfusion_lidar_2.6.0/base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_2.7.0/base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name @@ -31,6 +31,10 @@ pts_middle_encoder=dict( in_channels=_base_.point_use_dim, sparse_shape=_base_.grid_size, + num_aug_features=4, + # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here + aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], ), bbox_head=dict( class_names=_base_.class_names, # Use class names to identify the correct class indices From 9b38a42898e724c57532b5ad6bf94546c870e809 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Mon, 13 Apr 2026 18:34:04 +0900 Subject: [PATCH 015/183] Added --- autoware_ml/configs/detection3d/dataset/t4dataset/base.py | 1 - .../configs/detection3d/dataset/t4dataset/jpntaxi_base.py | 2 +- ...on_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py | 4 ++-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py index c92d58431..d0744a131 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py @@ -48,7 +48,6 @@ "j6gen2_base": ("t4dataset_j6gen2_base_infos_test.pkl", False), "j6gen2": ("t4dataset_j6gen2_infos_test.pkl", False), "largebus": ("t4dataset_largebus_infos_test.pkl", False), - "jpntaxi_base": ("t4dataset_jpntaxi_base_infos_test.pkl", False), "jpntaxi_gen2": ("t4dataset_jpntaxi_gen2_infos_test.pkl", False), "base": ("t4dataset_base_infos_test.pkl", True), } diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py index a50cf8852..b7ddb799a 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py @@ -18,7 +18,7 @@ # dataset scene setting dataset_test_groups = { - "jpntaxi_base": ("t4dataset_jpntaxi_base_infos_test.pkl", True), + "jpntaxi_gen2": ("t4dataset_jpntaxi_gen2_infos_test.pkl", True), } dataset_version_list = [ diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py index 0878cef29..563f71cf9 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py @@ -13,7 +13,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_1/" +info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m" @@ -144,4 +144,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_40.pth" +load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth" From f39b5841db399a9ebd60bf4d40396c83cadd5089 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Fri, 17 Apr 2026 12:08:05 +0900 Subject: [PATCH 016/183] Updated --- ...idar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py index 563f71cf9..b6677ff05 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py @@ -15,7 +15,7 @@ data_root = "data/t4dataset/" info_directory_path = "info/kokseang_2_6_2/" -experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2.7.1/jpntaxi_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name @@ -31,6 +31,10 @@ pts_middle_encoder=dict( in_channels=_base_.point_use_dim, sparse_shape=_base_.grid_size, + num_aug_features=5, + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0, 0.0], + aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 255.0, 0.2], ), bbox_head=dict( class_names=_base_.class_names, # Use class names to identify the correct class indices @@ -144,4 +148,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth" +load_from = "work_dirs/bevfusion_lidar_2.7.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_48.pth" From 24a88adf1e782379104d6d7ff64d408b3c263d8e Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Fri, 17 Apr 2026 12:08:56 +0900 Subject: [PATCH 017/183] Added --- ...on_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py | 2 +- ...el_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py | 2 +- ...n_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py | 5 +++-- ...l_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py | 2 +- ...dar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py | 2 +- 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index 605e3cf7c..41a2152cf 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -144,4 +144,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth" +load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth" \ No newline at end of file diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py index f048ab1a8..7dfc7e0f8 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py @@ -4,7 +4,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_1/" +info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_lidar_intensity_2.6.1/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py index 563f71cf9..998e5a22e 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py @@ -16,7 +16,7 @@ info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m" +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_no_bicycle_pooling" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter @@ -48,6 +48,7 @@ pc_range=_base_.point_cloud_range[0:2], voxel_size=_base_.voxel_size[0:2], ), + dense_heatmap_pooling_classes=["car", "truck", "bus"], # Use class indices for pooling ), ) @@ -144,4 +145,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth" +load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth" \ No newline at end of file diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py index b4d8ddfbf..238054ab5 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py @@ -4,7 +4,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_1/" +info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py index 7a0215139..4f9fb7b65 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py @@ -4,7 +4,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_1/" +info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_lidar_2.6.0/base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2" From c637420d5e6e9f2fa898c581184a45098ce6469b Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Fri, 17 Apr 2026 12:10:58 +0900 Subject: [PATCH 018/183] Added --- ..._lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py | 4 ++-- ..._second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py | 2 +- ...lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py | 7 +++---- ...second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py | 2 +- ...vfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py | 4 +--- ...r_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py | 2 +- 6 files changed, 9 insertions(+), 12 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index 41a2152cf..8c02ca112 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -13,7 +13,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" +info_directory_path = "info/user_name/" experiment_group_name = "bevfusion_lidar_intensity_2.6.1/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m" @@ -144,4 +144,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth" \ No newline at end of file +load_from "" \ No newline at end of file diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py index 7dfc7e0f8..afb150284 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py @@ -4,7 +4,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" +info_directory_path = "info/user_name/" experiment_group_name = "bevfusion_lidar_intensity_2.6.1/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py index 998e5a22e..3dda36c3a 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py @@ -13,10 +13,10 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" +info_directory_path = "info/user_name/" experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_no_bicycle_pooling" +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter @@ -48,7 +48,6 @@ pc_range=_base_.point_cloud_range[0:2], voxel_size=_base_.voxel_size[0:2], ), - dense_heatmap_pooling_classes=["car", "truck", "bus"], # Use class indices for pooling ), ) @@ -145,4 +144,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2.6.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_50.pth" \ No newline at end of file +load_from "" \ No newline at end of file diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py index 238054ab5..3320d2b08 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py @@ -4,7 +4,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" +info_directory_path = "info/user_name/" experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py index 0a2a178c6..38f3e369a 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py @@ -13,7 +13,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" +info_directory_path = "info/user_name/" experiment_group_name = "bevfusion_lidar_2.6.0/base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m" @@ -143,5 +143,3 @@ checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), ) log_processor = dict(window_size=50) - -resume = True diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py index 4f9fb7b65..c9a0050c0 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py @@ -4,7 +4,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" +info_directory_path = "info/user_name/" experiment_group_name = "bevfusion_lidar_2.6.0/base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2" From 48879b8cb6540d0e07124a81bf13aa4b67ecb51e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 17 Apr 2026 03:11:22 +0000 Subject: [PATCH 019/183] ci(pre-commit): autofix --- ...usion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py | 2 +- ...sion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index 8c02ca112..264f0da77 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -144,4 +144,4 @@ ) log_processor = dict(window_size=50) -load_from "" \ No newline at end of file +load_from "" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py index 3dda36c3a..f505ac5dc 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py @@ -144,4 +144,4 @@ ) log_processor = dict(window_size=50) -load_from "" \ No newline at end of file +load_from "" From 82457ab52003421646cf2c15070c60314eaabe6e Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Fri, 17 Apr 2026 12:12:18 +0900 Subject: [PATCH 020/183] Added --- .../default/pipelines/default_camera_lidar_intensity_120m.py | 2 -- .../configs/t4dataset/default/pipelines/default_lidar_120m.py | 2 -- .../t4dataset/default/pipelines/default_lidar_intensity_120m.py | 2 -- 3 files changed, 6 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py index 9d1910dab..963a218e1 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py @@ -84,8 +84,6 @@ "traffic_cone", ], ), - # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), - # dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), dict(type="PointShuffle"), dict( type="Pack3DDetInputs", diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index a74ad2ea0..06d95be16 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -59,8 +59,6 @@ "pedestrian", ], ), - # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), - # dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), dict(type="PointShuffle"), dict( type="Pack3DDetInputs", diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index ce7985fd8..4e74d3616 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -59,8 +59,6 @@ "pedestrian", ], ), - # dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), - # dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), dict(type="PointShuffle"), dict( type="Pack3DDetInputs", From 827bbb24c66e04e2d6c38f53423305de0010c075 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Fri, 17 Apr 2026 15:50:30 +0900 Subject: [PATCH 021/183] Updated --- ...ion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index 264f0da77..be535c560 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -15,7 +15,7 @@ data_root = "data/t4dataset/" info_directory_path = "info/user_name/" -experiment_group_name = "bevfusion_lidar_intensity_2.6.1/j6gen2_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2.7.1/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name @@ -144,4 +144,4 @@ ) log_processor = dict(window_size=50) -load_from "" +load_from = "work_dirs/bevfusion_lidar_2.7.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_48.pth" From 5f9a4a55156ee29a689a2e08b12064d208d8118d Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Fri, 17 Apr 2026 16:01:35 +0900 Subject: [PATCH 022/183] Updated --- ...n_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index be535c560..3e615b504 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -13,7 +13,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" +info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_lidar_intensity_2.7.1/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m" @@ -31,6 +31,10 @@ pts_middle_encoder=dict( in_channels=_base_.point_use_dim, sparse_shape=_base_.grid_size, + num_aug_features=5, + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0, 0.0], + aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 255.0, 0.2], ), bbox_head=dict( class_names=_base_.class_names, # Use class names to identify the correct class indices From 1d0ac8db9b9ec1a738a4f7294edcd6764adaef19 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 18 Apr 2026 11:44:49 +0900 Subject: [PATCH 023/183] Added --- projects/BEVFusion/bevfusion/__init__.py | 3 + projects/BEVFusion/bevfusion/bevfusion.py | 30 +- .../bevfusion/bevfusion_voxel_encoder.py | 295 ++++++++++++++++++ ..._voxel_second_secfpn_50e_8xb8_base_120m.py | 30 +- ...n_50e_8xb8_base_120m_sincos_10_channels.py | 161 ++++++++++ 5 files changed, 501 insertions(+), 18 deletions(-) create mode 100644 projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_10_channels.py diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py index 60a64b532..3db358b55 100644 --- a/projects/BEVFusion/bevfusion/__init__.py +++ b/projects/BEVFusion/bevfusion/__init__.py @@ -7,6 +7,7 @@ from .transformer import TransformerDecoderLayer from .transforms_3d import BEVFusionGlobalRotScaleTrans, BEVFusionRandomFlip3D, GridMask, ImageAug3D from .utils import BBoxBEVL1Cost, HeuristicAssigner3D, HungarianAssigner3D, IoU3DCost, TransFusionBBoxCoder +from .bevfusion_voxel_encoder import BEVFusionVoxelEncoder, BEVFusionVoxelSinCosEncoder __all__ = [ "BEVFusion", @@ -27,4 +28,6 @@ "BEVFusionRandomFlip3D", "BEVFusionGlobalRotScaleTrans", "TransFusionBBoxCoder", + "BEVFusionVoxelEncoder", + "BEVFusionVoxelSinCosEncoder", ] diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py index 243b3beb5..bc3f1b094 100644 --- a/projects/BEVFusion/bevfusion/bevfusion.py +++ b/projects/BEVFusion/bevfusion/bevfusion.py @@ -207,18 +207,29 @@ def extract_pts_feat(self, feats, coords, sizes, points=None) -> torch.Tensor: points = [point.float() for point in points] feats, coords, sizes = self.voxelize(points) batch_size = coords[-1, 0] + 1 + + if self.pts_voxel_encoder is not None: + assert not self.voxelize_reduce + feats = self.pts_voxel_encoder(feats, sizes, coords) else: # NOTE(knzo25): onnx inference. Voxelization happens outside the graph with torch.cuda.amp.autocast(enabled=False): # with torch.autocast('cuda', enabled=False): + # NOTE(knzo25): onnx demmands this + # batch_size = coords[-1, 0] + 1 + # with torch.autocast('cuda', enabled=False): + # NOTE(knzo25): onnx demmands this # batch_size = coords[-1, 0] + 1 batch_size = 1 print("Run onnx point_eSpConvst") - assert self.voxelize_reduce - if self.voxelize_reduce: - feats = feats.sum(dim=1, keepdim=False) / sizes.type_as(feats).view(-1, 1) + if self.pts_voxel_encoder is not None: + feats = self.pts_voxel_encoder(feats, sizes, coords) + else: + assert self.voxelize_reduce + if self.voxelize_reduce: + feats = feats.sum(dim=1, keepdim=False) / sizes.type_as(feats).view(-1, 1) x = self.pts_middle_encoder(feats, coords, batch_size) return x @@ -241,12 +252,13 @@ def voxelize(self, points): feats = torch.cat(feats, dim=0) coords = torch.cat(coords, dim=0) - if len(sizes) > 0: - sizes = torch.cat(sizes, dim=0) - if self.voxelize_reduce: - feats = feats.sum(dim=1, keepdim=False) / sizes.type_as(feats).view(-1, 1) - feats = feats.contiguous() - + assert len(sizes) > 0, "No points in the voxel" + sizes = torch.cat(sizes, dim=0) + + if self.voxelize_reduce: + feats = feats.sum(dim=1, keepdim=False) / sizes.type_as(feats).view(-1, 1) + feats = feats.contiguous() + return feats, coords, sizes def predict( diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py new file mode 100644 index 000000000..efbc995e8 --- /dev/null +++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py @@ -0,0 +1,295 @@ +from typing import Optional, Tuple + +import torch +import numpy as np +from mmcv.cnn import build_norm_layer +from mmcv.ops import DynamicScatter +from torch import Tensor, nn + +from mmdet3d.registry import MODELS +from mmdet3d.models.voxel_encoders.utils import get_paddings_indicator, PFNLayer + + +@MODELS.register_module() +class BEVFusionVoxelEncoder(nn.Module): + """BEVFusion Voxel Encoder Feature Net. + + The network is same as pillar featuer net. + The network prepares the pillar features and performs forward pass + through PFNLayers. + + Args: + in_channels (int, optional): Number of input features, + either x, y, z or x, y, z, r. Defaults to 4. + feat_channels (tuple, optional): Number of features in each of the + N PFNLayers. Defaults to (64, ). + with_distance (bool, optional): Whether to include Euclidean distance + to points. Defaults to False. + with_cluster_center (bool, optional): [description]. Defaults to True. + with_voxel_center (bool, optional): [description]. Defaults to True. + voxel_size (tuple[float], optional): Size of voxels, only utilize x + and y size. Defaults to (0.2, 0.2, 4). + point_cloud_range (tuple[float], optional): Point cloud range, only + utilizes x and y min. Defaults to (0, -40, -3, 70.4, 40, 1). + norm_cfg ([type], optional): [description]. + Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01). + mode (str, optional): The mode to gather point features. Options are + 'max' or 'avg'. Defaults to 'max'. + legacy (bool, optional): Whether to use the new behavior or + the original behavior. Defaults to True. + """ + + def __init__(self, + min_norm_values: Optional[Tuple[float]] = None, + max_norm_values: Optional[Tuple[float]] = None, + in_channels: Optional[int] = 4, + feat_channels: Optional[tuple] = (64, ), + with_distance: Optional[bool] = False, + with_cluster_center: Optional[bool] = True, + with_voxel_center: Optional[bool] = True, + voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4), + point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4, + 40, 1), + norm_cfg: Optional[dict] = dict( + type='BN1d', eps=1e-3, momentum=0.01), + mode: Optional[str] = 'max', + legacy: Optional[bool] = True): + super(BEVFusionVoxelEncoder, self).__init__() + assert len(feat_channels) > 0 + self.legacy = legacy + if with_cluster_center: + in_channels += 3 + if with_voxel_center: + in_channels += 3 + if with_distance: + in_channels += 1 + self._with_distance = with_distance + self._with_cluster_center = with_cluster_center + self._with_voxel_center = with_voxel_center + # Create PillarFeatureNet layers + self.in_channels = in_channels + feat_channels = [in_channels] + list(feat_channels) + pfn_layers = [] + for i in range(len(feat_channels) - 1): + in_filters = feat_channels[i] + out_filters = feat_channels[i + 1] + if i < len(feat_channels) - 2: + last_layer = False + else: + last_layer = True + pfn_layers.append( + PFNLayer( + in_filters, + out_filters, + norm_cfg=norm_cfg, + last_layer=last_layer, + mode=mode)) + self.pfn_layers = nn.ModuleList(pfn_layers) + + # Need pillar (voxel) size and x/y offset in order to calculate offset + self.vx = voxel_size[0] + self.vy = voxel_size[1] + self.vz = voxel_size[2] + self.x_offset = self.vx / 2 + point_cloud_range[0] + self.y_offset = self.vy / 2 + point_cloud_range[1] + self.z_offset = self.vz / 2 + point_cloud_range[2] + self.point_cloud_range = point_cloud_range + + self.register_buffer("min_norm_values", torch.tensor(min_norm_values)) + self.register_buffer("max_norm_values", torch.tensor(max_norm_values)) + self.register_buffer("voxel_size", torch.tensor([self.vx, self.vy, self.vz])) + + def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, + *args, **kwargs) -> Tensor: + """Forward function. + + Args: + features (torch.Tensor): Point features or raw points in shape + (N, M, C). + num_points (torch.Tensor): Number of points in each pillar in shape (M). + coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx). + + Returns: + torch.Tensor: Features of pillars in shape (M, C). + """ + if self.min_norm_values is not None and self.max_norm_values is not None: + features_norm = (features - self.min_norm_values) / (self.max_norm_values - self.min_norm_values) + else: + features_norm = features + + features_ls = [features_norm] + # Find distance of x, y, and z from cluster center, mapped to [-1, 1] if available + if self._with_cluster_center: + points_mean = features[:, :, :3].sum( + dim=1, keepdim=True) / num_points.type_as(features).view( + -1, 1, 1) + f_cluster = features[:, :, :3] - points_mean + # Map to [0, 1] if available + if self.min_norm_values is not None and self.max_norm_values is not None: + voxel_size = features.new_tensor([self.vx, self.vy, self.vz]) + f_cluster = f_cluster / voxel_size + features_ls.append(f_cluster) + + # Find distance of x, y, and z from pillar center + dtype = features.dtype + if self._with_voxel_center: + if not self.legacy: + f_center = torch.zeros_like(features[:, :, :3]) + f_center[:, :, 0] = features[:, :, 0] - ( + coors[:, 3].to(dtype).unsqueeze(1) * self.vx + + self.x_offset) + f_center[:, :, 1] = features[:, :, 1] - ( + coors[:, 2].to(dtype).unsqueeze(1) * self.vy + + self.y_offset) + f_center[:, :, 2] = features[:, :, 2] - ( + coors[:, 1].to(dtype).unsqueeze(1) * self.vz + + self.z_offset) + else: + f_center = features[:, :, :3] + f_center[:, :, 0] = f_center[:, :, 0] - ( + coors[:, 3].type_as(features).unsqueeze(1) * self.vx + + self.x_offset) + f_center[:, :, 1] = f_center[:, :, 1] - ( + coors[:, 2].type_as(features).unsqueeze(1) * self.vy + + self.y_offset) + f_center[:, :, 2] = f_center[:, :, 2] - ( + coors[:, 1].type_as(features).unsqueeze(1) * self.vz + + self.z_offset) + + if self.min_norm_values is not None and self.max_norm_values is not None: + f_center = f_center / (voxel_size * 0.5) + features_ls.append(f_center) + + if self._with_distance: + points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True) + features_ls.append(points_dist) + + # Combine together feature decorations + features = torch.cat(features_ls, dim=-1) + # The feature decorations were calculated without regard to whether + # pillar was empty. Need to ensure that + # empty pillars remain set to zeros. + voxel_count = features.shape[1] + mask = get_paddings_indicator(num_points, voxel_count, axis=0) + mask = torch.unsqueeze(mask, -1).type_as(features) + features *= mask + + for pfn in self.pfn_layers: + features = pfn(features, num_points) + + return features.squeeze(1) + + +@MODELS.register_module() +class BEVFusionVoxelSinCosEncoder(nn.Module): + def __init__(self, + min_norm_values: Tuple[float], + max_norm_values: Tuple[float], + in_channels: Optional[int] = 4, + with_distance: Optional[bool] = False, + with_cluster_center: Optional[bool] = True, + with_voxel_center: Optional[bool] = True, + voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4), + point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4, + 40, 1),): + super(BEVFusionVoxelSinCosEncoder, self).__init__() + + if with_cluster_center: + in_channels += 3 + if with_voxel_center: + in_channels += 3 + if with_distance: + in_channels += 1 + self._with_distance = with_distance + self._with_cluster_center = with_cluster_center + self._with_voxel_center = with_voxel_center + # Create PillarFeatureNet layers + self.in_channels = in_channels + + # Need pillar (voxel) size and x/y offset in order to calculate offset + self.vx = voxel_size[0] + self.vy = voxel_size[1] + self.vz = voxel_size[2] + self.x_offset = self.vx / 2 + point_cloud_range[0] + self.y_offset = self.vy / 2 + point_cloud_range[1] + self.z_offset = self.vz / 2 + point_cloud_range[2] + self.point_cloud_range = point_cloud_range + + self.register_buffer("min_norm_values", torch.tensor(min_norm_values)) + self.register_buffer("max_norm_values", torch.tensor(max_norm_values)) + self.register_buffer("voxel_size", torch.tensor([self.vx, self.vy, self.vz])) + self.register_buffer("exponents", (2 ** torch.arange(0, in_channels).float())) + + def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, + *args, **kwargs) -> Tensor: + """Forward function. + + Args: + features (torch.Tensor): Point features or raw points in shape + (N, M, C). + num_points (torch.Tensor): Number of points in each pillar in shape (M). + coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx). + + Returns: + torch.Tensor: Features of pillars in shape (M, C). + """ + features_norm = (features - self.min_norm_values) / (self.max_norm_values - self.min_norm_values) + features_ls = [features_norm] + # Find distance of x, y, and z from cluster center + if self._with_cluster_center: + points_mean = features[:, :, :3].sum( + dim=1, keepdim=True) / num_points.type_as(features).view( + -1, 1, 1) + + # Map to [-1, 1] + f_cluster = (features[:, :, :3] - points_mean) / self.voxel_size + # f_cluster = features[:, :, :3] - points_mean + features_ls.append(f_cluster) + + # Find distance of x, y, and z from pillar center + dtype = features.dtype + if self._with_voxel_center: + f_center = torch.zeros_like(features[:, :, :3]) + f_center[:, :, 0] = features[:, :, 0] - ( + coors[:, 3].to(dtype).unsqueeze(1) * self.vx + + self.x_offset) + f_center[:, :, 1] = features[:, :, 1] - ( + coors[:, 2].to(dtype).unsqueeze(1) * self.vy + + self.y_offset) + f_center[:, :, 2] = features[:, :, 2] - ( + coors[:, 1].to(dtype).unsqueeze(1) * self.vz + + self.z_offset) + + # Map to [-1, 1] + f_center = f_center / (self.voxel_size * 0.5) + features_ls.append(f_center) + + if self._with_distance: + points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True) + features_ls.append(points_dist) + + # Combine together feature decorations + features = torch.cat(features_ls, dim=-1) + num_voxels, max_points_per_voxel = features.shape[0], features.shape[1] + + # SinCos encoding + # (N, M, C) -> (N, M, C, 1) -> (N, M, C, 1) * (1, 1, 1, C) -> (N, M, C, C) + y = features.unsqueeze(-1) * np.pi * self.exponents.unsqueeze(0).unsqueeze(0).unsqueeze(0) + # (N, M, C, C) -> (N, M, C*C) + y = y.reshape(num_voxels, max_points_per_voxel, self.in_channels ** 2) + # (N, M, C*C) -> (N, M, C*C*2) + features = torch.cat([torch.cos(y), torch.sin(y)], dim=-1) + + # The feature decorations were calculated without regard to whether + # pillar was empty. Need to ensure that + # empty pillars remain set to zeros. + mask = get_paddings_indicator(num_points, max_points_per_voxel, axis=0) + mask = torch.unsqueeze(mask, -1).type_as(features) + features *= mask + + # Reduction by mean + # (N, M, C*C*2) -> (N, C*C*2) + features = features.sum(dim=1, keepdim=False) / num_points.type_as(features).view(-1, 1) + features = features.contiguous() + + return features diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py index d33b33c56..17f16254d 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py @@ -13,10 +13,10 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" +info_directory_path = "info/kokseang_2_6_2/" -experiment_group_name = "bevfusion_lidar_2.7.0/base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m" +experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type +experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter @@ -25,16 +25,28 @@ voxelize_cfg=dict( point_cloud_range=_base_.point_cloud_range, voxel_size=_base_.voxel_size, - voxelize_reduce=True, + voxelize_reduce=False, + ), + pts_voxel_encoder=dict( + _delete_=True, + type="BEVFusionVoxelSinCosEncoder", + in_channels=4, + with_distance=False, + with_cluster_center=True, + with_voxel_center=True, + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], ), - pts_voxel_encoder=dict(num_features=_base_.point_use_dim), pts_middle_encoder=dict( - in_channels=_base_.point_use_dim, + in_channels=100, sparse_shape=_base_.grid_size, - num_aug_features=4, + # num_aug_features=4, # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here - aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], - aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], + # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], ), bbox_head=dict( class_names=_base_.class_names, # Use class names to identify the correct class indices diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_10_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_10_channels.py new file mode 100644 index 000000000..531a07673 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_10_channels.py @@ -0,0 +1,161 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", + "../default/pipelines/default_lidar_120m.py", + "../default/models/default_lidar_second_secfpn_120m.py", + "../default/schedulers/default_50e_8xb8_adamw_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_6_2/" + +experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type +experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_10_channels" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + voxelize_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + voxelize_reduce=False, + ), + pts_voxel_encoder=dict( + _delete_=True, + type="BEVFusionVoxelSinCosEncoder", + in_channels=4, + with_distance=False, + with_cluster_center=True, + with_voxel_center=True, + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], + ), + pts_middle_encoder=dict( + in_channels=100, + sparse_shape=_base_.grid_size, + # num_aug_features=4, + # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here + # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], + ), + bbox_head=dict( + class_names=_base_.class_names, # Use class names to identify the correct class indices + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) From d063cf2a68c2fdd0e3129ff94f5f89299fea51be Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 18 Apr 2026 12:18:46 +0900 Subject: [PATCH 024/183] Added --- .../BEVFusion/bevfusion/bevfusion_head.py | 4 +- projects/BEVFusion/bevfusion/depth_lss.py | 3 +- ...sion_camera_swin_fpn_30e_8xb8_base_120m.py | 137 ++++++++++++++++++ ...mera_swin_fpn_30e_8xb8_j6gen2_base_120m.py | 2 +- ...amera_swin_fpn_lidar_second_secfpn_120m.py | 2 +- .../default_50e_8xb8_adamw_linear_cosine.py | 63 ++++++++ 6 files changed, 206 insertions(+), 5 deletions(-) create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_base_120m.py create mode 100644 projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 853523c4f..e17bab12a 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -25,11 +25,11 @@ def clip_sigmoid(x, eps=1e-4): @MODELS.register_module() class ConvFuser(nn.Sequential): - def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: int) -> None: + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, stride: int, padding: int) -> None: self.in_channels = in_channels self.out_channels = out_channels super().__init__( - nn.Conv2d(sum(in_channels), out_channels, kernel_size, padding, bias=False), + nn.Conv2d(sum(in_channels), out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False), nn.BatchNorm2d(out_channels), nn.ReLU(True), ) diff --git a/projects/BEVFusion/bevfusion/depth_lss.py b/projects/BEVFusion/bevfusion/depth_lss.py index ac7c5b503..03c0ec2f2 100644 --- a/projects/BEVFusion/bevfusion/depth_lss.py +++ b/projects/BEVFusion/bevfusion/depth_lss.py @@ -567,7 +567,8 @@ def __init__( zbound=zbound, dbound=dbound, ) - + + if lidar_depth_image_last_stride is not None: self.dtransform = LidarDepthImageNet(in_channels=1, out_channels=64, last_stride=lidar_depth_image_last_stride) self.depthnet = DepthLSSNet( in_channels=in_channels + self.dtransform.out_channels, out_channels=self.D + self.C diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_base_120m.py new file mode 100644 index 000000000..2debcccb8 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_base_120m.py @@ -0,0 +1,137 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", + "../default/pipelines/default_camera_lidar_intensity_120m.py", + "../default/models/default_camera_swin_fpn_120m.py", + "../default/schedulers/default_50e_8xb8_adamw_linear_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_6_2/" + +experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type +experiment_name = "bevfusion_camera_swin_fpn_30e_8xb8_base_120m" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + view_transform=dict(image_size=_base_.image_size), + bbox_head=dict( + class_names=_base_.class_names, + in_channels=80, + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py index e65c52ece..3a69a2cd2 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py @@ -13,7 +13,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" +info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type experiment_name = "bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m" diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lidar_second_secfpn_120m.py index c4097de3d..55c6ca3cd 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lidar_second_secfpn_120m.py @@ -56,5 +56,5 @@ dbound=[1.0, 130, 1.0], downsample=2, ), - fusion_layer=dict(type="ConvFuser", in_channels=[80, 256], out_channels=256, kernel_size=5, padding=2), + fusion_layer=dict(type="ConvFuser", in_channels=[80, 256], out_channels=256, kernel_size=5, stride=2, padding=2), ) diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py new file mode 100644 index 000000000..9517aba86 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py @@ -0,0 +1,63 @@ +# learning rate +# 1e-4 * sqrt(2) = 0.0001414 +lr = 1.4141e-4 +t_max = 8 +max_epochs = 50 +val_interval = 5 + +train_gpu_size = 8 +test_batch_size = 2 +train_batch_size = 8 + +param_scheduler = [ + # learning rate scheduler + dict(type="LinearLR", start_factor=1.0 / 3, begin=0, end=t_max, by_epoch=True), + dict( + type="CosineAnnealingLR", + T_max=(max_epochs - t_max), + eta_min=lr * 1e-4, + begin=t_max, + end=max_epochs, + by_epoch=True, + convert_to_iter_based=True, + ), + # momentum scheduler + # During the first (0.4 * max_epochs) epochs, momentum increases from 0 to 0.85 / 0.95 + # during the next epochs, momentum increases from 0.85 / 0.95 to 1 + dict( + type="CosineAnnealingMomentum", + T_max=t_max, + eta_min=0.85 / 0.95, + begin=0, + end=t_max, + by_epoch=True, + convert_to_iter_based=True, + ), + dict( + type="CosineAnnealingMomentum", + T_max=(max_epochs - t_max), + eta_min=1, + begin=t_max, + end=max_epochs, + by_epoch=True, + convert_to_iter_based=True, + ), +] + +train_cfg = dict( + by_epoch=True, max_epochs=max_epochs, val_interval=val_interval, dynamic_intervals=[(max_epochs - 5, 1)] +) +val_cfg = dict() +test_cfg = dict() + +optim_wrapper = dict( + type="OptimWrapper", + optimizer=dict(type="AdamW", lr=lr, weight_decay=0.01), + clip_grad=dict(max_norm=0.1, norm_type=2), +) + +auto_scale_lr = dict(enable=False, base_batch_size=train_gpu_size * train_batch_size) + +# Only set if the number of train_gpu_size more than 1 +if train_gpu_size > 1: + sync_bn = "torch" From e547e0514420c4cd26b68d13ffacdf65901cefc8 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 18 Apr 2026 17:08:16 +0900 Subject: [PATCH 025/183] Add bevfusion camera 2.6 --- projects/BEVFusion/bevfusion/transforms_3d.py | 12 ++ ...camera_swin_fpn_lss_50e_8xb8_base_120m.py} | 6 +- .../default_camera_swin_fpn_lss_120m.py | 65 +++++++++ .../pipelines/default_camera_base_120m.py | 131 ++++++++++++++++++ .../default_camera_lidar_intensity_120m.py | 6 +- 5 files changed, 214 insertions(+), 6 deletions(-) rename projects/BEVFusion/configs/t4dataset/BEVFusion-C/{bevfusion_camera_swin_fpn_30e_8xb8_base_120m.py => bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m.py} (95%) create mode 100644 projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_120m.py create mode 100644 projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py diff --git a/projects/BEVFusion/bevfusion/transforms_3d.py b/projects/BEVFusion/bevfusion/transforms_3d.py index 7e9faca24..c311f9254 100644 --- a/projects/BEVFusion/bevfusion/transforms_3d.py +++ b/projects/BEVFusion/bevfusion/transforms_3d.py @@ -188,6 +188,18 @@ def transform(self, input_dict: dict) -> dict: return input_dict +@TRANSFORMS.register_module() +class BEVFusionRemoveLiDARPoints(BaseTransform): + """Remove LiDAR points from the data.""" + def __init__(self): + super().__init__() + + def transform(self, results: Dict[str, Any]) -> Dict[str, Any]: + if "points" in results: + results["points"] = None + return results + + @TRANSFORMS.register_module() class GridMask(BaseTransform): diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m.py similarity index 95% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m.py index 2debcccb8..1c30d708a 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m.py @@ -1,8 +1,8 @@ _base_ = [ "../../../../../autoware_ml/configs/detection3d/default_runtime.py", "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", - "../default/pipelines/default_camera_lidar_intensity_120m.py", - "../default/models/default_camera_swin_fpn_120m.py", + "../default/pipelines/default_camera_base_120m.py", + "../default/models/default_camera_swin_fpn_lss_120m.py", "../default/schedulers/default_50e_8xb8_adamw_linear_cosine.py", "../default/default_misc.py", ] @@ -16,7 +16,7 @@ info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type -experiment_name = "bevfusion_camera_swin_fpn_30e_8xb8_base_120m" +experiment_name = "bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_120m.py new file mode 100644 index 000000000..2f1d1f3be --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_120m.py @@ -0,0 +1,65 @@ +_base_ = [ + "./default_lidar_second_secfpn_120m.py", +] + +# Image network +model = dict( + # Remove all lidar related configs + voxelize_cfg=None, + pts_voxel_encoder=None, + pts_middle_encoder=None, + pts_neck=None, + pts_backbone=None, + data_preprocessor=dict( + type="Det3DDataPreprocessor", + pad_size_divisor=32, + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=False, + rgb_to_bgr=False, + ), + img_backbone=dict( + type="mmdet.SwinTransformer", + pretrain_img_size=(256, 704), + embed_dims=96, + depths=[2, 2, 6, 2], + num_heads=[3, 6, 12, 24], + window_size=7, + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + drop_rate=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.2, + patch_norm=True, + out_indices=[1, 2, 3], + with_cp=False, + convert_weights=True, + init_cfg=dict( + type="Pretrained", + # https://download.openmmlab.com/mmdetection3d/v1.1.0_models/bevfusion/swint-nuimages-pretrained.pth + checkpoint="work_dirs/swin_transformer/swint_nuimages_pretrained.pth", # noqa: E251 + ), + ), + img_neck=dict( + type="GeneralizedLSSFPN", + in_channels=[192, 384, 768], + out_channels=256, + start_level=0, + num_outs=3, + norm_cfg=dict(type="BN2d", requires_grad=True), + act_cfg=dict(type="ReLU", inplace=True), + upsample_cfg=dict(mode="bilinear", align_corners=False), + ), + view_transform=dict( + type="LSSTransform", + in_channels=256, + out_channels=80, + feature_size=[48, 96], + xbound=[-122.40, 122.40, 0.68], + ybound=[-122.40, 122.40, 0.68], + zbound=[-10.0, 10.0, 20.0], + dbound=[1.0, 130, 1.0], + downsample=2, + ), +) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py new file mode 100644 index 000000000..a96773825 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py @@ -0,0 +1,131 @@ +## This config is for the camera_base only model, without lidar points + +_base_ = [ + "./default_lidar_120m.py", +] + +# Image parameters +image_size = [384, 768] # Height, Width +camera_order = ["CAM_FRONT", "CAM_FRONT_LEFT", "CAM_BACK_LEFT", "CAM_FRONT_RIGHT", "CAM_BACK_RIGHT"] + +train_pipeline = [ + dict( + type="BEVLoadMultiViewImageFromFiles", + to_float32=True, + color_type="color", + backend_args=backend_args, + camera_order=camera_order, + ), + # We keep loading LiDAR points to make downstream BEV augmentation easier + dict( + type="LoadPointsFromFile", + coord_type="LIDAR", + load_dim=point_load_dim, + use_dim=point_load_dim, + backend_args=backend_args, + ), + dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False), + dict( + type="ImageAug3D", + final_dim=image_size, + resize_lim=[0.28, 0.40], + bot_pct_lim=[0.0, 0.0], + rot_lim=[0.0, 0.0], + rand_flip=True, + is_train=True, + ), + dict( + type="BEVFusionGlobalRotScaleTrans", + scale_ratio_range=[0.95, 1.05], + rot_range=[-0.78539816, 0.78539816], + translation_std=[0.5, 0.5, 0.2], + ), + dict(type="BEVFusionRandomFlip3D"), + dict(type="ObjectRangeFilter", point_cloud_range=point_cloud_range), + # Remove LiDAR points from the data + dict(type="BEVFusionRemoveLiDARPoints"), + dict( + type="ObjectNameFilter", + classes=[ + "car", + "truck", + "construction_vehicle", + "bus", + "trailer", + "barrier", + "motorcycle", + "bicycle", + "pedestrian", + "traffic_cone", + ], + ), + dict( + type="Pack3DDetInputs", + keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"], + meta_keys=[ + "cam2img", + "ori_cam2img", + "lidar2cam", + "lidar2img", + "cam2lidar", + "ori_lidar2img", + "img_aug_matrix", + "box_type_3d", + "sample_idx", + "lidar_path", + "img_path", + "transformation_3d_flow", + "pcd_rotation", + "pcd_scale_factor", + "pcd_trans", + "img_aug_matrix", + "lidar_aug_matrix", + "timestamp", + "vehicle_type", + "city", + ], + ), +] + +test_pipeline = [ + dict( + type="BEVLoadMultiViewImageFromFiles", + to_float32=True, + color_type="color", + backend_args=backend_args, + camera_order=camera_order, + ), + dict( + type="ImageAug3D", + final_dim=image_size, + resize_lim=[0.34, 0.34], + bot_pct_lim=[0.0, 0.0], + rot_lim=[0.0, 0.0], + rand_flip=False, + is_train=False, + ), + dict( + type="Pack3DDetInputs", + keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"], + meta_keys=[ + "cam2img", + "ori_cam2img", + "lidar2cam", + "lidar2img", + "cam2lidar", + "ori_lidar2img", + "img_aug_matrix", + "box_type_3d", + "sample_idx", + "lidar_path", + "img_path", + "num_pts_feats", + "num_views", + "timestamp", + "vehicle_type", + "city", + ], + ), +] + +filter_cfg = dict(filter_frames_with_camera_order=camera_order) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py index 963a218e1..2ed285200 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py @@ -54,9 +54,9 @@ dict( type="ImageAug3D", final_dim=image_size, - resize_lim=[0.29, 0.35], + resize_lim=[0.28, 0.40], bot_pct_lim=[0.0, 0.0], - rot_lim=[-5.4, 5.4], + rot_lim=[0.0, 0.0], rand_flip=True, is_train=True, ), @@ -139,7 +139,7 @@ dict( type="ImageAug3D", final_dim=image_size, - resize_lim=[0.32, 0.32], + resize_lim=[0.34, 0.34], bot_pct_lim=[0.0, 0.0], rot_lim=[0.0, 0.0], rand_flip=False, From 2ef16e885b2c0c653761664d0fd2e5cf6740740e Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 18 Apr 2026 17:20:34 +0900 Subject: [PATCH 026/183] Add bevfusion camera 2.6 --- projects/BEVFusion/bevfusion/depth_lss.py | 1 - .../default/pipelines/default_camera_base_120m.py | 12 ++++++------ .../pipelines/default_camera_lidar_intensity_120m.py | 4 ++++ .../default/pipelines/default_lidar_120m.py | 2 +- .../pipelines/default_lidar_intensity_120m.py | 2 +- 5 files changed, 12 insertions(+), 9 deletions(-) diff --git a/projects/BEVFusion/bevfusion/depth_lss.py b/projects/BEVFusion/bevfusion/depth_lss.py index 03c0ec2f2..f202c3777 100644 --- a/projects/BEVFusion/bevfusion/depth_lss.py +++ b/projects/BEVFusion/bevfusion/depth_lss.py @@ -568,7 +568,6 @@ def __init__( dbound=dbound, ) - if lidar_depth_image_last_stride is not None: self.dtransform = LidarDepthImageNet(in_channels=1, out_channels=64, last_stride=lidar_depth_image_last_stride) self.depthnet = DepthLSSNet( in_channels=in_channels + self.dtransform.out_channels, out_channels=self.D + self.C diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py index a96773825..8d1c6da86 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py @@ -13,16 +13,16 @@ type="BEVLoadMultiViewImageFromFiles", to_float32=True, color_type="color", - backend_args=backend_args, + backend_args=_base_.backend_args, camera_order=camera_order, ), # We keep loading LiDAR points to make downstream BEV augmentation easier dict( type="LoadPointsFromFile", coord_type="LIDAR", - load_dim=point_load_dim, - use_dim=point_load_dim, - backend_args=backend_args, + load_dim=_base_.point_load_dim, + use_dim=_base_.point_load_dim, + backend_args=_base_.backend_args, ), dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False), dict( @@ -41,7 +41,7 @@ translation_std=[0.5, 0.5, 0.2], ), dict(type="BEVFusionRandomFlip3D"), - dict(type="ObjectRangeFilter", point_cloud_range=point_cloud_range), + dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range), # Remove LiDAR points from the data dict(type="BEVFusionRemoveLiDARPoints"), dict( @@ -92,7 +92,7 @@ type="BEVLoadMultiViewImageFromFiles", to_float32=True, color_type="color", - backend_args=backend_args, + backend_args=_base_.backend_args, camera_order=camera_order, ), dict( diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py index 2ed285200..18e154921 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py @@ -107,6 +107,8 @@ "img_aug_matrix", "lidar_aug_matrix", "timestamp", + "vehicle_type", + "city", ], ), ] @@ -164,6 +166,8 @@ "num_pts_feats", "num_views", "timestamp", + "vehicle_type", + "city", ], ), ] diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index 06d95be16..da2b775e2 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -137,4 +137,4 @@ # e.g., dict(filter_frames_with_missing_image=True). # - This is a LiDAR-only config (`input_modality['use_camera'] = False`), so # image-based filtering does not apply and `filter_cfg` is intentionally None. -filter_cfg = None +filter_cfg = dict() diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index 4e74d3616..723a241d7 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -137,4 +137,4 @@ # e.g., dict(filter_frames_with_missing_image=True). # - This is a LiDAR-only config (`input_modality['use_camera'] = False`), so # image-based filtering does not apply and `filter_cfg` is intentionally None. -filter_cfg = None +filter_cfg = dict() From a26782abfb7c9b088845d9d778ab0babd520ed74 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 18 Apr 2026 17:24:44 +0900 Subject: [PATCH 027/183] Updated --- ...l_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py | 4 ++-- ..._lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py | 6 +----- ..._second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py | 4 ++-- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py index afb150284..62ea479fb 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py @@ -42,7 +42,7 @@ testing_statistics_parquet_path=testing_statistics_parquet_path, validation_statistics_parquet_path=validation_statistics_parquet_path, output_dir="validation", - dataset_name="base", + dataset_name="j6gen2_base", perception_evaluator_configs=perception_evaluator_configs, critical_object_filter_config=None, frame_pass_fail_config=frame_pass_fail_config, @@ -64,7 +64,7 @@ testing_statistics_parquet_path=testing_statistics_parquet_path, validation_statistics_parquet_path=validation_statistics_parquet_path, output_dir="testing", - dataset_name="base", + dataset_name="j6gen2_base", perception_evaluator_configs=perception_evaluator_configs, critical_object_filter_config=None, frame_pass_fail_config=frame_pass_fail_config, diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py index 0eb440472..73c1e4671 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py @@ -148,8 +148,4 @@ ) log_processor = dict(window_size=50) -<<<<<<< HEAD -load_from = "work_dirs/bevfusion_lidar_2.7.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_48.pth" -======= -load_from "" ->>>>>>> feat/releave_bevfusion_2_6 +# load_from = "" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py index 3320d2b08..0109e96d9 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py @@ -42,7 +42,7 @@ testing_statistics_parquet_path=testing_statistics_parquet_path, validation_statistics_parquet_path=validation_statistics_parquet_path, output_dir="validation", - dataset_name="base", + dataset_name="jpntaxi_base", perception_evaluator_configs=perception_evaluator_configs, critical_object_filter_config=None, frame_pass_fail_config=frame_pass_fail_config, @@ -64,7 +64,7 @@ testing_statistics_parquet_path=testing_statistics_parquet_path, validation_statistics_parquet_path=validation_statistics_parquet_path, output_dir="testing", - dataset_name="base", + dataset_name="jpntaxi_base", perception_evaluator_configs=perception_evaluator_configs, critical_object_filter_config=None, frame_pass_fail_config=frame_pass_fail_config, From f4c01a542618b3c592ddf1cd6e2b9d1657abccbf Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 18 Apr 2026 17:51:58 +0900 Subject: [PATCH 028/183] Updated --- autoware_ml/detection3d/datasets/t4dataset.py | 2 +- ..._second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py | 7 ++----- ...lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py | 2 +- ...second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py | 3 --- ...r_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py | 3 --- 5 files changed, 4 insertions(+), 13 deletions(-) diff --git a/autoware_ml/detection3d/datasets/t4dataset.py b/autoware_ml/detection3d/datasets/t4dataset.py index ce1c78f31..74d274b87 100644 --- a/autoware_ml/detection3d/datasets/t4dataset.py +++ b/autoware_ml/detection3d/datasets/t4dataset.py @@ -64,7 +64,7 @@ def filter_data(self) -> List[dict]: break if entry["images"][camera_order]["img_path"] is None or not osp.exists( - entry["images"][camera_order]["img_path"] + self.data_root + entry["images"][camera_order]["img_path"] ): filtered = True break diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py index 62ea479fb..3476011ff 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py @@ -3,9 +3,6 @@ ] # user setting -data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" - experiment_group_name = "bevfusion_lidar_intensity_2.6.1/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name @@ -42,7 +39,7 @@ testing_statistics_parquet_path=testing_statistics_parquet_path, validation_statistics_parquet_path=validation_statistics_parquet_path, output_dir="validation", - dataset_name="j6gen2_base", + dataset_name="base", perception_evaluator_configs=perception_evaluator_configs, critical_object_filter_config=None, frame_pass_fail_config=frame_pass_fail_config, @@ -64,7 +61,7 @@ testing_statistics_parquet_path=testing_statistics_parquet_path, validation_statistics_parquet_path=validation_statistics_parquet_path, output_dir="testing", - dataset_name="j6gen2_base", + dataset_name="base", perception_evaluator_configs=perception_evaluator_configs, critical_object_filter_config=None, frame_pass_fail_config=frame_pass_fail_config, diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py index 73c1e4671..4eea4c2aa 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py @@ -13,7 +13,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" +info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_lidar_intensity_2.7.1/jpntaxi_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py index 0109e96d9..49d91e05d 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py @@ -3,9 +3,6 @@ ] # user setting -data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" - experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py index c9a0050c0..3d976d970 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py @@ -3,9 +3,6 @@ ] # user setting -data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" - experiment_group_name = "bevfusion_lidar_2.6.0/base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name From 15371af9539e84c46d844efa5007d565acb80878 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 18 Apr 2026 17:52:51 +0900 Subject: [PATCH 029/183] Update dataset name --- ...xel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py index 3476011ff..0748008ba 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py @@ -39,7 +39,7 @@ testing_statistics_parquet_path=testing_statistics_parquet_path, validation_statistics_parquet_path=validation_statistics_parquet_path, output_dir="validation", - dataset_name="base", + dataset_name="j6gen2_base", perception_evaluator_configs=perception_evaluator_configs, critical_object_filter_config=None, frame_pass_fail_config=frame_pass_fail_config, @@ -61,7 +61,7 @@ testing_statistics_parquet_path=testing_statistics_parquet_path, validation_statistics_parquet_path=validation_statistics_parquet_path, output_dir="testing", - dataset_name="base", + dataset_name="j6gen2_base", perception_evaluator_configs=perception_evaluator_configs, critical_object_filter_config=None, frame_pass_fail_config=frame_pass_fail_config, From c6e8a10856085304b458e7204bfbc0246492dccd Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 18 Apr 2026 18:40:44 +0900 Subject: [PATCH 030/183] Add bevfusion camera 2.6 --- autoware_ml/detection3d/datasets/t4dataset.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/autoware_ml/detection3d/datasets/t4dataset.py b/autoware_ml/detection3d/datasets/t4dataset.py index ce1c78f31..a01d9fbe6 100644 --- a/autoware_ml/detection3d/datasets/t4dataset.py +++ b/autoware_ml/detection3d/datasets/t4dataset.py @@ -5,6 +5,7 @@ from mmdet3d.datasets import NuScenesDataset from mmengine.logging import print_log from mmengine.registry import DATASETS +import tqdm @DATASETS.register_module() @@ -56,16 +57,14 @@ def filter_data(self) -> List[dict]: return self.data_list filtered_data_list = [] - for entry in self.data_list: + for entry in tqdm.tqdm(self.data_list, desc="Filtering data"): filtered = False for camera_order in filter_frames_with_camera_order: if camera_order not in entry["images"]: filtered = True break - - if entry["images"][camera_order]["img_path"] is None or not osp.exists( - entry["images"][camera_order]["img_path"] - ): + + if entry["images"][camera_order]["img_path"] is None: filtered = True break From 7252126bcf3dce3b73ef26db0fc7fe2f59ba1cb6 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 18 Apr 2026 19:21:52 +0900 Subject: [PATCH 031/183] Add bevfusion camera 2.6 --- autoware_ml/detection3d/datasets/t4dataset.py | 1 + .../t4dataset/default/pipelines/default_camera_base_120m.py | 1 + 2 files changed, 2 insertions(+) diff --git a/autoware_ml/detection3d/datasets/t4dataset.py b/autoware_ml/detection3d/datasets/t4dataset.py index a01d9fbe6..2406bfcb1 100644 --- a/autoware_ml/detection3d/datasets/t4dataset.py +++ b/autoware_ml/detection3d/datasets/t4dataset.py @@ -179,6 +179,7 @@ def parse_data_info(self, info: dict) -> dict: cam_prefix, img_info["img_path"], ) + # print_log(f"Camera path: {img_info['img_path']}", logger="current") if self.default_cam_key is not None: info["img_path"] = info["images"][self.default_cam_key]["img_path"] diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py index 8d1c6da86..39972b6d0 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py @@ -3,6 +3,7 @@ _base_ = [ "./default_lidar_120m.py", ] +input_modality = dict(use_lidar=True, use_camera=True) # Image parameters image_size = [384, 768] # Height, Width From 9e793246cd49b5849a12c38821e6623d7d542a2c Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sun, 19 Apr 2026 04:57:19 +0900 Subject: [PATCH 032/183] Add bevfusion camera 2.6 --- projects/BEVFusion/bevfusion/__init__.py | 3 ++- .../default/pipelines/default_camera_base_120m.py | 7 +++++-- .../pipelines/default_camera_lidar_intensity_120m.py | 1 + .../schedulers/default_20e_8xb8_adamw_linear_cosine.py | 2 +- .../schedulers/default_30e_8xb8_adamw_linear_cosine.py | 2 +- .../schedulers/default_50e_8xb8_adamw_linear_cosine.py | 4 ++-- 6 files changed, 12 insertions(+), 7 deletions(-) diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py index 60a64b532..4732eabfc 100644 --- a/projects/BEVFusion/bevfusion/__init__.py +++ b/projects/BEVFusion/bevfusion/__init__.py @@ -5,7 +5,7 @@ from .loading import BEVLoadMultiViewImageFromFiles from .sparse_encoder import BEVFusionSparseEncoder from .transformer import TransformerDecoderLayer -from .transforms_3d import BEVFusionGlobalRotScaleTrans, BEVFusionRandomFlip3D, GridMask, ImageAug3D +from .transforms_3d import BEVFusionGlobalRotScaleTrans, BEVFusionRandomFlip3D, GridMask, ImageAug3D, BEVFusionRemoveLiDARPoints from .utils import BBoxBEVL1Cost, HeuristicAssigner3D, HungarianAssigner3D, IoU3DCost, TransFusionBBoxCoder __all__ = [ @@ -26,5 +26,6 @@ "TransformerDecoderLayer", "BEVFusionRandomFlip3D", "BEVFusionGlobalRotScaleTrans", + "BEVFusionRemoveLiDARPoints", "TransFusionBBoxCoder", ] diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py index 39972b6d0..5f926f867 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py @@ -29,7 +29,8 @@ dict( type="ImageAug3D", final_dim=image_size, - resize_lim=[0.28, 0.40], + # resize_lim=[0.28, 0.40], + resize_lim=0.02, bot_pct_lim=[0.0, 0.0], rot_lim=[0.0, 0.0], rand_flip=True, @@ -99,12 +100,14 @@ dict( type="ImageAug3D", final_dim=image_size, - resize_lim=[0.34, 0.34], + # resize_lim=[0.34, 0.34], + resize_lim=0.02, bot_pct_lim=[0.0, 0.0], rot_lim=[0.0, 0.0], rand_flip=False, is_train=False, ), + dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range), dict( type="Pack3DDetInputs", keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"], diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py index 18e154921..53a149f07 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py @@ -148,6 +148,7 @@ is_train=False, ), dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range), + dict(type="ObjectRangeFilter", point_cloud_range=point_cloud_range), dict( type="Pack3DDetInputs", keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"], diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py index 15ba38878..db5515b46 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py @@ -1,6 +1,6 @@ # learning rate # 1e-4 * sqrt(2) = 0.0001414 -lr = 1.4141e-4 +lr = 1e-4 t_max = 6 max_epochs = 20 val_interval = 1 diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py index 264eda921..f1a1f52b4 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py @@ -1,6 +1,6 @@ # learning rate # 1e-4 * sqrt(2) = 0.0001414 -lr = 1.4141e-4 +lr = 1e-4 t_max = 8 max_epochs = 30 val_interval = 1 diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py index 9517aba86..1161621b8 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py @@ -1,7 +1,7 @@ # learning rate # 1e-4 * sqrt(2) = 0.0001414 -lr = 1.4141e-4 -t_max = 8 +lr = 1e-4 +t_max = 15 max_epochs = 50 val_interval = 5 From a249d423aafc5595cfa2546f7e0599e555d8882f Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sun, 19 Apr 2026 18:24:39 +0900 Subject: [PATCH 033/183] Add bevfusion camera 2.6 --- ...ra_swin_fpn_depthlss_50e_8xb8_base_120m.py | 137 +++++++++++++++++ ...n_camera_swin_fpn_lss_50e_8xb8_base_50m.py | 137 +++++++++++++++++ .../default_camera_swin_fpn_depthlss_120m.py | 65 ++++++++ .../models/default_camera_swin_fpn_lss_50m.py | 70 +++++++++ .../pipelines/default_camera_base_120m.py | 10 +- .../pipelines/default_camera_base_50m.py | 135 +++++++++++++++++ .../default/pipelines/default_lidar_50m.py | 140 ++++++++++++++++++ .../default_50e_8xb8_adamw_linear_cosine.py | 2 +- 8 files changed, 691 insertions(+), 5 deletions(-) create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m.py create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_50m.py create mode 100644 projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_depthlss_120m.py create mode 100644 projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_50m.py create mode 100644 projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_50m.py create mode 100644 projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m.py new file mode 100644 index 000000000..8c8d84d18 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m.py @@ -0,0 +1,137 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", + "../default/pipelines/default_camera_lidar_intensity_120m.py", + "../default/models/default_camera_swin_fpn_depthlss_120m.py", + "../default/schedulers/default_50e_8xb8_adamw_linear_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_6_2/" + +experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type +experiment_name = "bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + view_transform=dict(image_size=_base_.image_size), + bbox_head=dict( + class_names=_base_.class_names, + in_channels=80, + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_50m.py new file mode 100644 index 000000000..7c5a5f91f --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_50m.py @@ -0,0 +1,137 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", + "../default/pipelines/default_camera_base_50m.py", + "../default/models/default_camera_swin_fpn_lss_50m.py", + "../default/schedulers/default_50e_8xb8_adamw_linear_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_6_2/" + +experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type +experiment_name = "bevfusion_camera_swin_fpn_lss_50e_8xb8_base_50m" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + view_transform=dict(image_size=_base_.image_size), + bbox_head=dict( + class_names=_base_.class_names, + in_channels=80, + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_depthlss_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_depthlss_120m.py new file mode 100644 index 000000000..c4b0cd9ab --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_depthlss_120m.py @@ -0,0 +1,65 @@ +_base_ = [ + "./default_lidar_second_secfpn_120m.py", +] + +# Image network +model = dict( + # Remove all lidar related configs + voxelize_cfg=None, + pts_voxel_encoder=None, + pts_middle_encoder=None, + pts_neck=None, + pts_backbone=None, + data_preprocessor=dict( + type="Det3DDataPreprocessor", + pad_size_divisor=32, + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=False, + rgb_to_bgr=False, + ), + img_backbone=dict( + type="mmdet.SwinTransformer", + pretrain_img_size=(256, 704), + embed_dims=96, + depths=[2, 2, 6, 2], + num_heads=[3, 6, 12, 24], + window_size=7, + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + drop_rate=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.2, + patch_norm=True, + out_indices=[1, 2, 3], + with_cp=False, + convert_weights=True, + init_cfg=dict( + type="Pretrained", + # https://download.openmmlab.com/mmdetection3d/v1.1.0_models/bevfusion/swint-nuimages-pretrained.pth + checkpoint="work_dirs/swin_transformer/swint_nuimages_pretrained.pth", # noqa: E251 + ), + ), + img_neck=dict( + type="GeneralizedLSSFPN", + in_channels=[192, 384, 768], + out_channels=256, + start_level=0, + num_outs=3, + norm_cfg=dict(type="BN2d", requires_grad=True), + act_cfg=dict(type="ReLU", inplace=True), + upsample_cfg=dict(mode="bilinear", align_corners=False), + ), + view_transform=dict( + type="DepthLSSTransform", + in_channels=256, + out_channels=80, + feature_size=[48, 96], + xbound=[-122.40, 122.40, 0.68], + ybound=[-122.40, 122.40, 0.68], + zbound=[-10.0, 10.0, 20.0], + dbound=[1.0, 130, 1.0], + downsample=2, + ), +) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_50m.py new file mode 100644 index 000000000..a35e3a79a --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_50m.py @@ -0,0 +1,70 @@ +_base_ = [ + "./default_lidar_second_secfpn_120m.py", +] + +# Image network +model = dict( + # Remove all lidar related configs + voxelize_cfg=None, + pts_voxel_encoder=None, + pts_middle_encoder=None, + pts_neck=None, + pts_backbone=None, + data_preprocessor=dict( + type="Det3DDataPreprocessor", + pad_size_divisor=32, + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=False, + rgb_to_bgr=False, + ), + img_backbone=dict( + type="mmdet.SwinTransformer", + pretrain_img_size=(256, 704), + embed_dims=96, + depths=[2, 2, 6, 2], + num_heads=[3, 6, 12, 24], + window_size=7, + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + drop_rate=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.2, + patch_norm=True, + out_indices=[1, 2, 3], + with_cp=False, + convert_weights=True, + init_cfg=dict( + type="Pretrained", + # https://download.openmmlab.com/mmdetection3d/v1.1.0_models/bevfusion/swint-nuimages-pretrained.pth + checkpoint="work_dirs/swin_transformer/swint_nuimages_pretrained.pth", # noqa: E251 + ), + ), + img_neck=dict( + type="GeneralizedLSSFPN", + in_channels=[192, 384, 768], + out_channels=256, + start_level=0, + num_outs=3, + norm_cfg=dict(type="BN2d", requires_grad=True), + act_cfg=dict(type="ReLU", inplace=True), + upsample_cfg=dict(mode="bilinear", align_corners=False), + ), + view_transform=dict( + type="LSSTransform", + in_channels=256, + out_channels=80, + feature_size=[48, 96], + xbound=[-54.0, 54.0, 0.3], + ybound=[-54.0, 54.0, 0.3], + zbound=[-10.0, 10.0, 20.0], + dbound=[1.0, 60, 0.5], + downsample=2, + ), + bbox_head=dict( + bbox_coder=dict( + post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], + ), + ) +) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py index 5f926f867..410968467 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py @@ -38,9 +38,12 @@ ), dict( type="BEVFusionGlobalRotScaleTrans", - scale_ratio_range=[0.95, 1.05], - rot_range=[-0.78539816, 0.78539816], - translation_std=[0.5, 0.5, 0.2], + # scale_ratio_range=[0.95, 1.05], + # rot_range=[-0.78539816, 0.78539816], + # translation_std=[0.5, 0.5, 0.2], + scale_ratio_range=[0.98, 1.02], + rot_range=[-0.3925, 0.3925], + translation_std=[0.2, 0.2, 0.1], ), dict(type="BEVFusionRandomFlip3D"), dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range), @@ -107,7 +110,6 @@ rand_flip=False, is_train=False, ), - dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range), dict( type="Pack3DDetInputs", keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"], diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_50m.py new file mode 100644 index 000000000..c9010038f --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_50m.py @@ -0,0 +1,135 @@ +## This config is for the camera_base only model, without lidar points + +_base_ = [ + "./default_lidar_50m.py", +] +input_modality = dict(use_lidar=True, use_camera=True) + +# Image parameters +image_size = [384, 768] # Height, Width +camera_order = ["CAM_FRONT", "CAM_FRONT_LEFT", "CAM_BACK_LEFT", "CAM_FRONT_RIGHT", "CAM_BACK_RIGHT"] + +train_pipeline = [ + dict( + type="BEVLoadMultiViewImageFromFiles", + to_float32=True, + color_type="color", + backend_args=_base_.backend_args, + camera_order=camera_order, + ), + # We keep loading LiDAR points to make downstream BEV augmentation easier + dict( + type="LoadPointsFromFile", + coord_type="LIDAR", + load_dim=_base_.point_load_dim, + use_dim=_base_.point_load_dim, + backend_args=_base_.backend_args, + ), + dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False), + dict( + type="ImageAug3D", + final_dim=image_size, + resize_lim=[0.28, 0.40], + bot_pct_lim=[0.0, 0.0], + rot_lim=[0.0, 0.0], + rand_flip=True, + is_train=True, + ), + dict( + type="BEVFusionGlobalRotScaleTrans", + # scale_ratio_range=[0.95, 1.05], + # rot_range=[-0.78539816, 0.78539816], + # translation_std=[0.5, 0.5, 0.2], + scale_ratio_range=[0.98, 1.02], + rot_range=[-0.3925, 0.3925], + translation_std=[0.2, 0.2, 0.1], + ), + dict(type="BEVFusionRandomFlip3D"), + dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range), + # Remove LiDAR points from the data + dict(type="BEVFusionRemoveLiDARPoints"), + dict( + type="ObjectNameFilter", + classes=[ + "car", + "truck", + "construction_vehicle", + "bus", + "trailer", + "barrier", + "motorcycle", + "bicycle", + "pedestrian", + "traffic_cone", + ], + ), + dict( + type="Pack3DDetInputs", + keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"], + meta_keys=[ + "cam2img", + "ori_cam2img", + "lidar2cam", + "lidar2img", + "cam2lidar", + "ori_lidar2img", + "img_aug_matrix", + "box_type_3d", + "sample_idx", + "lidar_path", + "img_path", + "transformation_3d_flow", + "pcd_rotation", + "pcd_scale_factor", + "pcd_trans", + "img_aug_matrix", + "lidar_aug_matrix", + "timestamp", + "vehicle_type", + "city", + ], + ), +] + +test_pipeline = [ + dict( + type="BEVLoadMultiViewImageFromFiles", + to_float32=True, + color_type="color", + backend_args=_base_.backend_args, + camera_order=camera_order, + ), + dict( + type="ImageAug3D", + final_dim=image_size, + resize_lim=[0.34, 0.34], + bot_pct_lim=[0.0, 0.0], + rot_lim=[0.0, 0.0], + rand_flip=False, + is_train=False, + ), + dict( + type="Pack3DDetInputs", + keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"], + meta_keys=[ + "cam2img", + "ori_cam2img", + "lidar2cam", + "lidar2img", + "cam2lidar", + "ori_lidar2img", + "img_aug_matrix", + "box_type_3d", + "sample_idx", + "lidar_path", + "img_path", + "num_pts_feats", + "num_views", + "timestamp", + "vehicle_type", + "city", + ], + ), +] + +filter_cfg = dict(filter_frames_with_camera_order=camera_order) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py new file mode 100644 index 000000000..964d6eef9 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py @@ -0,0 +1,140 @@ +# Dataset parameters +backend_args = None +num_workers = 16 +input_modality = dict(use_lidar=True, use_camera=False) + +# range setting +point_cloud_range = [-54.0, -54.0, -3.0, 54.0, 54.0, 5.0] +voxel_size = [0.075, 0.075, 0.2] +grid_size = [1440, 1440, 41] +eval_class_range = { + "car": 54.0, + "truck": 54.0, + "bus": 54.0, + "bicycle": 54.0, + "pedestrian": 54.0, +} + +# LiDAR parameters +point_load_dim = 5 # x, y, z, intensity, ring_id +point_use_dim = 4 +lidar_sweep_dims = [0, 1, 2, 4] # x, y, z, time_lag +sweeps_num = 1 + +train_pipeline = [ + dict( + type="LoadPointsFromFile", + coord_type="LIDAR", + load_dim=point_load_dim, + use_dim=point_load_dim, + backend_args=backend_args, + ), + dict( + type="LoadPointsFromMultiSweeps", + sweeps_num=sweeps_num, + load_dim=point_load_dim, + use_dim=lidar_sweep_dims, + pad_empty_sweeps=True, + remove_close=True, + backend_args=backend_args, + test_mode=False, + ), + dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False), + dict( + type="BEVFusionGlobalRotScaleTrans", + scale_ratio_range=[0.95, 1.05], + rot_range=[-0.78539816, 0.78539816], + translation_std=[0.5, 0.5, 0.2], + ), + dict(type="BEVFusionRandomFlip3D"), + dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range), + dict(type="ObjectRangeFilter", point_cloud_range=point_cloud_range), + dict( + type="ObjectNameFilter", + classes=[ + "car", + "truck", + "bus", + "bicycle", + "pedestrian", + ], + ), + dict(type="PointShuffle"), + dict( + type="Pack3DDetInputs", + keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"], + meta_keys=[ + "cam2img", + "ori_cam2img", + "lidar2cam", + "lidar2img", + "cam2lidar", + "ori_lidar2img", + "img_aug_matrix", + "box_type_3d", + "sample_idx", + "lidar_path", + "img_path", + "transformation_3d_flow", + "pcd_rotation", + "pcd_scale_factor", + "pcd_trans", + "img_aug_matrix", + "lidar_aug_matrix", + "timestamp", + "vehicle_type", + "city", + ], + ), +] + +test_pipeline = [ + dict( + type="LoadPointsFromFile", + coord_type="LIDAR", + load_dim=point_load_dim, + use_dim=point_load_dim, + backend_args=backend_args, + ), + dict( + type="LoadPointsFromMultiSweeps", + sweeps_num=sweeps_num, + load_dim=point_load_dim, + use_dim=lidar_sweep_dims, + pad_empty_sweeps=True, + remove_close=True, + backend_args=backend_args, + test_mode=True, + ), + dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range), + dict( + type="Pack3DDetInputs", + keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"], + meta_keys=[ + "cam2img", + "ori_cam2img", + "lidar2cam", + "lidar2img", + "cam2lidar", + "ori_lidar2img", + "img_aug_matrix", + "box_type_3d", + "sample_idx", + "lidar_path", + "img_path", + "num_pts_feats", + "num_views", + "timestamp", + "vehicle_type", + "city", + ], + ), +] + +# Filtering configuration +# Note: +# - In camera–LiDAR configs, `filter_cfg` can enable image-based frame filtering, +# e.g., dict(filter_frames_with_missing_image=True). +# - This is a LiDAR-only config (`input_modality['use_camera'] = False`), so +# image-based filtering does not apply and `filter_cfg` is intentionally None. +filter_cfg = dict() diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py index 1161621b8..d569900d6 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py @@ -1,7 +1,7 @@ # learning rate # 1e-4 * sqrt(2) = 0.0001414 lr = 1e-4 -t_max = 15 +t_max = 3 max_epochs = 50 val_interval = 5 From 9d7ddc21adff7115a303d42b024b40e95c38a7a4 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sun, 19 Apr 2026 18:26:57 +0900 Subject: [PATCH 034/183] Add bevfusion camera 2.6 --- ...mera_swin_fpn_30e_8xb8_j6gen2_base_120m.py | 137 ------------------ .../models/default_camera_swin_fpn_120m.py | 65 --------- .../default_30e_8xb8_adamw_linear_cosine.py | 2 +- 3 files changed, 1 insertion(+), 203 deletions(-) delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py delete mode 100644 projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_120m.py diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py deleted file mode 100644 index 3a69a2cd2..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m.py +++ /dev/null @@ -1,137 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", - "../default/pipelines/default_camera_lidar_intensity_120m.py", - "../default/models/default_camera_swin_fpn_120m.py", - "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" - -experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type -experiment_name = "bevfusion_camera_swin_fpn_30e_8xb8_j6gen2_base_120m" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - view_transform=dict(image_size=_base_.image_size), - bbox_head=dict( - class_names=_base_.class_names, - in_channels=80, - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - ), -) - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_120m.py deleted file mode 100644 index c4b0cd9ab..000000000 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_120m.py +++ /dev/null @@ -1,65 +0,0 @@ -_base_ = [ - "./default_lidar_second_secfpn_120m.py", -] - -# Image network -model = dict( - # Remove all lidar related configs - voxelize_cfg=None, - pts_voxel_encoder=None, - pts_middle_encoder=None, - pts_neck=None, - pts_backbone=None, - data_preprocessor=dict( - type="Det3DDataPreprocessor", - pad_size_divisor=32, - mean=[123.675, 116.28, 103.53], - std=[58.395, 57.12, 57.375], - bgr_to_rgb=False, - rgb_to_bgr=False, - ), - img_backbone=dict( - type="mmdet.SwinTransformer", - pretrain_img_size=(256, 704), - embed_dims=96, - depths=[2, 2, 6, 2], - num_heads=[3, 6, 12, 24], - window_size=7, - mlp_ratio=4, - qkv_bias=True, - qk_scale=None, - drop_rate=0.0, - attn_drop_rate=0.0, - drop_path_rate=0.2, - patch_norm=True, - out_indices=[1, 2, 3], - with_cp=False, - convert_weights=True, - init_cfg=dict( - type="Pretrained", - # https://download.openmmlab.com/mmdetection3d/v1.1.0_models/bevfusion/swint-nuimages-pretrained.pth - checkpoint="work_dirs/swin_transformer/swint_nuimages_pretrained.pth", # noqa: E251 - ), - ), - img_neck=dict( - type="GeneralizedLSSFPN", - in_channels=[192, 384, 768], - out_channels=256, - start_level=0, - num_outs=3, - norm_cfg=dict(type="BN2d", requires_grad=True), - act_cfg=dict(type="ReLU", inplace=True), - upsample_cfg=dict(mode="bilinear", align_corners=False), - ), - view_transform=dict( - type="DepthLSSTransform", - in_channels=256, - out_channels=80, - feature_size=[48, 96], - xbound=[-122.40, 122.40, 0.68], - ybound=[-122.40, 122.40, 0.68], - zbound=[-10.0, 10.0, 20.0], - dbound=[1.0, 130, 1.0], - downsample=2, - ), -) diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py index f1a1f52b4..2181cbebb 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py @@ -1,7 +1,7 @@ # learning rate # 1e-4 * sqrt(2) = 0.0001414 lr = 1e-4 -t_max = 8 +t_max = 2 max_epochs = 30 val_interval = 1 From 0286f25f66b0910f8ea3e9230331ea1ec279c42b Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sun, 19 Apr 2026 18:27:55 +0900 Subject: [PATCH 035/183] Add bevfusion camera 2.6 --- ..._fpn_depthlss_30e_8xb8_j6gen2_base_120m.py | 137 ++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_30e_8xb8_j6gen2_base_120m.py diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_30e_8xb8_j6gen2_base_120m.py new file mode 100644 index 000000000..d31630dd0 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_30e_8xb8_j6gen2_base_120m.py @@ -0,0 +1,137 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", + "../default/pipelines/default_camera_lidar_intensity_120m.py", + "../default/models/default_camera_swin_fpn_depthlss_120m.py", + "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_6_2/" + +experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type +experiment_name = "bevfusion_camera_swin_fpn_depthlss_30e_8xb8_j6gen2_base_120m" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + view_transform=dict(image_size=_base_.image_size), + bbox_head=dict( + class_names=_base_.class_names, + in_channels=80, + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) From 4977b332437c647cd617c66a88f1f109129ec9a2 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Mon, 20 Apr 2026 06:46:48 +0900 Subject: [PATCH 036/183] Update dataset name --- ...voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py | 2 +- ...oxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py | 2 +- .../bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py | 2 +- ..._lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py index 0748008ba..d1950d39a 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar_intensity_2.6.1/j6gen2_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2.7.1/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py index 49d91e05d..6bd285ce1 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar_intensity_2.6.1/jpntaxi_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2.7.1/jpntaxi_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py index d33b33c56..78d287af6 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py @@ -13,7 +13,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" +info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_lidar_2.7.0/base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py index 3d976d970..fbcfe2dce 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar_2.6.0/base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_2.7.0/base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name From f85a8e906b887d56d67a4e7f88673c1032432a0c Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Mon, 20 Apr 2026 13:04:20 +0900 Subject: [PATCH 037/183] Update dataset name --- ...usion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index 3e615b504..4f220cbcb 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -148,4 +148,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2.7.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_48.pth" +# load_from = "" From cf3310c090d254dc184bf798e196c8d92d9a2a98 Mon Sep 17 00:00:00 2001 From: KokSeang Date: Tue, 21 Apr 2026 14:10:39 +0900 Subject: [PATCH 038/183] Added --- ..._swin_fpn_lss_30e_8xb8_j6gen2_base_120m.py | 137 ++++++++++++++++++ .../pipelines/default_camera_base_120m.py | 9 ++ .../default_camera_lidar_intensity_120m.py | 48 ++---- 3 files changed, 161 insertions(+), 33 deletions(-) create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_120m.py diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_120m.py new file mode 100644 index 000000000..4c809264e --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_120m.py @@ -0,0 +1,137 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", + "../default/pipelines/default_camera_lidar_intensity_120m.py", + "../default/models/default_camera_swin_fpn_lss_120m.py", + "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4datasets/" +info_directory_path = "info/kokseang_2_6_1/" + +experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type +experiment_name = "bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_120m" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + view_transform=dict(image_size=_base_.image_size), + bbox_head=dict( + class_names=_base_.class_names, + in_channels=80, + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py index 410968467..227c8241e 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py @@ -38,12 +38,21 @@ ), dict( type="BEVFusionGlobalRotScaleTrans", +<<<<<<< HEAD # scale_ratio_range=[0.95, 1.05], # rot_range=[-0.78539816, 0.78539816], # translation_std=[0.5, 0.5, 0.2], scale_ratio_range=[0.98, 1.02], rot_range=[-0.3925, 0.3925], translation_std=[0.2, 0.2, 0.1], +======= + scale_ratio_range=[0.95, 1.05], + rot_range=[-0.78539816, 0.78539816], + translation_std=[0.5, 0.5, 0.2], + # scale_ratio_range=[0.98, 1.02], + # rot_range=[-0.3925, 0.3925], + # translation_std=[0.2, 0.2, 0.1], +>>>>>>> e7daa8a9 (Added) ), dict(type="BEVFusionRandomFlip3D"), dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range), diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py index 53a149f07..439459010 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py @@ -40,21 +40,21 @@ use_dim=point_load_dim, backend_args=backend_args, ), - dict( - type="LoadPointsFromMultiSweeps", - sweeps_num=sweeps_num, - load_dim=point_load_dim, - use_dim=lidar_sweep_dims, - pad_empty_sweeps=True, - remove_close=True, - backend_args=backend_args, - test_mode=False, - ), + # dict( + # type="LoadPointsFromMultiSweeps", + # sweeps_num=sweeps_num, + # load_dim=point_load_dim, + # use_dim=lidar_sweep_dims, + # pad_empty_sweeps=True, + # remove_close=True, + # backend_args=backend_args, + # test_mode=False, + # ), dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False), dict( type="ImageAug3D", final_dim=image_size, - resize_lim=[0.28, 0.40], + resize_lim=[0.29, 0.35], bot_pct_lim=[0.0, 0.0], rot_lim=[0.0, 0.0], rand_flip=True, @@ -67,8 +67,9 @@ translation_std=[0.5, 0.5, 0.2], ), dict(type="BEVFusionRandomFlip3D"), - dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range), + # dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range), dict(type="ObjectRangeFilter", point_cloud_range=point_cloud_range), + dict(type="BEVFusionRemoveLiDARPoints"), dict( type="ObjectNameFilter", classes=[ @@ -84,7 +85,7 @@ "traffic_cone", ], ), - dict(type="PointShuffle"), + # dict(type="PointShuffle"), dict( type="Pack3DDetInputs", keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"], @@ -121,34 +122,15 @@ backend_args=backend_args, camera_order=camera_order, ), - dict( - type="LoadPointsFromFile", - coord_type="LIDAR", - load_dim=point_load_dim, - use_dim=point_load_dim, - backend_args=backend_args, - ), - dict( - type="LoadPointsFromMultiSweeps", - sweeps_num=sweeps_num, - load_dim=point_load_dim, - use_dim=lidar_sweep_dims, - pad_empty_sweeps=True, - remove_close=True, - backend_args=backend_args, - test_mode=True, - ), dict( type="ImageAug3D", final_dim=image_size, - resize_lim=[0.34, 0.34], + resize_lim=[0.32, 0.32], bot_pct_lim=[0.0, 0.0], rot_lim=[0.0, 0.0], rand_flip=False, is_train=False, ), - dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range), - dict(type="ObjectRangeFilter", point_cloud_range=point_cloud_range), dict( type="Pack3DDetInputs", keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"], From 655a0a1321e5f5c5f86f520cc1ba565098b1879f Mon Sep 17 00:00:00 2001 From: KokSeang Date: Tue, 21 Apr 2026 14:11:29 +0900 Subject: [PATCH 039/183] Added --- .../default/pipelines/default_camera_base_120m.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py index 227c8241e..5bc85cbbe 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py @@ -38,21 +38,12 @@ ), dict( type="BEVFusionGlobalRotScaleTrans", -<<<<<<< HEAD - # scale_ratio_range=[0.95, 1.05], - # rot_range=[-0.78539816, 0.78539816], - # translation_std=[0.5, 0.5, 0.2], - scale_ratio_range=[0.98, 1.02], - rot_range=[-0.3925, 0.3925], - translation_std=[0.2, 0.2, 0.1], -======= scale_ratio_range=[0.95, 1.05], rot_range=[-0.78539816, 0.78539816], translation_std=[0.5, 0.5, 0.2], # scale_ratio_range=[0.98, 1.02], # rot_range=[-0.3925, 0.3925], # translation_std=[0.2, 0.2, 0.1], ->>>>>>> e7daa8a9 (Added) ), dict(type="BEVFusionRandomFlip3D"), dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range), From 0f5b5888148efcd2aac5af2315befd9301907745 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 21 Apr 2026 15:06:00 +0900 Subject: [PATCH 040/183] Update configs --- autoware_ml/detection3d/datasets/t4dataset.py | 2 +- .../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/autoware_ml/detection3d/datasets/t4dataset.py b/autoware_ml/detection3d/datasets/t4dataset.py index 74d274b87..ce1c78f31 100644 --- a/autoware_ml/detection3d/datasets/t4dataset.py +++ b/autoware_ml/detection3d/datasets/t4dataset.py @@ -64,7 +64,7 @@ def filter_data(self) -> List[dict]: break if entry["images"][camera_order]["img_path"] is None or not osp.exists( - self.data_root + entry["images"][camera_order]["img_path"] + entry["images"][camera_order]["img_path"] ): filtered = True break diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py index 264eda921..23d29acc1 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py @@ -1,6 +1,5 @@ # learning rate -# 1e-4 * sqrt(2) = 0.0001414 -lr = 1.4141e-4 +lr = 1e-4 t_max = 8 max_epochs = 30 val_interval = 1 From caecca60228a1468c1f139d331b096884da19a4b Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 21 Apr 2026 17:38:51 +0900 Subject: [PATCH 041/183] Update base docstring --- .../BEVFusion/docs/BEVFusion-L/v2/base.md | 288 +++++++++++++++++- 1 file changed, 272 insertions(+), 16 deletions(-) diff --git a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md index 9de8a2e34..72d47c4b3 100644 --- a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md +++ b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md @@ -70,18 +70,20 @@
Eval Range: 0.0 - 50.0m - | Model version | mAP | car
(107,309) | truck
(24,206) | bus
(5,712) | bicycle
(4,060) | pedestrian
(77,369) | - | :---- | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.6.0 | 0.8774 | 0.9049 | 0.8514 | 0.8824 | 0.8543 | 0.8941 | + | Model version | mAP | mAPH | car
(107,309) | truck
(24,206) | bus
(5,712) | bicycle
(4,060) | pedestrian
(77,369) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.7.0 | 0.8817 | 0.8496 | 0.9131 | 0.8552 | 0.9081 | 0.8357 | 0.8966 | + | BEVFusion-LiDAR base/2.6.0 | 0.8774 | 0.8443 | 0.9049 | 0.8514 | 0.8824 | 0.8543 | 0.8941 | -
+
Eval Range: 50.0 - 90.0m | Model version | mAP | mAPH | car
(94,080) | truck
(27,651) | bus
(4,761) | bicycle
(2,365) | pedestrian
(37,523) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.6.0 | 0.6824 | 0.6437 | 0.8005 | 0.6567 | 0.5783 | 0.6322 | 0.7445 | + | BEVFusion-LiDAR base/2.7.0 | 0.7002 | 0.6621 | 0.8174 | 0.6660 | 0.6414 | 0.6430 | 0.7331 | + | BEVFusion-LiDAR base/2.6.0 | 0.6824 | 0.6437 | 0.8005 | 0.6567 | 0.5783 | 0.6322 | 0.7445 |
@@ -90,7 +92,8 @@ | Model version | mAP | mAPH | car
(36,895) | truck
(17,759) | bus
(2,852) | bicycle
(519) | pedestrian
(17,091) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.6.0 | 0.5136 | 0.4788 | 0.6552 | 0.5023 | 0.2849 | 0.4369 | 0.6887 | + | BEVFusion-LiDAR base/2.7.0 | 0.5600 | 0.5254 | 0.6578 | 0.5131 | 0.5178 | 0.4296 | 0.6815 | + | BEVFusion-LiDAR base/2.6.0 | 0.5136 | 0.4788 | 0.6552 | 0.5023 | 0.2849 | 0.4369 | 0.6887 | @@ -99,7 +102,8 @@ | Model version | mAP | mAPH | car
(238,284) | truck
(69,616) | bus
(13,325) | bicycle
(6,944) | pedestrian
(131,983) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.6.0 | 0.7592 | 0.7227 | 0.8398 | 0.6994 | 0.6621 | 0.7595 | 0.8351 | + | BEVFusion-LiDAR base/2.7.0 | 0.7777 | 0.7420 | 0.8504 | 0.7065 | 0.7443 | 0.7538 | 0.8332 | + | BEVFusion-LiDAR base/2.6.0 | 0.7592 | 0.7227 | 0.8398 | 0.6994 | 0.6621 | 0.7595 | 0.8351 | @@ -119,7 +123,8 @@ | Model version | mAP | mAPH | car
(42,789) | truck
(17,259) | bus
(3,437) | bicycle
(2,681) | pedestrian
(57,948) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.6.0 | 0.8784 | 0.8487 | 0.9436 | 0.8531 | 0.8284 | 0.8546 | 0.9123 | + | BEVFusion-LiDAR base/2.7.0 | 0.8837 | 0.8562 | 0.9393 | 0.8587 | 0.8802 | 0.8268 | 0.9135 | + | BEVFusion-LiDAR base/2.6.0 | 0.8784 | 0.8487 | 0.9436 | 0.8531 | 0.8284 | 0.8546 | 0.9123 | @@ -128,7 +133,8 @@ | Model version | mAP | mAPH | car
(35,518) | truck
(22,550) | bus
(2,683) | bicycle
(1,607) | pedestrian
(27,240) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.6.0 | 0.6692 | 0.6414 | 0.8323 | 0.6571 | 0.4033 | 0.6721 | 0.7812 | + | BEVFusion-LiDAR base/2.7.0 | 0.6901 | 0.6630 | 0.8382 | 0.6676 | 0.5007 | 0.6794 | 0.7645 | + | BEVFusion-LiDAR base/2.6.0 | 0.6692 | 0.6414 | 0.8323 | 0.6571 | 0.4033 | 0.6721 | 0.7812 | @@ -137,7 +143,8 @@ | Model version | mAP | mAPH | car
(16,524) | truck
(14,587) | bus
(2,476) | bicycle
(364) | pedestrian
(14,297) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.6.0 | 0.5300 | 0.5010 | 0.6692 | 0.5020 | 0.2822 | 0.4586 | 0.7380 | + | BEVFusion-LiDAR base/2.7.0 | 0.5750 | 0.5466 | 0.6601 | 0.5131 | 0.5145 | 0.4541 | 0.7331 | + | BEVFusion-LiDAR base/2.6.0 | 0.5300 | 0.5010 | 0.6692 | 0.5020 | 0.2822 | 0.4586 | 0.7380 | @@ -146,6 +153,7 @@ | Model version | mAP | mAPH | car
(94,831) | truck
(54,396) | bus
(8,596) | bicycle
(4,652) | pedestrian
(99,485) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.7.0 | 0.7715 | 0.7432 | 0.8661 | 0.7010 | 0.6721 | 0.7611 | 0.8573 | | BEVFusion-LiDAR base/2.6.0 | 0.7471 | 0.7176 | 0.8667 | 0.6928 | 0.5446 | 0.7710 | 0.8606 | @@ -167,7 +175,8 @@ | Model version | mAP | mAPH | car
(14,883) | truck
(1,193) | bus
(336) | bicycle
(740) | pedestrian
(5,059) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.6.0 | 0.8882 | 0.8475 | 0.9045 | 0.8793 | 0.9482 | 0.8489 | 0.8598 | + | BEVFusion-LiDAR base/2.7.0 | 0.8876 | 0.8447 | 0.9176 | 0.8727 | 0.9443 | 0.8396 | 0.8639 | + | BEVFusion-LiDAR base/2.6.0 | 0.8882 | 0.8475 | 0.9045 | 0.8793 | 0.9482 | 0.8489 | 0.8598 | @@ -176,7 +185,8 @@ | Model version | mAP | mAPH | car
(10,994) | truck
(1,011) | bus
(143) | bicycle
(463) | pedestrian
(3,754) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.6.0 | 0.7132 | 0.6586 | 0.8237 | 0.7245 | 0.7811 | 0.5497 | 0.6871 | + | BEVFusion-LiDAR base/2.7.0 | 0.7392 | 0.6842 | 0.8425 | 0.7288 | 0.8580 | 0.5826 | 0.6839 | + | BEVFusion-LiDAR base/2.6.0 | 0.7132 | 0.6586 | 0.8237 | 0.7245 | 0.7811 | 0.5497 | 0.6871 | @@ -185,7 +195,8 @@ | Model version | mAP | mAPH | car
(3,018) | truck
(602) | bus
(60) | bicycle
(85) | pedestrian
(1,121) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.6.0 | 0.5202 | 0.4736 | 0.6989 | 0.6297 | 0.4058 | 0.3609 | 0.5056 | + | BEVFusion-LiDAR base/2.7.0 | 0.5572 | 0.5118 | 0.7091 | 0.6393 | 0.6121 | 0.3386 | 0.4870 | + | BEVFusion-LiDAR base/2.6.0 | 0.5202 | 0.4736 | 0.6989 | 0.6297 | 0.4058 | 0.3609 | 0.5056 | @@ -194,6 +205,7 @@ | Model version | mAP | mAPH | car
(28,895) | truck
(2,806) | bus
(539) | bicycle
(1,288) | pedestrian
(9,934) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.7.0 | 0.8086 | 0.7594 | 0.8789 | 0.7783 | 0.8898 | 0.7288 | 0.7670 | | BEVFusion-LiDAR base/2.6.0 | 0.7995 | 0.7514 | 0.8640 | 0.7788 | 0.8608 | 0.7272 | 0.7669 | @@ -221,7 +233,8 @@ | Model version | mAP | mAPH | car
(49,637) | truck
(5,754) | bus
(1,939) | bicycle
(639) | pedestrian
(14,362) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.6.0 | 0.8702 | 0.8284 | 0.8758 | 0.8410 | 0.9408 | 0.8590 | 0.8344 | + | BEVFusion-LiDAR base/2.7.0 | 0.8776 | 0.8370 | 0.8907 | 0.8438 | 0.9473 | 0.8665 | 0.8397 | + | BEVFusion-LiDAR base/2.6.0 | 0.8702 | 0.8284 | 0.8758 | 0.8410 | 0.9408 | 0.8590 | 0.8344 | @@ -230,7 +243,8 @@ | Model version | mAP | mAPH | car
(47,568) | truck
(4,090) | bus
(1,935) | bicycle
(295) | pedestrian
(6,529) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.6.0 | 0.6708 | 0.6165 | 0.7721 | 0.6421 | 0.7731 | 0.5472 | 0.6192 | + | BEVFusion-LiDAR base/2.7.0 | 0.6805 | 0.6279 | 0.7957 | 0.6451 | 0.7955 | 0.5394 | 0.6266 | + | BEVFusion-LiDAR base/2.6.0 | 0.6708 | 0.6165 | 0.7721 | 0.6421 | 0.7731 | 0.5472 | 0.6192 | @@ -239,6 +253,7 @@ | Model version | mAP | mAPH | car
(17,353) | truck
(2,570) | bus
(316) | bicycle
(70) | pedestrian
(1,673) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR base/2.7.0 | 0.4902 | 0.4491 | 0.6483 | 0.4871 | 0.5172 | 0.4406 | 0.3578 | | BEVFusion-LiDAR base/2.6.0 | 0.4462 | 0.4042 | 0.6346 | 0.4758 | 0.3215 | 0.4303 | 0.3688 | @@ -248,7 +263,8 @@ | Model version | mAP | mAPH | car
(114,558) | truck
(12,414) | bus
(4,190) | bicycle
(1,004) | pedestrian
(22,564) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR base/2.6.0 | 0.7712 | 0.7223 | 0.8110 | 0.7129 | 0.8348 | 0.7458 | 0.7515 | + | BEVFusion-LiDAR base/2.7.0 | 0.7822 | 0.7349 | 0.8292 | 0.7169 | 0.8590 | 0.7505 | 0.7556 | + | BEVFusion-LiDAR base/2.6.0 | 0.7712 | 0.7223 | 0.8110 | 0.7129 | 0.8348 | 0.7458 | 0.7515 | @@ -256,6 +272,246 @@ ## Release +### BEVFusion-LiDAR base/2.7.0 + +
+ Changes + +- Train by min-max normalizing (x, y, z, intensity, time_lag) into [0, 1], and then mapping it to fourier features [[1]](https://arxiv.org/pdf/2006.10739). +
+ +
+ Artifacts + +- Deployed onnx and ROS parameter files (for internal) + - [WebAuto](https://evaluation.tier4.jp/evaluation/mlpackages/46f8188d-e3be-4f2f-b989-fd27002610d7/releases/51628f64-9c15-4029-b3c5-5bf501d879e2?project_id=zWhWRzei) + - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/t4base/v2.7.0/deployment.zip) + - [Google drive](https://drive.google.com/file/d/1zopj68qxLmI244qi3NgxB0ELT997V4W3/view?usp=drive_link) +- Logs (for internal) + - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/t4base/v2.7.0/logs.zip) + - [Google drive](https://drive.google.com/file/d/1-OIvsmsB69a5L_4sqjOSJ9IOltRWFDIv/view?usp=drive_link) +- Pytorch Best checkpoints: + - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/t4base/v2.7.0/best_epoch_48.pth) + - [Google drive](https://drive.google.com/file/d/1b8iwwLBLAmn0NwqRaTJOWHMINfS9p_fc/view?usp=drive_link) + +
+ +
+ Training configs + +- [Config file path](https://github.com/KSeangTan/AWML/blob/0f5b5888148efcd2aac5af2315befd9301907745/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py) +- Train time: NVIDIA H100 80GB * 8 * 50 epochs ~= 4 days +- Batch size: 8*8 = 64 +- Training Dataset (frames: 142,196): + - jpntaxi: db_jpntaxi_v1 + db_jpntaxi_v2 + db_jpntaxi_v4 (28,161 frames) + - j6: db_gsm8_v1 + db_j6_v1 + db_j6_v2 + db_j6_v3 + db_j6_v5 (29,336 frames) + - j6gen2: db_j6gen2_v1 + db_j6gen2_v2 + db_j6gen2_v3 + db_j6gen2_v4 + db_j6gen2_v5 + db_j6gen2_v6 + db_j6gen2_v7 + db_j6gen2_v8 + db_j6gen2_v9 (43,968 frames) + - largebus: db_largebus_v1 + db_largebus_v2 (12,605 frames) + - jpntaxi_gen2: db_jpntaxigen2_v1 + db_jpntaxigen2_v2 (28,126 frames) + +
+ +
+ Evaluation + +**Base Datasets (15,154 frames)**: + + - j6gen2 (3,951 frames): db_j6gen2_v1 + db_j6gen2_v2 + db_j6gen2_v3 + db_j6gen2_v4 + db_j6gen2_v5 + db_j6gen2_v6 + db_j6gen2_v7 + db_j6gen2_v8 + db_j6gen2_v9 + - largebus (1,228 frames): db_largebus_v1 + db_largebus_v2 + db_largebus_v3 + - jpntaxi_gen2 (9,975 frames): db_jpntaxigen2_v1 + db_jpntaxigen2_v2 + +**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.8817** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 107,309 | 0.9131 | 0.862 / 0.914 / 0.933 / 0.943 | 0.905 / 0.935 / 0.942 / 0.945 | 0.233 / 0.192 / 0.159 / 0.142 | +| truck | 24,206 | 0.8552 | 0.711 / 0.843 / 0.919 / 0.948 | 0.795 / 0.877 / 0.918 / 0.934 | 0.297 / 0.225 / 0.192 / 0.180 | +| bus | 5,712 | 0.9081 | 0.829 / 0.912 / 0.945 / 0.947 | 0.876 / 0.916 / 0.931 / 0.932 | 0.312 / 0.146 / 0.146 / 0.146 | +| bicycle | 4,060 | 0.8357 | 0.813 / 0.840 / 0.844 / 0.846 | 0.857 / 0.868 / 0.869 / 0.870 | 0.210 / 0.194 / 0.194 / 0.194 | +| pedestrian | 77,369 | 0.8966 | 0.877 / 0.895 / 0.903 / 0.911 | 0.857 / 0.867 / 0.874 / 0.878 | 0.148 / 0.148 / 0.148 / 0.147 | +| **ALL** | 218,656 | 0.8817 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.7002** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 94,080 | 0.8174 | 0.708 / 0.817 / 0.864 / 0.881 | 0.782 / 0.844 / 0.867 / 0.872 | 0.212 / 0.166 / 0.164 / 0.161 | +| truck | 27,651 | 0.6660 | 0.463 / 0.626 / 0.759 / 0.815 | 0.612 / 0.714 / 0.787 / 0.812 | 0.229 / 0.190 / 0.154 / 0.130 | +| bus | 4,761 | 0.6414 | 0.393 / 0.602 / 0.775 / 0.795 | 0.554 / 0.691 / 0.798 / 0.807 | 0.324 / 0.219 / 0.181 / 0.138 | +| bicycle | 2,365 | 0.6430 | 0.586 / 0.658 / 0.663 / 0.666 | 0.683 / 0.715 / 0.716 / 0.717 | 0.141 / 0.141 / 0.141 / 0.141 | +| pedestrian | 37,523 | 0.7331 | 0.711 / 0.730 / 0.741 / 0.750 | 0.732 / 0.742 / 0.748 / 0.753 | 0.145 / 0.145 / 0.145 / 0.144 | +| **ALL** | 166,380 | 0.7002 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.5600** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 36,895 | 0.6578 | 0.498 / 0.656 / 0.726 / 0.751 | 0.626 / 0.714 / 0.750 / 0.760 | 0.168 / 0.143 / 0.137 / 0.132 | +| truck | 17,759 | 0.5131 | 0.206 / 0.450 / 0.648 / 0.749 | 0.439 / 0.611 / 0.720 / 0.775 | 0.240 / 0.193 / 0.134 / 0.124 | +| bus | 2,852 | 0.5178 | 0.313 / 0.520 / 0.608 / 0.630 | 0.534 / 0.659 / 0.704 / 0.714 | 0.244 / 0.166 / 0.140 / 0.140 | +| bicycle | 519 | 0.4296 | 0.315 / 0.421 / 0.491 / 0.491 | 0.503 / 0.563 / 0.592 / 0.592 | 0.180 / 0.180 / 0.180 / 0.180 | +| pedestrian | 17,091 | 0.6815 | 0.660 / 0.678 / 0.687 / 0.700 | 0.698 / 0.708 / 0.712 / 0.719 | 0.126 / 0.126 / 0.126 / 0.126 | +| **ALL** | 75,116 | 0.5600 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.7777** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 238,284 | 0.8504 | 0.760 / 0.851 / 0.888 / 0.903 | 0.818 / 0.868 / 0.886 / 0.890 | 0.219 / 0.184 / 0.161 / 0.158 | +| truck | 69,616 | 0.7065 | 0.492 / 0.671 / 0.802 / 0.861 | 0.641 / 0.752 / 0.822 / 0.851 | 0.251 / 0.216 / 0.173 / 0.136 | +| bus | 13,325 | 0.7443 | 0.575 / 0.735 / 0.827 / 0.840 | 0.703 / 0.791 / 0.843 / 0.849 | 0.345 / 0.181 / 0.181 / 0.146 | +| bicycle | 6,944 | 0.7538 | 0.714 / 0.761 / 0.769 / 0.771 | 0.776 / 0.797 / 0.800 / 0.801 | 0.186 / 0.176 / 0.176 / 0.176 | +| pedestrian | 131,983 | 0.8332 | 0.813 / 0.831 / 0.840 / 0.849 | 0.802 / 0.812 / 0.818 / 0.824 | 0.144 / 0.145 / 0.145 / 0.145 | +| **ALL** | 460,152 | 0.7777 | — | — | — | + +--- + +**LargeBus**: db_largebus_v1 + db_largebus_v2 + db_largebus_v3 (1,228 frames) + +**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.8876** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 14,883 | 0.9176 | 0.876 / 0.916 / 0.934 / 0.944 | 0.917 / 0.943 / 0.947 / 0.949 | 0.245 / 0.154 / 0.154 / 0.154 | +| truck | 1,193 | 0.8727 | 0.747 / 0.873 / 0.926 / 0.944 | 0.829 / 0.900 / 0.924 / 0.928 | 0.269 / 0.206 / 0.157 / 0.157 | +| bus | 336 | 0.9443 | 0.824 / 0.975 / 0.989 / 0.989 | 0.878 / 0.974 / 0.984 / 0.984 | 0.439 / 0.338 / 0.269 / 0.269 | +| bicycle | 740 | 0.8396 | 0.764 / 0.848 / 0.869 / 0.877 | 0.833 / 0.862 / 0.866 / 0.871 | 0.194 / 0.194 / 0.182 / 0.182 | +| pedestrian | 5,059 | 0.8639 | 0.848 / 0.863 / 0.869 / 0.876 | 0.837 / 0.845 / 0.850 / 0.853 | 0.167 / 0.167 / 0.167 / 0.154 | +| **ALL** | 22,211 | 0.8876 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.7392** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 10,994 | 0.8425 | 0.745 / 0.846 / 0.883 / 0.896 | 0.810 / 0.869 / 0.886 / 0.891 | 0.210 / 0.170 / 0.153 / 0.153 | +| truck | 1,011 | 0.7288 | 0.537 / 0.722 / 0.818 / 0.838 | 0.670 / 0.784 / 0.834 / 0.840 | 0.184 / 0.158 / 0.113 / 0.113 | +| bus | 143 | 0.8580 | 0.589 / 0.944 / 0.944 / 0.956 | 0.730 / 0.929 / 0.929 / 0.929 | 0.510 / 0.463 / 0.463 / 0.463 | +| bicycle | 463 | 0.5826 | 0.477 / 0.607 / 0.622 / 0.625 | 0.606 / 0.667 / 0.671 / 0.673 | 0.118 / 0.112 / 0.102 / 0.102 | +| pedestrian | 3,754 | 0.6839 | 0.664 / 0.681 / 0.690 / 0.702 | 0.698 / 0.705 / 0.711 / 0.717 | 0.121 / 0.117 / 0.117 / 0.117 | +| **ALL** | 16,365 | 0.7392 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.5572** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 3,018 | 0.7091 | 0.556 / 0.712 / 0.776 / 0.792 | 0.665 / 0.747 / 0.778 / 0.786 | 0.205 / 0.181 / 0.181 / 0.181 | +| truck | 602 | 0.6393 | 0.365 / 0.651 / 0.760 / 0.781 | 0.553 / 0.730 / 0.789 / 0.798 | 0.208 / 0.208 / 0.152 / 0.152 | +| bus | 60 | 0.6121 | 0.420 / 0.637 / 0.696 / 0.696 | 0.583 / 0.725 / 0.765 / 0.765 | 0.275 / 0.197 / 0.197 / 0.197 | +| bicycle | 85 | 0.3386 | 0.244 / 0.355 / 0.378 / 0.378 | 0.446 / 0.514 / 0.524 / 0.524 | 0.181 / 0.181 / 0.137 / 0.137 | +| pedestrian | 1,121 | 0.4870 | 0.473 / 0.483 / 0.490 / 0.502 | 0.579 / 0.586 / 0.591 / 0.593 | 0.137 / 0.137 / 0.137 / 0.137 | +| **ALL** | 4,886 | 0.5572 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.8086** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 28,895 | 0.8789 | 0.806 / 0.881 / 0.909 / 0.919 | 0.853 / 0.896 / 0.908 / 0.911 | 0.245 / 0.185 / 0.176 / 0.170 | +| truck | 2,806 | 0.7783 | 0.597 / 0.778 / 0.859 / 0.880 | 0.714 / 0.824 / 0.865 / 0.870 | 0.206 / 0.206 / 0.157 / 0.155 | +| bus | 539 | 0.8898 | 0.718 / 0.931 / 0.952 / 0.958 | 0.808 / 0.931 / 0.937 / 0.937 | 0.382 / 0.354 / 0.354 / 0.354 | +| bicycle | 1,288 | 0.7288 | 0.641 / 0.744 / 0.762 / 0.768 | 0.729 / 0.769 / 0.773 / 0.776 | 0.176 / 0.176 / 0.176 / 0.172 | +| pedestrian | 9,934 | 0.7670 | 0.749 / 0.765 / 0.772 / 0.782 | 0.757 / 0.765 / 0.771 / 0.775 | 0.137 / 0.137 / 0.137 / 0.137 | +| **ALL** | 43,462 | 0.8086 | — | — | — | + +--- + +**J6Gen2**: db_j6gen2_v1 + db_j6gen2_v2 + db_j6gen2_v3 + db_j6gen2_v4 + db_j6gen2_v5 + db_j6gen2_v6 + db_j6gen2_v7 + db_j6gen2_v8 + db_j6gen2_v9 (3,951 frames) + +**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.8776** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 49,637 | 0.8907 | 0.841 / 0.890 / 0.909 / 0.922 | 0.896 / 0.924 / 0.931 / 0.934 | 0.269 / 0.199 / 0.159 / 0.135 | +| truck | 5,754 | 0.8438 | 0.718 / 0.833 / 0.894 / 0.930 | 0.794 / 0.862 / 0.893 / 0.915 | 0.222 / 0.194 / 0.171 / 0.171 | +| bus | 1,939 | 0.9473 | 0.878 / 0.942 / 0.983 / 0.986 | 0.925 / 0.963 / 0.981 / 0.982 | 0.206 / 0.140 / 0.140 / 0.140 | +| bicycle | 639 | 0.8665 | 0.854 / 0.871 / 0.871 / 0.871 | 0.867 / 0.875 / 0.875 / 0.875 | 0.176 / 0.176 / 0.176 / 0.176 | +| pedestrian | 14,362 | 0.8397 | 0.813 / 0.836 / 0.849 / 0.861 | 0.806 / 0.817 / 0.824 / 0.831 | 0.169 / 0.151 / 0.151 / 0.165 | +| **ALL** | 72,331 | 0.8776 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.6805** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 47,568 | 0.7957 | 0.662 / 0.795 / 0.851 / 0.875 | 0.760 / 0.838 / 0.866 / 0.874 | 0.212 / 0.184 / 0.164 / 0.164 | +| truck | 4,090 | 0.6451 | 0.451 / 0.622 / 0.729 / 0.778 | 0.606 / 0.711 / 0.768 / 0.789 | 0.234 / 0.205 / 0.176 / 0.165 | +| bus | 1,935 | 0.7955 | 0.571 / 0.760 / 0.912 / 0.938 | 0.694 / 0.815 / 0.906 / 0.916 | 0.345 / 0.240 / 0.182 / 0.168 | +| bicycle | 295 | 0.5394 | 0.494 / 0.552 / 0.554 / 0.557 | 0.628 / 0.669 / 0.669 / 0.669 | 0.137 / 0.138 / 0.138 / 0.138 | +| pedestrian | 6,529 | 0.6266 | 0.591 / 0.622 / 0.639 / 0.654 | 0.661 / 0.676 / 0.682 / 0.689 | 0.140 / 0.140 / 0.140 / 0.140 | +| **ALL** | 60,417 | 0.6805 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.4902** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 17,353 | 0.6483 | 0.452 / 0.639 / 0.734 / 0.768 | 0.608 / 0.712 / 0.760 / 0.774 | 0.168 / 0.153 / 0.143 / 0.132 | +| truck | 2,570 | 0.4871 | 0.209 / 0.419 / 0.619 / 0.702 | 0.425 / 0.578 / 0.700 / 0.746 | 0.199 / 0.127 / 0.126 / 0.124 | +| bus | 316 | 0.5172 | 0.246 / 0.532 / 0.626 / 0.665 | 0.433 / 0.640 / 0.701 / 0.721 | 0.173 / 0.100 / 0.100 / 0.089 | +| bicycle | 70 | 0.4406 | 0.382 / 0.438 / 0.471 / 0.471 | 0.584 / 0.619 / 0.637 / 0.637 | 0.193 / 0.193 / 0.193 / 0.193 | +| pedestrian | 1,673 | 0.3578 | 0.344 / 0.354 / 0.362 / 0.371 | 0.492 / 0.496 / 0.500 / 0.505 | 0.137 / 0.107 / 0.107 / 0.111 | +| **ALL** | 21,982 | 0.4902 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.7822** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 114,558 | 0.8292 | 0.725 / 0.826 / 0.872 / 0.894 | 0.800 / 0.859 / 0.881 / 0.888 | 0.232 / 0.194 / 0.164 / 0.158 | +| truck | 12,414 | 0.7169 | 0.534 / 0.691 / 0.795 / 0.847 | 0.665 / 0.760 / 0.816 / 0.843 | 0.251 / 0.194 / 0.166 / 0.151 | +| bus | 4,190 | 0.8590 | 0.703 / 0.840 / 0.938 / 0.955 | 0.790 / 0.874 / 0.929 / 0.936 | 0.345 / 0.186 / 0.182 / 0.168 | +| bicycle | 1,004 | 0.7505 | 0.724 / 0.758 / 0.760 / 0.760 | 0.781 / 0.798 / 0.799 / 0.799 | 0.176 / 0.176 / 0.176 / 0.176 | +| pedestrian | 22,564 | 0.7556 | 0.727 / 0.752 / 0.766 / 0.778 | 0.744 / 0.756 / 0.763 / 0.770 | 0.152 / 0.151 / 0.151 / 0.151 | +| **ALL** | 154,730 | 0.7822 | — | — | — | + +--- + +**JPNTaxi_Gen2**: db_jpntaxigen2_v1 + db_jpntaxigen2_v2 (9,975 frames) + +**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.8837** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 42,789 | 0.9393 | 0.882 / 0.945 / 0.964 / 0.967 | 0.911 / 0.946 / 0.954 / 0.955 | 0.211 / 0.168 / 0.142 / 0.142 | +| truck | 17,259 | 0.8587 | 0.709 / 0.846 / 0.926 / 0.954 | 0.795 / 0.881 / 0.926 / 0.941 | 0.371 / 0.243 / 0.234 / 0.189 | +| bus | 3,437 | 0.8802 | 0.798 / 0.889 / 0.916 / 0.918 | 0.850 / 0.886 / 0.898 / 0.899 | 0.369 / 0.146 / 0.128 / 0.128 | +| bicycle | 2,681 | 0.8268 | 0.816 / 0.830 / 0.831 / 0.831 | 0.865 / 0.871 / 0.872 / 0.872 | 0.219 / 0.219 / 0.219 / 0.219 | +| pedestrian | 57,948 | 0.9135 | 0.896 / 0.912 / 0.919 / 0.926 | 0.872 / 0.882 / 0.889 / 0.893 | 0.148 / 0.140 / 0.143 / 0.140 | +| **ALL** | 124,114 | 0.8837 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.6901** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 35,518 | 0.8382 | 0.757 / 0.838 / 0.874 / 0.885 | 0.803 / 0.847 / 0.862 / 0.865 | 0.212 / 0.165 / 0.162 / 0.161 | +| truck | 22,550 | 0.6676 | 0.462 / 0.623 / 0.762 / 0.823 | 0.611 / 0.711 / 0.788 / 0.816 | 0.247 / 0.193 / 0.154 / 0.130 | +| bus | 2,683 | 0.5007 | 0.240 / 0.447 / 0.649 / 0.667 | 0.421 / 0.581 / 0.708 / 0.717 | 0.242 / 0.151 / 0.144 / 0.144 | +| bicycle | 1,607 | 0.6794 | 0.635 / 0.692 / 0.695 / 0.697 | 0.719 / 0.740 / 0.742 / 0.743 | 0.146 / 0.141 / 0.141 / 0.141 | +| pedestrian | 27,240 | 0.7645 | 0.745 / 0.762 / 0.772 / 0.780 | 0.753 / 0.764 / 0.769 / 0.773 | 0.156 / 0.144 / 0.145 / 0.145 | +| **ALL** | 89,598 | 0.6901 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.5750** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 16,524 | 0.6601 | 0.539 / 0.665 / 0.710 / 0.727 | 0.643 / 0.715 / 0.740 / 0.745 | 0.138 / 0.108 / 0.108 / 0.109 | +| truck | 14,587 | 0.5131 | 0.200 / 0.448 / 0.649 / 0.756 | 0.438 / 0.613 / 0.721 / 0.779 | 0.248 / 0.193 / 0.134 / 0.124 | +| bus | 2,476 | 0.5145 | 0.318 / 0.515 / 0.602 / 0.623 | 0.547 / 0.661 / 0.704 / 0.714 | 0.244 / 0.163 / 0.152 / 0.148 | +| bicycle | 364 | 0.4541 | 0.324 / 0.439 / 0.527 / 0.527 | 0.504 / 0.567 / 0.604 / 0.604 | 0.174 / 0.171 / 0.171 / 0.171 | +| pedestrian | 14,297 | 0.7331 | 0.711 / 0.730 / 0.739 / 0.753 | 0.731 / 0.742 / 0.746 / 0.754 | 0.126 / 0.126 / 0.126 / 0.126 | +| **ALL** | 48,248 | 0.5750 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.7715** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 94,831 | 0.8661 | 0.785 / 0.869 / 0.900 / 0.910 | 0.828 / 0.871 / 0.884 / 0.887 | 0.198 / 0.165 / 0.150 / 0.141 | +| truck | 54,396 | 0.7010 | 0.478 / 0.662 / 0.800 / 0.864 | 0.632 / 0.747 / 0.821 / 0.852 | 0.273 / 0.216 / 0.173 / 0.134 | +| bus | 8,596 | 0.6721 | 0.500 / 0.665 / 0.756 / 0.768 | 0.648 / 0.737 / 0.792 / 0.798 | 0.326 / 0.151 / 0.146 / 0.146 | +| bicycle | 4,652 | 0.7611 | 0.731 / 0.766 / 0.773 / 0.775 | 0.790 / 0.805 / 0.809 / 0.809 | 0.186 / 0.187 / 0.187 / 0.187 | +| pedestrian | 99,485 | 0.8573 | 0.838 / 0.855 / 0.864 / 0.872 | 0.820 / 0.830 / 0.836 / 0.841 | 0.145 / 0.143 / 0.145 / 0.143 | +| **ALL** | 261,960 | 0.7715 | — | — | — | + +
+ +--- + ### BEVFusion-LiDAR base/2.6.0
From 07c2e110802ec2537d4c620d9af7f7e1b8120b97 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 21 Apr 2026 17:39:32 +0900 Subject: [PATCH 042/183] Update base docstring --- projects/BEVFusion/docs/BEVFusion-L/v2/base.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md index 72d47c4b3..ecdd1e9a8 100644 --- a/projects/BEVFusion/docs/BEVFusion-L/v2/base.md +++ b/projects/BEVFusion/docs/BEVFusion-L/v2/base.md @@ -277,7 +277,7 @@
Changes -- Train by min-max normalizing (x, y, z, intensity, time_lag) into [0, 1], and then mapping it to fourier features [[1]](https://arxiv.org/pdf/2006.10739). +- Train by min-max normalization (x, y, z, intensity, time_lag) into [0, 1], and then mapping it to fourier features [[1]](https://arxiv.org/pdf/2006.10739).
From 2665b277bda7865a10f04daa37b8eaa8ea6c5606 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 21 Apr 2026 19:15:35 +0900 Subject: [PATCH 043/183] Update j6gen2_base and jpntaxi_base docstring --- .../v2/{j6gen2.md => j6gen2_base.md} | 220 +++++++++++++++++- .../docs/BEVFusion-L/v2/jpntaxi_base.md | 153 ++++++++++++ 2 files changed, 363 insertions(+), 10 deletions(-) rename projects/BEVFusion/docs/BEVFusion-L/v2/{j6gen2.md => j6gen2_base.md} (54%) create mode 100644 projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md diff --git a/projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2.md b/projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2_base.md similarity index 54% rename from projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2.md rename to projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2_base.md index 8ad986677..54e994313 100644 --- a/projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2.md +++ b/projects/BEVFusion/docs/BEVFusion-L/v2/j6gen2_base.md @@ -64,7 +64,8 @@ | Model version | mAP | mAPH | car
(64,520) | truck
(6,947) | bus
(2,275) | bicycle
(1,379) | pedestrian
(19,421) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.8810 | 0.8380 | 0.8873 | 0.8586 | 0.9476 | 0.8583 | 0.8534 | + | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.8828 | 0.8387 | 0.9022 | 0.8627 | 0.9440 | 0.8483 | 0.8569 | + | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.8810 | 0.8380 | 0.8873 | 0.8586 | 0.9476 | 0.8583 | 0.8534 |
@@ -73,7 +74,8 @@ | Model version | mAP | mAPH | car
(58,562) | truck
(5,101) | bus
(2,078) | bicycle
(758) | pedestrian
(10,283) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.7032 | 0.6483 | 0.7876 | 0.6830 | 0.7911 | 0.5802 | 0.6741 | + | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.7193 | 0.6620 | 0.8197 | 0.6856 | 0.8249 | 0.5862 | 0.6801 | + | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.7032 | 0.6483 | 0.7876 | 0.6830 | 0.7911 | 0.5802 | 0.6741 |
@@ -82,7 +84,8 @@ | Model version | mAP | mAPH | car
(20,371) | truck
(3,172) | bus
(376) | bicycle
(155) | pedestrian
(2,794) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.4938 | 0.4494 | 0.6564 | 0.5192 | 0.3777 | 0.4406 | 0.4752 | + | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.5223 | 0.4757 | 0.6814 | 0.5181 | 0.5381 | 0.4165 | 0.4573 | + | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.4938 | 0.4494 | 0.6564 | 0.5192 | 0.3777 | 0.4406 | 0.4752 | @@ -91,6 +94,7 @@ | Model version | mAP | mAPH | car
(143,453) | truck
(15,220) | bus
(4,729) | bicycle
(2,292) | pedestrian
(32,498) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.7990 | 0.7487 | 0.8508 | 0.7435 | 0.8711 | 0.7487 | 0.7809 | | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.7903 | 0.7413 | 0.8266 | 0.7409 | 0.8510 | 0.7541 | 0.7790 | @@ -112,6 +116,7 @@ | Model version | mAP | mAPH | car
(14,883) | truck
(1,193) | bus
(336) | bicycle
(740) | pedestrian
(5,059) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.8947 | 0.8393 | 0.9231 | 0.8893 | 0.9564 | 0.8264 | 0.8782 | | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.8985 | 0.8484 | 0.9087 | 0.8974 | 0.9636 | 0.8447 | 0.8780 | @@ -121,7 +126,8 @@ | Model version | mAP | mAPH | car
(10,994) | truck
(1,011) | bus
(143) | bicycle
(463) | pedestrian
(3,754) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.7475 | 0.6925 | 0.8317 | 0.7758 | 0.7910 | 0.5959 | 0.7433 | + | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.7679 | 0.7089 | 0.8567 | 0.7666 | 0.8723 | 0.5955 | 0.7485 | + | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.7475 | 0.6925 | 0.8317 | 0.7758 | 0.7910 | 0.5959 | 0.7433 | @@ -130,7 +136,8 @@ | Model version | mAP | mAPH | car
(3,018) | truck
(602) | bus
(60) | bicycle
(85) | pedestrian
(1,121) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.5636 | 0.5191 | 0.7125 | 0.6383 | 0.4781 | 0.4293 | 0.5595 | + | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.5924 | 0.5370 | 0.7238 | 0.6616 | 0.6305 | 0.3964 | 0.5497 | + | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.5636 | 0.5191 | 0.7125 | 0.6383 | 0.4781 | 0.4293 | 0.5595 | @@ -139,7 +146,8 @@ | Model version | mAP | mAPH | car
(28,895) | truck
(2,806) | bus
(539) | bicycle
(1,288) | pedestrian
(9,934) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.8198 | 0.7666 | 0.8690 | 0.8052 | 0.8756 | 0.7455 | 0.8036 | + | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.8267 | 0.7675 | 0.8888 | 0.8055 | 0.9009 | 0.7334 | 0.8051 | + | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.8198 | 0.7666 | 0.8690 | 0.8052 | 0.8756 | 0.7455 | 0.8036 | @@ -166,7 +174,8 @@ | Model version | mAP | mAPH | car
(49,637) | truck
(5,754) | bus
(1,939) | bicycle
(639) | pedestrian
(14,362) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.8788 | 0.8368 | 0.8813 | 0.8505 | 0.9427 | 0.8749 | 0.8448 | + | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.8836 | 0.8431 | 0.8942 | 0.8569 | 0.9393 | 0.8780 | 0.8494 | + | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.8788 | 0.8368 | 0.8813 | 0.8505 | 0.9427 | 0.8749 | 0.8448 | @@ -175,7 +184,8 @@ | Model version | mAP | mAPH | car
(47,568) | truck
(4,090) | bus
(1,935) | bicycle
(295) | pedestrian
(6,529) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.6864 | 0.6344 | 0.7772 | 0.6609 | 0.7913 | 0.5671 | 0.6357 | + | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.7040 | 0.6488 | 0.8118 | 0.6662 | 0.8221 | 0.5781 | 0.6417 | + | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.6864 | 0.6344 | 0.7772 | 0.6609 | 0.7913 | 0.5671 | 0.6357 | @@ -184,7 +194,8 @@ | Model version | mAP | mAPH | car
(17,353) | truck
(2,570) | bus
(316) | bicycle
(70) | pedestrian
(1,673) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.4766 | 0.4309 | 0.6465 | 0.4903 | 0.3618 | 0.4627 | 0.4214 | + | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.5030 | 0.4572 | 0.6739 | 0.4847 | 0.5186 | 0.4430 | 0.3948 | + | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.4766 | 0.4309 | 0.6465 | 0.4903 | 0.3618 | 0.4627 | 0.4214 | @@ -193,7 +204,8 @@ | Model version | mAP | mAPH | car
(114,558) | truck
(12,414) | bus
(4,190) | bicycle
(1,004) | pedestrian
(22,564) | | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | - | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.7851 | 0.7375 | 0.8166 | 0.7262 | 0.8481 | 0.7661 | 0.7687 | + | BEVFusion-LiDAR j6gen2_base/2.7.1 | 0.7958 | 0.7472 | 0.8408 | 0.7294 | 0.8673 | 0.7710 | 0.7706 | + | BEVFusion-LiDAR j6gen2_base/2.6.1 | 0.7851 | 0.7375 | 0.8166 | 0.7262 | 0.8481 | 0.7661 | 0.7687 | @@ -201,6 +213,194 @@ ## Release +### BEVFusion-LiDAR J6Gen2_base/2.7.1 + +
+ Changes + +- Finetune from `BEVFusion-LiDAR base/2.7.0` with j6gen2 base dataset and intensity. +
+ +
+ Artifacts + +- Deployed onnx and ROS parameter files (for internal) + - [WebAuto](https://evaluation.tier4.jp/evaluation/mlpackages/46f8188d-e3be-4f2f-b989-fd27002610d7/releases/ab0f33f5-2c8e-4adf-b122-f8f0c229c91e?project_id=zWhWRzei) + - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/j6gen2_base/v2.7.1/deployment.zip) + - [Google drive](https://drive.google.com/file/d/1Sw2UkqsoOP_YhoPpLqaBvHFnBapBV1kw/view?usp=drive_link) +- Logs (for internal) + - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/j6gen2_base/v2.7.1/logs.zip) + - [Google drive](https://drive.google.com/file/d/1M_Ae0rQ9L1I4NbzSL9tlJ8D0KVGvunKF/view?usp=drive_link) +- Pytorch Best checkpoints: + - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/j6gen2_base/v2.7.1/best_epoch_28.pth) + - [Google drive](https://drive.google.com/file/d/1xsFKCIkqVnt273o2SKjjCayuh_4IV-Vd/view?usp=drive_link) + +
+ +
+ Training configs + +- [Config file path](https://github.com/KSeangTan/AWML/blob/07c2e110802ec2537d4c620d9af7f7e1b8120b97/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py) +- Train time: NVIDIA H100 80GB * 8 * 30 epochs = 20 hours +- Batch size: 8*8 = 64 +- Training Dataset (frames: 55,714): + - j6gen2: db_j6gen2_v1 + db_j6gen2_v2 + db_j6gen2_v3 + db_j6gen2_v4 + db_j6gen2_v5 + db_j6gen2_v6 + db_j6gen2_v7 + db_j6gen2_v8 (43,109 frames) + - largebus: db_largebus_v1 + db_largebus_v2 + db_largebus_v3 (12,605 frames) + +
+ +
+ Evaluation + +**J6Gen2_base Datasets (5,179 frames)**: + + - j6gen2 (3,951 frames): db_j6gen2_v1 + db_j6gen2_v2 + db_j6gen2_v3 + db_j6gen2_v4 + db_j6gen2_v5 + db_j6gen2_v6 + db_j6gen2_v7 + db_j6gen2_v8 + db_j6gen2_v9 + - largebus (1,228 frames): db_largebus_v1 + db_largebus_v2 + db_largebus_v3 + +**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.8828** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 64,520 | 0.9022 | 0.853 / 0.901 / 0.921 / 0.933 | 0.904 / 0.931 / 0.937 / 0.939 | 0.260 / 0.193 / 0.180 / 0.172 | +| truck | 6,947 | 0.8627 | 0.736 / 0.863 / 0.910 / 0.942 | 0.800 / 0.877 / 0.903 / 0.920 | 0.244 / 0.191 / 0.188 / 0.166 | +| bus | 2,275 | 0.9440 | 0.866 / 0.940 / 0.983 / 0.986 | 0.912 / 0.958 / 0.978 / 0.980 | 0.203 / 0.177 / 0.163 / 0.138 | +| bicycle | 1,379 | 0.8483 | 0.802 / 0.849 / 0.869 / 0.874 | 0.847 / 0.867 / 0.876 / 0.879 | 0.205 / 0.191 / 0.172 / 0.172 | +| pedestrian | 19,421 | 0.8569 | 0.834 / 0.854 / 0.865 / 0.875 | 0.822 / 0.833 / 0.838 / 0.844 | 0.163 / 0.152 / 0.152 / 0.152 | +| **ALL** | 94,542 | 0.8828 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.7193** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 58,562 | 0.8197 | 0.694 / 0.818 / 0.873 / 0.893 | 0.782 / 0.853 / 0.879 / 0.886 | 0.228 / 0.173 / 0.164 / 0.164 | +| truck | 5,101 | 0.6856 | 0.484 / 0.670 / 0.773 / 0.815 | 0.633 / 0.743 / 0.798 / 0.816 | 0.213 / 0.206 / 0.184 / 0.164 | +| bus | 2,078 | 0.8249 | 0.626 / 0.815 / 0.918 / 0.941 | 0.730 / 0.846 / 0.904 / 0.919 | 0.342 / 0.211 / 0.210 / 0.160 | +| bicycle | 758 | 0.5862 | 0.495 / 0.603 / 0.622 / 0.624 | 0.637 / 0.679 / 0.683 / 0.683 | 0.183 / 0.155 / 0.155 / 0.183 | +| pedestrian | 10,283 | 0.6801 | 0.650 / 0.676 / 0.691 / 0.703 | 0.692 / 0.705 / 0.713 / 0.720 | 0.136 / 0.136 / 0.136 / 0.136 | +| **ALL** | 76,782 | 0.7193 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.5223** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 20,371 | 0.6814 | 0.493 / 0.674 / 0.763 / 0.796 | 0.638 / 0.737 / 0.781 / 0.795 | 0.193 / 0.159 / 0.151 / 0.151 | +| truck | 3,172 | 0.5181 | 0.227 / 0.454 / 0.652 / 0.738 | 0.447 / 0.601 / 0.715 / 0.762 | 0.206 / 0.206 / 0.162 / 0.140 | +| bus | 376 | 0.5381 | 0.272 / 0.557 / 0.643 / 0.680 | 0.462 / 0.669 / 0.714 / 0.731 | 0.217 / 0.151 / 0.115 / 0.115 | +| bicycle | 155 | 0.4165 | 0.316 / 0.419 / 0.466 / 0.466 | 0.487 / 0.553 / 0.589 / 0.589 | 0.199 / 0.166 / 0.190 / 0.190 | +| pedestrian | 2,794 | 0.4573 | 0.443 / 0.452 / 0.462 / 0.472 | 0.564 / 0.569 / 0.573 / 0.578 | 0.120 / 0.120 / 0.120 / 0.120 | +| **ALL** | 26,868 | 0.5223 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.7990** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 143,453 | 0.8508 | 0.752 / 0.849 / 0.891 / 0.910 | 0.820 / 0.874 / 0.894 / 0.900 | 0.232 / 0.189 / 0.174 / 0.164 | +| truck | 15,220 | 0.7435 | 0.555 / 0.725 / 0.824 / 0.871 | 0.677 / 0.780 / 0.834 / 0.858 | 0.234 / 0.206 / 0.186 / 0.165 | +| bus | 4,729 | 0.8711 | 0.726 / 0.865 / 0.939 / 0.954 | 0.804 / 0.890 / 0.928 / 0.937 | 0.408 / 0.211 / 0.177 / 0.161 | +| bicycle | 2,292 | 0.7487 | 0.682 / 0.754 / 0.777 / 0.781 | 0.760 / 0.789 / 0.799 / 0.801 | 0.191 / 0.189 / 0.189 / 0.190 | +| pedestrian | 32,498 | 0.7809 | 0.756 / 0.777 / 0.790 / 0.801 | 0.760 / 0.772 / 0.778 / 0.784 | 0.151 / 0.136 / 0.136 / 0.136 | +| **ALL** | 198,192 | 0.7990 | — | — | — | + +--- + +**LargeBus**: db_largebus_v1 + db_largebus_v2 + db_largebus_v3 (1,228 frames) + +**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.8947** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 14,883 | 0.9231 | 0.884 / 0.925 / 0.937 / 0.946 | 0.923 / 0.947 / 0.952 / 0.953 | 0.234 / 0.178 / 0.178 / 0.178 | +| truck | 1,193 | 0.8893 | 0.754 / 0.905 / 0.938 / 0.961 | 0.832 / 0.922 / 0.939 / 0.945 | 0.269 / 0.201 / 0.188 / 0.116 | +| bus | 336 | 0.9564 | 0.872 / 0.983 / 0.985 / 0.986 | 0.904 / 0.962 / 0.965 / 0.965 | 0.419 / 0.174 / 0.174 / 0.174 | +| bicycle | 740 | 0.8264 | 0.749 / 0.825 / 0.862 / 0.870 | 0.824 / 0.854 / 0.867 / 0.872 | 0.249 / 0.247 / 0.198 / 0.198 | +| pedestrian | 5,059 | 0.8782 | 0.862 / 0.876 / 0.883 / 0.891 | 0.849 / 0.857 / 0.861 / 0.866 | 0.148 / 0.148 / 0.139 / 0.140 | +| **ALL** | 22,211 | 0.8947 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.7679** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 10,994 | 0.8567 | 0.759 / 0.860 / 0.897 / 0.911 | 0.824 / 0.881 / 0.898 / 0.901 | 0.210 / 0.164 / 0.160 / 0.160 | +| truck | 1,011 | 0.7666 | 0.593 / 0.770 / 0.843 / 0.860 | 0.710 / 0.818 / 0.851 / 0.854 | 0.234 / 0.219 / 0.166 / 0.150 | +| bus | 143 | 0.8723 | 0.698 / 0.921 / 0.932 / 0.939 | 0.788 / 0.904 / 0.911 / 0.911 | 0.294 / 0.498 / 0.498 / 0.498 | +| bicycle | 463 | 0.5955 | 0.472 / 0.616 / 0.647 / 0.648 | 0.625 / 0.685 / 0.692 / 0.692 | 0.151 / 0.151 / 0.151 / 0.151 | +| pedestrian | 3,754 | 0.7485 | 0.726 / 0.747 / 0.755 / 0.766 | 0.740 / 0.749 / 0.755 / 0.761 | 0.124 / 0.124 / 0.121 / 0.121 | +| **ALL** | 16,365 | 0.7679 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.5924** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 3,018 | 0.7238 | 0.573 / 0.728 / 0.789 / 0.806 | 0.688 / 0.765 / 0.792 / 0.801 | 0.221 / 0.228 / 0.158 / 0.158 | +| truck | 602 | 0.6616 | 0.381 / 0.676 / 0.780 / 0.809 | 0.575 / 0.756 / 0.811 / 0.822 | 0.216 / 0.208 / 0.176 / 0.176 | +| bus | 60 | 0.6305 | 0.434 / 0.626 / 0.730 / 0.732 | 0.608 / 0.745 / 0.793 / 0.793 | 0.217 / 0.217 / 0.087 / 0.087 | +| bicycle | 85 | 0.3964 | 0.298 / 0.382 / 0.452 / 0.453 | 0.468 / 0.544 / 0.595 / 0.595 | 0.166 / 0.166 / 0.166 / 0.166 | +| pedestrian | 1,121 | 0.5497 | 0.536 / 0.546 / 0.552 / 0.565 | 0.624 / 0.629 / 0.633 / 0.638 | 0.120 / 0.118 / 0.118 / 0.118 | +| **ALL** | 4,886 | 0.5924 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.8267** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 28,895 | 0.8888 | 0.815 / 0.891 / 0.919 / 0.930 | 0.864 / 0.905 / 0.917 / 0.919 | 0.230 / 0.180 / 0.180 / 0.176 | +| truck | 2,806 | 0.8055 | 0.623 / 0.816 / 0.879 / 0.903 | 0.736 / 0.851 / 0.882 / 0.888 | 0.233 / 0.207 / 0.183 / 0.169 | +| bus | 539 | 0.9009 | 0.783 / 0.929 / 0.945 / 0.948 | 0.838 / 0.921 / 0.929 / 0.929 | 0.430 / 0.208 / 0.208 / 0.208 | +| bicycle | 1,288 | 0.7334 | 0.637 / 0.738 / 0.776 / 0.783 | 0.730 / 0.774 / 0.793 / 0.796 | 0.186 / 0.161 / 0.161 / 0.161 | +| pedestrian | 9,934 | 0.8051 | 0.787 / 0.803 / 0.811 / 0.820 | 0.782 / 0.790 / 0.796 / 0.801 | 0.149 / 0.135 / 0.128 / 0.135 | +| **ALL** | 43,462 | 0.8267 | — | — | — | + +--- + +**J6Gen2**: db_j6gen2_v1 + db_j6gen2_v2 + db_j6gen2_v3 + db_j6gen2_v4 + db_j6gen2_v5 + db_j6gen2_v6 + db_j6gen2_v7 + db_j6gen2_v8 + db_j6gen2_v9 (3,951 frames) + +**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.8836** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 49,637 | 0.8942 | 0.843 / 0.891 / 0.912 / 0.931 | 0.899 / 0.926 / 0.933 / 0.935 | 0.277 / 0.202 / 0.189 / 0.172 | +| truck | 5,754 | 0.8569 | 0.732 / 0.854 / 0.905 / 0.937 | 0.794 / 0.867 / 0.896 / 0.915 | 0.244 / 0.191 / 0.189 / 0.180 | +| bus | 1,939 | 0.9393 | 0.864 / 0.932 / 0.975 / 0.986 | 0.916 / 0.958 / 0.981 / 0.984 | 0.203 / 0.187 / 0.139 / 0.138 | +| bicycle | 639 | 0.8780 | 0.868 / 0.881 / 0.881 / 0.882 | 0.881 / 0.888 / 0.888 / 0.888 | 0.172 / 0.172 / 0.172 / 0.172 | +| pedestrian | 14,362 | 0.8494 | 0.824 / 0.846 / 0.858 / 0.869 | 0.813 / 0.825 / 0.831 / 0.837 | 0.163 / 0.161 / 0.155 / 0.155 | +| **ALL** | 72,331 | 0.8836 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.7040** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 47,568 | 0.8118 | 0.679 / 0.810 / 0.868 / 0.890 | 0.772 / 0.846 / 0.874 / 0.883 | 0.228 / 0.173 / 0.164 / 0.163 | +| truck | 4,090 | 0.6662 | 0.459 / 0.645 / 0.757 / 0.804 | 0.614 / 0.724 / 0.785 / 0.807 | 0.213 / 0.206 / 0.184 / 0.164 | +| bus | 1,935 | 0.8221 | 0.621 / 0.806 / 0.919 / 0.943 | 0.727 / 0.842 / 0.904 / 0.921 | 0.413 / 0.211 / 0.206 / 0.160 | +| bicycle | 295 | 0.5781 | 0.542 / 0.588 / 0.590 / 0.592 | 0.674 / 0.686 / 0.686 / 0.690 | 0.215 / 0.206 / 0.206 / 0.206 | +| pedestrian | 6,529 | 0.6417 | 0.608 / 0.636 / 0.655 / 0.668 | 0.666 / 0.682 / 0.692 / 0.699 | 0.136 / 0.136 / 0.136 / 0.136 | +| **ALL** | 60,417 | 0.7040 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.5030** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 17,353 | 0.6739 | 0.479 / 0.664 / 0.759 / 0.794 | 0.631 / 0.732 / 0.780 / 0.794 | 0.193 / 0.159 / 0.146 / 0.146 | +| truck | 2,570 | 0.4847 | 0.194 / 0.401 / 0.621 / 0.723 | 0.414 / 0.562 / 0.692 / 0.751 | 0.206 / 0.179 / 0.130 / 0.128 | +| bus | 316 | 0.5186 | 0.238 / 0.541 / 0.625 / 0.670 | 0.433 / 0.657 / 0.703 / 0.724 | 0.218 / 0.151 / 0.115 / 0.115 | +| bicycle | 70 | 0.4430 | 0.340 / 0.465 / 0.483 / 0.483 | 0.513 / 0.584 / 0.602 / 0.602 | 0.199 / 0.199 / 0.199 / 0.199 | +| pedestrian | 1,673 | 0.3948 | 0.381 / 0.389 / 0.401 / 0.408 | 0.524 / 0.528 / 0.532 / 0.535 | 0.125 / 0.125 / 0.125 / 0.125 | +| **ALL** | 21,982 | 0.5030 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.7958** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 114,558 | 0.8408 | 0.737 / 0.837 / 0.882 / 0.906 | 0.809 / 0.866 / 0.888 / 0.895 | 0.236 / 0.189 / 0.164 / 0.164 | +| truck | 12,414 | 0.7294 | 0.539 / 0.704 / 0.811 / 0.863 | 0.664 / 0.764 / 0.823 / 0.851 | 0.244 / 0.206 / 0.183 / 0.164 | +| bus | 4,190 | 0.8673 | 0.719 / 0.856 / 0.939 / 0.956 | 0.800 / 0.886 / 0.928 / 0.939 | 0.342 / 0.211 / 0.161 / 0.161 | +| bicycle | 1,004 | 0.7710 | 0.747 / 0.778 / 0.780 / 0.780 | 0.801 / 0.813 / 0.814 / 0.815 | 0.191 / 0.191 / 0.191 / 0.191 | +| pedestrian | 22,564 | 0.7706 | 0.743 / 0.766 / 0.781 / 0.792 | 0.751 / 0.764 / 0.771 / 0.778 | 0.152 / 0.146 / 0.136 / 0.146 | +| **ALL** | 154,730 | 0.7958 | — | — | — | + +
+ +--- + ### BEVFusion-LiDAR J6Gen2_base/2.6.1
diff --git a/projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md b/projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md new file mode 100644 index 000000000..fc9e2677d --- /dev/null +++ b/projects/BEVFusion/docs/BEVFusion-L/v2/jpntaxi_base.md @@ -0,0 +1,153 @@ +# Deployed model for BEVFusion-LiDAR JPNTaxi_base/2.X +## Summary + +### Main Parameters + + - **Range:** [122.40m, 122.40m, 8.0m] + - **Voxel Size:** [0.17, 0.17, 0.2] + - **Grid Size:** [1440, 1440, 40] + - **With Intensity** + +### Testing Datasets + +- **Total Frames: 5,179** + +
+ jpntaxi_gen2 (9,975 frames) + - `db_jpntaxigen2_v1` + - `db_jpntaxigen2_v2` + +
+ +### mAP -JPNTaxi_gen2 + +- **Class mAP for BEV Center Distance: 0.5m, 1.0m, 2.0m, 4.0m** + +
+ Eval Range: 0.0 - 50.0m + + | Model version | mAP | mAPH | car
(42,789) | truck
(17,259) | bus
(3,437) | bicycle
(2,681) | pedestrian
(57,948) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR jpntaxi_base/2.7.1 | 0.8862 | 0.8586 | 0.9397 | 0.8591 | 0.8839 | 0.8264 | 0.9218 | + +
+ +
+ Eval Range: 50.0 - 90.0m + + | Model version | mAP | mAPH | car
(35,518) | truck
(22,550) | bus
(2,683) | bicycle
(1,607) | pedestrian
(27,240) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR jpntaxi_base/2.7.1 | 0.7125 | 0.6854 | 0.8453 | 0.6838 | 0.5362 | 0.6969 | 0.8003 | + +
+ +
+ Eval Range: 90.0 - 121.0m + + | Model version | mAP | mAPH | car
(16,524) | truck
(14,587) | bus
(2,476) | bicycle
(364) | pedestrian
(14,297) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR jpntaxi_base/2.7.1 | 0.6030 | 0.5762 | 0.6947 | 0.5260 | 0.5030 | 0.5321 | 0.7591 | + +
+ +
+ Eval Range: 0.0 - 121.0m + + | Model version | mAP | mAPH | car
(94,831) | truck
(54,396) | bus
(8,596) | bicycle
(4,652) | pedestrian
(99,485) | + | :---- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | + | BEVFusion-LiDAR jpntaxi_base/2.7.1 | 0.7805 | 0.7527 | 0.8730 | 0.7118 | 0.6785 | 0.7655 | 0.8739 | + +
+ +## Release + +### BEVFusion-LiDAR JPNTaxi_base/2.7.1 + +
+ Changes + +- Finetune from `BEVFusion-LiDAR base/2.7.0` with JPNTaxi_base dataset and intensity. +
+ +
+ Artifacts + +- Deployed onnx and ROS parameter files (for internal) + - [WebAuto](https://evaluation.tier4.jp/evaluation/mlpackages/46f8188d-e3be-4f2f-b989-fd27002610d7/releases/47abcab3-34e1-4971-9bdf-5a2af5d2b2e6?project_id=zWhWRzei) + - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/jpntaxi_base/v2.7.1/deployment.zip) + - [Google drive](https://drive.google.com/file/d/1nQlYrnCjlxXbUamEj7MCL_sKxojoU_wk/view?usp=drive_link) +- Logs (for internal) + - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/jpntaxi_base/v2.7.1/logs.zip) + - [Google drive](https://drive.google.com/file/d/1q_3zj9nF6mnA5IgyO1QRswS7XqnXqvUH/view?usp=drive_link) +- Pytorch Best checkpoints: + - [model-zoo](https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/jpntaxi_base/v2.7.1/best_epoch_30.pth) + - [Google drive](https://drive.google.com/file/d/1K7rDv7fb8T2haXHxttbZN7FUEoLYESTr/view?usp=drive_link) + +
+ +
+ Training configs + +- [Config file path](https://github.com/KSeangTan/AWML/blob/07c2e110802ec2537d4c620d9af7f7e1b8120b97/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py) +- Train time: NVIDIA H100 80GB * 8 * 30 epochs = 20 hours +- Batch size: 8*8 = 64 +- Training Dataset (frames: 56,287): + - jpntaxi: db_jpntaxi_v1 + db_jpntaxi_v2 + db_jpntaxi_v4 (28,161 frames) + - jpntaxi_gen2: db_jpntaxigen2_v1 + db_jpntaxigen2_v2 (28,126 frames) + +
+ +
+ Evaluation + +**JPNTaxi_gen2 Datasets (9,975 frames)**: + + - jpntaxi_gen2 (9,975 frames): db_jpntaxigen2_v1 + db_jpntaxigen2_v2 + +**Total BEV Center Distance mAP (eval range = 0.0 - 50.0m): 0.8862** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 42,789 | 0.9397 | 0.891 / 0.943 / 0.960 / 0.965 | 0.918 / 0.946 / 0.953 / 0.954 | 0.284 / 0.175 / 0.175 / 0.164 | +| truck | 17,259 | 0.8591 | 0.701 / 0.842 / 0.935 / 0.958 | 0.792 / 0.882 / 0.932 / 0.946 | 0.409 / 0.321 / 0.241 / 0.241 | +| bus | 3,437 | 0.8839 | 0.796 / 0.888 / 0.925 / 0.927 | 0.853 / 0.897 / 0.910 / 0.910 | 0.296 / 0.184 / 0.104 / 0.104 | +| bicycle | 2,681 | 0.8264 | 0.819 / 0.829 / 0.829 / 0.829 | 0.866 / 0.871 / 0.871 / 0.871 | 0.223 / 0.223 / 0.223 / 0.223 | +| pedestrian | 57,948 | 0.9218 | 0.906 / 0.921 / 0.927 / 0.933 | 0.883 / 0.893 / 0.899 / 0.903 | 0.135 / 0.129 / 0.125 / 0.132 | +| **ALL** | 124,114 | 0.8862 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 50.0 - 90.0m): 0.7125** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 35,518 | 0.8453 | 0.763 / 0.846 / 0.881 / 0.891 | 0.819 / 0.860 / 0.875 / 0.879 | 0.227 / 0.180 / 0.166 / 0.166 | +| truck | 22,550 | 0.6838 | 0.475 / 0.640 / 0.782 / 0.838 | 0.632 / 0.730 / 0.808 / 0.831 | 0.286 / 0.195 / 0.167 / 0.128 | +| bus | 2,683 | 0.5362 | 0.263 / 0.524 / 0.668 / 0.689 | 0.465 / 0.660 / 0.742 / 0.751 | 0.241 / 0.180 / 0.174 / 0.171 | +| bicycle | 1,607 | 0.6969 | 0.656 / 0.709 / 0.710 / 0.713 | 0.745 / 0.770 / 0.771 / 0.772 | 0.145 / 0.138 / 0.138 / 0.138 | +| pedestrian | 27,240 | 0.8003 | 0.782 / 0.798 / 0.807 / 0.814 | 0.782 / 0.790 / 0.795 / 0.799 | 0.163 / 0.163 / 0.163 / 0.164 | +| **ALL** | 89,598 | 0.7125 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 90.0 - 121.0m): 0.6030** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 16,524 | 0.6947 | 0.580 / 0.698 / 0.744 / 0.757 | 0.692 / 0.755 / 0.778 / 0.781 | 0.202 / 0.154 / 0.151 / 0.144 | +| truck | 14,587 | 0.5260 | 0.229 / 0.469 / 0.639 / 0.767 | 0.464 / 0.630 / 0.726 / 0.793 | 0.288 / 0.185 / 0.169 / 0.130 | +| bus | 2,476 | 0.5030 | 0.305 / 0.486 / 0.597 / 0.624 | 0.530 / 0.636 / 0.703 / 0.719 | 0.297 / 0.201 / 0.149 / 0.156 | +| bicycle | 364 | 0.5321 | 0.381 / 0.521 / 0.613 / 0.613 | 0.563 / 0.631 / 0.670 / 0.670 | 0.219 / 0.219 / 0.219 / 0.219 | +| pedestrian | 14,297 | 0.7591 | 0.737 / 0.756 / 0.766 / 0.778 | 0.750 / 0.760 / 0.765 / 0.771 | 0.134 / 0.127 / 0.129 / 0.132 | +| **ALL** | 48,248 | 0.6030 | — | — | — | + +**Total BEV Center Distance mAP (eval range = 0.0 - 121.0m): 0.7805** + +| class_name | GTs | mAP | AP@0.5/1.0/2.0/4.0 | max_f1@0.5/1.0/2.0/4.0 | optimal_conf@0.5/1.0/2.0/4.0 | +| :---- | ---: | ---: | :---- | :---- | :---- | +| car | 94,831 | 0.8730 | 0.799 / 0.875 / 0.905 / 0.914 | 0.845 / 0.884 / 0.896 / 0.899 | 0.235 / 0.189 / 0.165 / 0.165 | +| truck | 54,396 | 0.7118 | 0.490 / 0.674 / 0.809 / 0.875 | 0.645 / 0.757 / 0.831 / 0.862 | 0.314 / 0.240 / 0.178 / 0.153 | +| bus | 8,596 | 0.6785 | 0.504 / 0.674 / 0.761 / 0.775 | 0.655 / 0.761 / 0.807 / 0.813 | 0.285 / 0.180 / 0.168 / 0.168 | +| bicycle | 4,652 | 0.7655 | 0.736 / 0.770 / 0.778 / 0.778 | 0.800 / 0.816 / 0.819 / 0.820 | 0.194 / 0.159 / 0.159 / 0.159 | +| pedestrian | 99,485 | 0.8739 | 0.857 / 0.872 / 0.880 / 0.887 | 0.835 / 0.845 / 0.850 / 0.854 | 0.142 / 0.137 / 0.135 / 0.137 | +| **ALL** | 261,960 | 0.7805 | — | — | — | + +
+ +--- From 57a91d654b8122fc2ddc0defc79bf168080d4fdd Mon Sep 17 00:00:00 2001 From: KokSeang Date: Wed, 22 Apr 2026 15:02:12 +0900 Subject: [PATCH 044/183] Added --- .../default/pipelines/default_camera_base_50m.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_50m.py index c9010038f..a32e043b3 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_50m.py @@ -37,12 +37,12 @@ ), dict( type="BEVFusionGlobalRotScaleTrans", - # scale_ratio_range=[0.95, 1.05], - # rot_range=[-0.78539816, 0.78539816], - # translation_std=[0.5, 0.5, 0.2], - scale_ratio_range=[0.98, 1.02], - rot_range=[-0.3925, 0.3925], - translation_std=[0.2, 0.2, 0.1], + scale_ratio_range=[0.95, 1.05], + rot_range=[-0.78539816, 0.78539816], + translation_std=[0.5, 0.5, 0.2], + # scale_ratio_range=[0.98, 1.02], + # rot_range=[-0.3925, 0.3925], + # translation_std=[0.2, 0.2, 0.1], ), dict(type="BEVFusionRandomFlip3D"), dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range), From 5a2b1e0ce4bbcccafc6622ff0234498cdae13633 Mon Sep 17 00:00:00 2001 From: KokSeang Date: Wed, 22 Apr 2026 15:14:51 +0900 Subject: [PATCH 045/183] Added --- ...a_swin_fpn_lss_30e_8xb8_j6gen2_base_50m.py | 137 ++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_50m.py diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_50m.py new file mode 100644 index 000000000..927310e7d --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_50m.py @@ -0,0 +1,137 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", + "../default/pipelines/default_camera_base_50m.py", + "../default/models/default_camera_swin_fpn_lss_50m.py", + "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4datasets/" +info_directory_path = "info/kokseang_2_6_1/" + +experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type +experiment_name = "bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_50m" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + view_transform=dict(image_size=_base_.image_size), + bbox_head=dict( + class_names=_base_.class_names, + in_channels=80, + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) From 08b50e6d71f31577a1053f8792ae381fcafdf524 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 23 Apr 2026 14:47:27 +0900 Subject: [PATCH 046/183] Add the script --- projects/BEVFusion/bevfusion/__init__.py | 3 +- .../BEVFusion/bevfusion/bevfusion_head.py | 22 ++- .../bevfusion/bevfusion_voxel_encoder.py | 184 ++++++++++++++---- ...n_50e_8xb8_base_120m_sincos_10_channels.py | 161 --------------- .../default_lidar_second_secfpn_120m.py | 2 + 5 files changed, 168 insertions(+), 204 deletions(-) delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_10_channels.py diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py index 3db358b55..2e9822d76 100644 --- a/projects/BEVFusion/bevfusion/__init__.py +++ b/projects/BEVFusion/bevfusion/__init__.py @@ -7,7 +7,7 @@ from .transformer import TransformerDecoderLayer from .transforms_3d import BEVFusionGlobalRotScaleTrans, BEVFusionRandomFlip3D, GridMask, ImageAug3D from .utils import BBoxBEVL1Cost, HeuristicAssigner3D, HungarianAssigner3D, IoU3DCost, TransFusionBBoxCoder -from .bevfusion_voxel_encoder import BEVFusionVoxelEncoder, BEVFusionVoxelSinCosEncoder +from .bevfusion_voxel_encoder import BEVFusionVoxelEncoder, BEVFusionVoxelSinCosEncoder, BEVFusionVoxelMeanSinCosEncoder __all__ = [ "BEVFusion", @@ -30,4 +30,5 @@ "TransFusionBBoxCoder", "BEVFusionVoxelEncoder", "BEVFusionVoxelSinCosEncoder", + "BEVFusionVoxelMeanSinCosEncoder", ] diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 853523c4f..a8ef7129f 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -62,6 +62,7 @@ def __init__( norm_cfg=dict(type="BN1d"), bias="auto", # loss + loss_iou=None, loss_cls=dict(type="mmdet.GaussianFocalLoss", reduction="mean"), loss_bbox=dict(type="mmdet.L1Loss", reduction="mean"), loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="mean"), @@ -87,6 +88,7 @@ def __init__( if not self.use_sigmoid_cls: self.num_classes += 1 self.loss_cls = MODELS.build(loss_cls) + self.loss_iou = MODELS.build(loss_iou) if loss_iou is not None else None self.loss_bbox = MODELS.build(loss_bbox) self.loss_heatmap = MODELS.build(loss_heatmap) @@ -369,8 +371,8 @@ def predict_by_feat(self, preds_dicts, metas, img=None, rescale=False, for_roi=F for layer_id, preds_dict in enumerate(preds_dicts): batch_size = preds_dict[0]["heatmap"].shape[0] batch_score = preds_dict[0]["heatmap"][..., -self.num_proposals :].sigmoid() - # if self.loss_iou.loss_weight != 0: - # batch_score = torch.sqrt(batch_score * preds_dict[0]['iou'][..., -self.num_proposals:].sigmoid()) # noqa: E501 + if self.loss_iou is not None: + batch_score = torch.sqrt(batch_score * preds_dict[0]['iou'][..., -self.num_proposals:].sigmoid()) # noqa: E501 one_hot = F.one_hot(self.query_labels, num_classes=self.num_classes).permute(0, 2, 1) batch_score = batch_score * preds_dict[0]["query_heatmap_score"] * one_hot @@ -679,7 +681,7 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx): ious[None], int(pos_inds.shape[0]), float(mean_iou), - heatmap[None], + heatmap[None] ) def loss(self, batch_feats, batch_data_samples): @@ -711,7 +713,7 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li ious, num_pos, matched_ious, - heatmap, + heatmap ) = self.get_targets(batch_gt_instances_3d, preds_dicts[0]) if hasattr(self, "on_the_image_mask"): label_weights = label_weights * self.on_the_image_mask @@ -798,7 +800,17 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li loss_dict[f"{prefix}_loss_cls"] = layer_loss_cls loss_dict[f"{prefix}_loss_bbox"] = layer_loss_bbox - # loss_dict[f'{prefix}_loss_iou'] = layer_loss_iou + + # Output iou for iou-aware loss + if self.loss_iou is not None: + layer_ious = preds_dict["iou"][ + ... + idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals, + ] # [BS, num_proposals] + + # [BS, num_proposals] + layer_iou_weights = layer_bbox_weights[:, :, 0] + loss_dict[f'{prefix}_loss_iou'] = self.loss_iou(layer_ious, ious, layer_iou_weights, avg_factor=max(num_pos, 1)) loss_dict["matched_ious"] = layer_loss_cls.new_tensor(matched_ious) diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py index efbc995e8..086acc1e0 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py +++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py @@ -185,21 +185,21 @@ class BEVFusionVoxelSinCosEncoder(nn.Module): def __init__(self, min_norm_values: Tuple[float], max_norm_values: Tuple[float], + time_lag_channel_index: int = 3, + time_exp_factor: Optional[float] = None, + feat_channels: Optional[tuple] = (16, ), in_channels: Optional[int] = 4, with_distance: Optional[bool] = False, with_cluster_center: Optional[bool] = True, with_voxel_center: Optional[bool] = True, voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4), point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4, - 40, 1),): + 40, 1), + norm_cfg: Optional[dict] = dict( + type='BN1d', eps=1e-3, momentum=0.01), + mode: Optional[str] = 'max'): super(BEVFusionVoxelSinCosEncoder, self).__init__() - if with_cluster_center: - in_channels += 3 - if with_voxel_center: - in_channels += 3 - if with_distance: - in_channels += 1 self._with_distance = with_distance self._with_cluster_center = with_cluster_center self._with_voxel_center = with_voxel_center @@ -214,11 +214,42 @@ def __init__(self, self.y_offset = self.vy / 2 + point_cloud_range[1] self.z_offset = self.vz / 2 + point_cloud_range[2] self.point_cloud_range = point_cloud_range + + self.xyz_channels = 3 + feat_offset_channels = in_channels - self.xyz_channels + if with_cluster_center: + feat_offset_channels += 3 + if with_voxel_center: + feat_offset_channels += 3 + if with_distance: + feat_offset_channels += 1 + + feat_channels = [feat_offset_channels] + list(feat_channels) + assert len(feat_channels) > 0, "feat_channels must be greater than 0" + pfn_layers = [] + for i in range(len(feat_channels) - 1): + in_filters = feat_channels[i] + out_filters = feat_channels[i + 1] + if i < len(feat_channels) - 2: + last_layer = False + else: + last_layer = True + pfn_layers.append( + PFNLayer( + in_filters, + out_filters, + norm_cfg=norm_cfg, + last_layer=last_layer, + mode=mode)) + self.pfn_layers = nn.ModuleList(pfn_layers) + self.time_lag_channel_index = time_lag_channel_index + self.time_exp_factor = time_exp_factor + self.register_buffer("min_norm_values", torch.tensor(min_norm_values)) self.register_buffer("max_norm_values", torch.tensor(max_norm_values)) self.register_buffer("voxel_size", torch.tensor([self.vx, self.vy, self.vz])) - self.register_buffer("exponents", (2 ** torch.arange(0, in_channels).float())) + self.register_buffer("exponents", (2 ** torch.arange(0, self.xyz_channels)).float()) def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, *args, **kwargs) -> Tensor: @@ -232,19 +263,53 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, Returns: torch.Tensor: Features of pillars in shape (M, C). - """ - features_norm = (features - self.min_norm_values) / (self.max_norm_values - self.min_norm_values) - features_ls = [features_norm] + """ + num_voxels, max_points_per_voxel = features.shape[0], features.shape[1] + + # Mean in the voxel + # (N, M, 3) -> (N, 3) + voxel_features = (features[:, :, :self.xyz_channels].sum(dim=1, keepdim=False) / num_points.type_as(features).view( + -1, 1)).contiguous() + + # min-max normalization, (N, 3) -> (N, 3) + voxel_features_norm = (voxel_features - \ + self.min_norm_values[:self.xyz_channels].view(1, -1)) / ((self.max_norm_values[:self.xyz_channels] - self.min_norm_values[:self.xyz_channels]).view(1, -1)) + + # SinCos encoding + # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3) + y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1) + # (N*3, 3) -> (N, 3*3) + y = y.reshape(num_voxels, -1) + # (N, 3*3) -> (N, 3*3*2) + voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1) + + # PFN + # Other features, for example, intensity or time_lag + other_features = features[:, :, self.xyz_channels:] + + # Normalization + other_features_norm = (other_features - self.min_norm_values[self.xyz_channels:]) / (self.max_norm_values[self.xyz_channels:] - self.min_norm_values[self.xyz_channels:]) + + time_lag_feature_index = self.time_lag_channel_index - self.xyz_channels + # exponentiate time_lag features, it's higher when the normlized time lag is lower + # (1.0 when time_lag_features is 0.0) + if self.time_exp_factor is not None: + other_features_norm[:, :, time_lag_feature_index] = torch.exp(- other_features_norm[:, :, time_lag_feature_index] * self.time_exp_factor) + else: + # Inverse the time_lag feature + other_features_norm[:, :, time_lag_feature_index] = 1.0 - other_features_norm[:, :, time_lag_feature_index] + + # Offsets + voxel_feature_offsets = [other_features_norm] # Find distance of x, y, and z from cluster center if self._with_cluster_center: points_mean = features[:, :, :3].sum( dim=1, keepdim=True) / num_points.type_as(features).view( -1, 1, 1) - # Map to [-1, 1] - f_cluster = (features[:, :, :3] - points_mean) / self.voxel_size - # f_cluster = features[:, :, :3] - points_mean - features_ls.append(f_cluster) + # f_cluster = (features[:, :, :3] - points_mean) + f_cluster = features[:, :, :3] - points_mean + voxel_feature_offsets.append(f_cluster) # Find distance of x, y, and z from pillar center dtype = features.dtype @@ -261,35 +326,80 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, self.z_offset) # Map to [-1, 1] - f_center = f_center / (self.voxel_size * 0.5) - features_ls.append(f_center) + # f_center = f_center / (self.voxel_size * 0.5) + voxel_feature_offsets.append(f_center) if self._with_distance: points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True) - features_ls.append(points_dist) + voxel_feature_offsets.append(points_dist) - # Combine together feature decorations - features = torch.cat(features_ls, dim=-1) - num_voxels, max_points_per_voxel = features.shape[0], features.shape[1] - - # SinCos encoding - # (N, M, C) -> (N, M, C, 1) -> (N, M, C, 1) * (1, 1, 1, C) -> (N, M, C, C) - y = features.unsqueeze(-1) * np.pi * self.exponents.unsqueeze(0).unsqueeze(0).unsqueeze(0) - # (N, M, C, C) -> (N, M, C*C) - y = y.reshape(num_voxels, max_points_per_voxel, self.in_channels ** 2) - # (N, M, C*C) -> (N, M, C*C*2) - features = torch.cat([torch.cos(y), torch.sin(y)], dim=-1) - + voxel_feature_offsets = torch.cat(voxel_feature_offsets, dim=-1) # The feature decorations were calculated without regard to whether # pillar was empty. Need to ensure that # empty pillars remain set to zeros. mask = get_paddings_indicator(num_points, max_points_per_voxel, axis=0) - mask = torch.unsqueeze(mask, -1).type_as(features) - features *= mask - - # Reduction by mean - # (N, M, C*C*2) -> (N, C*C*2) - features = features.sum(dim=1, keepdim=False) / num_points.type_as(features).view(-1, 1) - features = features.contiguous() + mask = torch.unsqueeze(mask, -1).type_as(voxel_feature_offsets) + voxel_feature_offsets *= mask + # PFN + for pfn in self.pfn_layers: + voxel_feature_offsets = pfn(voxel_feature_offsets, num_points) + + # Concat + features = torch.cat([voxel_fourier_features, voxel_feature_offsets.squeeze(1)], dim=-1) return features + + + +@MODELS.register_module() +class BEVFusionVoxelMeanSinCosEncoder(nn.Module): + def __init__(self, + min_norm_values: Tuple[float], + max_norm_values: Tuple[float], + in_channels: Optional[int] = 4, + voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4), + point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4, + 40, 1), + mode: Optional[str] = 'max'): + super(BEVFusionVoxelSinCosEncoder, self).__init__() + + # Create PillarFeatureNet layers + self.in_channels = in_channels + + self.register_buffer("min_norm_values", torch.tensor(min_norm_values)) + self.register_buffer("max_norm_values", torch.tensor(max_norm_values)) + self.register_buffer("exponents", (2 ** torch.arange(0, self.in_channels)).float()) + + def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, + *args, **kwargs) -> Tensor: + """Forward function. + + Args: + features (torch.Tensor): Point features or raw points in shape + (N, M, C). + num_points (torch.Tensor): Number of points in each pillar in shape (M). + coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx). + + Returns: + torch.Tensor: Features of pillars in shape (M, C). + """ + num_voxels, max_points_per_voxel = features.shape[0], features.shape[1] + + # Mean in the voxel + # (N, M, 3) -> (N, 3) + voxel_features = (features.sum(dim=1, keepdim=False) / num_points.type_as(features).view( + -1, 1)).contiguous() + + # min-max normalization, (N, 3) -> (N, 3) + voxel_features_norm = (voxel_features - \ + self.min_norm_values.view(1, -1)) / ((self.max_norm_values - self.min_norm_values).view(1, -1)) + + # SinCos encoding + # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3) + y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1) + # (N*3, 3) -> (N, 3*3) + y = y.reshape(num_voxels, -1) + # (N, 3*3) -> (N, 3*3*2) + voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1) + + return voxel_fourier_features diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_10_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_10_channels.py deleted file mode 100644 index 531a07673..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_10_channels.py +++ /dev/null @@ -1,161 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", - "../default/pipelines/default_lidar_120m.py", - "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_50e_8xb8_adamw_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" - -experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type -experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_10_channels" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - voxelize_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - voxelize_reduce=False, - ), - pts_voxel_encoder=dict( - _delete_=True, - type="BEVFusionVoxelSinCosEncoder", - in_channels=4, - with_distance=False, - with_cluster_center=True, - with_voxel_center=True, - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here - min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], - max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], - ), - pts_middle_encoder=dict( - in_channels=100, - sparse_shape=_base_.grid_size, - # num_aug_features=4, - # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here - # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], - # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], - ), - bbox_head=dict( - class_names=_base_.class_names, # Use class names to identify the correct class indices - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - ), -) - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index b5d9a8fdc..4843f5677 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -94,6 +94,7 @@ ], ), dense_heatmap_pooling_classes=["car", "truck", "bus", "bicycle"], # Use class indices for pooling + # common_heads=dict(center=[2, 2], height=[1, 2], dim=[3, 2], rot=[2, 2], vel=[2, 2], iou=[1, 2]), common_heads=dict(center=[2, 2], height=[1, 2], dim=[3, 2], rot=[2, 2], vel=[2, 2]), bbox_coder=dict( type="TransFusionBBoxCoder", @@ -110,6 +111,7 @@ reduction="mean", loss_weight=1.0, ), + # loss_iou=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=1.0), loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="mean", loss_weight=1.0), loss_bbox=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=0.25), ), From dead69b6bf0a744cde4fc4db0d410b974ac4f40a Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 23 Apr 2026 14:47:39 +0900 Subject: [PATCH 047/183] Add the script --- ...second_secfpn_50e_8xb8_base_120m_sincos.py | 156 +++++++++++++++++ ...n_50e_8xb8_base_120m_sincos_34_channels.py | 163 +++++++++++++++++ ...b8_base_120m_sincos_timeexp_34_channels.py | 165 ++++++++++++++++++ ...fault_lidar_second_secfpn_120m_iou_loss.py | 117 +++++++++++++ 4 files changed, 601 insertions(+) create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos.py create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_34_channels.py create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels.py create mode 100644 projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m_iou_loss.py diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos.py new file mode 100644 index 000000000..d856b1d4b --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos.py @@ -0,0 +1,156 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", + "../default/pipelines/default_lidar_120m.py", + "../default/models/default_lidar_second_secfpn_120m.py", + "../default/schedulers/default_50e_8xb8_adamw_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_6_2/" + +experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type +experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_sincos" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + voxelize_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + voxelize_reduce=False, + ), + pts_voxel_encoder=dict( + _delete_=True, + type="BEVFusionVoxelMeanSinCosEncoder", + in_channels=4, + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], + ), + pts_middle_encoder=dict( + in_channels=32, + sparse_shape=_base_.grid_size, + # num_aug_features=4, + # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here + # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], + ), + bbox_head=dict( + class_names=_base_.class_names, # Use class names to identify the correct class indices + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_34_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_34_channels.py new file mode 100644 index 000000000..54af6be5f --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_34_channels.py @@ -0,0 +1,163 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", + "../default/pipelines/default_lidar_120m.py", + "../default/models/default_lidar_second_secfpn_120m.py", + "../default/schedulers/default_50e_8xb8_adamw_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_6_2/" + +experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type +experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_34_channels" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + voxelize_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + voxelize_reduce=False, + ), + pts_voxel_encoder=dict( + _delete_=True, + type="BEVFusionVoxelSinCosEncoder", + in_channels=4, + with_distance=False, + with_cluster_center=True, + with_voxel_center=True, + feat_channels=[16], + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01), + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], + ), + pts_middle_encoder=dict( + in_channels=34, + sparse_shape=_base_.grid_size, + # num_aug_features=4, + # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here + # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], + ), + bbox_head=dict( + class_names=_base_.class_names, # Use class names to identify the correct class indices + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels.py new file mode 100644 index 000000000..d7e61102b --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels.py @@ -0,0 +1,165 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", + "../default/pipelines/default_lidar_120m.py", + "../default/models/default_lidar_second_secfpn_120m.py", + "../default/schedulers/default_50e_8xb8_adamw_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_6_2/" + +experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type +experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_34_channels_timeexp" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + voxelize_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + voxelize_reduce=False, + ), + pts_voxel_encoder=dict( + _delete_=True, + type="BEVFusionVoxelSinCosEncoder", + in_channels=4, + time_lag_channel_index=3, + time_exp_factor=1.0, + with_distance=False, + with_cluster_center=True, + with_voxel_center=True, + feat_channels=[16], + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01), + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], + ), + pts_middle_encoder=dict( + in_channels=34, + sparse_shape=_base_.grid_size, + # num_aug_features=4, + # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here + # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], + ), + bbox_head=dict( + class_names=_base_.class_names, # Use class names to identify the correct class indices + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m_iou_loss.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m_iou_loss.py new file mode 100644 index 000000000..4c7e996d9 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m_iou_loss.py @@ -0,0 +1,117 @@ +num_proposals = 500 +max_num_points = 10 +max_voxels = [120000, 160000] + +model = dict( + type="BEVFusion", + voxelize_cfg=dict( + max_num_points=max_num_points, + max_voxels=max_voxels, + voxelize_reduce=True, + ), + data_preprocessor=dict( + type="Det3DDataPreprocessor", + pad_size_divisor=32, + ), + pts_voxel_encoder=dict(type="HardSimpleVFE"), + pts_middle_encoder=dict( + type="BEVFusionSparseEncoder", + in_channels=5, + aug_features_min_values=[], + aug_features_max_values=[], + num_aug_features=0, + order=("conv", "norm", "act"), + norm_cfg=dict(type="BN1d", eps=0.001, momentum=0.01), + encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)), + encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, (1, 1, 0)), (0, 0)), + block_type="basicblock", + ), + pts_backbone=dict( + type="SECOND", + in_channels=256, + out_channels=[128, 256], + layer_nums=[5, 5], + layer_strides=[1, 2], + norm_cfg=dict(type="BN", eps=0.001, momentum=0.01), + conv_cfg=dict(type="Conv2d", bias=False), + ), + pts_neck=dict( + type="SECONDFPN", + in_channels=[128, 256], + out_channels=[256, 256], + upsample_strides=[1, 2], + norm_cfg=dict(type="BN", eps=0.001, momentum=0.01), + upsample_cfg=dict(type="deconv", bias=False), + use_conv_for_no_stride=True, + ), + bbox_head=dict( + type="BEVFusionHead", + num_proposals=num_proposals, + auxiliary=True, + in_channels=512, + hidden_channel=128, + nms_kernel_size=3, + bn_momentum=0.1, + num_decoder_layers=1, + decoder_layer=dict( + type="TransformerDecoderLayer", + self_attn_cfg=dict(embed_dims=128, num_heads=8, dropout=0.1), + cross_attn_cfg=dict(embed_dims=128, num_heads=8, dropout=0.1), + ffn_cfg=dict( + embed_dims=128, + feedforward_channels=256, + num_fcs=2, + ffn_drop=0.1, + act_cfg=dict(type="ReLU", inplace=True), + ), + norm_cfg=dict(type="LN"), + pos_encoding_cfg=dict(input_channel=2, num_pos_feats=128), + ), + train_cfg=dict( + dataset="t4datasets", + out_size_factor=8, + gaussian_overlap=0.1, + min_radius=2, + pos_weight=-1, + code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2], + assigner=dict( + type="HungarianAssigner3D", + iou_calculator=dict(type="BboxOverlaps3D", coordinate="lidar"), + cls_cost=dict(type="mmdet.FocalLossCost", gamma=2.0, alpha=0.25, weight=0.15), + reg_cost=dict(type="BBoxBEVL1Cost", weight=0.25), + iou_cost=dict(type="IoU3DCost", weight=0.25), + ), + ), + test_cfg=dict( + dataset="t4datasets", + out_size_factor=8, + nms_type=None, # Set to "circle" for circle_nms + # Set NMS for different clusters + nms_clusters=[ + dict(class_names=["car", "truck", "bus"], nms_threshold=0.5), # It's radius if using circle_nms + dict(class_names=["bicycle"], nms_threshold=0.5), + dict(class_names=["pedestrian"], nms_threshold=0.175), + ], + ), + dense_heatmap_pooling_classes=["car", "truck", "bus", "bicycle"], # Use class indices for pooling + common_heads=dict(center=[2, 2], height=[1, 2], dim=[3, 2], rot=[2, 2], vel=[2, 2], iou=[1, 2]), + bbox_coder=dict( + type="TransFusionBBoxCoder", + post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0], + score_threshold=0.0, + out_size_factor=8, + code_size=10, + ), + loss_cls=dict( + type="mmdet.FocalLoss", + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + reduction="mean", + loss_weight=1.0, + ), + loss_iou=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=1.0), + loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="mean", loss_weight=1.0), + loss_bbox=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=0.25), + ), +) From db756f57c3cf97e808bd4ae5d57b5ae785285ada Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 23 Apr 2026 14:50:26 +0900 Subject: [PATCH 048/183] Update dataset name --- .../bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m.py | 2 ++ .../default/pipelines/default_camera_lidar_intensity_120m.py | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m.py index 8c8d84d18..4ac46afea 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m.py @@ -135,3 +135,5 @@ checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), ) log_processor = dict(window_size=50) + +resume = True \ No newline at end of file diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py index 439459010..7dac0838f 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py @@ -1,6 +1,6 @@ # Dataset parameters backend_args = None -num_workers = 32 +num_workers = 8 input_modality = dict(use_lidar=True, use_camera=True) # range setting @@ -131,6 +131,7 @@ rand_flip=False, is_train=False, ), + dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range), dict( type="Pack3DDetInputs", keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"], From 1725f79575ff203ae80300504d7b85b5b0f5f796 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 23 Apr 2026 16:36:11 +0900 Subject: [PATCH 049/183] Update dataset name --- .../BEVFusion/bevfusion/bevfusion_head.py | 25 ++++++++++--------- .../bevfusion/bevfusion_voxel_encoder.py | 2 +- ..._voxel_second_secfpn_50e_8xb8_base_120m.py | 13 +++------- 3 files changed, 18 insertions(+), 22 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index a8ef7129f..0852ebf16 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -62,7 +62,7 @@ def __init__( norm_cfg=dict(type="BN1d"), bias="auto", # loss - loss_iou=None, + loss_iou=None, loss_cls=dict(type="mmdet.GaussianFocalLoss", reduction="mean"), loss_bbox=dict(type="mmdet.L1Loss", reduction="mean"), loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="mean"), @@ -372,7 +372,7 @@ def predict_by_feat(self, preds_dicts, metas, img=None, rescale=False, for_roi=F batch_size = preds_dict[0]["heatmap"].shape[0] batch_score = preds_dict[0]["heatmap"][..., -self.num_proposals :].sigmoid() if self.loss_iou is not None: - batch_score = torch.sqrt(batch_score * preds_dict[0]['iou'][..., -self.num_proposals:].sigmoid()) # noqa: E501 + batch_score = torch.sqrt(batch_score * preds_dict[0]['iou'][..., -self.num_proposals:].clamp(min=0.0, max=1.0)) # noqa: E501 one_hot = F.one_hot(self.query_labels, num_classes=self.num_classes).permute(0, 2, 1) batch_score = batch_score * preds_dict[0]["query_heatmap_score"] * one_hot @@ -801,16 +801,17 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li loss_dict[f"{prefix}_loss_cls"] = layer_loss_cls loss_dict[f"{prefix}_loss_bbox"] = layer_loss_bbox - # Output iou for iou-aware loss - if self.loss_iou is not None: - layer_ious = preds_dict["iou"][ - ... - idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals, - ] # [BS, num_proposals] - - # [BS, num_proposals] - layer_iou_weights = layer_bbox_weights[:, :, 0] - loss_dict[f'{prefix}_loss_iou'] = self.loss_iou(layer_ious, ious, layer_iou_weights, avg_factor=max(num_pos, 1)) + # Output iou for iou-aware loss + if self.loss_iou is not None: + layer_ious = preds_dict["iou"][ + ..., + idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals, + ].squeeze(1) # [BS, num_proposals] + + # [BS, num_proposals] + layer_iou_weights = layer_bbox_weights[:, :, 0] + # print(layer_ious.shape, ious.shape, layer_iou_weights.shape, "layer_ious.shape, ious.shape, layer_iou_weights.shape") + loss_dict[f'{prefix}_loss_iou'] = self.loss_iou(layer_ious, ious, layer_iou_weights, avg_factor=max(num_pos, 1)) loss_dict["matched_ious"] = layer_loss_cls.new_tensor(matched_ious) diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py index 086acc1e0..5037113aa 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py +++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py @@ -361,7 +361,7 @@ def __init__(self, point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4, 40, 1), mode: Optional[str] = 'max'): - super(BEVFusionVoxelSinCosEncoder, self).__init__() + super(BEVFusionVoxelMeanSinCosEncoder, self).__init__() # Create PillarFeatureNet layers self.in_channels = in_channels diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py index 17f16254d..7c1286df8 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py @@ -2,7 +2,7 @@ "../../../../../autoware_ml/configs/detection3d/default_runtime.py", "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", "../default/pipelines/default_lidar_120m.py", - "../default/models/default_lidar_second_secfpn_120m.py", + "../default/models/default_lidar_second_secfpn_120m_iou_loss.py", "../default/schedulers/default_50e_8xb8_adamw_cosine.py", "../default/default_misc.py", ] @@ -16,7 +16,7 @@ info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type -experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m" +experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_iou_loss" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter @@ -29,19 +29,14 @@ ), pts_voxel_encoder=dict( _delete_=True, - type="BEVFusionVoxelSinCosEncoder", + type="BEVFusionVoxelMeanSinCosEncoder", in_channels=4, - with_distance=False, - with_cluster_center=True, - with_voxel_center=True, - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], ), pts_middle_encoder=dict( - in_channels=100, + in_channels=32, sparse_shape=_base_.grid_size, # num_aug_features=4, # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here From 24d780bb788fd25813481007d898c85051c944cb Mon Sep 17 00:00:00 2001 From: KokSeang Date: Fri, 24 Apr 2026 16:31:47 +0900 Subject: [PATCH 050/183] Added --- tools/detection3d/t4dataset_converters/t4converter.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/detection3d/t4dataset_converters/t4converter.py b/tools/detection3d/t4dataset_converters/t4converter.py index 842b0f458..5dfd1dc1f 100644 --- a/tools/detection3d/t4dataset_converters/t4converter.py +++ b/tools/detection3d/t4dataset_converters/t4converter.py @@ -626,6 +626,10 @@ def get_lidarseg_annotations( ) -> dict: if not hasattr(t4, "lidarseg") or not t4.lidarseg: return dict() + + if sd_record.info_filename is None: + print(f"sample {lidar_token} doesn't have lidar info_filename") + return dict() assert i < len(t4.lidarseg), "Index exceeds number of lidarseg records!" assert t4.lidarseg[i].sample_data_token == lidar_token, "Sample data token mismatch!" From 8175419ca1604a9fe25b39ab3715616f3c8fc07f Mon Sep 17 00:00:00 2001 From: KokSeang Date: Fri, 24 Apr 2026 16:54:00 +0900 Subject: [PATCH 051/183] Added --- .../configs/detection3d/dataset/t4dataset/base.py | 13 +++++++++---- .../detection3d/dataset/t4dataset/j6gen2.py | 13 +++++++++---- .../detection3d/dataset/t4dataset/j6gen2_base.py | 14 ++++++++++---- .../detection3d/dataset/t4dataset/jpntaxi_base.py | 14 ++++++++++---- .../detection3d/dataset/t4dataset/jpntaxi_gen2.py | 14 ++++++++++---- .../detection3d/dataset/t4dataset/largebus.py | 14 ++++++++++---- 6 files changed, 58 insertions(+), 24 deletions(-) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py index d0744a131..3f90e7e0c 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py @@ -91,8 +91,8 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "movable_object.barrier": "barrier", - "movable_object.debris": "debris", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.debris": "barrier", + "movable_object.pushable_pullable": "barrier", "movable_object.trafficcone": "traffic_cone", "movable_object.traffic_cone": "traffic_cone", "animal": "animal", @@ -113,7 +113,7 @@ # DBv2.0 and DBv3.0 "animal": "animal", "movable_object.barrier": "barrier", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.pushable_pullable": "barrier", "movable_object.traffic_cone": "traffic_cone", "pedestrian.adult": "pedestrian", "pedestrian.child": "pedestrian", @@ -123,7 +123,7 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "bollard", + "static_object.bollard": "barrier", "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car "vehicle.bicycle": "bicycle", "vehicle.bus": "bus", @@ -143,6 +143,9 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } class_names = [ @@ -151,6 +154,8 @@ "bus", "bicycle", "pedestrian", + "traffic_cone", + "barrier" ] num_class = len(class_names) metainfo = dict(classes=class_names) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py index 3c8675c13..e737994aa 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py @@ -72,8 +72,8 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "movable_object.barrier": "barrier", - "movable_object.debris": "debris", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.debris": "barrier", + "movable_object.pushable_pullable": "barrier", "movable_object.trafficcone": "traffic_cone", "movable_object.traffic_cone": "traffic_cone", "animal": "animal", @@ -94,7 +94,7 @@ # DBv2.0 and DBv3.0 "animal": "animal", "movable_object.barrier": "barrier", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.pushable_pullable": "barrier", "movable_object.traffic_cone": "traffic_cone", "pedestrian.adult": "pedestrian", "pedestrian.child": "pedestrian", @@ -104,7 +104,7 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "bollard", + "static_object.bollard": "barrier", "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car "vehicle.bicycle": "bicycle", "vehicle.bus": "bus", @@ -124,6 +124,9 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } class_names = [ @@ -132,6 +135,8 @@ "bus", "bicycle", "pedestrian", + "traffic_cone", + "barrier", ] num_class = len(class_names) metainfo = dict(classes=class_names) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py index cc3a86d3e..a8f6c6e7d 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py @@ -78,8 +78,8 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "movable_object.barrier": "barrier", - "movable_object.debris": "debris", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.debris": "barrier", + "movable_object.pushable_pullable": "barrier", "movable_object.trafficcone": "traffic_cone", "movable_object.traffic_cone": "traffic_cone", "animal": "animal", @@ -100,7 +100,7 @@ # DBv2.0 and DBv3.0 "animal": "animal", "movable_object.barrier": "barrier", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.pushable_pullable": "barrier", "movable_object.traffic_cone": "traffic_cone", "pedestrian.adult": "pedestrian", "pedestrian.child": "pedestrian", @@ -110,7 +110,7 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "bollard", + "static_object.bollard": "barrier", "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car "vehicle.bicycle": "bicycle", "vehicle.bus": "bus", @@ -130,14 +130,20 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } + class_names = [ "car", "truck", "bus", "bicycle", "pedestrian", + "traffic_cone", + "barrier", ] num_class = len(class_names) metainfo = dict(classes=class_names) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py index b7ddb799a..229ff7604 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py @@ -68,8 +68,8 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "movable_object.barrier": "barrier", - "movable_object.debris": "debris", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.debris": "barrier", + "movable_object.pushable_pullable": "barrier", "movable_object.trafficcone": "traffic_cone", "movable_object.traffic_cone": "traffic_cone", "animal": "animal", @@ -90,7 +90,7 @@ # DBv2.0 and DBv3.0 "animal": "animal", "movable_object.barrier": "barrier", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.pushable_pullable": "barrier", "movable_object.traffic_cone": "traffic_cone", "pedestrian.adult": "pedestrian", "pedestrian.child": "pedestrian", @@ -100,7 +100,7 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "bollard", + "static_object.bollard": "barrier", "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car "vehicle.bicycle": "bicycle", "vehicle.bus": "bus", @@ -120,6 +120,9 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } class_names = [ @@ -128,7 +131,10 @@ "bus", "bicycle", "pedestrian", + "traffic_cone", + "barrier", ] + num_class = len(class_names) metainfo = dict(classes=class_names) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py index f91bbc22f..411cabe7e 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py @@ -65,8 +65,8 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "movable_object.barrier": "barrier", - "movable_object.debris": "debris", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.debris": "barrier", + "movable_object.pushable_pullable": "barrier", "movable_object.trafficcone": "traffic_cone", "movable_object.traffic_cone": "traffic_cone", "animal": "animal", @@ -87,7 +87,7 @@ # DBv2.0 and DBv3.0 "animal": "animal", "movable_object.barrier": "barrier", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.pushable_pullable": "barrier", "movable_object.traffic_cone": "traffic_cone", "pedestrian.adult": "pedestrian", "pedestrian.child": "pedestrian", @@ -97,7 +97,7 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "bollard", + "static_object.bollard": "barrier", "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car "vehicle.bicycle": "bicycle", "vehicle.bus": "bus", @@ -117,6 +117,9 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } class_names = [ @@ -125,7 +128,10 @@ "bus", "bicycle", "pedestrian", + "traffic_cone", + "barrier", ] + num_class = len(class_names) metainfo = dict(classes=class_names) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py b/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py index b117c3798..a611750d3 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py @@ -67,8 +67,8 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "movable_object.barrier": "barrier", - "movable_object.debris": "debris", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.debris": "barrier", + "movable_object.pushable_pullable": "barrier", "movable_object.trafficcone": "traffic_cone", "movable_object.traffic_cone": "traffic_cone", "animal": "animal", @@ -89,7 +89,7 @@ # DBv2.0 and DBv3.0 "animal": "animal", "movable_object.barrier": "barrier", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.pushable_pullable": "barrier", "movable_object.traffic_cone": "traffic_cone", "pedestrian.adult": "pedestrian", "pedestrian.child": "pedestrian", @@ -99,7 +99,7 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "bollard", + "static_object.bollard": "barrier", "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car "vehicle.bicycle": "bicycle", "vehicle.bus": "bus", @@ -119,6 +119,9 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } class_names = [ @@ -127,7 +130,10 @@ "bus", "bicycle", "pedestrian", + "traffic_cone", + "barrier", ] + num_class = len(class_names) metainfo = dict(classes=class_names) From 33f11cd5db171246654950d2a0afc22a757dcce5 Mon Sep 17 00:00:00 2001 From: KokSeang Date: Sat, 25 Apr 2026 14:08:04 +0900 Subject: [PATCH 052/183] Added --- .../download_t4dataset/download_t4dataset.py | 4 ++-- tools/detection3d/create_data_t4dataset.py | 13 ++++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/pipelines/webauto/download_t4dataset/download_t4dataset.py b/pipelines/webauto/download_t4dataset/download_t4dataset.py index f06f6979d..d06b85717 100644 --- a/pipelines/webauto/download_t4dataset/download_t4dataset.py +++ b/pipelines/webauto/download_t4dataset/download_t4dataset.py @@ -68,8 +68,8 @@ def get_t4dataset_ids(config_path: str) -> list[str]: for key in required_keys: for t4dataset_ids in data_splits[key]: t4dataset_ids = t4dataset_ids.split("/") - if len(t4dataset_ids) == 4: - t4dataset_id, t4dataset_version_id, city, vehicle_type = t4dataset_ids + if len(t4dataset_ids) == 5: + t4dataset_id, t4dataset_version_id, city, vehicle_type, traffic_cone_barrier_status = t4dataset_ids elif len(t4dataset_ids) == 2: t4dataset_id, t4dataset_version_id = t4dataset_ids elif len(t4dataset_ids) == 1: diff --git a/tools/detection3d/create_data_t4dataset.py b/tools/detection3d/create_data_t4dataset.py index 1e61af9d8..e75a36a04 100644 --- a/tools/detection3d/create_data_t4dataset.py +++ b/tools/detection3d/create_data_t4dataset.py @@ -104,6 +104,7 @@ def get_info( max_sweeps: int, city: Optional[str] = None, vehicle_type: Optional[str] = None, + traffic_cone_barrier_status: Optional[str] = None, ) -> Dict[str, Any]: lidar_token = get_lidar_token(sample) if lidar_token is None: @@ -129,6 +130,11 @@ def get_info( sd_record: SampleData = t4.get("sample_data", lidar_token) info = get_empty_standard_data_info(cfg.camera_types) + + if traffic_cone_barrier_status is not None and traffic_cone_barrier_status == "true": + traffic_cone_barrier_status = True + else: + traffic_cone_barrier_status = False basic_info = dict( sample_idx=i, @@ -139,6 +145,7 @@ def get_info( scene_name=scene_record.name, city=city, vehicle_type=vehicle_type, + traffic_cone_barrier_status=traffic_cone_barrier_status, ) for new_info in [ @@ -302,8 +309,8 @@ def main(): f"Creating data info for scene: {scene_id}, steps: {sample_steps}, sweeps: {args.max_sweeps}" ) dataset_scene_info = scene_id.split("/") - if len(dataset_scene_info) == 4: - t4_dataset_id, t4_dataset_version_id, city, vehicle_type = dataset_scene_info + if len(dataset_scene_info) == 5: + t4_dataset_id, t4_dataset_version_id, city, vehicle_type, traffic_cone_barrier_status = dataset_scene_info elif len(dataset_scene_info) == 2: t4_dataset_id, t4_dataset_version_id = dataset_scene_info city = vehicle_type = None @@ -326,7 +333,7 @@ def main(): infos = [] for i in range(0, len(t4.sample), sample_steps): sample = t4.sample[i] - info = get_info(cfg, t4, sample, i, args.max_sweeps, city, vehicle_type) + info = get_info(cfg, t4, sample, i, args.max_sweeps, city, vehicle_type, traffic_cone_barrier_status) if info is None: continue # info["version"] = dataset_version # used for visualizations during debugging. From 2237522607f97186e9900c4b4884159684ba9fad Mon Sep 17 00:00:00 2001 From: KokSeang Date: Sat, 25 Apr 2026 14:36:45 +0900 Subject: [PATCH 053/183] Added --- .../configs/detection3d/dataset/t4dataset/base.py | 12 ++++++------ .../configs/detection3d/dataset/t4dataset/j6gen2.py | 12 ++++++------ .../detection3d/dataset/t4dataset/j6gen2_base.py | 12 ++++++------ .../detection3d/dataset/t4dataset/jpntaxi_base.py | 12 ++++++------ .../detection3d/dataset/t4dataset/jpntaxi_gen2.py | 12 ++++++------ .../detection3d/dataset/t4dataset/largebus.py | 12 ++++++------ 6 files changed, 36 insertions(+), 36 deletions(-) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py index 3f90e7e0c..4248c90e6 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py @@ -123,7 +123,7 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "barrier", + "static_object.bollard": "bollard", "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car "vehicle.bicycle": "bicycle", "vehicle.bus": "bus", @@ -143,9 +143,9 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", - "traffic_cone": "traffic_cone", - "trafficcone": "traffic_cone", - "barrier": "barrier", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } class_names = [ @@ -154,8 +154,8 @@ "bus", "bicycle", "pedestrian", - "traffic_cone", - "barrier" + "traffic_cone", + "barrier" ] num_class = len(class_names) metainfo = dict(classes=class_names) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py index e737994aa..0324e7207 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py @@ -104,7 +104,7 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "barrier", + "static_object.bollard": "bollard", "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car "vehicle.bicycle": "bicycle", "vehicle.bus": "bus", @@ -124,9 +124,9 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", - "traffic_cone": "traffic_cone", - "trafficcone": "traffic_cone", - "barrier": "barrier", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } class_names = [ @@ -135,8 +135,8 @@ "bus", "bicycle", "pedestrian", - "traffic_cone", - "barrier", + "traffic_cone", + "barrier", ] num_class = len(class_names) metainfo = dict(classes=class_names) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py index a8f6c6e7d..b9ec03f27 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py @@ -110,7 +110,7 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "barrier", + "static_object.bollard": "bollard", "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car "vehicle.bicycle": "bicycle", "vehicle.bus": "bus", @@ -130,9 +130,9 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", - "traffic_cone": "traffic_cone", - "trafficcone": "traffic_cone", - "barrier": "barrier", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } @@ -142,8 +142,8 @@ "bus", "bicycle", "pedestrian", - "traffic_cone", - "barrier", + "traffic_cone", + "barrier", ] num_class = len(class_names) metainfo = dict(classes=class_names) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py index 229ff7604..c08decfa1 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py @@ -100,7 +100,7 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "barrier", + "static_object.bollard": "bollard", "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car "vehicle.bicycle": "bicycle", "vehicle.bus": "bus", @@ -120,9 +120,9 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", - "traffic_cone": "traffic_cone", - "trafficcone": "traffic_cone", - "barrier": "barrier", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } class_names = [ @@ -131,8 +131,8 @@ "bus", "bicycle", "pedestrian", - "traffic_cone", - "barrier", + "traffic_cone", + "barrier", ] num_class = len(class_names) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py index 411cabe7e..6b7250673 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py @@ -97,7 +97,7 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "barrier", + "static_object.bollard": "bollard", "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car "vehicle.bicycle": "bicycle", "vehicle.bus": "bus", @@ -117,9 +117,9 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", - "traffic_cone": "traffic_cone", - "trafficcone": "traffic_cone", - "barrier": "barrier", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } class_names = [ @@ -128,8 +128,8 @@ "bus", "bicycle", "pedestrian", - "traffic_cone", - "barrier", + "traffic_cone", + "barrier", ] num_class = len(class_names) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py b/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py index a611750d3..2b54629eb 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py @@ -99,7 +99,7 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "barrier", + "static_object.bollard": "bollard", "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car "vehicle.bicycle": "bicycle", "vehicle.bus": "bus", @@ -119,9 +119,9 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", - "traffic_cone": "traffic_cone", - "trafficcone": "traffic_cone", - "barrier": "barrier", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } class_names = [ @@ -130,8 +130,8 @@ "bus", "bicycle", "pedestrian", - "traffic_cone", - "barrier", + "traffic_cone", + "barrier", ] num_class = len(class_names) From 60df4c0911f1559db0e917a2f2d9045ab07f83f3 Mon Sep 17 00:00:00 2001 From: KokSeang Date: Sat, 25 Apr 2026 23:16:34 +0900 Subject: [PATCH 054/183] Added --- tools/detection3d/create_data_t4dataset.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/detection3d/create_data_t4dataset.py b/tools/detection3d/create_data_t4dataset.py index e75a36a04..62169bc7a 100644 --- a/tools/detection3d/create_data_t4dataset.py +++ b/tools/detection3d/create_data_t4dataset.py @@ -102,9 +102,9 @@ def get_info( sample: Sample, i: int, max_sweeps: int, + traffic_cone_barrier_status: str, city: Optional[str] = None, vehicle_type: Optional[str] = None, - traffic_cone_barrier_status: Optional[str] = None, ) -> Dict[str, Any]: lidar_token = get_lidar_token(sample) if lidar_token is None: @@ -130,8 +130,7 @@ def get_info( sd_record: SampleData = t4.get("sample_data", lidar_token) info = get_empty_standard_data_info(cfg.camera_types) - - if traffic_cone_barrier_status is not None and traffic_cone_barrier_status == "true": + if traffic_cone_barrier_status == "true": traffic_cone_barrier_status = True else: traffic_cone_barrier_status = False @@ -333,7 +332,7 @@ def main(): infos = [] for i in range(0, len(t4.sample), sample_steps): sample = t4.sample[i] - info = get_info(cfg, t4, sample, i, args.max_sweeps, city, vehicle_type, traffic_cone_barrier_status) + info = get_info(cfg, t4, sample, i, args.max_sweeps, traffic_cone_barrier_status, city, vehicle_type) if info is None: continue # info["version"] = dataset_version # used for visualizations during debugging. From b5dabf2d53e4cde87b994c723f5233dadb267ec6 Mon Sep 17 00:00:00 2001 From: KokSeang Date: Mon, 27 Apr 2026 14:27:49 +0900 Subject: [PATCH 055/183] Added --- .../BEVFusion/bevfusion/bevfusion_head.py | 79 +++++++++++++++---- projects/BEVFusion/bevfusion/utils.py | 11 ++- .../default_lidar_second_secfpn_120m.py | 5 +- .../default/pipelines/default_lidar_120m.py | 2 + .../pipelines/default_lidar_intensity_120m.py | 2 + 5 files changed, 81 insertions(+), 18 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 853523c4f..9bbc6469c 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -69,6 +69,7 @@ def __init__( train_cfg=None, test_cfg=None, bbox_coder=None, + partial_traffic_cone_barrier=False, ): super().__init__() self.class_names = class_names @@ -82,7 +83,8 @@ def __init__( self.nms_kernel_size = nms_kernel_size self.train_cfg = train_cfg self.test_cfg = test_cfg - + # If true, only compute loss for traffic cone and barrier when it's available in the frame + self.partial_traffic_cone_barrier = partial_traffic_cone_barrier self.use_sigmoid_cls = loss_cls.get("use_sigmoid", False) if not self.use_sigmoid_cls: self.num_classes += 1 @@ -185,6 +187,13 @@ def __init__( cluster["class_indices"] = sorted( [self.class_name_to_indices[class_name] for class_name in cluster["class_names"]] ) + + if self.partial_traffic_cone_barrier: + assert loss_heatmap['reduction'] == 'none', "Loss reduction must be 'none' for partial traffic cone and barrier" + self.ignore_labels = [self.class_name_to_indices["traffic_cone"], self.class_name_to_indices["barrier"]] + else: + self.ignore_labels = None + def create_2D_grid(self, x_size, y_size): meshgrid = [[0, x_size - 1, x_size], [0, y_size - 1, y_size]] @@ -456,7 +465,7 @@ def predict_by_feat(self, preds_dicts, metas, img=None, rescale=False, for_roi=F return rets[0] - def get_targets(self, batch_gt_instances_3d: List[InstanceData], preds_dict: List[dict]): + def get_targets(self, batch_gt_instances_3d: List[InstanceData], preds_dict: List[dict], batch_metadata: List[dict]): """Generate training targets. Args: batch_gt_instances_3d (List[InstanceData]): @@ -500,6 +509,7 @@ def get_targets(self, batch_gt_instances_3d: List[InstanceData], preds_dict: Lis batch_gt_instances_3d, list_of_pred_dict, np.arange(len(batch_gt_instances_3d)), + batch_metadata, ) labels = torch.cat(res_tuple[0], dim=0) label_weights = torch.cat(res_tuple[1], dim=0) @@ -509,6 +519,7 @@ def get_targets(self, batch_gt_instances_3d: List[InstanceData], preds_dict: Lis num_pos = np.sum(res_tuple[5]) matched_ious = np.mean(res_tuple[6]) heatmap = torch.cat(res_tuple[7], dim=0) + heatmap_weights = torch.cat(res_tuple[8], dim=0) return ( labels, label_weights, @@ -518,9 +529,10 @@ def get_targets(self, batch_gt_instances_3d: List[InstanceData], preds_dict: Lis num_pos, matched_ious, heatmap, + heatmap_weights, ) - def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx): + def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, batch_metadata): """Generate training targets for a single sample. Args: gt_instances_3d (:obj:`InstanceData`): ground truth of instances. @@ -563,6 +575,12 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx): num_layer = self.num_decoder_layers else: num_layer = 1 + + traffic_cone_barrier_status = batch_metadata[batch_idx].get("traffic_cone_barrier_status", True) + if self.ignore_labels is not None and not traffic_cone_barrier_status: + ignore_labels = self.ignore_labels + else: + ignore_labels = None assign_result_list = [] for idx_layer in range(num_layer): @@ -581,6 +599,7 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx): gt_labels_3d, score_layer, self.train_cfg, + ignore_labels, ) elif self.train_cfg.assigner.type == "HeuristicAssigner": assign_result = self.bbox_assigner.assign( @@ -637,10 +656,10 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx): label_weights[pos_inds] = 1.0 else: label_weights[pos_inds] = self.train_cfg.pos_weight - + if len(neg_inds) > 0: label_weights[neg_inds] = 1.0 - + # # compute dense heatmap targets device = labels.device gt_bboxes_3d = torch.cat([gt_bboxes_3d.gravity_center, gt_bboxes_3d.tensor[:, 3:]], dim=1).to(device) @@ -671,6 +690,15 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx): draw_heatmap_gaussian(heatmap[gt_labels_3d[idx]], center_int[[1, 0]], radius) mean_iou = ious[pos_inds].sum() / max(len(pos_inds), 1) + heatmap_weights = torch.ones_like(heatmap) + + # Ignore labels for traffic cone and barrier + if self.ignore_labels is not None and not traffic_cone_barrier_status: + pred_labels = pred_instances.scores.argmax(dim=1, keepdim=False) + ignore_preds_masks = pred_labels.isin(self.ignore_labels) + label_weights[ignore_preds_masks] = 0.0 # Set to 0 to ignore these proposals + heatmap_weights[self.ignore_labels] = 0.0 # Set to 0 to ignore these proposals + return ( labels[None], label_weights[None], @@ -680,6 +708,7 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx): int(pos_inds.shape[0]), float(mean_iou), heatmap[None], + heatmap_weights[None], ) def loss(self, batch_feats, batch_data_samples): @@ -698,11 +727,11 @@ def loss(self, batch_feats, batch_data_samples): batch_input_metas.append(data_sample.metainfo) batch_gt_instances_3d.append(data_sample.gt_instances_3d) preds_dicts = self(batch_feats, batch_input_metas) - loss = self.loss_by_feat(preds_dicts, batch_gt_instances_3d) + loss = self.loss_by_feat(preds_dicts, batch_gt_instances_3d, batch_input_metas) return loss - def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: List[InstanceData], *args, **kwargs): + def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: List[InstanceData], batch_input_metas): ( labels, label_weights, @@ -712,7 +741,8 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li num_pos, matched_ious, heatmap, - ) = self.get_targets(batch_gt_instances_3d, preds_dicts[0]) + heatmap_weights, + ) = self.get_targets(batch_gt_instances_3d, preds_dicts[0], batch_input_metas) if hasattr(self, "on_the_image_mask"): label_weights = label_weights * self.on_the_image_mask bbox_weights = bbox_weights * self.on_the_image_mask[:, :, None] @@ -721,13 +751,32 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li loss_dict = dict() # compute heatmap loss - loss_heatmap = self.loss_heatmap( - clip_sigmoid(preds_dict["dense_heatmap"]).float(), - heatmap.float(), - avg_factor=max(heatmap.eq(1).float().sum().item(), 1), - ) - loss_dict["loss_heatmap"] = loss_heatmap - + preds_dense_heatmap = clip_sigmoid(preds_dict["dense_heatmap"].float()) + num_pos_dense_heatmap = max(heatmap.eq(1).float().sum().item(), 1) + if self.ignore_labels is not None: + loss_heatmap = self.loss_heatmap( + preds_dense_heatmap, + heatmap.float(), + avg_factor=num_pos_dense_heatmap, + ) + loss_dict["loss_heatmap"] = loss_heatmap + else: + # When ignore labels is found, we compute the loss for each class + # heatmap focal loss + loss_heatmap_cls: torch.Tensor = self.loss_heatmap( + preds_dense_heatmap, + heatmap.float(), + ) + # (Batch, num_classes, height, width) * (Batch, num_classes, height, width) + loss_heatmap_cls = loss_heatmap_cls * heatmap_weights.float() + loss_heatmap_cls = loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap + # (Batch, num_classes) + for cls_i, class_name in enumerate(self.class_names): + loss_dict[f"loss_heatmap_{class_name}"] = loss_heatmap_cls[cls_i] + + # Prevent loss item to avoid computing gradients twice. This is for logging. + loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum() + # compute loss for each layer for idx_layer in range(self.num_decoder_layers if self.auxiliary else 1): if idx_layer == self.num_decoder_layers - 1 or (idx_layer == 0 and self.auxiliary is False): diff --git a/projects/BEVFusion/bevfusion/utils.py b/projects/BEVFusion/bevfusion/utils.py index c47604dbd..b27d9e681 100644 --- a/projects/BEVFusion/bevfusion/utils.py +++ b/projects/BEVFusion/bevfusion/utils.py @@ -238,7 +238,7 @@ def __init__( self.iou_cost = TASK_UTILS.build(iou_cost) self.iou_calculator = TASK_UTILS.build(iou_calculator) - def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, train_cfg): + def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, train_cfg, ignore_labels=None): num_gts, num_bboxes = gt_bboxes.size(0), bboxes.size(0) # 1. assign -1 by default @@ -259,9 +259,16 @@ def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, train_cfg): reg_cost = self.reg_cost(bboxes, gt_bboxes, train_cfg) iou = self.iou_calculator(bboxes, gt_bboxes) iou_cost = self.iou_cost(iou) - + # weighted sum of above three costs cost = cls_cost + reg_cost + iou_cost + + if ignore_labels is not None: + preds_labels = pred_instances.scores.argmax(dim=1, keepdim=False) + print("shape of pred_instances.scores, preds_labels", pred_instances.scores.shape, preds_labels.shape) + ignore_preds_masks = preds_labels.isin(ignore_labels) + cost[ignore_preds_masks] = 10000 + print("shape of ignore_preds_masks, cost", ignore_preds_masks.shape, cost.shape) # 3. do Hungarian matching on CPU using linear_sum_assignment cost = cost.detach().cpu() diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index b5d9a8fdc..94fca2829 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -91,9 +91,11 @@ dict(class_names=["car", "truck", "bus"], nms_threshold=0.5), # It's radius if using circle_nms dict(class_names=["bicycle"], nms_threshold=0.5), dict(class_names=["pedestrian"], nms_threshold=0.175), + dict(class_names=["barrier"], nms_threshold=0.25), + dict(class_names=["traffic_cone"], nms_threshold=0.175), ], ), - dense_heatmap_pooling_classes=["car", "truck", "bus", "bicycle"], # Use class indices for pooling + dense_heatmap_pooling_classes=["car", "truck", "bus", "bicycle", "barrier"], # Use class indices for pooling common_heads=dict(center=[2, 2], height=[1, 2], dim=[3, 2], rot=[2, 2], vel=[2, 2]), bbox_coder=dict( type="TransFusionBBoxCoder", @@ -112,5 +114,6 @@ ), loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="mean", loss_weight=1.0), loss_bbox=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=0.25), + partial_traffic_cone_barrier=True ), ) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index 06d95be16..c3e8e18ee 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -84,6 +84,7 @@ "timestamp", "vehicle_type", "city", + "traffic_cone_barrier_status", ], ), ] @@ -127,6 +128,7 @@ "timestamp", "vehicle_type", "city", + "traffic_cone_barrier_status", ], ), ] diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index 4e74d3616..a9032fcdc 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -84,6 +84,7 @@ "timestamp", "vehicle_type", "city", + "traffic_cone_barrier_status", ], ), ] @@ -127,6 +128,7 @@ "timestamp", "vehicle_type", "city", + "traffic_cone_barrier_status", ], ), ] From 79024cf9f861e086d77b7b1362b62be6c81f6bc5 Mon Sep 17 00:00:00 2001 From: KokSeang Date: Mon, 27 Apr 2026 14:41:33 +0900 Subject: [PATCH 056/183] Added --- projects/BEVFusion/bevfusion/bevfusion_head.py | 3 +++ .../default/models/default_lidar_second_secfpn_120m.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 9bbc6469c..d616725d2 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -15,6 +15,7 @@ from mmdet.models.utils import multi_apply from mmengine.structures import InstanceData from torch import nn +from mmengine.logging import print_log def clip_sigmoid(x, eps=1e-4): @@ -194,6 +195,8 @@ def __init__( else: self.ignore_labels = None + print_log(f"BEVFusionHead Ignore labels: {self.ignore_labels}, dense heatmap pooling classes: \ + {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}", logger="current") def create_2D_grid(self, x_size, y_size): meshgrid = [[0, x_size - 1, x_size], [0, y_size - 1, y_size]] diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index 94fca2829..ec37de42a 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -91,7 +91,7 @@ dict(class_names=["car", "truck", "bus"], nms_threshold=0.5), # It's radius if using circle_nms dict(class_names=["bicycle"], nms_threshold=0.5), dict(class_names=["pedestrian"], nms_threshold=0.175), - dict(class_names=["barrier"], nms_threshold=0.25), + dict(class_names=["barrier"], nms_threshold=0.5), dict(class_names=["traffic_cone"], nms_threshold=0.175), ], ), From e771a69cb77196e16263b0049919837d0edb275e Mon Sep 17 00:00:00 2001 From: KokSeang Date: Mon, 27 Apr 2026 18:18:08 +0900 Subject: [PATCH 057/183] Added --- .../BEVFusion/bevfusion/bevfusion_head.py | 21 +++++++++++-------- projects/BEVFusion/bevfusion/utils.py | 12 +++++------ 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index d616725d2..7e62d21a4 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -579,12 +579,6 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, batch_metad else: num_layer = 1 - traffic_cone_barrier_status = batch_metadata[batch_idx].get("traffic_cone_barrier_status", True) - if self.ignore_labels is not None and not traffic_cone_barrier_status: - ignore_labels = self.ignore_labels - else: - ignore_labels = None - assign_result_list = [] for idx_layer in range(num_layer): bboxes_tensor_layer = bboxes_tensor[ @@ -638,7 +632,8 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, batch_metad ious = assign_result_ensemble.max_overlaps ious = torch.clamp(ious, min=0.0, max=1.0) labels = bboxes_tensor.new_zeros(num_proposals, dtype=torch.long) - label_weights = bboxes_tensor.new_zeros(num_proposals, dtype=torch.long) + label_weights = bboxes_tensor.new_zeros([num_proposals, self.num_classes], dtype=torch.long) + # label_weights = bboxes_tensor.new_zeros(num_proposals, dtype=torch.long) if gt_labels_3d is not None: # default label is -1 labels += self.num_classes @@ -696,11 +691,12 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, batch_metad heatmap_weights = torch.ones_like(heatmap) # Ignore labels for traffic cone and barrier + traffic_cone_barrier_status = batch_metadata[batch_idx].get("traffic_cone_barrier_status", True) if self.ignore_labels is not None and not traffic_cone_barrier_status: pred_labels = pred_instances.scores.argmax(dim=1, keepdim=False) ignore_preds_masks = pred_labels.isin(self.ignore_labels) - label_weights[ignore_preds_masks] = 0.0 # Set to 0 to ignore these proposals heatmap_weights[self.ignore_labels] = 0.0 # Set to 0 to ignore these proposals + label_weights[:, self.ignore_labels] = 0.0 # Set to 0 to ignore traffic_cone and barrier return ( labels[None], @@ -791,10 +787,17 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li ..., idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals, ].reshape(-1) + # layer_label_weights = label_weights[ + # ..., + # idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals, + # ].reshape(-1) layer_label_weights = label_weights[ ..., idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals, - ].reshape(-1) + ] + # (Batch*num_proposals, num_classes) + layer_label_weights = layer_label_weights.reshape(-1, self.num_classes) + print_log(f"layer_label_weights: {layer_label_weights.shape}", logger="current") layer_score = preds_dict["heatmap"][ ..., idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals, diff --git a/projects/BEVFusion/bevfusion/utils.py b/projects/BEVFusion/bevfusion/utils.py index b27d9e681..b6bd2be41 100644 --- a/projects/BEVFusion/bevfusion/utils.py +++ b/projects/BEVFusion/bevfusion/utils.py @@ -263,12 +263,12 @@ def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, train_cfg, ignore_label # weighted sum of above three costs cost = cls_cost + reg_cost + iou_cost - if ignore_labels is not None: - preds_labels = pred_instances.scores.argmax(dim=1, keepdim=False) - print("shape of pred_instances.scores, preds_labels", pred_instances.scores.shape, preds_labels.shape) - ignore_preds_masks = preds_labels.isin(ignore_labels) - cost[ignore_preds_masks] = 10000 - print("shape of ignore_preds_masks, cost", ignore_preds_masks.shape, cost.shape) + # if ignore_labels is not None: + # preds_labels = pred_instances.scores.argmax(dim=1, keepdim=False) + # print("shape of pred_instances.scores, preds_labels", pred_instances.scores.shape, preds_labels.shape) + # ignore_preds_masks = preds_labels.isin(ignore_labels) + # cost[ignore_preds_masks] = 10000 + # print("shape of ignore_preds_masks, cost", ignore_preds_masks.shape, cost.shape) # 3. do Hungarian matching on CPU using linear_sum_assignment cost = cost.detach().cpu() From 05703cbad1bf353279c4feff7af59ae6926c6281 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Mon, 27 Apr 2026 20:07:18 +0900 Subject: [PATCH 058/183] Update configs --- .../BEVFusion/bevfusion/bevfusion_head.py | 30 ++-- ..._secfpn_50e_8xb8_base_120m_traffic_cone.py | 163 ++++++++++++++++++ .../default_lidar_second_secfpn_120m.py | 2 +- .../default/pipelines/default_lidar_120m.py | 2 + 4 files changed, 179 insertions(+), 18 deletions(-) create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone.py diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 7e62d21a4..ace7f26b8 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -70,7 +70,7 @@ def __init__( train_cfg=None, test_cfg=None, bbox_coder=None, - partial_traffic_cone_barrier=False, + partial_ignore_labels=None ): super().__init__() self.class_names = class_names @@ -84,8 +84,6 @@ def __init__( self.nms_kernel_size = nms_kernel_size self.train_cfg = train_cfg self.test_cfg = test_cfg - # If true, only compute loss for traffic cone and barrier when it's available in the frame - self.partial_traffic_cone_barrier = partial_traffic_cone_barrier self.use_sigmoid_cls = loss_cls.get("use_sigmoid", False) if not self.use_sigmoid_cls: self.num_classes += 1 @@ -189,13 +187,14 @@ def __init__( [self.class_name_to_indices[class_name] for class_name in cluster["class_names"]] ) - if self.partial_traffic_cone_barrier: + # If true, only compute loss for traffic cone and barrier when it's available in the frame + if partial_ignore_labels is not None: assert loss_heatmap['reduction'] == 'none', "Loss reduction must be 'none' for partial traffic cone and barrier" - self.ignore_labels = [self.class_name_to_indices["traffic_cone"], self.class_name_to_indices["barrier"]] + self.partial_ignore_labels = [self.class_name_to_indices[class_name] for class_name in partial_ignore_labels] else: - self.ignore_labels = None + self.partial_ignore_labels = None - print_log(f"BEVFusionHead Ignore labels: {self.ignore_labels}, dense heatmap pooling classes: \ + print_log(f"BEVFusionHead Partial ignore labels: {self.partial_ignore_labels}, dense heatmap pooling classes: \ {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}", logger="current") def create_2D_grid(self, x_size, y_size): @@ -535,7 +534,7 @@ def get_targets(self, batch_gt_instances_3d: List[InstanceData], preds_dict: Lis heatmap_weights, ) - def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, batch_metadata): + def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata): """Generate training targets for a single sample. Args: gt_instances_3d (:obj:`InstanceData`): ground truth of instances. @@ -596,7 +595,6 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, batch_metad gt_labels_3d, score_layer, self.train_cfg, - ignore_labels, ) elif self.train_cfg.assigner.type == "HeuristicAssigner": assign_result = self.bbox_assigner.assign( @@ -691,12 +689,10 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, batch_metad heatmap_weights = torch.ones_like(heatmap) # Ignore labels for traffic cone and barrier - traffic_cone_barrier_status = batch_metadata[batch_idx].get("traffic_cone_barrier_status", True) - if self.ignore_labels is not None and not traffic_cone_barrier_status: - pred_labels = pred_instances.scores.argmax(dim=1, keepdim=False) - ignore_preds_masks = pred_labels.isin(self.ignore_labels) - heatmap_weights[self.ignore_labels] = 0.0 # Set to 0 to ignore these proposals - label_weights[:, self.ignore_labels] = 0.0 # Set to 0 to ignore traffic_cone and barrier + traffic_cone_barrier_status = metadata.get("traffic_cone_barrier_status", True) + if self.partial_ignore_labels is not None and not traffic_cone_barrier_status: + heatmap_weights[self.partial_ignore_labels] = 0.0 # Set to 0 to ignore these proposals + label_weights[:, self.partial_ignore_labels] = 0.0 # Set to 0 to ignore traffic_cone and barrier return ( labels[None], @@ -752,7 +748,7 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li # compute heatmap loss preds_dense_heatmap = clip_sigmoid(preds_dict["dense_heatmap"].float()) num_pos_dense_heatmap = max(heatmap.eq(1).float().sum().item(), 1) - if self.ignore_labels is not None: + if self.partial_ignore_labels is not None: loss_heatmap = self.loss_heatmap( preds_dense_heatmap, heatmap.float(), @@ -797,12 +793,12 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li ] # (Batch*num_proposals, num_classes) layer_label_weights = layer_label_weights.reshape(-1, self.num_classes) - print_log(f"layer_label_weights: {layer_label_weights.shape}", logger="current") layer_score = preds_dict["heatmap"][ ..., idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals, ] layer_cls_score = layer_score.permute(0, 2, 1).reshape(-1, self.num_classes) + print_log(f"layer_label_weights: {layer_label_weights.shape}, layer_score: {layer_score.shape}, layer_labels: {layer_labels.shape}", logger="current") layer_loss_cls = self.loss_cls( layer_cls_score.float(), layer_labels, diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone.py new file mode 100644 index 000000000..39c6ddf54 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone.py @@ -0,0 +1,163 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", + "../default/pipelines/default_lidar_120m.py", + "../default/models/default_lidar_second_secfpn_120m.py", + "../default/schedulers/default_50e_8xb8_adamw_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_8/" + +experiment_group_name = "bevfusion_lidar_traffic_cone/base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + voxelize_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + voxelize_reduce=True, + ), + pts_voxel_encoder=dict(num_features=_base_.point_use_dim), + pts_middle_encoder=dict( + in_channels=_base_.point_use_dim, + sparse_shape=_base_.grid_size, + num_aug_features=4, + # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here + aug_features_min_values=[ + _base_.point_cloud_range[0], + _base_.point_cloud_range[1], + _base_.point_cloud_range[2], + 0.0, + ], + aug_features_max_values=[ + _base_.point_cloud_range[3], + _base_.point_cloud_range[4], + _base_.point_cloud_range[5], + 0.2, + ], + ), + bbox_head=dict( + class_names=_base_.class_names, # Use class names to identify the correct class indices + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + partial_ignore_labels=["traffic_cone", "barrier"], + loss_heatmap=dict( + reduction="none", + ), + ), +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index ec37de42a..023c6774d 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -114,6 +114,6 @@ ), loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="mean", loss_weight=1.0), loss_bbox=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=0.25), - partial_traffic_cone_barrier=True + partial_ignore_labels=None ), ) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index c3e8e18ee..455c2761a 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -13,6 +13,8 @@ "bus": 120, "bicycle": 120, "pedestrian": 120, + "traffic_cone": 120, + "barrier": 120, } # LiDAR parameters From 06ae9c2f75fc2060d102a1731d6365001bd91de8 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Mon, 27 Apr 2026 20:53:07 +0900 Subject: [PATCH 059/183] Update configs --- projects/BEVFusion/bevfusion/bevfusion_head.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index ace7f26b8..c58a04dca 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -748,7 +748,7 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li # compute heatmap loss preds_dense_heatmap = clip_sigmoid(preds_dict["dense_heatmap"].float()) num_pos_dense_heatmap = max(heatmap.eq(1).float().sum().item(), 1) - if self.partial_ignore_labels is not None: + if self.partial_ignore_labels is None: loss_heatmap = self.loss_heatmap( preds_dense_heatmap, heatmap.float(), @@ -798,7 +798,6 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals, ] layer_cls_score = layer_score.permute(0, 2, 1).reshape(-1, self.num_classes) - print_log(f"layer_label_weights: {layer_label_weights.shape}, layer_score: {layer_score.shape}, layer_labels: {layer_labels.shape}", logger="current") layer_loss_cls = self.loss_cls( layer_cls_score.float(), layer_labels, From d8c19749f3d54dd9a437b02ce58b2b0ca1af755d Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 28 Apr 2026 09:52:10 +0900 Subject: [PATCH 060/183] Update configs --- projects/BEVFusion/bevfusion/bevfusion_head.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index c58a04dca..adbd64835 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -762,6 +762,10 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li preds_dense_heatmap, heatmap.float(), ) + loss_heatmap_cls_before_reduction = (loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap).clone().detach() + for cls_i, class_name in enumerate(self.class_names): + loss_dict[f"heatmap_{class_name}_before_reduction"] = loss_heatmap_cls_before_reduction[cls_i] + # (Batch, num_classes, height, width) * (Batch, num_classes, height, width) loss_heatmap_cls = loss_heatmap_cls * heatmap_weights.float() loss_heatmap_cls = loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap From 25e581c902b3ad907da44a5bcbd41d9d249a65d3 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 28 Apr 2026 10:02:59 +0900 Subject: [PATCH 061/183] Update configs --- tools/detection3d/create_data_t4dataset.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/detection3d/create_data_t4dataset.py b/tools/detection3d/create_data_t4dataset.py index 62169bc7a..9550b2872 100644 --- a/tools/detection3d/create_data_t4dataset.py +++ b/tools/detection3d/create_data_t4dataset.py @@ -273,7 +273,8 @@ def main(): if cfg.filter_attributes is None: print_log("No attribute filtering is applied!") - + + remove_non_traffic_cone_barrier = cfg.get("remove_non_traffic_cone_barrier", False) # Get every pair of min-max distance filtering thresholds bev_distance_ranges = [] if hasattr(cfg, "evaluator_metric_configs"): @@ -310,6 +311,9 @@ def main(): dataset_scene_info = scene_id.split("/") if len(dataset_scene_info) == 5: t4_dataset_id, t4_dataset_version_id, city, vehicle_type, traffic_cone_barrier_status = dataset_scene_info + if remove_non_traffic_cone_barrier and traffic_cone_barrier_status == "false": + print_log(f"Skipping scene: {scene_id} because it does not have traffic cone or barrier", logger="current") + continue elif len(dataset_scene_info) == 2: t4_dataset_id, t4_dataset_version_id = dataset_scene_info city = vehicle_type = None From 9ba440a640eb8f9742cc47425e3e4d6977f58705 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 28 Apr 2026 10:05:29 +0900 Subject: [PATCH 062/183] Update configs --- .../t4dataset/j6gen2_base_traffic_cone.py | 211 ++++++++++++++++++ .../t4dataset/jpntaxi_base_traffic_cone.py | 202 +++++++++++++++++ 2 files changed, 413 insertions(+) create mode 100644 autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py create mode 100644 autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py new file mode 100644 index 000000000..8c57cf4fa --- /dev/null +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py @@ -0,0 +1,211 @@ +custom_imports = dict( + imports=[ + "autoware_ml.detection3d.datasets.t4dataset", + "autoware_ml.detection3d.evaluation.t4metric.t4metric", + "autoware_ml.detection3d.evaluation.t4metric.t4metric_v2", + ] +) + +# dataset type setting +dataset_type = "T4Dataset" +info_train_file_name = "t4dataset_j6gen2_base_traffic_cone_infos_train.pkl" +info_val_file_name = "t4dataset_j6gen2_base_traffic_cone_infos_val.pkl" +info_test_file_name = "t4dataset_j6gen2_base_traffic_cone_infos_test.pkl" + +info_train_statistics_file_name = "t4dataset_j6gen2_base_traffic_cone_statistics_train.parquet" +info_val_statistics_file_name = "t4dataset_j6gen2_base_traffic_cone_statistics_val.parquet" +info_test_statistics_file_name = "t4dataset_j6gen2_base_traffic_cone_statistics_test.parquet" + +# dataset scene setting +dataset_version_list = [ + "db_j6gen2_v1", + "db_j6gen2_v2", + "db_j6gen2_v3", + "db_j6gen2_v4", + "db_j6gen2_v5", + "db_j6gen2_v6", + "db_j6gen2_v7", + "db_j6gen2_v8", + "db_j6gen2_v9", + "db_largebus_v1", + "db_largebus_v2", + "db_largebus_v3", +] + +dataset_test_groups = { + "largebus": ("t4dataset_largebus_traffic_cone_infos_test.pkl", False), + "j6gen2": ("t4dataset_j6gen2_traffic_cone_infos_test.pkl", False), + "j6gen2_base": ("t4dataset_j6gen2_base_traffic_cone_infos_test.pkl", True), +} + +# dataset format setting +data_prefix = dict( + pts="", + CAM_FRONT="", + CAM_FRONT_LEFT="", + CAM_FRONT_RIGHT="", + CAM_BACK="", + CAM_BACK_RIGHT="", + CAM_BACK_LEFT="", + sweeps="", +) + +camera_types = { + "CAM_FRONT", + "CAM_FRONT_RIGHT", + "CAM_FRONT_LEFT", + "CAM_BACK", + "CAM_BACK_LEFT", + "CAM_BACK_RIGHT", +} + +# class setting +name_mapping = { + # DBv1.0 + "vehicle.car": "car", + "vehicle.construction": "truck", + "vehicle.emergency (ambulance & police)": "car", + "vehicle.motorcycle": "bicycle", + "vehicle.trailer": "trailer", + "vehicle.truck": "truck", + "vehicle.bicycle": "bicycle", + "vehicle.bus (bendy & rigid)": "bus", + "pedestrian.adult": "pedestrian", + "pedestrian.child": "pedestrian", + "pedestrian.construction_worker": "pedestrian", + "pedestrian.personal_mobility": "pedestrian", + "pedestrian.police_officer": "pedestrian", + "pedestrian.stroller": "pedestrian", + "pedestrian.wheelchair": "pedestrian", + "movable_object.barrier": "barrier", + "movable_object.debris": "barrier", + "movable_object.pushable_pullable": "barrier", + "movable_object.trafficcone": "traffic_cone", + "movable_object.traffic_cone": "traffic_cone", + "animal": "animal", + "static_object.bicycle_rack": "bicycle_rack", + # DBv1.1 and UCv2.0 + "car": "car", + "truck": "truck", + "bus": "bus", + "trailer": "trailer", + "motorcycle": "bicycle", + "bicycle": "bicycle", + "police_car": "car", + "pedestrian": "pedestrian", + "police_officer": "pedestrian", + "forklift": "car", + "construction_worker": "pedestrian", + "stroller": "pedestrian", + # DBv2.0 and DBv3.0 + "animal": "animal", + "movable_object.barrier": "barrier", + "movable_object.pushable_pullable": "barrier", + "movable_object.traffic_cone": "traffic_cone", + "pedestrian.adult": "pedestrian", + "pedestrian.child": "pedestrian", + "pedestrian.construction_worker": "pedestrian", + "pedestrian.personal_mobility": "pedestrian", + "pedestrian.police_officer": "pedestrian", + "pedestrian.stroller": "pedestrian", + "pedestrian.wheelchair": "pedestrian", + "static_object.bicycle rack": "bicycle rack", + "static_object.bollard": "bollard", + "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car + "vehicle.bicycle": "bicycle", + "vehicle.bus": "bus", + "vehicle.car": "car", + "vehicle.construction": "truck", + "vehicle.fire": "truck", + "vehicle.motorcycle": "bicycle", + "vehicle.police": "car", + "vehicle.trailer": "trailer", + "vehicle.truck": "truck", + # DBv1.3 + "ambulance": "car", + "kart": "car", + "wheelchair": "pedestrian", + "personal_mobility": "pedestrian", + "fire_truck": "truck", + "semi_trailer": "trailer", + "tractor_unit": "truck", + "construction_vehicle": "truck", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", +} + + +class_names = [ + "car", + "truck", + "bus", + "bicycle", + "pedestrian", + "traffic_cone", + "barrier", +] +num_class = len(class_names) +metainfo = dict(classes=class_names) + +merge_objects = [ + ("truck", ["truck", "trailer"]), +] +merge_type = "extend_longer" # One of ["extend_longer","union", None] + +# visualization +class_colors = { + "car": (30, 144, 255), + "truck": (140, 0, 255), + "construction_vehicle": (255, 255, 0), + "bus": (111, 255, 111), + "trailer": (0, 255, 255), + "barrier": (0, 0, 0), + "motorcycle": (100, 0, 30), + "bicycle": (255, 0, 30), + "pedestrian": (255, 200, 200), + "traffic_cone": (120, 120, 120), +} +camera_panels = [ + "data/CAM_FRONT_LEFT", + "data/CAM_FRONT", + "data/CAM_FRONT_RIGHT", + "data/CAM_BACK_LEFT", + "data/CAM_BACK", + "data/CAM_BACK_RIGHT", +] + +filter_attributes = [ + ("vehicle.bicycle", "vehicle_state.parked"), + ("vehicle.bicycle", "cycle_state.without_rider"), + ("vehicle.bicycle", "motorcycle_state.without_rider"), + ("vehicle.motorcycle", "vehicle_state.parked"), + ("vehicle.motorcycle", "cycle_state.without_rider"), + ("vehicle.motorcycle", "motorcycle_state.without_rider"), + ("bicycle", "vehicle_state.parked"), + ("bicycle", "cycle_state.without_rider"), + ("bicycle", "motorcycle_state.without_rider"), + ("motorcycle", "vehicle_state.parked"), + ("motorcycle", "cycle_state.without_rider"), + ("motorcycle", "motorcycle_state.without_rider"), +] + +evaluator_metric_configs = dict( + evaluation_task="detection", + target_labels=class_names, + center_distance_bev_thresholds=[0.5, 1.0, 2.0, 4.0], + # plane_distance_thresholds is required for the pass fail evaluation + plane_distance_thresholds=[2.0, 4.0], + iou_2d_thresholds=None, + iou_3d_thresholds=None, + label_prefix="autoware", + # bev minimum distance ranges for each range bucket, must be the same length as max_distance, + # they will form bev distance ranges in [(min_distance[0], max_distance[0]), (min_distance[1], max_distance[1]), ...] when filtering + min_distance=[0.0, 50.0, 90.0, 0.0], + # bev maximum distance ranges for each range bucket, must be the same length as min_distance + max_distance=[50.0, 90.0, 121.0, 121.0], + min_point_numbers=0, + matching_class_agnostic_fps=False, +) + +remove_non_traffic_cone_barrier = True \ No newline at end of file diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py new file mode 100644 index 000000000..3643b4475 --- /dev/null +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py @@ -0,0 +1,202 @@ +custom_imports = dict( + imports=[ + "autoware_ml.detection3d.datasets.t4dataset", + "autoware_ml.detection3d.evaluation.t4metric.t4metric", + "autoware_ml.detection3d.evaluation.t4metric.t4metric_v2", + ] +) + +# dataset type setting +dataset_type = "T4Dataset" +info_train_file_name = "t4dataset_jpntaxi_base_traffic_cone_infos_train.pkl" +info_val_file_name = "t4dataset_jpntaxi_base_traffic_cone_infos_val.pkl" +info_test_file_name = "t4dataset_jpntaxi_base_traffic_cone_infos_test.pkl" + +info_train_statistics_file_name = "t4dataset_jpntaxi_base_traffic_cone_statistics_train.parquet" +info_val_statistics_file_name = "t4dataset_jpntaxi_base_traffic_cone_statistics_val.parquet" +info_test_statistics_file_name = "t4dataset_jpntaxi_base_traffic_cone_statistics_test.parquet" + +# dataset scene setting +dataset_test_groups = { + "jpntaxi_gen2": ("t4dataset_jpntaxi_gen2_traffic_cone_infos_test.pkl", True), +} + +dataset_version_list = [ + "db_jpntaxigen2_v1", + "db_jpntaxigen2_v2", + "db_jpntaxi_v1", + "db_jpntaxi_v2", + "db_jpntaxi_v4", +] + +# dataset format setting +data_prefix = dict( + pts="", + CAM_FRONT="", + CAM_FRONT_LEFT="", + CAM_FRONT_RIGHT="", + CAM_BACK="", + CAM_BACK_RIGHT="", + CAM_BACK_LEFT="", + sweeps="", +) +camera_types = { + "CAM_FRONT", + "CAM_FRONT_RIGHT", + "CAM_FRONT_LEFT", + "CAM_BACK", + "CAM_BACK_LEFT", + "CAM_BACK_RIGHT", +} + +# class setting +name_mapping = { + # DBv1.0 + "vehicle.car": "car", + "vehicle.construction": "truck", + "vehicle.emergency (ambulance & police)": "car", + "vehicle.motorcycle": "bicycle", + "vehicle.trailer": "trailer", + "vehicle.truck": "truck", + "vehicle.bicycle": "bicycle", + "vehicle.bus (bendy & rigid)": "bus", + "pedestrian.adult": "pedestrian", + "pedestrian.child": "pedestrian", + "pedestrian.construction_worker": "pedestrian", + "pedestrian.personal_mobility": "pedestrian", + "pedestrian.police_officer": "pedestrian", + "pedestrian.stroller": "pedestrian", + "pedestrian.wheelchair": "pedestrian", + "movable_object.barrier": "barrier", + "movable_object.debris": "barrier", + "movable_object.pushable_pullable": "barrier", + "movable_object.trafficcone": "traffic_cone", + "movable_object.traffic_cone": "traffic_cone", + "animal": "animal", + "static_object.bicycle_rack": "bicycle_rack", + # DBv1.1 and UCv2.0 + "car": "car", + "truck": "truck", + "bus": "bus", + "trailer": "trailer", + "motorcycle": "bicycle", + "bicycle": "bicycle", + "police_car": "car", + "pedestrian": "pedestrian", + "police_officer": "pedestrian", + "forklift": "car", + "construction_worker": "pedestrian", + "stroller": "pedestrian", + # DBv2.0 and DBv3.0 + "animal": "animal", + "movable_object.barrier": "barrier", + "movable_object.pushable_pullable": "barrier", + "movable_object.traffic_cone": "traffic_cone", + "pedestrian.adult": "pedestrian", + "pedestrian.child": "pedestrian", + "pedestrian.construction_worker": "pedestrian", + "pedestrian.personal_mobility": "pedestrian", + "pedestrian.police_officer": "pedestrian", + "pedestrian.stroller": "pedestrian", + "pedestrian.wheelchair": "pedestrian", + "static_object.bicycle rack": "bicycle rack", + "static_object.bollard": "bollard", + "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car + "vehicle.bicycle": "bicycle", + "vehicle.bus": "bus", + "vehicle.car": "car", + "vehicle.construction": "truck", + "vehicle.fire": "truck", + "vehicle.motorcycle": "bicycle", + "vehicle.police": "car", + "vehicle.trailer": "trailer", + "vehicle.truck": "truck", + # DBv1.3 + "ambulance": "car", + "kart": "car", + "wheelchair": "pedestrian", + "personal_mobility": "pedestrian", + "fire_truck": "truck", + "semi_trailer": "trailer", + "tractor_unit": "truck", + "construction_vehicle": "truck", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", +} + +class_names = [ + "car", + "truck", + "bus", + "bicycle", + "pedestrian", + "traffic_cone", + "barrier", +] + +num_class = len(class_names) +metainfo = dict(classes=class_names) + +merge_objects = [ + ("truck", ["truck", "trailer"]), +] +merge_type = "extend_longer" # One of ["extend_longer","union", None] + +# visualization +class_colors = { + "car": (30, 144, 255), + "truck": (140, 0, 255), + "construction_vehicle": (255, 255, 0), + "bus": (111, 255, 111), + "trailer": (0, 255, 255), + "barrier": (0, 0, 0), + "motorcycle": (100, 0, 30), + "bicycle": (255, 0, 30), + "pedestrian": (255, 200, 200), + "traffic_cone": (120, 120, 120), +} +camera_panels = [ + "data/CAM_FRONT_LEFT", + "data/CAM_FRONT", + "data/CAM_FRONT_RIGHT", + "data/CAM_BACK_LEFT", + "data/CAM_BACK", + "data/CAM_BACK_RIGHT", +] + +# Add filter attributes +filter_attributes = [ + ("vehicle.bicycle", "vehicle_state.parked"), + ("vehicle.bicycle", "cycle_state.without_rider"), + ("vehicle.bicycle", "motorcycle_state.without_rider"), + ("vehicle.motorcycle", "vehicle_state.parked"), + ("vehicle.motorcycle", "cycle_state.without_rider"), + ("vehicle.motorcycle", "motorcycle_state.without_rider"), + ("bicycle", "vehicle_state.parked"), + ("bicycle", "cycle_state.without_rider"), + ("bicycle", "motorcycle_state.without_rider"), + ("motorcycle", "vehicle_state.parked"), + ("motorcycle", "cycle_state.without_rider"), + ("motorcycle", "motorcycle_state.without_rider"), +] + +evaluator_metric_configs = dict( + evaluation_task="detection", + target_labels=class_names, + center_distance_bev_thresholds=[0.5, 1.0, 2.0, 4.0], + # plane_distance_thresholds is required for the pass fail evaluation + plane_distance_thresholds=[2.0, 4.0], + iou_2d_thresholds=None, + iou_3d_thresholds=None, + label_prefix="autoware", + # bev minimum distance ranges for each range bucket, must be the same length as max_distance, + # they will form bev distance ranges in [(min_distance[0], max_distance[0]), (min_distance[1], max_distance[1]), ...] when filtering + min_distance=[0.0, 50.0, 90.0, 0.0], + # bev maximum distance ranges for each range bucket, must be the same length as min_distance + max_distance=[50.0, 90.0, 121.0, 121.0], + min_point_numbers=0, + matching_class_agnostic_fps=False, +) + +remove_non_traffic_cone_barrier = True \ No newline at end of file From aac1e1d3fa8c342d628ccaf723fe7a2419cf0a8a Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 28 Apr 2026 10:14:25 +0900 Subject: [PATCH 063/183] Update configs --- projects/BEVFusion/bevfusion/bevfusion_head.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index adbd64835..b0a42249a 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -692,7 +692,7 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata): traffic_cone_barrier_status = metadata.get("traffic_cone_barrier_status", True) if self.partial_ignore_labels is not None and not traffic_cone_barrier_status: heatmap_weights[self.partial_ignore_labels] = 0.0 # Set to 0 to ignore these proposals - label_weights[:, self.partial_ignore_labels] = 0.0 # Set to 0 to ignore traffic_cone and barrier + label_weights[neg_inds, self.partial_ignore_labels] = 0.0 # Set to 0 to ignore traffic_cone and barrier return ( labels[None], From 09c06d79ee992ef0479876b374271b6276aa632c Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 28 Apr 2026 10:17:53 +0900 Subject: [PATCH 064/183] Update configs --- projects/BEVFusion/bevfusion/bevfusion_head.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index b0a42249a..226237ff3 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -762,7 +762,7 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li preds_dense_heatmap, heatmap.float(), ) - loss_heatmap_cls_before_reduction = (loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap).clone().detach() + loss_heatmap_cls_before_reduction = (loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap).detach() for cls_i, class_name in enumerate(self.class_names): loss_dict[f"heatmap_{class_name}_before_reduction"] = loss_heatmap_cls_before_reduction[cls_i] From 5279b178195f4a8aa7290eb7cec905aeb354576c Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 28 Apr 2026 12:18:52 +0900 Subject: [PATCH 065/183] Update configs --- projects/BEVFusion/bevfusion/bevfusion_head.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 226237ff3..3f857e22d 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -692,7 +692,12 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata): traffic_cone_barrier_status = metadata.get("traffic_cone_barrier_status", True) if self.partial_ignore_labels is not None and not traffic_cone_barrier_status: heatmap_weights[self.partial_ignore_labels] = 0.0 # Set to 0 to ignore these proposals - label_weights[neg_inds, self.partial_ignore_labels] = 0.0 # Set to 0 to ignore traffic_cone and barrier + if len(neg_inds) > 0: + # neg_inds [N] and column indices [K] must broadcast (not pair); see IndexError N vs K. + _cols = torch.as_tensor( + self.partial_ignore_labels, device=label_weights.device, dtype=torch.long + ) + label_weights[neg_inds.unsqueeze(1), _cols.unsqueeze(0)] = 0.0 return ( labels[None], From 0ce7a720a12904400e236281b8a4e3f4c17fd9a7 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 28 Apr 2026 12:19:27 +0900 Subject: [PATCH 066/183] Add the script --- ...0m_sincos_timeexp_34_channels_32_points.py | 166 ++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels_32_points.py diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels_32_points.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels_32_points.py new file mode 100644 index 000000000..f784b2386 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels_32_points.py @@ -0,0 +1,166 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", + "../default/pipelines/default_lidar_120m.py", + "../default/models/default_lidar_second_secfpn_120m.py", + "../default/schedulers/default_50e_8xb8_adamw_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_6_2/" + +experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type +experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_34_channels_timeexp_32_points" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + voxelize_cfg=dict( + max_num_points=32, + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + voxelize_reduce=False, + ), + pts_voxel_encoder=dict( + _delete_=True, + type="BEVFusionVoxelSinCosEncoder", + in_channels=4, + time_lag_channel_index=3, + time_exp_factor=1.0, + with_distance=False, + with_cluster_center=True, + with_voxel_center=True, + feat_channels=[16], + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01), + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], + ), + pts_middle_encoder=dict( + in_channels=34, + sparse_shape=_base_.grid_size, + # num_aug_features=4, + # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here + # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], + ), + bbox_head=dict( + class_names=_base_.class_names, # Use class names to identify the correct class indices + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) From 9b4c2f292c10bc3561110541089c3c7bb0d0dcb9 Mon Sep 17 00:00:00 2001 From: KokSeang Date: Tue, 28 Apr 2026 12:48:58 +0900 Subject: [PATCH 067/183] Added --- .../detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py index 3643b4475..c7e631458 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py @@ -18,7 +18,7 @@ # dataset scene setting dataset_test_groups = { - "jpntaxi_gen2": ("t4dataset_jpntaxi_gen2_traffic_cone_infos_test.pkl", True), + "jpntaxi_base_traffic_cone": ("t4dataset_jpntaxi_base_traffic_cone_infos_test.pkl", True), } dataset_version_list = [ From 5c0ada87126561c3483253ebd0a684ece02b332c Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 28 Apr 2026 12:49:32 +0900 Subject: [PATCH 068/183] Add the script --- .../BEVFusion/bevfusion/bevfusion_head.py | 71 ++++++++++++------- 1 file changed, 45 insertions(+), 26 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 3f857e22d..a1819b309 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -753,34 +753,53 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li # compute heatmap loss preds_dense_heatmap = clip_sigmoid(preds_dict["dense_heatmap"].float()) num_pos_dense_heatmap = max(heatmap.eq(1).float().sum().item(), 1) - if self.partial_ignore_labels is None: - loss_heatmap = self.loss_heatmap( - preds_dense_heatmap, - heatmap.float(), - avg_factor=num_pos_dense_heatmap, - ) - loss_dict["loss_heatmap"] = loss_heatmap - else: - # When ignore labels is found, we compute the loss for each class - # heatmap focal loss - loss_heatmap_cls: torch.Tensor = self.loss_heatmap( - preds_dense_heatmap, - heatmap.float(), - ) - loss_heatmap_cls_before_reduction = (loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap).detach() - for cls_i, class_name in enumerate(self.class_names): - loss_dict[f"heatmap_{class_name}_before_reduction"] = loss_heatmap_cls_before_reduction[cls_i] - - # (Batch, num_classes, height, width) * (Batch, num_classes, height, width) - loss_heatmap_cls = loss_heatmap_cls * heatmap_weights.float() - loss_heatmap_cls = loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap - # (Batch, num_classes) - for cls_i, class_name in enumerate(self.class_names): - loss_dict[f"loss_heatmap_{class_name}"] = loss_heatmap_cls[cls_i] + # if self.partial_ignore_labels is None: + # loss_heatmap = self.loss_heatmap( + # preds_dense_heatmap, + # heatmap.float(), + # avg_factor=num_pos_dense_heatmap, + # ) + # loss_dict["loss_heatmap"] = loss_heatmap + # else: + # # When ignore labels is found, we compute the loss for each class + # # heatmap focal loss + # loss_heatmap_cls: torch.Tensor = self.loss_heatmap( + # preds_dense_heatmap, + # heatmap.float(), + # ) + # # loss_heatmap_cls_before_reduction = (loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap).detach() + # # for cls_i, class_name in enumerate(self.class_names): + # # loss_dict[f"heatmap_{class_name}_before_reduction"] = loss_heatmap_cls_before_reduction[cls_i] + + # # (Batch, num_classes, height, width) * (Batch, num_classes, height, width) + # loss_heatmap_cls = loss_heatmap_cls * heatmap_weights.float() + # loss_heatmap_cls = loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap + # # (Batch, num_classes) + # for cls_i, class_name in enumerate(self.class_names): + # loss_dict[f"loss_heatmap_{class_name}"] = loss_heatmap_cls[cls_i] - # Prevent loss item to avoid computing gradients twice. This is for logging. - loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum() + # # Prevent loss item to avoid computing gradients twice. This is for logging. + # loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum() + + # # When ignore labels is found, we compute the loss for each class + # # heatmap focal loss + loss_heatmap_cls: torch.Tensor = self.loss_heatmap( + preds_dense_heatmap, + heatmap.float(), + ) + # loss_heatmap_cls_before_reduction = (loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap).detach() + # for cls_i, class_name in enumerate(self.class_names): + # loss_dict[f"heatmap_{class_name}_before_reduction"] = loss_heatmap_cls_before_reduction[cls_i] + + # (Batch, num_classes, height, width) * (Batch, num_classes, height, width) + loss_heatmap_cls = loss_heatmap_cls * heatmap_weights.float() + loss_heatmap_cls = loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap + # (Batch, num_classes) + for cls_i, class_name in enumerate(self.class_names): + loss_dict[f"loss_heatmap_{class_name}"] = loss_heatmap_cls[cls_i] + # Prevent loss item to avoid computing gradients twice. This is for logging. + loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum() # compute loss for each layer for idx_layer in range(self.num_decoder_layers if self.auxiliary else 1): if idx_layer == self.num_decoder_layers - 1 or (idx_layer == 0 and self.auxiliary is False): From ad6b07a14afd1bd08a3178342856660fe364185a Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 28 Apr 2026 12:49:46 +0900 Subject: [PATCH 069/183] Add the script --- ...pn_50e_8xb8_base_120m_traffic_cone_full.py | 163 ++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_full.py diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_full.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_full.py new file mode 100644 index 000000000..38b1e8ea5 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_full.py @@ -0,0 +1,163 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", + "../default/pipelines/default_lidar_120m.py", + "../default/models/default_lidar_second_secfpn_120m.py", + "../default/schedulers/default_50e_8xb8_adamw_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_8/" + +experiment_group_name = "bevfusion_lidar_traffic_cone_full/base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + voxelize_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + voxelize_reduce=True, + ), + pts_voxel_encoder=dict(num_features=_base_.point_use_dim), + pts_middle_encoder=dict( + in_channels=_base_.point_use_dim, + sparse_shape=_base_.grid_size, + num_aug_features=4, + # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here + aug_features_min_values=[ + _base_.point_cloud_range[0], + _base_.point_cloud_range[1], + _base_.point_cloud_range[2], + 0.0, + ], + aug_features_max_values=[ + _base_.point_cloud_range[3], + _base_.point_cloud_range[4], + _base_.point_cloud_range[5], + 0.2, + ], + ), + bbox_head=dict( + class_names=_base_.class_names, # Use class names to identify the correct class indices + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + partial_ignore_labels=None, + loss_heatmap=dict( + reduction="none", + ), + ), +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) From a592868d6add0b0e589a1e7dd2ee47d55848be13 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 28 Apr 2026 16:28:38 +0900 Subject: [PATCH 070/183] Add the script --- .../BEVFusion/bevfusion/bevfusion_head.py | 70 +++----- ...8xb8_j6gen2_base_120m_traffic_cone_full.py | 164 ++++++++++++++++++ 2 files changed, 188 insertions(+), 46 deletions(-) create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index a1819b309..0b18803c4 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -753,53 +753,31 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li # compute heatmap loss preds_dense_heatmap = clip_sigmoid(preds_dict["dense_heatmap"].float()) num_pos_dense_heatmap = max(heatmap.eq(1).float().sum().item(), 1) - # if self.partial_ignore_labels is None: - # loss_heatmap = self.loss_heatmap( - # preds_dense_heatmap, - # heatmap.float(), - # avg_factor=num_pos_dense_heatmap, - # ) - # loss_dict["loss_heatmap"] = loss_heatmap - # else: - # # When ignore labels is found, we compute the loss for each class - # # heatmap focal loss - # loss_heatmap_cls: torch.Tensor = self.loss_heatmap( - # preds_dense_heatmap, - # heatmap.float(), - # ) - # # loss_heatmap_cls_before_reduction = (loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap).detach() - # # for cls_i, class_name in enumerate(self.class_names): - # # loss_dict[f"heatmap_{class_name}_before_reduction"] = loss_heatmap_cls_before_reduction[cls_i] - - # # (Batch, num_classes, height, width) * (Batch, num_classes, height, width) - # loss_heatmap_cls = loss_heatmap_cls * heatmap_weights.float() - # loss_heatmap_cls = loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap - # # (Batch, num_classes) - # for cls_i, class_name in enumerate(self.class_names): - # loss_dict[f"loss_heatmap_{class_name}"] = loss_heatmap_cls[cls_i] + if self.partial_ignore_labels is None: + loss_heatmap = self.loss_heatmap( + preds_dense_heatmap, + heatmap.float(), + avg_factor=num_pos_dense_heatmap, + ) + loss_dict["loss_heatmap"] = loss_heatmap + else: + # When ignore labels is found, we compute the loss for each class + # heatmap focal loss + loss_heatmap_cls: torch.Tensor = self.loss_heatmap( + preds_dense_heatmap, + heatmap.float(), + ) + + # (Batch, num_classes, height, width) * (Batch, num_classes, height, width) + loss_heatmap_cls = loss_heatmap_cls * heatmap_weights.float() + loss_heatmap_cls = loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap + # (Batch, num_classes) + for cls_i, class_name in enumerate(self.class_names): + loss_dict[f"loss_heatmap_{class_name}"] = loss_heatmap_cls[cls_i] - # # Prevent loss item to avoid computing gradients twice. This is for logging. - # loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum() - - # # When ignore labels is found, we compute the loss for each class - # # heatmap focal loss - loss_heatmap_cls: torch.Tensor = self.loss_heatmap( - preds_dense_heatmap, - heatmap.float(), - ) - # loss_heatmap_cls_before_reduction = (loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap).detach() - # for cls_i, class_name in enumerate(self.class_names): - # loss_dict[f"heatmap_{class_name}_before_reduction"] = loss_heatmap_cls_before_reduction[cls_i] - - # (Batch, num_classes, height, width) * (Batch, num_classes, height, width) - loss_heatmap_cls = loss_heatmap_cls * heatmap_weights.float() - loss_heatmap_cls = loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap - # (Batch, num_classes) - for cls_i, class_name in enumerate(self.class_names): - loss_dict[f"loss_heatmap_{class_name}"] = loss_heatmap_cls[cls_i] - - # Prevent loss item to avoid computing gradients twice. This is for logging. - loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum() + # Prevent loss item to avoid computing gradients twice. This is for logging. + loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum() + # compute loss for each layer for idx_layer in range(self.num_decoder_layers if self.auxiliary else 1): if idx_layer == self.num_decoder_layers - 1 or (idx_layer == 0 and self.auxiliary is False): diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py new file mode 100644 index 000000000..1ca622714 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py @@ -0,0 +1,164 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", + "../default/pipelines/default_lidar_intensity_120m.py", + "../default/models/default_lidar_second_secfpn_120m.py", + "../default/schedulers/default_30e_8xb8_adamw_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/user_name/" + +experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + voxelize_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + voxelize_reduce=True, + ), + pts_voxel_encoder=dict(num_features=_base_.point_use_dim), + pts_middle_encoder=dict( + in_channels=_base_.point_use_dim, + sparse_shape=_base_.grid_size, + num_aug_features=5, + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + aug_features_min_values=[ + _base_.point_cloud_range[0], + _base_.point_cloud_range[1], + _base_.point_cloud_range[2], + 0.0, + 0.0, + ], + aug_features_max_values=[ + _base_.point_cloud_range[3], + _base_.point_cloud_range[4], + _base_.point_cloud_range[5], + 255.0, + 0.2, + ], + ), + bbox_head=dict( + class_names=_base_.class_names, # Use class names to identify the correct class indices + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), + partial_ignore_labels=None, +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) + +load_from = None From 2929ff67b324627b3adbbe50da221e235f004ec5 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 28 Apr 2026 20:35:00 +0900 Subject: [PATCH 071/183] Update configs --- ...cond_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py | 4 ++-- .../default/pipelines/default_lidar_intensity_120m.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py index 1ca622714..57afc7e75 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py @@ -13,7 +13,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" +info_directory_path = "info/kokseang_2_8/" experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full" @@ -161,4 +161,4 @@ ) log_processor = dict(window_size=50) -load_from = None +load_from = "work_dirs/bevfusion_lidar_2.7.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_48.pth" diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index a9032fcdc..19051a04f 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -13,6 +13,8 @@ "bus": 120, "bicycle": 120, "pedestrian": 120, + "traffic_cone": 120, + "barrier": 120, } # LiDAR parameters From 57d6ae6771fac9787e9ac4deeb3f6edca50e5bc3 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 28 Apr 2026 21:18:15 +0900 Subject: [PATCH 072/183] Update configs --- .../datasets/transforms/loading.py | 41 +++++ ...b8_j6gen2_base_120m_traffic_cone_ignore.py | 167 ++++++++++++++++++ .../default_30e_8xb8_adamw_cosine.py | 2 +- 3 files changed, 209 insertions(+), 1 deletion(-) create mode 100644 autoware_ml/detection3d/datasets/transforms/loading.py create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py diff --git a/autoware_ml/detection3d/datasets/transforms/loading.py b/autoware_ml/detection3d/datasets/transforms/loading.py new file mode 100644 index 000000000..3e23218e4 --- /dev/null +++ b/autoware_ml/detection3d/datasets/transforms/loading.py @@ -0,0 +1,41 @@ +from mmcv.transforms import BaseTransform +from mmdet3d.structures.ops import box_np_ops +from mmengine.registry import TRANSFORMS + + +@TRANSFORMS.register_module() +class LoadPointsFromCurrentFileSweep(BaseTransform): + """Load points from the current file and sweep. + This is used to load the points from the current file and sweep for copy-paste augmentation. + + Args: + coord_type (str): The type of coordinates of points cloud. + load_dim (int): The dimension of the loaded points. + use_dim (list[int] | int): Which dimensions of the points to use. + backend_args (dict, optional): Arguments to instantiate the + corresponding backend. Defaults to None. + """ + + def __init__(self, + coord_type: str, + load_dim: int = 6, + use_dim: Union[int, List[int]] = [0, 1, 2], + shift_height: bool = False, + use_color: bool = False, + norm_intensity: bool = False, + norm_elongation: bool = False, + backend_args: Optional[dict] = None) -> None: + self.shift_height = shift_height + self.use_color = use_color + if isinstance(use_dim, int): + use_dim = list(range(use_dim)) + assert max(use_dim) < load_dim, \ + f'Expect all used dimensions < {load_dim}, got {use_dim}' + assert coord_type in ['CAMERA', 'LIDAR', 'DEPTH'] + + self.coord_type = coord_type + self.load_dim = load_dim + self.use_dim = use_dim + self.norm_intensity = norm_intensity + self.norm_elongation = norm_elongation + self.backend_args = backend_args \ No newline at end of file diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py new file mode 100644 index 000000000..80bd595dd --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py @@ -0,0 +1,167 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", + "../default/pipelines/default_lidar_intensity_120m.py", + "../default/models/default_lidar_second_secfpn_120m.py", + "../default/schedulers/default_30e_8xb8_adamw_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_8/" + +experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + voxelize_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + voxelize_reduce=True, + ), + pts_voxel_encoder=dict(num_features=_base_.point_use_dim), + pts_middle_encoder=dict( + in_channels=_base_.point_use_dim, + sparse_shape=_base_.grid_size, + num_aug_features=5, + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + aug_features_min_values=[ + _base_.point_cloud_range[0], + _base_.point_cloud_range[1], + _base_.point_cloud_range[2], + 0.0, + 0.0, + ], + aug_features_max_values=[ + _base_.point_cloud_range[3], + _base_.point_cloud_range[4], + _base_.point_cloud_range[5], + 255.0, + 0.2, + ], + ), + bbox_head=dict( + class_names=_base_.class_names, # Use class names to identify the correct class indices + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), + partial_ignore_labels=["traffic_cone", "barrier"], + loss_heatmap=dict( + reduction="none", + ), +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) + +load_from = "work_dirs/bevfusion_lidar_2.7.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_48.pth" diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py index a2cd2d2e9..388705848 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py @@ -3,7 +3,7 @@ lr = 1.4141e-4 t_max = 8 max_epochs = 30 -val_interval = 5 +val_interval = 1 train_gpu_size = 8 test_batch_size = 2 From 2e03655ea03b7b9147c49e30d591ae33df1ee08a Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Wed, 29 Apr 2026 01:31:43 +0900 Subject: [PATCH 073/183] Add the script --- .../datasets/transforms/__init__.py | 3 +- .../datasets/transforms/loading.py | 53 +-- ..._base_120m_traffic_cone_full_copy_paste.py | 312 ++++++++++++++++++ ...b8_j6gen2_base_120m_traffic_cone_ignore.py | 10 +- .../default/pipelines/default_lidar_120m.py | 2 + .../pipelines/default_lidar_intensity_120m.py | 2 + ...default_lidar_intensity_120m_copy_paste.py | 180 ++++++++++ 7 files changed, 532 insertions(+), 30 deletions(-) create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py create mode 100644 projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m_copy_paste.py diff --git a/autoware_ml/detection3d/datasets/transforms/__init__.py b/autoware_ml/detection3d/datasets/transforms/__init__.py index 6bc932f1a..dc95d27f8 100644 --- a/autoware_ml/detection3d/datasets/transforms/__init__.py +++ b/autoware_ml/detection3d/datasets/transforms/__init__.py @@ -1,3 +1,4 @@ from .object_min_points_filter import ObjectMinPointsFilter +from .loading import LoadPointsFromCurrentFileSweep -__all__ = ["ObjectMinPointsFilter"] +__all__ = ["ObjectMinPointsFilter", "LoadPointsFromCurrentFileSweep"] diff --git a/autoware_ml/detection3d/datasets/transforms/loading.py b/autoware_ml/detection3d/datasets/transforms/loading.py index 3e23218e4..d96a87b5a 100644 --- a/autoware_ml/detection3d/datasets/transforms/loading.py +++ b/autoware_ml/detection3d/datasets/transforms/loading.py @@ -1,22 +1,24 @@ from mmcv.transforms import BaseTransform from mmdet3d.structures.ops import box_np_ops +from mmdet3d.datasets.transforms import LoadPointsFromFile, LoadPointsFromMultiSweeps from mmengine.registry import TRANSFORMS + @TRANSFORMS.register_module() class LoadPointsFromCurrentFileSweep(BaseTransform): - """Load points from the current file and sweep. - This is used to load the points from the current file and sweep for copy-paste augmentation. + """Load points from the current file and sweep. + This is used to load the points from the current file and sweep for copy-paste augmentation. - Args: - coord_type (str): The type of coordinates of points cloud. - load_dim (int): The dimension of the loaded points. - use_dim (list[int] | int): Which dimensions of the points to use. - backend_args (dict, optional): Arguments to instantiate the - corresponding backend. Defaults to None. - """ + Args: + coord_type (str): The type of coordinates of points cloud. + load_dim (int): The dimension of the loaded points. + use_dim (list[int] | int): Which dimensions of the points to use. + backend_args (dict, optional): Arguments to instantiate the + corresponding backend. Defaults to None. + """ - def __init__(self, + def __init__(self, coord_type: str, load_dim: int = 6, use_dim: Union[int, List[int]] = [0, 1, 2], @@ -24,18 +26,21 @@ def __init__(self, use_color: bool = False, norm_intensity: bool = False, norm_elongation: bool = False, - backend_args: Optional[dict] = None) -> None: - self.shift_height = shift_height - self.use_color = use_color - if isinstance(use_dim, int): - use_dim = list(range(use_dim)) - assert max(use_dim) < load_dim, \ - f'Expect all used dimensions < {load_dim}, got {use_dim}' - assert coord_type in ['CAMERA', 'LIDAR', 'DEPTH'] + backend_args: Optional[dict] = None, + sweeps_num: int = 10, + pad_empty_sweeps: bool = False, + remove_close: bool = False, + test_mode: bool = False + ) -> None: + + self.points_loader = LoadPointsFromFile(coord_type=coord_type, load_dim=load_dim, use_dim=use_dim, backend_args=backend_args) + if sweeps_num > 0: + self.points_from_multi_sweeps_loader = LoadPointsFromMultiSweeps(sweeps_num=sweeps_num, pad_empty_sweeps=pad_empty_sweeps, remove_close=remove_close, test_mode=test_mode) + else: + self.points_from_multi_sweeps_loader = None - self.coord_type = coord_type - self.load_dim = load_dim - self.use_dim = use_dim - self.norm_intensity = norm_intensity - self.norm_elongation = norm_elongation - self.backend_args = backend_args \ No newline at end of file + def transform(self, results: dict) -> dict: + points = self.points_loader(results) + if self.points_from_multi_sweeps_loader is not None: + points = self.points_from_multi_sweeps_loader(points) + return points diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py new file mode 100644 index 000000000..6c7fb78a8 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py @@ -0,0 +1,312 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", + "../default/pipelines/default_lidar_intensity_120m.py", + "../default/models/default_lidar_second_secfpn_120m.py", + "../default/schedulers/default_30e_8xb8_adamw_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_8/" + +experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + voxelize_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + voxelize_reduce=True, + ), + pts_voxel_encoder=dict(num_features=_base_.point_use_dim), + pts_middle_encoder=dict( + in_channels=_base_.point_use_dim, + sparse_shape=_base_.grid_size, + num_aug_features=5, + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + aug_features_min_values=[ + _base_.point_cloud_range[0], + _base_.point_cloud_range[1], + _base_.point_cloud_range[2], + 0.0, + 0.0, + ], + aug_features_max_values=[ + _base_.point_cloud_range[3], + _base_.point_cloud_range[4], + _base_.point_cloud_range[5], + 255.0, + 0.2, + ], + ), + bbox_head=dict( + class_names=_base_.class_names, # Use class names to identify the correct class indices + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ) + ), +) + +db_sampler = dict( + data_root=data_root, + info_path=info_directory_path + _base_.info_train_file_name, + rate=1.0, + prepare=dict( + filter_by_difficulty=[-1], + filter_by_min_points=dict( + car=5, + truck=5, + bus=5, + trailer=5, + traffic_cone=5, + barrier=5, + bicycle=5, + pedestrian=5)), + classes=_base_.class_names, + sample_groups=dict( + car=0, + truck=0, + bus=0, + barrier=2, + traffic_cone=4), + points_loader=dict( + type='LoadPointsFromCurrentFileSweep', + coord_type='LIDAR', + load_dim=_base_.point_load_dim, + use_dim=_base_.point_use_dim, + backend_args=_base_.backend_args, + sweeps_num=_base_.sweeps_num, + pad_empty_sweeps=True, + remove_close=True, + test_mode=False, + )) + +train_pipeline = [ + dict( + type="LoadPointsFromFile", + coord_type="LIDAR", + load_dim=_base_.point_load_dim, + use_dim=_base_.point_use_dim, + backend_args=_base_.backend_args, + ), + dict( + type="LoadPointsFromMultiSweeps", + sweeps_num=_base_.sweeps_num, + load_dim=_base_.point_load_dim, + use_dim=_base_.point_use_dim, + pad_empty_sweeps=True, + remove_close=True, + backend_args=_base_.backend_args, + test_mode=False, + ), + dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False), + dict(type="ObjectSample", db_sampler=db_sampler), + dict( + type="BEVFusionGlobalRotScaleTrans", + scale_ratio_range=[0.95, 1.05], + rot_range=[-0.78539816, 0.78539816], + translation_std=[0.5, 0.5, 0.2], + ), + dict(type="BEVFusionRandomFlip3D"), + dict(type="PointsRangeFilter", point_cloud_range=_base_.point_cloud_range), + dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range), + dict( + type="ObjectNameFilter", + classes=[ + "car", + "truck", + "bus", + "bicycle", + "pedestrian", + ], + ), + dict(type="PointShuffle"), + dict( + type="Pack3DDetInputs", + keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"], + meta_keys=[ + "cam2img", + "ori_cam2img", + "lidar2cam", + "lidar2img", + "cam2lidar", + "ori_lidar2img", + "img_aug_matrix", + "box_type_3d", + "sample_idx", + "lidar_path", + "img_path", + "transformation_3d_flow", + "pcd_rotation", + "pcd_scale_factor", + "pcd_trans", + "img_aug_matrix", + "lidar_aug_matrix", + "timestamp", + "vehicle_type", + "city", + "traffic_cone_barrier_status", + ], + ), +] + +test_pipeline = [ + dict( + type="LoadPointsFromFile", + coord_type="LIDAR", + load_dim=point_load_dim, + use_dim=point_load_dim, + backend_args=backend_args, + ), + dict( + type="LoadPointsFromMultiSweeps", + sweeps_num=sweeps_num, + load_dim=point_load_dim, + use_dim=lidar_sweep_dims, + pad_empty_sweeps=True, + remove_close=True, + backend_args=backend_args, + test_mode=True, + ), + dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range), + dict( + type="Pack3DDetInputs", + keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"], + meta_keys=[ + "cam2img", + "ori_cam2img", + "lidar2cam", + "lidar2img", + "cam2lidar", + "ori_lidar2img", + "img_aug_matrix", + "box_type_3d", + "sample_idx", + "lidar_path", + "img_path", + "num_pts_feats", + "num_views", + "timestamp", + "vehicle_type", + "city", + "traffic_cone_barrier_status", + ], + ), +] + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) + +load_from = "work_dirs/bevfusion_lidar_2.7.0/base/epoch_48.pth" + +custom_hooks = [] diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py index 80bd595dd..68c736749 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py @@ -16,7 +16,7 @@ info_directory_path = "info/kokseang_2_8/" experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full" +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter @@ -64,11 +64,11 @@ pc_range=_base_.point_cloud_range[0:2], voxel_size=_base_.voxel_size[0:2], ), - ), - partial_ignore_labels=["traffic_cone", "barrier"], - loss_heatmap=dict( + partial_ignore_labels=["traffic_cone", "barrier"], + loss_heatmap=dict( reduction="none", ), + ), ) # Dataset parameters @@ -164,4 +164,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2.7.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_48.pth" +load_from = "work_dirs/bevfusion_lidar_2.7.0/base/epoch_48.pth" diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index 455c2761a..09b9f7b26 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -59,6 +59,8 @@ "bus", "bicycle", "pedestrian", + "traffic_cone", + "barrier", ], ), dict(type="PointShuffle"), diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index 19051a04f..9c7e02977 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -59,6 +59,8 @@ "bus", "bicycle", "pedestrian", + "traffic_cone", + "barrier", ], ), dict(type="PointShuffle"), diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m_copy_paste.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m_copy_paste.py new file mode 100644 index 000000000..a7c7cddfe --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m_copy_paste.py @@ -0,0 +1,180 @@ +# Dataset parameters +backend_args = None +num_workers = 32 +input_modality = dict(use_lidar=True, use_camera=False) + +# range setting +point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0] +voxel_size = [0.17, 0.17, 0.2] +grid_size = [1440, 1440, 41] +eval_class_range = { + "car": 120, + "truck": 120, + "bus": 120, + "bicycle": 120, + "pedestrian": 120, + "traffic_cone": 120, + "barrier": 120, +} + +# LiDAR parameters +point_load_dim = 5 # x, y, z, intensity, ring_id +point_use_dim = 5 +lidar_sweep_dims = [0, 1, 2, 3, 4] # x, y, z, intensity, time_lag +sweeps_num = 1 + +db_sampler = dict( + data_root=data_root, + info_path=data_root + 'nuscenes_dbinfos_train.pkl', + rate=1.0, + prepare=dict( + filter_by_difficulty=[-1], + filter_by_min_points=dict( + car=5, + truck=5, + bus=5, + trailer=5, + construction_vehicle=5, + traffic_cone=5, + barrier=5, + motorcycle=5, + bicycle=5, + pedestrian=5)), + classes=class_names, + sample_groups=dict( + car=2, + truck=3, + construction_vehicle=7, + bus=4, + trailer=6, + barrier=2, + motorcycle=6, + bicycle=6, + pedestrian=2, + traffic_cone=2), + points_loader=dict( + type='LoadPointsFromFile', + coord_type='LIDAR', + load_dim=5, + use_dim=[0, 1, 2, 3, 4], + backend_args=backend_args)) + +train_pipeline = [ + dict( + type="LoadPointsFromFile", + coord_type="LIDAR", + load_dim=point_load_dim, + use_dim=point_load_dim, + backend_args=backend_args, + ), + dict( + type="LoadPointsFromMultiSweeps", + sweeps_num=sweeps_num, + load_dim=point_load_dim, + use_dim=lidar_sweep_dims, + pad_empty_sweeps=True, + remove_close=True, + backend_args=backend_args, + test_mode=False, + ), + dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False), + dict( + type="BEVFusionGlobalRotScaleTrans", + scale_ratio_range=[0.95, 1.05], + rot_range=[-0.78539816, 0.78539816], + translation_std=[0.5, 0.5, 0.2], + ), + dict(type="BEVFusionRandomFlip3D"), + dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range), + dict(type="ObjectRangeFilter", point_cloud_range=point_cloud_range), + dict( + type="ObjectNameFilter", + classes=[ + "car", + "truck", + "bus", + "bicycle", + "pedestrian", + ], + ), + dict(type="PointShuffle"), + dict( + type="Pack3DDetInputs", + keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"], + meta_keys=[ + "cam2img", + "ori_cam2img", + "lidar2cam", + "lidar2img", + "cam2lidar", + "ori_lidar2img", + "img_aug_matrix", + "box_type_3d", + "sample_idx", + "lidar_path", + "img_path", + "transformation_3d_flow", + "pcd_rotation", + "pcd_scale_factor", + "pcd_trans", + "img_aug_matrix", + "lidar_aug_matrix", + "timestamp", + "vehicle_type", + "city", + "traffic_cone_barrier_status", + ], + ), +] + +test_pipeline = [ + dict( + type="LoadPointsFromFile", + coord_type="LIDAR", + load_dim=point_load_dim, + use_dim=point_load_dim, + backend_args=backend_args, + ), + dict( + type="LoadPointsFromMultiSweeps", + sweeps_num=sweeps_num, + load_dim=point_load_dim, + use_dim=lidar_sweep_dims, + pad_empty_sweeps=True, + remove_close=True, + backend_args=backend_args, + test_mode=True, + ), + dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range), + dict( + type="Pack3DDetInputs", + keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"], + meta_keys=[ + "cam2img", + "ori_cam2img", + "lidar2cam", + "lidar2img", + "cam2lidar", + "ori_lidar2img", + "img_aug_matrix", + "box_type_3d", + "sample_idx", + "lidar_path", + "img_path", + "num_pts_feats", + "num_views", + "timestamp", + "vehicle_type", + "city", + "traffic_cone_barrier_status", + ], + ), +] + +# Filtering configuration +# Note: +# - In camera–LiDAR configs, `filter_cfg` can enable image-based frame filtering, +# e.g., dict(filter_frames_with_missing_image=True). +# - This is a LiDAR-only config (`input_modality['use_camera'] = False`), so +# image-based filtering does not apply and `filter_cfg` is intentionally None. +filter_cfg = None From e2a69c1851b6149f256e32f48803b968756f018d Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Wed, 29 Apr 2026 01:35:46 +0900 Subject: [PATCH 074/183] Add the script --- .../datasets/transforms/loading.py | 2 + ..._base_120m_traffic_cone_full_copy_paste.py | 2 + ...default_lidar_intensity_120m_copy_paste.py | 180 ------------------ 3 files changed, 4 insertions(+), 180 deletions(-) delete mode 100644 projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m_copy_paste.py diff --git a/autoware_ml/detection3d/datasets/transforms/loading.py b/autoware_ml/detection3d/datasets/transforms/loading.py index d96a87b5a..535653d9b 100644 --- a/autoware_ml/detection3d/datasets/transforms/loading.py +++ b/autoware_ml/detection3d/datasets/transforms/loading.py @@ -1,3 +1,5 @@ +from typing import List, Optional, Union + from mmcv.transforms import BaseTransform from mmdet3d.structures.ops import box_np_ops from mmdet3d.datasets.transforms import LoadPointsFromFile, LoadPointsFromMultiSweeps diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py index 6c7fb78a8..6e0d7445b 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py @@ -138,6 +138,8 @@ "bus", "bicycle", "pedestrian", + "traffic_cone", + "barrier", ], ), dict(type="PointShuffle"), diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m_copy_paste.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m_copy_paste.py deleted file mode 100644 index a7c7cddfe..000000000 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m_copy_paste.py +++ /dev/null @@ -1,180 +0,0 @@ -# Dataset parameters -backend_args = None -num_workers = 32 -input_modality = dict(use_lidar=True, use_camera=False) - -# range setting -point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0] -voxel_size = [0.17, 0.17, 0.2] -grid_size = [1440, 1440, 41] -eval_class_range = { - "car": 120, - "truck": 120, - "bus": 120, - "bicycle": 120, - "pedestrian": 120, - "traffic_cone": 120, - "barrier": 120, -} - -# LiDAR parameters -point_load_dim = 5 # x, y, z, intensity, ring_id -point_use_dim = 5 -lidar_sweep_dims = [0, 1, 2, 3, 4] # x, y, z, intensity, time_lag -sweeps_num = 1 - -db_sampler = dict( - data_root=data_root, - info_path=data_root + 'nuscenes_dbinfos_train.pkl', - rate=1.0, - prepare=dict( - filter_by_difficulty=[-1], - filter_by_min_points=dict( - car=5, - truck=5, - bus=5, - trailer=5, - construction_vehicle=5, - traffic_cone=5, - barrier=5, - motorcycle=5, - bicycle=5, - pedestrian=5)), - classes=class_names, - sample_groups=dict( - car=2, - truck=3, - construction_vehicle=7, - bus=4, - trailer=6, - barrier=2, - motorcycle=6, - bicycle=6, - pedestrian=2, - traffic_cone=2), - points_loader=dict( - type='LoadPointsFromFile', - coord_type='LIDAR', - load_dim=5, - use_dim=[0, 1, 2, 3, 4], - backend_args=backend_args)) - -train_pipeline = [ - dict( - type="LoadPointsFromFile", - coord_type="LIDAR", - load_dim=point_load_dim, - use_dim=point_load_dim, - backend_args=backend_args, - ), - dict( - type="LoadPointsFromMultiSweeps", - sweeps_num=sweeps_num, - load_dim=point_load_dim, - use_dim=lidar_sweep_dims, - pad_empty_sweeps=True, - remove_close=True, - backend_args=backend_args, - test_mode=False, - ), - dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False), - dict( - type="BEVFusionGlobalRotScaleTrans", - scale_ratio_range=[0.95, 1.05], - rot_range=[-0.78539816, 0.78539816], - translation_std=[0.5, 0.5, 0.2], - ), - dict(type="BEVFusionRandomFlip3D"), - dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range), - dict(type="ObjectRangeFilter", point_cloud_range=point_cloud_range), - dict( - type="ObjectNameFilter", - classes=[ - "car", - "truck", - "bus", - "bicycle", - "pedestrian", - ], - ), - dict(type="PointShuffle"), - dict( - type="Pack3DDetInputs", - keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"], - meta_keys=[ - "cam2img", - "ori_cam2img", - "lidar2cam", - "lidar2img", - "cam2lidar", - "ori_lidar2img", - "img_aug_matrix", - "box_type_3d", - "sample_idx", - "lidar_path", - "img_path", - "transformation_3d_flow", - "pcd_rotation", - "pcd_scale_factor", - "pcd_trans", - "img_aug_matrix", - "lidar_aug_matrix", - "timestamp", - "vehicle_type", - "city", - "traffic_cone_barrier_status", - ], - ), -] - -test_pipeline = [ - dict( - type="LoadPointsFromFile", - coord_type="LIDAR", - load_dim=point_load_dim, - use_dim=point_load_dim, - backend_args=backend_args, - ), - dict( - type="LoadPointsFromMultiSweeps", - sweeps_num=sweeps_num, - load_dim=point_load_dim, - use_dim=lidar_sweep_dims, - pad_empty_sweeps=True, - remove_close=True, - backend_args=backend_args, - test_mode=True, - ), - dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range), - dict( - type="Pack3DDetInputs", - keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"], - meta_keys=[ - "cam2img", - "ori_cam2img", - "lidar2cam", - "lidar2img", - "cam2lidar", - "ori_lidar2img", - "img_aug_matrix", - "box_type_3d", - "sample_idx", - "lidar_path", - "img_path", - "num_pts_feats", - "num_views", - "timestamp", - "vehicle_type", - "city", - "traffic_cone_barrier_status", - ], - ), -] - -# Filtering configuration -# Note: -# - In camera–LiDAR configs, `filter_cfg` can enable image-based frame filtering, -# e.g., dict(filter_frames_with_missing_image=True). -# - This is a LiDAR-only config (`input_modality['use_camera'] = False`), so -# image-based filtering does not apply and `filter_cfg` is intentionally None. -filter_cfg = None From ebc80340e07dac07833f2b2b5d7bd9df15fe3450 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Wed, 29 Apr 2026 01:37:23 +0900 Subject: [PATCH 075/183] Update configs --- ..._second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py | 1 - 1 file changed, 1 deletion(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py index 57afc7e75..b9fafe7a9 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py @@ -65,7 +65,6 @@ voxel_size=_base_.voxel_size[0:2], ), ), - partial_ignore_labels=None, ) # Dataset parameters From bb35205a445beb1ad37bfe075a9400d0a8fb960a Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Wed, 29 Apr 2026 10:33:08 +0900 Subject: [PATCH 076/183] Add the script --- .../default/schedulers/default_30e_8xb8_adamw_cosine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py index 388705848..a2cd2d2e9 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py @@ -3,7 +3,7 @@ lr = 1.4141e-4 t_max = 8 max_epochs = 30 -val_interval = 1 +val_interval = 5 train_gpu_size = 8 test_batch_size = 2 From f343fbeae0811127e1d54870e21f7eb5850af3b2 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Wed, 29 Apr 2026 10:42:46 +0900 Subject: [PATCH 077/183] Update configs --- autoware_ml/detection3d/datasets/t4dataset.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/autoware_ml/detection3d/datasets/t4dataset.py b/autoware_ml/detection3d/datasets/t4dataset.py index ce1c78f31..526150755 100644 --- a/autoware_ml/detection3d/datasets/t4dataset.py +++ b/autoware_ml/detection3d/datasets/t4dataset.py @@ -191,5 +191,8 @@ def parse_data_info(self, info: dict) -> dict: info["lidar2img"] = np.array(info["images"][self.default_cam_key]["lidar2img"]) else: info["lidar2img"] = info["cam2img"] @ info["lidar2cam"] - + + # Default difficulty to 0 if not present + if 'difficulty' not in info: + info['difficulty'] = 0 return info From 6f55027b662c778c264d5ebe967d8d2f34813676 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 30 Apr 2026 03:22:00 +0900 Subject: [PATCH 078/183] Update configs --- ...xel_second_secfpn_50e_8xb8_base_120m_traffic_cone_ignore.py} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone.py => bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_ignore.py} (98%) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_ignore.py similarity index 98% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_ignore.py index 39c6ddf54..90136a748 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_ignore.py @@ -16,7 +16,7 @@ info_directory_path = "info/kokseang_2_8/" experiment_group_name = "bevfusion_lidar_traffic_cone/base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m" +experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m_ignore" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter From d99abd97ed98a0bdfcc90c1b26383aded31eedcf Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 30 Apr 2026 12:25:17 +0900 Subject: [PATCH 079/183] Add the script --- .../BEVFusion/bevfusion/bevfusion_head.py | 9 +- ...8xb8_j6gen2_base_120m_traffic_cone_full.py | 1 + ..._base_120m_traffic_cone_full_copy_paste.py | 21 +- ...b8_j6gen2_base_120m_traffic_cone_ignore.py | 1 + ...ase_120m_traffic_cone_ignore_copy_paste.py | 317 ++++++++++++++++++ 5 files changed, 336 insertions(+), 13 deletions(-) create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 0b18803c4..4894ad2e7 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -70,7 +70,8 @@ def __init__( train_cfg=None, test_cfg=None, bbox_coder=None, - partial_ignore_labels=None + partial_ignore_labels=None, + partial_ignore_dense_heatmap=False ): super().__init__() self.class_names = class_names @@ -194,7 +195,8 @@ def __init__( else: self.partial_ignore_labels = None - print_log(f"BEVFusionHead Partial ignore labels: {self.partial_ignore_labels}, dense heatmap pooling classes: \ + self.partial_ignore_dense_heatmap = partial_ignore_dense_heatmap + print_log(f"BEVFusionHead Partial ignore labels: {self.partial_ignore_labels}, partial ignore dense heatmap: {self.partial_ignore_dense_heatmap}, dense heatmap pooling classes: \ {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}", logger="current") def create_2D_grid(self, x_size, y_size): @@ -691,7 +693,8 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata): # Ignore labels for traffic cone and barrier traffic_cone_barrier_status = metadata.get("traffic_cone_barrier_status", True) if self.partial_ignore_labels is not None and not traffic_cone_barrier_status: - heatmap_weights[self.partial_ignore_labels] = 0.0 # Set to 0 to ignore these proposals + if self.partial_ignore_dense_heatmap: + heatmap_weights[self.partial_ignore_labels] = 0.0 # Set to 0 to ignore these proposals if len(neg_inds) > 0: # neg_inds [N] and column indices [K] must broadcast (not pair); see IndexError N vs K. _cols = torch.as_tensor( diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py index b9fafe7a9..88e3cbc54 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py @@ -64,6 +64,7 @@ pc_range=_base_.point_cloud_range[0:2], voxel_size=_base_.voxel_size[0:2], ), + partial_ignore_dense_heatmap=False ), ) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py index 6e0d7445b..903df577c 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py @@ -63,7 +63,8 @@ bbox_coder=dict( pc_range=_base_.point_cloud_range[0:2], voxel_size=_base_.voxel_size[0:2], - ) + ), + partial_ignore_dense_heatmap=False ), ) @@ -113,7 +114,7 @@ type="LoadPointsFromMultiSweeps", sweeps_num=_base_.sweeps_num, load_dim=_base_.point_load_dim, - use_dim=_base_.point_use_dim, + use_dim=_base_.lidar_sweep_dims, pad_empty_sweeps=True, remove_close=True, backend_args=_base_.backend_args, @@ -176,21 +177,21 @@ dict( type="LoadPointsFromFile", coord_type="LIDAR", - load_dim=point_load_dim, - use_dim=point_load_dim, - backend_args=backend_args, + load_dim=_base_.point_load_dim, + use_dim=_base_.point_use_dim, + backend_args=_base_.backend_args, ), dict( type="LoadPointsFromMultiSweeps", - sweeps_num=sweeps_num, - load_dim=point_load_dim, - use_dim=lidar_sweep_dims, + sweeps_num=_base_.sweeps_num, + load_dim=_base_.point_load_dim, + use_dim=_base_.lidar_sweep_dims, pad_empty_sweeps=True, remove_close=True, - backend_args=backend_args, + backend_args=_base_.backend_args, test_mode=True, ), - dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range), + dict(type="PointsRangeFilter", point_cloud_range=_base_.point_cloud_range), dict( type="Pack3DDetInputs", keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"], diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py index 68c736749..bb10d484d 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py @@ -65,6 +65,7 @@ voxel_size=_base_.voxel_size[0:2], ), partial_ignore_labels=["traffic_cone", "barrier"], + partial_ignore_dense_heatmap=True, loss_heatmap=dict( reduction="none", ), diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py new file mode 100644 index 000000000..61b9d35f3 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py @@ -0,0 +1,317 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", + "../default/pipelines/default_lidar_intensity_120m.py", + "../default/models/default_lidar_second_secfpn_120m.py", + "../default/schedulers/default_30e_8xb8_adamw_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_8/" + +experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + voxelize_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + voxelize_reduce=True, + ), + pts_voxel_encoder=dict(num_features=_base_.point_use_dim), + pts_middle_encoder=dict( + in_channels=_base_.point_use_dim, + sparse_shape=_base_.grid_size, + num_aug_features=5, + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + aug_features_min_values=[ + _base_.point_cloud_range[0], + _base_.point_cloud_range[1], + _base_.point_cloud_range[2], + 0.0, + 0.0, + ], + aug_features_max_values=[ + _base_.point_cloud_range[3], + _base_.point_cloud_range[4], + _base_.point_cloud_range[5], + 255.0, + 0.2, + ], + ), + bbox_head=dict( + class_names=_base_.class_names, # Use class names to identify the correct class indices + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + partial_ignore_labels=["traffic_cone", "barrier"], + partial_ignore_dense_heatmap=False, + loss_heatmap=dict( + reduction="none", + ), + ), +) + +db_sampler = dict( + data_root=data_root, + info_path=info_directory_path + _base_.info_train_file_name, + rate=1.0, + prepare=dict( + filter_by_difficulty=[-1], + filter_by_min_points=dict( + car=5, + truck=5, + bus=5, + trailer=5, + traffic_cone=5, + barrier=5, + bicycle=5, + pedestrian=5)), + classes=_base_.class_names, + sample_groups=dict( + car=0, + truck=0, + bus=0, + barrier=2, + traffic_cone=4), + points_loader=dict( + type='LoadPointsFromCurrentFileSweep', + coord_type='LIDAR', + load_dim=_base_.point_load_dim, + use_dim=_base_.point_use_dim, + backend_args=_base_.backend_args, + sweeps_num=_base_.sweeps_num, + pad_empty_sweeps=True, + remove_close=True, + test_mode=False, + )) + +train_pipeline = [ + dict( + type="LoadPointsFromFile", + coord_type="LIDAR", + load_dim=_base_.point_load_dim, + use_dim=_base_.point_use_dim, + backend_args=_base_.backend_args, + ), + dict( + type="LoadPointsFromMultiSweeps", + sweeps_num=_base_.sweeps_num, + load_dim=_base_.point_load_dim, + use_dim=_base_.lidar_sweep_dims, + pad_empty_sweeps=True, + remove_close=True, + backend_args=_base_.backend_args, + test_mode=False, + ), + dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False), + dict(type="ObjectSample", db_sampler=db_sampler), + dict( + type="BEVFusionGlobalRotScaleTrans", + scale_ratio_range=[0.95, 1.05], + rot_range=[-0.78539816, 0.78539816], + translation_std=[0.5, 0.5, 0.2], + ), + dict(type="BEVFusionRandomFlip3D"), + dict(type="PointsRangeFilter", point_cloud_range=_base_.point_cloud_range), + dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range), + dict( + type="ObjectNameFilter", + classes=[ + "car", + "truck", + "bus", + "bicycle", + "pedestrian", + "traffic_cone", + "barrier", + ], + ), + dict(type="PointShuffle"), + dict( + type="Pack3DDetInputs", + keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"], + meta_keys=[ + "cam2img", + "ori_cam2img", + "lidar2cam", + "lidar2img", + "cam2lidar", + "ori_lidar2img", + "img_aug_matrix", + "box_type_3d", + "sample_idx", + "lidar_path", + "img_path", + "transformation_3d_flow", + "pcd_rotation", + "pcd_scale_factor", + "pcd_trans", + "img_aug_matrix", + "lidar_aug_matrix", + "timestamp", + "vehicle_type", + "city", + "traffic_cone_barrier_status", + ], + ), +] + +test_pipeline = [ + dict( + type="LoadPointsFromFile", + coord_type="LIDAR", + load_dim=_base_.point_load_dim, + use_dim=_base_.point_use_dim, + backend_args=_base_.backend_args, + ), + dict( + type="LoadPointsFromMultiSweeps", + sweeps_num=_base_.sweeps_num, + load_dim=_base_.point_load_dim, + use_dim=_base_.lidar_sweep_dims, + pad_empty_sweeps=True, + remove_close=True, + backend_args=_base_.backend_args, + test_mode=True, + ), + dict(type="PointsRangeFilter", point_cloud_range=_base_.point_cloud_range), + dict( + type="Pack3DDetInputs", + keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"], + meta_keys=[ + "cam2img", + "ori_cam2img", + "lidar2cam", + "lidar2img", + "cam2lidar", + "ori_lidar2img", + "img_aug_matrix", + "box_type_3d", + "sample_idx", + "lidar_path", + "img_path", + "num_pts_feats", + "num_views", + "timestamp", + "vehicle_type", + "city", + "traffic_cone_barrier_status", + ], + ), +] + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) + +load_from = "work_dirs/bevfusion_lidar_2.7.0/base/epoch_48.pth" From b1b247a148f1a060eec5f70f7d8f1d8911524ebe Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 30 Apr 2026 12:28:55 +0900 Subject: [PATCH 080/183] Add the script --- ...pn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py | 2 ++ ..._30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py index 903df577c..7fef2db47 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py @@ -88,6 +88,8 @@ car=0, truck=0, bus=0, + bicycle=0, + pedestrian=0, barrier=2, traffic_cone=4), points_loader=dict( diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py index 61b9d35f3..e5e9c9ff3 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py @@ -92,6 +92,8 @@ car=0, truck=0, bus=0, + bicycle=0, + pedestrian=0, barrier=2, traffic_cone=4), points_loader=dict( From 36e3811139144ea84899aadfa8b94f66e421dd9d Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 30 Apr 2026 20:38:01 +0900 Subject: [PATCH 081/183] Add the script --- .../BEVFusion/bevfusion/bevfusion_head.py | 3 + ...ase_120m_traffic_cone_ignore_copy_paste.py | 73 ++++--------------- 2 files changed, 19 insertions(+), 57 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 4894ad2e7..5b0e156d0 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -701,6 +701,9 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata): self.partial_ignore_labels, device=label_weights.device, dtype=torch.long ) label_weights[neg_inds.unsqueeze(1), _cols.unsqueeze(0)] = 0.0 + + print("heatmap with traffic cone: ", heatmap[5].sum()) + print("heatmap with barrier: ", heatmap[6].sum()) return ( labels[None], diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py index e5e9c9ff3..41629bb17 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py @@ -74,19 +74,20 @@ db_sampler = dict( data_root=data_root, - info_path=info_directory_path + _base_.info_train_file_name, + info_path=data_root + info_directory_path + _base_.info_train_file_name, rate=1.0, - prepare=dict( - filter_by_difficulty=[-1], - filter_by_min_points=dict( - car=5, - truck=5, - bus=5, - trailer=5, - traffic_cone=5, - barrier=5, - bicycle=5, - pedestrian=5)), + prepare=dict(), + # prepare=dict( + # filter_by_difficulty=[-1], + # filter_by_min_points=dict( + # car=5, + # truck=5, + # bus=5, + # trailer=5, + # traffic_cone=5, + # barrier=5, + # bicycle=5, + # pedestrian=5)), classes=_base_.class_names, sample_groups=dict( car=0, @@ -179,50 +180,6 @@ ), ] -test_pipeline = [ - dict( - type="LoadPointsFromFile", - coord_type="LIDAR", - load_dim=_base_.point_load_dim, - use_dim=_base_.point_use_dim, - backend_args=_base_.backend_args, - ), - dict( - type="LoadPointsFromMultiSweeps", - sweeps_num=_base_.sweeps_num, - load_dim=_base_.point_load_dim, - use_dim=_base_.lidar_sweep_dims, - pad_empty_sweeps=True, - remove_close=True, - backend_args=_base_.backend_args, - test_mode=True, - ), - dict(type="PointsRangeFilter", point_cloud_range=_base_.point_cloud_range), - dict( - type="Pack3DDetInputs", - keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"], - meta_keys=[ - "cam2img", - "ori_cam2img", - "lidar2cam", - "lidar2img", - "cam2lidar", - "ori_lidar2img", - "img_aug_matrix", - "box_type_3d", - "sample_idx", - "lidar_path", - "img_path", - "num_pts_feats", - "num_views", - "timestamp", - "vehicle_type", - "city", - "traffic_cone_barrier_status", - ], - ), -] - # Dataset parameters train_dataloader = dict( batch_size=_base_.train_batch_size, @@ -231,7 +188,7 @@ sampler=dict(type="DefaultSampler", shuffle=True), dataset=dict( type=_base_.dataset_type, - pipeline=_base_.train_pipeline, + pipeline=train_pipeline, modality=_base_.input_modality, backend_args=_base_.backend_args, data_root=data_root, @@ -317,3 +274,5 @@ log_processor = dict(window_size=50) load_from = "work_dirs/bevfusion_lidar_2.7.0/base/epoch_48.pth" + +custom_hooks = [] From e9052633e80ae8ba8b36fb6554ded9c04e7bf672 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 7 May 2026 15:29:37 +0900 Subject: [PATCH 082/183] Add traffic cone and barrier --- .../BEVFusion/bevfusion/bevfusion_head.py | 13 +- ...second_secfpn_30e_8xb8_j6gen2_base_120m.py | 4 + ...8xb8_j6gen2_base_120m_traffic_cone_full.py | 164 --------- ..._base_120m_traffic_cone_full_copy_paste.py | 317 ------------------ ...b8_j6gen2_base_120m_traffic_cone_ignore.py | 168 ---------- ...ase_120m_traffic_cone_ignore_copy_paste.py | 278 --------------- ...econd_secfpn_30e_8xb8_jpntaxi_base_120m.py | 4 + ..._voxel_second_secfpn_50e_8xb8_base_120m.py | 4 + ...pn_50e_8xb8_base_120m_traffic_cone_full.py | 163 --------- ..._50e_8xb8_base_120m_traffic_cone_ignore.py | 163 --------- 10 files changed, 14 insertions(+), 1264 deletions(-) delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_full.py delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_ignore.py diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 5b0e156d0..da056efcc 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -633,7 +633,6 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata): ious = torch.clamp(ious, min=0.0, max=1.0) labels = bboxes_tensor.new_zeros(num_proposals, dtype=torch.long) label_weights = bboxes_tensor.new_zeros([num_proposals, self.num_classes], dtype=torch.long) - # label_weights = bboxes_tensor.new_zeros(num_proposals, dtype=torch.long) if gt_labels_3d is not None: # default label is -1 labels += self.num_classes @@ -693,17 +692,13 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata): # Ignore labels for traffic cone and barrier traffic_cone_barrier_status = metadata.get("traffic_cone_barrier_status", True) if self.partial_ignore_labels is not None and not traffic_cone_barrier_status: - if self.partial_ignore_dense_heatmap: - heatmap_weights[self.partial_ignore_labels] = 0.0 # Set to 0 to ignore these proposals + heatmap_weights[self.partial_ignore_labels] = 0.0 # Set to 0 to ignore these grids if len(neg_inds) > 0: - # neg_inds [N] and column indices [K] must broadcast (not pair); see IndexError N vs K. + # neg_inds [N] and column indices [K] must broadcast (not pair); _cols = torch.as_tensor( self.partial_ignore_labels, device=label_weights.device, dtype=torch.long ) label_weights[neg_inds.unsqueeze(1), _cols.unsqueeze(0)] = 0.0 - - print("heatmap with traffic cone: ", heatmap[5].sum()) - print("heatmap with barrier: ", heatmap[6].sum()) return ( labels[None], @@ -795,10 +790,6 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li ..., idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals, ].reshape(-1) - # layer_label_weights = label_weights[ - # ..., - # idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals, - # ].reshape(-1) layer_label_weights = label_weights[ ..., idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals, diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index 9da67036e..d32dc9c70 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -64,6 +64,10 @@ pc_range=_base_.point_cloud_range[0:2], voxel_size=_base_.voxel_size[0:2], ), + partial_ignore_labels=["traffic_cone", "barrier"], + loss_heatmap=dict( + reduction="none", + ), ), ) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py deleted file mode 100644 index 88e3cbc54..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full.py +++ /dev/null @@ -1,164 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", - "../default/pipelines/default_lidar_intensity_120m.py", - "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_30e_8xb8_adamw_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_8/" - -experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - voxelize_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - voxelize_reduce=True, - ), - pts_voxel_encoder=dict(num_features=_base_.point_use_dim), - pts_middle_encoder=dict( - in_channels=_base_.point_use_dim, - sparse_shape=_base_.grid_size, - num_aug_features=5, - # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here - aug_features_min_values=[ - _base_.point_cloud_range[0], - _base_.point_cloud_range[1], - _base_.point_cloud_range[2], - 0.0, - 0.0, - ], - aug_features_max_values=[ - _base_.point_cloud_range[3], - _base_.point_cloud_range[4], - _base_.point_cloud_range[5], - 255.0, - 0.2, - ], - ), - bbox_head=dict( - class_names=_base_.class_names, # Use class names to identify the correct class indices - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - partial_ignore_dense_heatmap=False - ), -) - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) - -load_from = "work_dirs/bevfusion_lidar_2.7.0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m/epoch_48.pth" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py deleted file mode 100644 index 7fef2db47..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste.py +++ /dev/null @@ -1,317 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", - "../default/pipelines/default_lidar_intensity_120m.py", - "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_30e_8xb8_adamw_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_8/" - -experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_full_copy_paste" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - voxelize_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - voxelize_reduce=True, - ), - pts_voxel_encoder=dict(num_features=_base_.point_use_dim), - pts_middle_encoder=dict( - in_channels=_base_.point_use_dim, - sparse_shape=_base_.grid_size, - num_aug_features=5, - # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here - aug_features_min_values=[ - _base_.point_cloud_range[0], - _base_.point_cloud_range[1], - _base_.point_cloud_range[2], - 0.0, - 0.0, - ], - aug_features_max_values=[ - _base_.point_cloud_range[3], - _base_.point_cloud_range[4], - _base_.point_cloud_range[5], - 255.0, - 0.2, - ], - ), - bbox_head=dict( - class_names=_base_.class_names, # Use class names to identify the correct class indices - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - partial_ignore_dense_heatmap=False - ), -) - -db_sampler = dict( - data_root=data_root, - info_path=info_directory_path + _base_.info_train_file_name, - rate=1.0, - prepare=dict( - filter_by_difficulty=[-1], - filter_by_min_points=dict( - car=5, - truck=5, - bus=5, - trailer=5, - traffic_cone=5, - barrier=5, - bicycle=5, - pedestrian=5)), - classes=_base_.class_names, - sample_groups=dict( - car=0, - truck=0, - bus=0, - bicycle=0, - pedestrian=0, - barrier=2, - traffic_cone=4), - points_loader=dict( - type='LoadPointsFromCurrentFileSweep', - coord_type='LIDAR', - load_dim=_base_.point_load_dim, - use_dim=_base_.point_use_dim, - backend_args=_base_.backend_args, - sweeps_num=_base_.sweeps_num, - pad_empty_sweeps=True, - remove_close=True, - test_mode=False, - )) - -train_pipeline = [ - dict( - type="LoadPointsFromFile", - coord_type="LIDAR", - load_dim=_base_.point_load_dim, - use_dim=_base_.point_use_dim, - backend_args=_base_.backend_args, - ), - dict( - type="LoadPointsFromMultiSweeps", - sweeps_num=_base_.sweeps_num, - load_dim=_base_.point_load_dim, - use_dim=_base_.lidar_sweep_dims, - pad_empty_sweeps=True, - remove_close=True, - backend_args=_base_.backend_args, - test_mode=False, - ), - dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False), - dict(type="ObjectSample", db_sampler=db_sampler), - dict( - type="BEVFusionGlobalRotScaleTrans", - scale_ratio_range=[0.95, 1.05], - rot_range=[-0.78539816, 0.78539816], - translation_std=[0.5, 0.5, 0.2], - ), - dict(type="BEVFusionRandomFlip3D"), - dict(type="PointsRangeFilter", point_cloud_range=_base_.point_cloud_range), - dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range), - dict( - type="ObjectNameFilter", - classes=[ - "car", - "truck", - "bus", - "bicycle", - "pedestrian", - "traffic_cone", - "barrier", - ], - ), - dict(type="PointShuffle"), - dict( - type="Pack3DDetInputs", - keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"], - meta_keys=[ - "cam2img", - "ori_cam2img", - "lidar2cam", - "lidar2img", - "cam2lidar", - "ori_lidar2img", - "img_aug_matrix", - "box_type_3d", - "sample_idx", - "lidar_path", - "img_path", - "transformation_3d_flow", - "pcd_rotation", - "pcd_scale_factor", - "pcd_trans", - "img_aug_matrix", - "lidar_aug_matrix", - "timestamp", - "vehicle_type", - "city", - "traffic_cone_barrier_status", - ], - ), -] - -test_pipeline = [ - dict( - type="LoadPointsFromFile", - coord_type="LIDAR", - load_dim=_base_.point_load_dim, - use_dim=_base_.point_use_dim, - backend_args=_base_.backend_args, - ), - dict( - type="LoadPointsFromMultiSweeps", - sweeps_num=_base_.sweeps_num, - load_dim=_base_.point_load_dim, - use_dim=_base_.lidar_sweep_dims, - pad_empty_sweeps=True, - remove_close=True, - backend_args=_base_.backend_args, - test_mode=True, - ), - dict(type="PointsRangeFilter", point_cloud_range=_base_.point_cloud_range), - dict( - type="Pack3DDetInputs", - keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"], - meta_keys=[ - "cam2img", - "ori_cam2img", - "lidar2cam", - "lidar2img", - "cam2lidar", - "ori_lidar2img", - "img_aug_matrix", - "box_type_3d", - "sample_idx", - "lidar_path", - "img_path", - "num_pts_feats", - "num_views", - "timestamp", - "vehicle_type", - "city", - "traffic_cone_barrier_status", - ], - ), -] - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) - -load_from = "work_dirs/bevfusion_lidar_2.7.0/base/epoch_48.pth" - -custom_hooks = [] diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py deleted file mode 100644 index bb10d484d..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore.py +++ /dev/null @@ -1,168 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", - "../default/pipelines/default_lidar_intensity_120m.py", - "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_30e_8xb8_adamw_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_8/" - -experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - voxelize_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - voxelize_reduce=True, - ), - pts_voxel_encoder=dict(num_features=_base_.point_use_dim), - pts_middle_encoder=dict( - in_channels=_base_.point_use_dim, - sparse_shape=_base_.grid_size, - num_aug_features=5, - # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here - aug_features_min_values=[ - _base_.point_cloud_range[0], - _base_.point_cloud_range[1], - _base_.point_cloud_range[2], - 0.0, - 0.0, - ], - aug_features_max_values=[ - _base_.point_cloud_range[3], - _base_.point_cloud_range[4], - _base_.point_cloud_range[5], - 255.0, - 0.2, - ], - ), - bbox_head=dict( - class_names=_base_.class_names, # Use class names to identify the correct class indices - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - partial_ignore_labels=["traffic_cone", "barrier"], - partial_ignore_dense_heatmap=True, - loss_heatmap=dict( - reduction="none", - ), - ), -) - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) - -load_from = "work_dirs/bevfusion_lidar_2.7.0/base/epoch_48.pth" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py deleted file mode 100644 index 41629bb17..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste.py +++ /dev/null @@ -1,278 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", - "../default/pipelines/default_lidar_intensity_120m.py", - "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_30e_8xb8_adamw_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_8/" - -experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_traffic_cone_ignore_copy_paste" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - voxelize_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - voxelize_reduce=True, - ), - pts_voxel_encoder=dict(num_features=_base_.point_use_dim), - pts_middle_encoder=dict( - in_channels=_base_.point_use_dim, - sparse_shape=_base_.grid_size, - num_aug_features=5, - # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here - aug_features_min_values=[ - _base_.point_cloud_range[0], - _base_.point_cloud_range[1], - _base_.point_cloud_range[2], - 0.0, - 0.0, - ], - aug_features_max_values=[ - _base_.point_cloud_range[3], - _base_.point_cloud_range[4], - _base_.point_cloud_range[5], - 255.0, - 0.2, - ], - ), - bbox_head=dict( - class_names=_base_.class_names, # Use class names to identify the correct class indices - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - partial_ignore_labels=["traffic_cone", "barrier"], - partial_ignore_dense_heatmap=False, - loss_heatmap=dict( - reduction="none", - ), - ), -) - -db_sampler = dict( - data_root=data_root, - info_path=data_root + info_directory_path + _base_.info_train_file_name, - rate=1.0, - prepare=dict(), - # prepare=dict( - # filter_by_difficulty=[-1], - # filter_by_min_points=dict( - # car=5, - # truck=5, - # bus=5, - # trailer=5, - # traffic_cone=5, - # barrier=5, - # bicycle=5, - # pedestrian=5)), - classes=_base_.class_names, - sample_groups=dict( - car=0, - truck=0, - bus=0, - bicycle=0, - pedestrian=0, - barrier=2, - traffic_cone=4), - points_loader=dict( - type='LoadPointsFromCurrentFileSweep', - coord_type='LIDAR', - load_dim=_base_.point_load_dim, - use_dim=_base_.point_use_dim, - backend_args=_base_.backend_args, - sweeps_num=_base_.sweeps_num, - pad_empty_sweeps=True, - remove_close=True, - test_mode=False, - )) - -train_pipeline = [ - dict( - type="LoadPointsFromFile", - coord_type="LIDAR", - load_dim=_base_.point_load_dim, - use_dim=_base_.point_use_dim, - backend_args=_base_.backend_args, - ), - dict( - type="LoadPointsFromMultiSweeps", - sweeps_num=_base_.sweeps_num, - load_dim=_base_.point_load_dim, - use_dim=_base_.lidar_sweep_dims, - pad_empty_sweeps=True, - remove_close=True, - backend_args=_base_.backend_args, - test_mode=False, - ), - dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False), - dict(type="ObjectSample", db_sampler=db_sampler), - dict( - type="BEVFusionGlobalRotScaleTrans", - scale_ratio_range=[0.95, 1.05], - rot_range=[-0.78539816, 0.78539816], - translation_std=[0.5, 0.5, 0.2], - ), - dict(type="BEVFusionRandomFlip3D"), - dict(type="PointsRangeFilter", point_cloud_range=_base_.point_cloud_range), - dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range), - dict( - type="ObjectNameFilter", - classes=[ - "car", - "truck", - "bus", - "bicycle", - "pedestrian", - "traffic_cone", - "barrier", - ], - ), - dict(type="PointShuffle"), - dict( - type="Pack3DDetInputs", - keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"], - meta_keys=[ - "cam2img", - "ori_cam2img", - "lidar2cam", - "lidar2img", - "cam2lidar", - "ori_lidar2img", - "img_aug_matrix", - "box_type_3d", - "sample_idx", - "lidar_path", - "img_path", - "transformation_3d_flow", - "pcd_rotation", - "pcd_scale_factor", - "pcd_trans", - "img_aug_matrix", - "lidar_aug_matrix", - "timestamp", - "vehicle_type", - "city", - "traffic_cone_barrier_status", - ], - ), -] - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) - -load_from = "work_dirs/bevfusion_lidar_2.7.0/base/epoch_48.pth" - -custom_hooks = [] diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py index c884c0aef..406e87655 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py @@ -64,6 +64,10 @@ pc_range=_base_.point_cloud_range[0:2], voxel_size=_base_.voxel_size[0:2], ), + partial_ignore_labels=["traffic_cone", "barrier"], + loss_heatmap=dict( + reduction="none", + ), ), ) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py index 79337d976..e8068332a 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py @@ -62,6 +62,10 @@ pc_range=_base_.point_cloud_range[0:2], voxel_size=_base_.voxel_size[0:2], ), + partial_ignore_labels=["traffic_cone", "barrier"], + loss_heatmap=dict( + reduction="none", + ), ), ) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_full.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_full.py deleted file mode 100644 index 38b1e8ea5..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_full.py +++ /dev/null @@ -1,163 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", - "../default/pipelines/default_lidar_120m.py", - "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_50e_8xb8_adamw_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_8/" - -experiment_group_name = "bevfusion_lidar_traffic_cone_full/base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - voxelize_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - voxelize_reduce=True, - ), - pts_voxel_encoder=dict(num_features=_base_.point_use_dim), - pts_middle_encoder=dict( - in_channels=_base_.point_use_dim, - sparse_shape=_base_.grid_size, - num_aug_features=4, - # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here - aug_features_min_values=[ - _base_.point_cloud_range[0], - _base_.point_cloud_range[1], - _base_.point_cloud_range[2], - 0.0, - ], - aug_features_max_values=[ - _base_.point_cloud_range[3], - _base_.point_cloud_range[4], - _base_.point_cloud_range[5], - 0.2, - ], - ), - bbox_head=dict( - class_names=_base_.class_names, # Use class names to identify the correct class indices - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - partial_ignore_labels=None, - loss_heatmap=dict( - reduction="none", - ), - ), -) - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_ignore.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_ignore.py deleted file mode 100644 index 90136a748..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_traffic_cone_ignore.py +++ /dev/null @@ -1,163 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", - "../default/pipelines/default_lidar_120m.py", - "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_50e_8xb8_adamw_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_8/" - -experiment_group_name = "bevfusion_lidar_traffic_cone/base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m_ignore" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - voxelize_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - voxelize_reduce=True, - ), - pts_voxel_encoder=dict(num_features=_base_.point_use_dim), - pts_middle_encoder=dict( - in_channels=_base_.point_use_dim, - sparse_shape=_base_.grid_size, - num_aug_features=4, - # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here - aug_features_min_values=[ - _base_.point_cloud_range[0], - _base_.point_cloud_range[1], - _base_.point_cloud_range[2], - 0.0, - ], - aug_features_max_values=[ - _base_.point_cloud_range[3], - _base_.point_cloud_range[4], - _base_.point_cloud_range[5], - 0.2, - ], - ), - bbox_head=dict( - class_names=_base_.class_names, # Use class names to identify the correct class indices - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - partial_ignore_labels=["traffic_cone", "barrier"], - loss_heatmap=dict( - reduction="none", - ), - ), -) - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) From dc2265e33d56726313862e0f42fbd87f8fd65fde Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 7 May 2026 06:32:44 +0000 Subject: [PATCH 083/183] ci(pre-commit): autofix --- .../detection3d/dataset/t4dataset/base.py | 14 ++---- .../t4dataset/j6gen2_base_traffic_cone.py | 2 +- .../t4dataset/jpntaxi_base_traffic_cone.py | 2 +- .../dataset/t4dataset/jpntaxi_gen2.py | 6 +-- .../detection3d/dataset/t4dataset/largebus.py | 4 +- autoware_ml/detection3d/datasets/t4dataset.py | 6 +-- .../datasets/transforms/__init__.py | 2 +- .../datasets/transforms/loading.py | 47 +++++++++++-------- .../BEVFusion/bevfusion/bevfusion_head.py | 45 +++++++++++------- projects/BEVFusion/bevfusion/utils.py | 4 +- .../default_lidar_second_secfpn_120m.py | 2 +- .../pipelines/default_lidar_intensity_120m.py | 6 +-- tools/detection3d/create_data_t4dataset.py | 15 ++++-- .../t4dataset_converters/t4converter.py | 2 +- 14 files changed, 86 insertions(+), 71 deletions(-) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py index 4248c90e6..3be587072 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py @@ -143,20 +143,12 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", - "traffic_cone": "traffic_cone", - "trafficcone": "traffic_cone", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", "barrier": "barrier", } -class_names = [ - "car", - "truck", - "bus", - "bicycle", - "pedestrian", - "traffic_cone", - "barrier" -] +class_names = ["car", "truck", "bus", "bicycle", "pedestrian", "traffic_cone", "barrier"] num_class = len(class_names) metainfo = dict(classes=class_names) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py index 8c57cf4fa..176763b54 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py @@ -208,4 +208,4 @@ matching_class_agnostic_fps=False, ) -remove_non_traffic_cone_barrier = True \ No newline at end of file +remove_non_traffic_cone_barrier = True diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py index c7e631458..61e9e915c 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py @@ -199,4 +199,4 @@ matching_class_agnostic_fps=False, ) -remove_non_traffic_cone_barrier = True \ No newline at end of file +remove_non_traffic_cone_barrier = True diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py index 6b7250673..dbd6e2813 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py @@ -117,9 +117,9 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", - "traffic_cone": "traffic_cone", - "trafficcone": "traffic_cone", - "barrier": "barrier", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } class_names = [ diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py b/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py index 2b54629eb..2212b8e56 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py @@ -130,8 +130,8 @@ "bus", "bicycle", "pedestrian", - "traffic_cone", - "barrier", + "traffic_cone", + "barrier", ] num_class = len(class_names) diff --git a/autoware_ml/detection3d/datasets/t4dataset.py b/autoware_ml/detection3d/datasets/t4dataset.py index 526150755..d7fed6256 100644 --- a/autoware_ml/detection3d/datasets/t4dataset.py +++ b/autoware_ml/detection3d/datasets/t4dataset.py @@ -191,8 +191,8 @@ def parse_data_info(self, info: dict) -> dict: info["lidar2img"] = np.array(info["images"][self.default_cam_key]["lidar2img"]) else: info["lidar2img"] = info["cam2img"] @ info["lidar2cam"] - + # Default difficulty to 0 if not present - if 'difficulty' not in info: - info['difficulty'] = 0 + if "difficulty" not in info: + info["difficulty"] = 0 return info diff --git a/autoware_ml/detection3d/datasets/transforms/__init__.py b/autoware_ml/detection3d/datasets/transforms/__init__.py index dc95d27f8..b517bf1ea 100644 --- a/autoware_ml/detection3d/datasets/transforms/__init__.py +++ b/autoware_ml/detection3d/datasets/transforms/__init__.py @@ -1,4 +1,4 @@ -from .object_min_points_filter import ObjectMinPointsFilter from .loading import LoadPointsFromCurrentFileSweep +from .object_min_points_filter import ObjectMinPointsFilter __all__ = ["ObjectMinPointsFilter", "LoadPointsFromCurrentFileSweep"] diff --git a/autoware_ml/detection3d/datasets/transforms/loading.py b/autoware_ml/detection3d/datasets/transforms/loading.py index 535653d9b..09beddc34 100644 --- a/autoware_ml/detection3d/datasets/transforms/loading.py +++ b/autoware_ml/detection3d/datasets/transforms/loading.py @@ -1,15 +1,14 @@ from typing import List, Optional, Union from mmcv.transforms import BaseTransform -from mmdet3d.structures.ops import box_np_ops from mmdet3d.datasets.transforms import LoadPointsFromFile, LoadPointsFromMultiSweeps +from mmdet3d.structures.ops import box_np_ops from mmengine.registry import TRANSFORMS - @TRANSFORMS.register_module() class LoadPointsFromCurrentFileSweep(BaseTransform): - """Load points from the current file and sweep. + """Load points from the current file and sweep. This is used to load the points from the current file and sweep for copy-paste augmentation. Args: @@ -20,24 +19,32 @@ class LoadPointsFromCurrentFileSweep(BaseTransform): corresponding backend. Defaults to None. """ - def __init__(self, - coord_type: str, - load_dim: int = 6, - use_dim: Union[int, List[int]] = [0, 1, 2], - shift_height: bool = False, - use_color: bool = False, - norm_intensity: bool = False, - norm_elongation: bool = False, - backend_args: Optional[dict] = None, - sweeps_num: int = 10, - pad_empty_sweeps: bool = False, - remove_close: bool = False, - test_mode: bool = False - ) -> None: - - self.points_loader = LoadPointsFromFile(coord_type=coord_type, load_dim=load_dim, use_dim=use_dim, backend_args=backend_args) + def __init__( + self, + coord_type: str, + load_dim: int = 6, + use_dim: Union[int, List[int]] = [0, 1, 2], + shift_height: bool = False, + use_color: bool = False, + norm_intensity: bool = False, + norm_elongation: bool = False, + backend_args: Optional[dict] = None, + sweeps_num: int = 10, + pad_empty_sweeps: bool = False, + remove_close: bool = False, + test_mode: bool = False, + ) -> None: + + self.points_loader = LoadPointsFromFile( + coord_type=coord_type, load_dim=load_dim, use_dim=use_dim, backend_args=backend_args + ) if sweeps_num > 0: - self.points_from_multi_sweeps_loader = LoadPointsFromMultiSweeps(sweeps_num=sweeps_num, pad_empty_sweeps=pad_empty_sweeps, remove_close=remove_close, test_mode=test_mode) + self.points_from_multi_sweeps_loader = LoadPointsFromMultiSweeps( + sweeps_num=sweeps_num, + pad_empty_sweeps=pad_empty_sweeps, + remove_close=remove_close, + test_mode=test_mode, + ) else: self.points_from_multi_sweeps_loader = None diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index da056efcc..b62113f65 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -13,9 +13,9 @@ from mmdet3d.structures import xywhr2xyxyr from mmdet.models.task_modules import AssignResult, PseudoSampler, build_assigner, build_bbox_coder, build_sampler from mmdet.models.utils import multi_apply +from mmengine.logging import print_log from mmengine.structures import InstanceData from torch import nn -from mmengine.logging import print_log def clip_sigmoid(x, eps=1e-4): @@ -71,7 +71,7 @@ def __init__( test_cfg=None, bbox_coder=None, partial_ignore_labels=None, - partial_ignore_dense_heatmap=False + partial_ignore_dense_heatmap=False, ): super().__init__() self.class_names = class_names @@ -187,17 +187,24 @@ def __init__( cluster["class_indices"] = sorted( [self.class_name_to_indices[class_name] for class_name in cluster["class_names"]] ) - + # If true, only compute loss for traffic cone and barrier when it's available in the frame if partial_ignore_labels is not None: - assert loss_heatmap['reduction'] == 'none', "Loss reduction must be 'none' for partial traffic cone and barrier" - self.partial_ignore_labels = [self.class_name_to_indices[class_name] for class_name in partial_ignore_labels] + assert ( + loss_heatmap["reduction"] == "none" + ), "Loss reduction must be 'none' for partial traffic cone and barrier" + self.partial_ignore_labels = [ + self.class_name_to_indices[class_name] for class_name in partial_ignore_labels + ] else: self.partial_ignore_labels = None - + self.partial_ignore_dense_heatmap = partial_ignore_dense_heatmap - print_log(f"BEVFusionHead Partial ignore labels: {self.partial_ignore_labels}, partial ignore dense heatmap: {self.partial_ignore_dense_heatmap}, dense heatmap pooling classes: \ - {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}", logger="current") + print_log( + f"BEVFusionHead Partial ignore labels: {self.partial_ignore_labels}, partial ignore dense heatmap: {self.partial_ignore_dense_heatmap}, dense heatmap pooling classes: \ + {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}", + logger="current", + ) def create_2D_grid(self, x_size, y_size): meshgrid = [[0, x_size - 1, x_size], [0, y_size - 1, y_size]] @@ -469,7 +476,9 @@ def predict_by_feat(self, preds_dicts, metas, img=None, rescale=False, for_roi=F return rets[0] - def get_targets(self, batch_gt_instances_3d: List[InstanceData], preds_dict: List[dict], batch_metadata: List[dict]): + def get_targets( + self, batch_gt_instances_3d: List[InstanceData], preds_dict: List[dict], batch_metadata: List[dict] + ): """Generate training targets. Args: batch_gt_instances_3d (List[InstanceData]): @@ -579,7 +588,7 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata): num_layer = self.num_decoder_layers else: num_layer = 1 - + assign_result_list = [] for idx_layer in range(num_layer): bboxes_tensor_layer = bboxes_tensor[ @@ -653,10 +662,10 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata): label_weights[pos_inds] = 1.0 else: label_weights[pos_inds] = self.train_cfg.pos_weight - + if len(neg_inds) > 0: label_weights[neg_inds] = 1.0 - + # # compute dense heatmap targets device = labels.device gt_bboxes_3d = torch.cat([gt_bboxes_3d.gravity_center, gt_bboxes_3d.tensor[:, 3:]], dim=1).to(device) @@ -692,12 +701,10 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata): # Ignore labels for traffic cone and barrier traffic_cone_barrier_status = metadata.get("traffic_cone_barrier_status", True) if self.partial_ignore_labels is not None and not traffic_cone_barrier_status: - heatmap_weights[self.partial_ignore_labels] = 0.0 # Set to 0 to ignore these grids + heatmap_weights[self.partial_ignore_labels] = 0.0 # Set to 0 to ignore these grids if len(neg_inds) > 0: # neg_inds [N] and column indices [K] must broadcast (not pair); - _cols = torch.as_tensor( - self.partial_ignore_labels, device=label_weights.device, dtype=torch.long - ) + _cols = torch.as_tensor(self.partial_ignore_labels, device=label_weights.device, dtype=torch.long) label_weights[neg_inds.unsqueeze(1), _cols.unsqueeze(0)] = 0.0 return ( @@ -732,7 +739,9 @@ def loss(self, batch_feats, batch_data_samples): return loss - def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: List[InstanceData], batch_input_metas): + def loss_by_feat( + self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: List[InstanceData], batch_input_metas + ): ( labels, label_weights, @@ -775,7 +784,7 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li # (Batch, num_classes) for cls_i, class_name in enumerate(self.class_names): loss_dict[f"loss_heatmap_{class_name}"] = loss_heatmap_cls[cls_i] - + # Prevent loss item to avoid computing gradients twice. This is for logging. loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum() diff --git a/projects/BEVFusion/bevfusion/utils.py b/projects/BEVFusion/bevfusion/utils.py index b6bd2be41..8fd83a0c5 100644 --- a/projects/BEVFusion/bevfusion/utils.py +++ b/projects/BEVFusion/bevfusion/utils.py @@ -259,10 +259,10 @@ def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, train_cfg, ignore_label reg_cost = self.reg_cost(bboxes, gt_bboxes, train_cfg) iou = self.iou_calculator(bboxes, gt_bboxes) iou_cost = self.iou_cost(iou) - + # weighted sum of above three costs cost = cls_cost + reg_cost + iou_cost - + # if ignore_labels is not None: # preds_labels = pred_instances.scores.argmax(dim=1, keepdim=False) # print("shape of pred_instances.scores, preds_labels", pred_instances.scores.shape, preds_labels.shape) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index 023c6774d..809179b20 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -114,6 +114,6 @@ ), loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="mean", loss_weight=1.0), loss_bbox=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=0.25), - partial_ignore_labels=None + partial_ignore_labels=None, ), ) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index 9c7e02977..e2de195e9 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -13,8 +13,8 @@ "bus": 120, "bicycle": 120, "pedestrian": 120, - "traffic_cone": 120, - "barrier": 120, + "traffic_cone": 120, + "barrier": 120, } # LiDAR parameters @@ -132,7 +132,7 @@ "timestamp", "vehicle_type", "city", - "traffic_cone_barrier_status", + "traffic_cone_barrier_status", ], ), ] diff --git a/tools/detection3d/create_data_t4dataset.py b/tools/detection3d/create_data_t4dataset.py index 9550b2872..3b02017e0 100644 --- a/tools/detection3d/create_data_t4dataset.py +++ b/tools/detection3d/create_data_t4dataset.py @@ -273,7 +273,7 @@ def main(): if cfg.filter_attributes is None: print_log("No attribute filtering is applied!") - + remove_non_traffic_cone_barrier = cfg.get("remove_non_traffic_cone_barrier", False) # Get every pair of min-max distance filtering thresholds bev_distance_ranges = [] @@ -310,9 +310,14 @@ def main(): ) dataset_scene_info = scene_id.split("/") if len(dataset_scene_info) == 5: - t4_dataset_id, t4_dataset_version_id, city, vehicle_type, traffic_cone_barrier_status = dataset_scene_info + t4_dataset_id, t4_dataset_version_id, city, vehicle_type, traffic_cone_barrier_status = ( + dataset_scene_info + ) if remove_non_traffic_cone_barrier and traffic_cone_barrier_status == "false": - print_log(f"Skipping scene: {scene_id} because it does not have traffic cone or barrier", logger="current") + print_log( + f"Skipping scene: {scene_id} because it does not have traffic cone or barrier", + logger="current", + ) continue elif len(dataset_scene_info) == 2: t4_dataset_id, t4_dataset_version_id = dataset_scene_info @@ -336,7 +341,9 @@ def main(): infos = [] for i in range(0, len(t4.sample), sample_steps): sample = t4.sample[i] - info = get_info(cfg, t4, sample, i, args.max_sweeps, traffic_cone_barrier_status, city, vehicle_type) + info = get_info( + cfg, t4, sample, i, args.max_sweeps, traffic_cone_barrier_status, city, vehicle_type + ) if info is None: continue # info["version"] = dataset_version # used for visualizations during debugging. diff --git a/tools/detection3d/t4dataset_converters/t4converter.py b/tools/detection3d/t4dataset_converters/t4converter.py index 5dfd1dc1f..ccc88b2d1 100644 --- a/tools/detection3d/t4dataset_converters/t4converter.py +++ b/tools/detection3d/t4dataset_converters/t4converter.py @@ -626,7 +626,7 @@ def get_lidarseg_annotations( ) -> dict: if not hasattr(t4, "lidarseg") or not t4.lidarseg: return dict() - + if sd_record.info_filename is None: print(f"sample {lidar_token} doesn't have lidar info_filename") return dict() From 15bbf0ef0920f41f6e267b312447d2900b5bd8fd Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 7 May 2026 15:35:46 +0900 Subject: [PATCH 084/183] remove unecessary changes --- .../t4dataset/j6gen2_base_traffic_cone.py | 211 ------------------ .../t4dataset/jpntaxi_base_traffic_cone.py | 202 ----------------- .../datasets/transforms/__init__.py | 3 +- .../datasets/transforms/loading.py | 55 ----- .../BEVFusion/bevfusion/bevfusion_head.py | 14 +- projects/BEVFusion/bevfusion/utils.py | 9 +- 6 files changed, 6 insertions(+), 488 deletions(-) delete mode 100644 autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py delete mode 100644 autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py delete mode 100644 autoware_ml/detection3d/datasets/transforms/loading.py diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py deleted file mode 100644 index 176763b54..000000000 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base_traffic_cone.py +++ /dev/null @@ -1,211 +0,0 @@ -custom_imports = dict( - imports=[ - "autoware_ml.detection3d.datasets.t4dataset", - "autoware_ml.detection3d.evaluation.t4metric.t4metric", - "autoware_ml.detection3d.evaluation.t4metric.t4metric_v2", - ] -) - -# dataset type setting -dataset_type = "T4Dataset" -info_train_file_name = "t4dataset_j6gen2_base_traffic_cone_infos_train.pkl" -info_val_file_name = "t4dataset_j6gen2_base_traffic_cone_infos_val.pkl" -info_test_file_name = "t4dataset_j6gen2_base_traffic_cone_infos_test.pkl" - -info_train_statistics_file_name = "t4dataset_j6gen2_base_traffic_cone_statistics_train.parquet" -info_val_statistics_file_name = "t4dataset_j6gen2_base_traffic_cone_statistics_val.parquet" -info_test_statistics_file_name = "t4dataset_j6gen2_base_traffic_cone_statistics_test.parquet" - -# dataset scene setting -dataset_version_list = [ - "db_j6gen2_v1", - "db_j6gen2_v2", - "db_j6gen2_v3", - "db_j6gen2_v4", - "db_j6gen2_v5", - "db_j6gen2_v6", - "db_j6gen2_v7", - "db_j6gen2_v8", - "db_j6gen2_v9", - "db_largebus_v1", - "db_largebus_v2", - "db_largebus_v3", -] - -dataset_test_groups = { - "largebus": ("t4dataset_largebus_traffic_cone_infos_test.pkl", False), - "j6gen2": ("t4dataset_j6gen2_traffic_cone_infos_test.pkl", False), - "j6gen2_base": ("t4dataset_j6gen2_base_traffic_cone_infos_test.pkl", True), -} - -# dataset format setting -data_prefix = dict( - pts="", - CAM_FRONT="", - CAM_FRONT_LEFT="", - CAM_FRONT_RIGHT="", - CAM_BACK="", - CAM_BACK_RIGHT="", - CAM_BACK_LEFT="", - sweeps="", -) - -camera_types = { - "CAM_FRONT", - "CAM_FRONT_RIGHT", - "CAM_FRONT_LEFT", - "CAM_BACK", - "CAM_BACK_LEFT", - "CAM_BACK_RIGHT", -} - -# class setting -name_mapping = { - # DBv1.0 - "vehicle.car": "car", - "vehicle.construction": "truck", - "vehicle.emergency (ambulance & police)": "car", - "vehicle.motorcycle": "bicycle", - "vehicle.trailer": "trailer", - "vehicle.truck": "truck", - "vehicle.bicycle": "bicycle", - "vehicle.bus (bendy & rigid)": "bus", - "pedestrian.adult": "pedestrian", - "pedestrian.child": "pedestrian", - "pedestrian.construction_worker": "pedestrian", - "pedestrian.personal_mobility": "pedestrian", - "pedestrian.police_officer": "pedestrian", - "pedestrian.stroller": "pedestrian", - "pedestrian.wheelchair": "pedestrian", - "movable_object.barrier": "barrier", - "movable_object.debris": "barrier", - "movable_object.pushable_pullable": "barrier", - "movable_object.trafficcone": "traffic_cone", - "movable_object.traffic_cone": "traffic_cone", - "animal": "animal", - "static_object.bicycle_rack": "bicycle_rack", - # DBv1.1 and UCv2.0 - "car": "car", - "truck": "truck", - "bus": "bus", - "trailer": "trailer", - "motorcycle": "bicycle", - "bicycle": "bicycle", - "police_car": "car", - "pedestrian": "pedestrian", - "police_officer": "pedestrian", - "forklift": "car", - "construction_worker": "pedestrian", - "stroller": "pedestrian", - # DBv2.0 and DBv3.0 - "animal": "animal", - "movable_object.barrier": "barrier", - "movable_object.pushable_pullable": "barrier", - "movable_object.traffic_cone": "traffic_cone", - "pedestrian.adult": "pedestrian", - "pedestrian.child": "pedestrian", - "pedestrian.construction_worker": "pedestrian", - "pedestrian.personal_mobility": "pedestrian", - "pedestrian.police_officer": "pedestrian", - "pedestrian.stroller": "pedestrian", - "pedestrian.wheelchair": "pedestrian", - "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "bollard", - "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car - "vehicle.bicycle": "bicycle", - "vehicle.bus": "bus", - "vehicle.car": "car", - "vehicle.construction": "truck", - "vehicle.fire": "truck", - "vehicle.motorcycle": "bicycle", - "vehicle.police": "car", - "vehicle.trailer": "trailer", - "vehicle.truck": "truck", - # DBv1.3 - "ambulance": "car", - "kart": "car", - "wheelchair": "pedestrian", - "personal_mobility": "pedestrian", - "fire_truck": "truck", - "semi_trailer": "trailer", - "tractor_unit": "truck", - "construction_vehicle": "truck", - "traffic_cone": "traffic_cone", - "trafficcone": "traffic_cone", - "barrier": "barrier", -} - - -class_names = [ - "car", - "truck", - "bus", - "bicycle", - "pedestrian", - "traffic_cone", - "barrier", -] -num_class = len(class_names) -metainfo = dict(classes=class_names) - -merge_objects = [ - ("truck", ["truck", "trailer"]), -] -merge_type = "extend_longer" # One of ["extend_longer","union", None] - -# visualization -class_colors = { - "car": (30, 144, 255), - "truck": (140, 0, 255), - "construction_vehicle": (255, 255, 0), - "bus": (111, 255, 111), - "trailer": (0, 255, 255), - "barrier": (0, 0, 0), - "motorcycle": (100, 0, 30), - "bicycle": (255, 0, 30), - "pedestrian": (255, 200, 200), - "traffic_cone": (120, 120, 120), -} -camera_panels = [ - "data/CAM_FRONT_LEFT", - "data/CAM_FRONT", - "data/CAM_FRONT_RIGHT", - "data/CAM_BACK_LEFT", - "data/CAM_BACK", - "data/CAM_BACK_RIGHT", -] - -filter_attributes = [ - ("vehicle.bicycle", "vehicle_state.parked"), - ("vehicle.bicycle", "cycle_state.without_rider"), - ("vehicle.bicycle", "motorcycle_state.without_rider"), - ("vehicle.motorcycle", "vehicle_state.parked"), - ("vehicle.motorcycle", "cycle_state.without_rider"), - ("vehicle.motorcycle", "motorcycle_state.without_rider"), - ("bicycle", "vehicle_state.parked"), - ("bicycle", "cycle_state.without_rider"), - ("bicycle", "motorcycle_state.without_rider"), - ("motorcycle", "vehicle_state.parked"), - ("motorcycle", "cycle_state.without_rider"), - ("motorcycle", "motorcycle_state.without_rider"), -] - -evaluator_metric_configs = dict( - evaluation_task="detection", - target_labels=class_names, - center_distance_bev_thresholds=[0.5, 1.0, 2.0, 4.0], - # plane_distance_thresholds is required for the pass fail evaluation - plane_distance_thresholds=[2.0, 4.0], - iou_2d_thresholds=None, - iou_3d_thresholds=None, - label_prefix="autoware", - # bev minimum distance ranges for each range bucket, must be the same length as max_distance, - # they will form bev distance ranges in [(min_distance[0], max_distance[0]), (min_distance[1], max_distance[1]), ...] when filtering - min_distance=[0.0, 50.0, 90.0, 0.0], - # bev maximum distance ranges for each range bucket, must be the same length as min_distance - max_distance=[50.0, 90.0, 121.0, 121.0], - min_point_numbers=0, - matching_class_agnostic_fps=False, -) - -remove_non_traffic_cone_barrier = True diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py deleted file mode 100644 index 61e9e915c..000000000 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base_traffic_cone.py +++ /dev/null @@ -1,202 +0,0 @@ -custom_imports = dict( - imports=[ - "autoware_ml.detection3d.datasets.t4dataset", - "autoware_ml.detection3d.evaluation.t4metric.t4metric", - "autoware_ml.detection3d.evaluation.t4metric.t4metric_v2", - ] -) - -# dataset type setting -dataset_type = "T4Dataset" -info_train_file_name = "t4dataset_jpntaxi_base_traffic_cone_infos_train.pkl" -info_val_file_name = "t4dataset_jpntaxi_base_traffic_cone_infos_val.pkl" -info_test_file_name = "t4dataset_jpntaxi_base_traffic_cone_infos_test.pkl" - -info_train_statistics_file_name = "t4dataset_jpntaxi_base_traffic_cone_statistics_train.parquet" -info_val_statistics_file_name = "t4dataset_jpntaxi_base_traffic_cone_statistics_val.parquet" -info_test_statistics_file_name = "t4dataset_jpntaxi_base_traffic_cone_statistics_test.parquet" - -# dataset scene setting -dataset_test_groups = { - "jpntaxi_base_traffic_cone": ("t4dataset_jpntaxi_base_traffic_cone_infos_test.pkl", True), -} - -dataset_version_list = [ - "db_jpntaxigen2_v1", - "db_jpntaxigen2_v2", - "db_jpntaxi_v1", - "db_jpntaxi_v2", - "db_jpntaxi_v4", -] - -# dataset format setting -data_prefix = dict( - pts="", - CAM_FRONT="", - CAM_FRONT_LEFT="", - CAM_FRONT_RIGHT="", - CAM_BACK="", - CAM_BACK_RIGHT="", - CAM_BACK_LEFT="", - sweeps="", -) -camera_types = { - "CAM_FRONT", - "CAM_FRONT_RIGHT", - "CAM_FRONT_LEFT", - "CAM_BACK", - "CAM_BACK_LEFT", - "CAM_BACK_RIGHT", -} - -# class setting -name_mapping = { - # DBv1.0 - "vehicle.car": "car", - "vehicle.construction": "truck", - "vehicle.emergency (ambulance & police)": "car", - "vehicle.motorcycle": "bicycle", - "vehicle.trailer": "trailer", - "vehicle.truck": "truck", - "vehicle.bicycle": "bicycle", - "vehicle.bus (bendy & rigid)": "bus", - "pedestrian.adult": "pedestrian", - "pedestrian.child": "pedestrian", - "pedestrian.construction_worker": "pedestrian", - "pedestrian.personal_mobility": "pedestrian", - "pedestrian.police_officer": "pedestrian", - "pedestrian.stroller": "pedestrian", - "pedestrian.wheelchair": "pedestrian", - "movable_object.barrier": "barrier", - "movable_object.debris": "barrier", - "movable_object.pushable_pullable": "barrier", - "movable_object.trafficcone": "traffic_cone", - "movable_object.traffic_cone": "traffic_cone", - "animal": "animal", - "static_object.bicycle_rack": "bicycle_rack", - # DBv1.1 and UCv2.0 - "car": "car", - "truck": "truck", - "bus": "bus", - "trailer": "trailer", - "motorcycle": "bicycle", - "bicycle": "bicycle", - "police_car": "car", - "pedestrian": "pedestrian", - "police_officer": "pedestrian", - "forklift": "car", - "construction_worker": "pedestrian", - "stroller": "pedestrian", - # DBv2.0 and DBv3.0 - "animal": "animal", - "movable_object.barrier": "barrier", - "movable_object.pushable_pullable": "barrier", - "movable_object.traffic_cone": "traffic_cone", - "pedestrian.adult": "pedestrian", - "pedestrian.child": "pedestrian", - "pedestrian.construction_worker": "pedestrian", - "pedestrian.personal_mobility": "pedestrian", - "pedestrian.police_officer": "pedestrian", - "pedestrian.stroller": "pedestrian", - "pedestrian.wheelchair": "pedestrian", - "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "bollard", - "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car - "vehicle.bicycle": "bicycle", - "vehicle.bus": "bus", - "vehicle.car": "car", - "vehicle.construction": "truck", - "vehicle.fire": "truck", - "vehicle.motorcycle": "bicycle", - "vehicle.police": "car", - "vehicle.trailer": "trailer", - "vehicle.truck": "truck", - # DBv1.3 - "ambulance": "car", - "kart": "car", - "wheelchair": "pedestrian", - "personal_mobility": "pedestrian", - "fire_truck": "truck", - "semi_trailer": "trailer", - "tractor_unit": "truck", - "construction_vehicle": "truck", - "traffic_cone": "traffic_cone", - "trafficcone": "traffic_cone", - "barrier": "barrier", -} - -class_names = [ - "car", - "truck", - "bus", - "bicycle", - "pedestrian", - "traffic_cone", - "barrier", -] - -num_class = len(class_names) -metainfo = dict(classes=class_names) - -merge_objects = [ - ("truck", ["truck", "trailer"]), -] -merge_type = "extend_longer" # One of ["extend_longer","union", None] - -# visualization -class_colors = { - "car": (30, 144, 255), - "truck": (140, 0, 255), - "construction_vehicle": (255, 255, 0), - "bus": (111, 255, 111), - "trailer": (0, 255, 255), - "barrier": (0, 0, 0), - "motorcycle": (100, 0, 30), - "bicycle": (255, 0, 30), - "pedestrian": (255, 200, 200), - "traffic_cone": (120, 120, 120), -} -camera_panels = [ - "data/CAM_FRONT_LEFT", - "data/CAM_FRONT", - "data/CAM_FRONT_RIGHT", - "data/CAM_BACK_LEFT", - "data/CAM_BACK", - "data/CAM_BACK_RIGHT", -] - -# Add filter attributes -filter_attributes = [ - ("vehicle.bicycle", "vehicle_state.parked"), - ("vehicle.bicycle", "cycle_state.without_rider"), - ("vehicle.bicycle", "motorcycle_state.without_rider"), - ("vehicle.motorcycle", "vehicle_state.parked"), - ("vehicle.motorcycle", "cycle_state.without_rider"), - ("vehicle.motorcycle", "motorcycle_state.without_rider"), - ("bicycle", "vehicle_state.parked"), - ("bicycle", "cycle_state.without_rider"), - ("bicycle", "motorcycle_state.without_rider"), - ("motorcycle", "vehicle_state.parked"), - ("motorcycle", "cycle_state.without_rider"), - ("motorcycle", "motorcycle_state.without_rider"), -] - -evaluator_metric_configs = dict( - evaluation_task="detection", - target_labels=class_names, - center_distance_bev_thresholds=[0.5, 1.0, 2.0, 4.0], - # plane_distance_thresholds is required for the pass fail evaluation - plane_distance_thresholds=[2.0, 4.0], - iou_2d_thresholds=None, - iou_3d_thresholds=None, - label_prefix="autoware", - # bev minimum distance ranges for each range bucket, must be the same length as max_distance, - # they will form bev distance ranges in [(min_distance[0], max_distance[0]), (min_distance[1], max_distance[1]), ...] when filtering - min_distance=[0.0, 50.0, 90.0, 0.0], - # bev maximum distance ranges for each range bucket, must be the same length as min_distance - max_distance=[50.0, 90.0, 121.0, 121.0], - min_point_numbers=0, - matching_class_agnostic_fps=False, -) - -remove_non_traffic_cone_barrier = True diff --git a/autoware_ml/detection3d/datasets/transforms/__init__.py b/autoware_ml/detection3d/datasets/transforms/__init__.py index b517bf1ea..6bc932f1a 100644 --- a/autoware_ml/detection3d/datasets/transforms/__init__.py +++ b/autoware_ml/detection3d/datasets/transforms/__init__.py @@ -1,4 +1,3 @@ -from .loading import LoadPointsFromCurrentFileSweep from .object_min_points_filter import ObjectMinPointsFilter -__all__ = ["ObjectMinPointsFilter", "LoadPointsFromCurrentFileSweep"] +__all__ = ["ObjectMinPointsFilter"] diff --git a/autoware_ml/detection3d/datasets/transforms/loading.py b/autoware_ml/detection3d/datasets/transforms/loading.py deleted file mode 100644 index 09beddc34..000000000 --- a/autoware_ml/detection3d/datasets/transforms/loading.py +++ /dev/null @@ -1,55 +0,0 @@ -from typing import List, Optional, Union - -from mmcv.transforms import BaseTransform -from mmdet3d.datasets.transforms import LoadPointsFromFile, LoadPointsFromMultiSweeps -from mmdet3d.structures.ops import box_np_ops -from mmengine.registry import TRANSFORMS - - -@TRANSFORMS.register_module() -class LoadPointsFromCurrentFileSweep(BaseTransform): - """Load points from the current file and sweep. - This is used to load the points from the current file and sweep for copy-paste augmentation. - - Args: - coord_type (str): The type of coordinates of points cloud. - load_dim (int): The dimension of the loaded points. - use_dim (list[int] | int): Which dimensions of the points to use. - backend_args (dict, optional): Arguments to instantiate the - corresponding backend. Defaults to None. - """ - - def __init__( - self, - coord_type: str, - load_dim: int = 6, - use_dim: Union[int, List[int]] = [0, 1, 2], - shift_height: bool = False, - use_color: bool = False, - norm_intensity: bool = False, - norm_elongation: bool = False, - backend_args: Optional[dict] = None, - sweeps_num: int = 10, - pad_empty_sweeps: bool = False, - remove_close: bool = False, - test_mode: bool = False, - ) -> None: - - self.points_loader = LoadPointsFromFile( - coord_type=coord_type, load_dim=load_dim, use_dim=use_dim, backend_args=backend_args - ) - if sweeps_num > 0: - self.points_from_multi_sweeps_loader = LoadPointsFromMultiSweeps( - sweeps_num=sweeps_num, - pad_empty_sweeps=pad_empty_sweeps, - remove_close=remove_close, - test_mode=test_mode, - ) - else: - self.points_from_multi_sweeps_loader = None - - def transform(self, results: dict) -> dict: - points = self.points_loader(results) - if self.points_from_multi_sweeps_loader is not None: - points = self.points_from_multi_sweeps_loader(points) - return points diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index b62113f65..dd566eab1 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -70,9 +70,7 @@ def __init__( train_cfg=None, test_cfg=None, bbox_coder=None, - partial_ignore_labels=None, - partial_ignore_dense_heatmap=False, - ): + partial_ignore_labels=None): super().__init__() self.class_names = class_names self.num_classes = len(self.class_names) @@ -198,13 +196,9 @@ def __init__( ] else: self.partial_ignore_labels = None - - self.partial_ignore_dense_heatmap = partial_ignore_dense_heatmap - print_log( - f"BEVFusionHead Partial ignore labels: {self.partial_ignore_labels}, partial ignore dense heatmap: {self.partial_ignore_dense_heatmap}, dense heatmap pooling classes: \ - {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}", - logger="current", - ) + + print_log(f"BEVFusionHead Partial ignore labels: {self.partial_ignore_labels}, dense heatmap pooling classes: \ + {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}", logger="current") def create_2D_grid(self, x_size, y_size): meshgrid = [[0, x_size - 1, x_size], [0, y_size - 1, y_size]] diff --git a/projects/BEVFusion/bevfusion/utils.py b/projects/BEVFusion/bevfusion/utils.py index 8fd83a0c5..c47604dbd 100644 --- a/projects/BEVFusion/bevfusion/utils.py +++ b/projects/BEVFusion/bevfusion/utils.py @@ -238,7 +238,7 @@ def __init__( self.iou_cost = TASK_UTILS.build(iou_cost) self.iou_calculator = TASK_UTILS.build(iou_calculator) - def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, train_cfg, ignore_labels=None): + def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, train_cfg): num_gts, num_bboxes = gt_bboxes.size(0), bboxes.size(0) # 1. assign -1 by default @@ -263,13 +263,6 @@ def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, train_cfg, ignore_label # weighted sum of above three costs cost = cls_cost + reg_cost + iou_cost - # if ignore_labels is not None: - # preds_labels = pred_instances.scores.argmax(dim=1, keepdim=False) - # print("shape of pred_instances.scores, preds_labels", pred_instances.scores.shape, preds_labels.shape) - # ignore_preds_masks = preds_labels.isin(ignore_labels) - # cost[ignore_preds_masks] = 10000 - # print("shape of ignore_preds_masks, cost", ignore_preds_masks.shape, cost.shape) - # 3. do Hungarian matching on CPU using linear_sum_assignment cost = cost.detach().cpu() if linear_sum_assignment is None: From 1323d4ed662678fc225ca43ef7baaf5a8b144cc1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 7 May 2026 06:36:38 +0000 Subject: [PATCH 085/183] ci(pre-commit): autofix --- projects/BEVFusion/bevfusion/bevfusion_head.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index dd566eab1..c37c5a538 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -70,7 +70,8 @@ def __init__( train_cfg=None, test_cfg=None, bbox_coder=None, - partial_ignore_labels=None): + partial_ignore_labels=None, + ): super().__init__() self.class_names = class_names self.num_classes = len(self.class_names) @@ -196,7 +197,7 @@ def __init__( ] else: self.partial_ignore_labels = None - + print_log(f"BEVFusionHead Partial ignore labels: {self.partial_ignore_labels}, dense heatmap pooling classes: \ {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}", logger="current") From 585a0b2b068d5c721d657a93e15bbbe6f904cf45 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 7 May 2026 20:15:05 +0900 Subject: [PATCH 086/183] remove unecessary changes --- Dockerfile | 6 +- .../dataset/t4dataset/j6gen2_v2.py | 194 ++++++++++++++++++ ...second_secfpn_30e_8xb8_j6gen2_base_120m.py | 2 +- ...n_30e_8xb8_j6gen2_base_120m_t4metric_v2.py | 4 +- ...econd_secfpn_30e_8xb8_jpntaxi_base_120m.py | 8 +- ..._30e_8xb8_jpntaxi_base_120m_t4metric_v2.py | 4 +- 6 files changed, 207 insertions(+), 11 deletions(-) create mode 100644 autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_v2.py diff --git a/Dockerfile b/Dockerfile index 3e9caecb9..2fbcaa620 100644 --- a/Dockerfile +++ b/Dockerfile @@ -61,13 +61,15 @@ RUN python3 -m pip --no-cache-dir install \ RUN python3 -m pip install git+https://github.com/tier4/t4-devkit@v0.5.1 # Install autoware-perception-evaluation -RUN python3 -m pip install git+https://github.com/tier4/autoware_perception_evaluation@9d8c9773d35177bb0b7f2606f429f58a5fb708ca +RUN python3 -m pip install git+https://github.com/tier4/autoware_perception_evaluation@3c9577dc23fd76a049559b42656ca46c1c32fa66 # Need to dowgrade setuptools to 60.2.0 to fix setup RUN python3 -m pip --no-cache-dir install \ setuptools==60.2.0 \ transformers==4.51.3 \ - polars==1.37.1 + polars==1.37.1 \ + onnx_graphsurgeon==0.5.8 \ + spconv-cu126==2.3.8 # NOTE(knzo25): this patch is needed to use numpy versions over 1.23.5 (version used in mmdet3d 1.4.0) # It can be safely deleted when mmdet3d updates the numpy version diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_v2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_v2.py new file mode 100644 index 000000000..e4375d576 --- /dev/null +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_v2.py @@ -0,0 +1,194 @@ +custom_imports = dict( + imports=[ + "autoware_ml.detection3d.datasets.t4dataset", + "autoware_ml.detection3d.evaluation.t4metric.t4metric", + "autoware_ml.detection3d.evaluation.t4metric.t4metric_v2", + ] +) + +# dataset type setting +dataset_type = "T4Dataset" +info_train_file_name = "t4dataset_j6gen2_v2_infos_train.pkl" +info_val_file_name = "t4dataset_j6gen2_v2_infos_val.pkl" +info_test_file_name = "t4dataset_j6gen2_v2_infos_test.pkl" + +info_train_statistics_file_name = "t4dataset_j6gen2_v2_statistics_train.parquet" +info_val_statistics_file_name = "t4dataset_j6gen2_v2_statistics_val.parquet" +info_test_statistics_file_name = "t4dataset_j6gen2_v2_statistics_test.parquet" + +# dataset scene setting +dataset_version_list = [ + "db_j6gen2_v2", +] + +dataset_test_groups = { + "j6gen2_v2": ("t4dataset_j6gen2_v2_infos_test.pkl", True), +} + +# dataset format setting +data_prefix = dict( + pts="", + CAM_FRONT="", + CAM_FRONT_LEFT="", + CAM_FRONT_RIGHT="", + CAM_BACK="", + CAM_BACK_RIGHT="", + CAM_BACK_LEFT="", + sweeps="", +) +camera_types = { + "CAM_FRONT", + "CAM_FRONT_RIGHT", + "CAM_FRONT_LEFT", + "CAM_BACK", + "CAM_BACK_LEFT", + "CAM_BACK_RIGHT", +} + +# class setting +name_mapping = { + # DBv1.0 + "vehicle.car": "car", + "vehicle.construction": "truck", + "vehicle.emergency (ambulance & police)": "car", + "vehicle.motorcycle": "bicycle", + "vehicle.trailer": "trailer", + "vehicle.truck": "truck", + "vehicle.bicycle": "bicycle", + "vehicle.bus (bendy & rigid)": "bus", + "pedestrian.adult": "pedestrian", + "pedestrian.child": "pedestrian", + "pedestrian.construction_worker": "pedestrian", + "pedestrian.personal_mobility": "pedestrian", + "pedestrian.police_officer": "pedestrian", + "pedestrian.stroller": "pedestrian", + "pedestrian.wheelchair": "pedestrian", + "movable_object.barrier": "barrier", + "movable_object.debris": "barrier", + "movable_object.pushable_pullable": "barrier", + "movable_object.trafficcone": "traffic_cone", + "movable_object.traffic_cone": "traffic_cone", + "animal": "animal", + "static_object.bicycle_rack": "bicycle_rack", + # DBv1.1 and UCv2.0 + "car": "car", + "truck": "truck", + "bus": "bus", + "trailer": "trailer", + "motorcycle": "bicycle", + "bicycle": "bicycle", + "police_car": "car", + "pedestrian": "pedestrian", + "police_officer": "pedestrian", + "forklift": "car", + "construction_worker": "pedestrian", + "stroller": "pedestrian", + # DBv2.0 and DBv3.0 + "animal": "animal", + "movable_object.barrier": "barrier", + "movable_object.pushable_pullable": "barrier", + "movable_object.traffic_cone": "traffic_cone", + "pedestrian.adult": "pedestrian", + "pedestrian.child": "pedestrian", + "pedestrian.construction_worker": "pedestrian", + "pedestrian.personal_mobility": "pedestrian", + "pedestrian.police_officer": "pedestrian", + "pedestrian.stroller": "pedestrian", + "pedestrian.wheelchair": "pedestrian", + "static_object.bicycle rack": "bicycle rack", + "static_object.bollard": "bollard", + "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car + "vehicle.bicycle": "bicycle", + "vehicle.bus": "bus", + "vehicle.car": "car", + "vehicle.construction": "truck", + "vehicle.fire": "truck", + "vehicle.motorcycle": "bicycle", + "vehicle.police": "car", + "vehicle.trailer": "trailer", + "vehicle.truck": "truck", + # DBv1.3 + "ambulance": "car", + "kart": "car", + "wheelchair": "pedestrian", + "personal_mobility": "pedestrian", + "fire_truck": "truck", + "semi_trailer": "trailer", + "tractor_unit": "truck", + "construction_vehicle": "truck", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", +} + +class_names = [ + "car", + "truck", + "bus", + "bicycle", + "pedestrian", + "traffic_cone", + "barrier", +] +num_class = len(class_names) +metainfo = dict(classes=class_names) + +merge_objects = [ + ("truck", ["truck", "trailer"]), +] +merge_type = "extend_longer" # One of ["extend_longer","union", None] + +# visualization +class_colors = { + "car": (30, 144, 255), + "truck": (140, 0, 255), + "construction_vehicle": (255, 255, 0), + "bus": (111, 255, 111), + "trailer": (0, 255, 255), + "barrier": (0, 0, 0), + "motorcycle": (100, 0, 30), + "bicycle": (255, 0, 30), + "pedestrian": (255, 200, 200), + "traffic_cone": (120, 120, 120), +} +camera_panels = [ + "data/CAM_FRONT_LEFT", + "data/CAM_FRONT", + "data/CAM_FRONT_RIGHT", + "data/CAM_BACK_LEFT", + "data/CAM_BACK", + "data/CAM_BACK_RIGHT", +] + +filter_attributes = [ + ("vehicle.bicycle", "vehicle_state.parked"), + ("vehicle.bicycle", "cycle_state.without_rider"), + ("vehicle.bicycle", "motorcycle_state.without_rider"), + ("vehicle.motorcycle", "vehicle_state.parked"), + ("vehicle.motorcycle", "cycle_state.without_rider"), + ("vehicle.motorcycle", "motorcycle_state.without_rider"), + ("bicycle", "vehicle_state.parked"), + ("bicycle", "cycle_state.without_rider"), + ("bicycle", "motorcycle_state.without_rider"), + ("motorcycle", "vehicle_state.parked"), + ("motorcycle", "cycle_state.without_rider"), + ("motorcycle", "motorcycle_state.without_rider"), +] + +evaluator_metric_configs = dict( + evaluation_task="detection", + target_labels=class_names, + center_distance_bev_thresholds=[0.5, 1.0, 2.0, 4.0], + # plane_distance_thresholds is required for the pass fail evaluation + plane_distance_thresholds=[2.0, 4.0], + iou_2d_thresholds=None, + iou_3d_thresholds=None, + label_prefix="autoware", + # bev minimum distance ranges for each range bucket, must be the same length as max_distance, + # they will form bev distance ranges in [(min_distance[0], max_distance[0]), (min_distance[1], max_distance[1]), ...] when filtering + min_distance=[0.0, 50.0, 90.0, 0.0], + # bev maximum distance ranges for each range bucket, must be the same length as min_distance + max_distance=[50.0, 90.0, 121.0, 121.0], + min_point_numbers=0, + matching_class_agnostic_fps=False, +) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index d32dc9c70..380a4ba81 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -13,7 +13,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" +info_directory_path = "info/kokseang_2_8/" experiment_group_name = "bevfusion_lidar_intensity/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py index 39462b1f6..e3f7d5146 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar_intensity/j6gen2_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name @@ -18,7 +18,7 @@ frame_pass_fail_config = dict( target_labels=_base_.class_names, # Matching thresholds per class (must align with `plane_distance_thresholds` used in evaluation) - matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0], + matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0], confidence_threshold_list=None, ) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py index 406e87655..eec87a585 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py @@ -13,10 +13,10 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" +info_directory_path = "info/kokseang_2_8/" -experiment_group_name = "bevfusion_lidar/jpntaxi_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m" +experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/jpntaxi_base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_ignore" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter @@ -164,4 +164,4 @@ ) log_processor = dict(window_size=50) -load_from = None +load_from = "work_dirs/bevfusion_lidar_traffic_cone/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m_ignore/epoch_48.pth" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py index b50b093f7..5190182cc 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar_intensity/jpntaxi_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/jpntaxi_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name @@ -18,7 +18,7 @@ frame_pass_fail_config = dict( target_labels=_base_.class_names, # Matching thresholds per class (must align with `plane_distance_thresholds` used in evaluation) - matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0], + matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0], confidence_threshold_list=None, ) From a47646ea6476603518857c0a60cf18b30d5720a8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 7 May 2026 11:15:34 +0000 Subject: [PATCH 087/183] ci(pre-commit): autofix --- ...oxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py index 5190182cc..213f0041b 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py @@ -18,7 +18,7 @@ frame_pass_fail_config = dict( target_labels=_base_.class_names, # Matching thresholds per class (must align with `plane_distance_thresholds` used in evaluation) - matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0], + matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0], confidence_threshold_list=None, ) From 16eb517a21911d55513ad85863b47c0a6576a200 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 9 May 2026 12:04:10 +0900 Subject: [PATCH 088/183] Add the script --- .../bevfusion/bevfusion_voxel_encoder.py | 66 ++++--- ..._base_120m_sincos_48_channels_32_points.py | 164 ++++++++++++++++++ 2 files changed, 207 insertions(+), 23 deletions(-) create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_48_channels_32_points.py diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py index 5037113aa..2cde57cc5 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py +++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py @@ -57,18 +57,19 @@ def __init__(self, super(BEVFusionVoxelEncoder, self).__init__() assert len(feat_channels) > 0 self.legacy = legacy + pfn_in_channels = 0 if with_cluster_center: - in_channels += 3 + pfn_in_channels += 3 if with_voxel_center: - in_channels += 3 + pfn_in_channels += 3 if with_distance: - in_channels += 1 + pfn_in_channels += 1 self._with_distance = with_distance self._with_cluster_center = with_cluster_center self._with_voxel_center = with_voxel_center # Create PillarFeatureNet layers self.in_channels = in_channels - feat_channels = [in_channels] + list(feat_channels) + feat_channels = [pfn_in_channels] + list(feat_channels) pfn_layers = [] for i in range(len(feat_channels) - 1): in_filters = feat_channels[i] @@ -97,7 +98,8 @@ def __init__(self, self.register_buffer("min_norm_values", torch.tensor(min_norm_values)) self.register_buffer("max_norm_values", torch.tensor(max_norm_values)) - self.register_buffer("voxel_size", torch.tensor([self.vx, self.vy, self.vz])) + self.register_buffer("exponents", (2 ** torch.arange(0, self.in_channels)).float()) + # self.register_buffer("voxel_size", torch.tensor([self.vx, self.vy, self.vz])) def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, *args, **kwargs) -> Tensor: @@ -112,12 +114,26 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, Returns: torch.Tensor: Features of pillars in shape (M, C). """ - if self.min_norm_values is not None and self.max_norm_values is not None: - features_norm = (features - self.min_norm_values) / (self.max_norm_values - self.min_norm_values) - else: - features_norm = features + num_voxels, max_points_per_voxel = features.shape[0], features.shape[1] + + # Mean in the voxel + # (N, M, 3) -> (N, 3) + voxel_features = (features.sum(dim=1, keepdim=False) / num_points.type_as(features).view( + -1, 1)).contiguous() - features_ls = [features_norm] + # min-max normalization, (N, 3) -> (N, 3) + voxel_features_norm = (voxel_features - \ + self.min_norm_values.view(1, -1)) / ((self.max_norm_values - self.min_norm_values).view(1, -1)) + + # SinCos encoding + # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3) + y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1) + # (N*3, 3) -> (N, 3*3) + y = y.reshape(num_voxels, -1) + # (N, 3*3) -> (N, 3*3*2) + voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1) + + features_ls = [] # Find distance of x, y, and z from cluster center, mapped to [-1, 1] if available if self._with_cluster_center: points_mean = features[:, :, :3].sum( @@ -125,9 +141,9 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, -1, 1, 1) f_cluster = features[:, :, :3] - points_mean # Map to [0, 1] if available - if self.min_norm_values is not None and self.max_norm_values is not None: - voxel_size = features.new_tensor([self.vx, self.vy, self.vz]) - f_cluster = f_cluster / voxel_size + # if self.min_norm_values is not None and self.max_norm_values is not None: + # voxel_size = features.new_tensor([self.vx, self.vy, self.vz]) + # f_cluster = f_cluster / voxel_size features_ls.append(f_cluster) # Find distance of x, y, and z from pillar center @@ -156,8 +172,8 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, coors[:, 1].type_as(features).unsqueeze(1) * self.vz + self.z_offset) - if self.min_norm_values is not None and self.max_norm_values is not None: - f_center = f_center / (voxel_size * 0.5) + # if self.min_norm_values is not None and self.max_norm_values is not None: + # f_center = f_center / (voxel_size * 0.5) features_ls.append(f_center) if self._with_distance: @@ -165,19 +181,23 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, features_ls.append(points_dist) # Combine together feature decorations - features = torch.cat(features_ls, dim=-1) + voxel_feature_offsets = torch.cat(features_ls, dim=-1) + # The feature decorations were calculated without regard to whether # pillar was empty. Need to ensure that # empty pillars remain set to zeros. - voxel_count = features.shape[1] - mask = get_paddings_indicator(num_points, voxel_count, axis=0) - mask = torch.unsqueeze(mask, -1).type_as(features) - features *= mask - + mask = get_paddings_indicator(num_points, max_points_per_voxel, axis=0) + mask = torch.unsqueeze(mask, -1).type_as(voxel_feature_offsets) + voxel_feature_offsets *= mask + + # PFN for pfn in self.pfn_layers: - features = pfn(features, num_points) + voxel_feature_offsets = pfn(voxel_feature_offsets, num_points) + + # Concat + features = torch.cat([voxel_fourier_features, voxel_feature_offsets.squeeze(1)], dim=-1) - return features.squeeze(1) + return features @MODELS.register_module() diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_48_channels_32_points.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_48_channels_32_points.py new file mode 100644 index 000000000..073249a3e --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_48_channels_32_points.py @@ -0,0 +1,164 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", + "../default/pipelines/default_lidar_120m.py", + "../default/models/default_lidar_second_secfpn_120m.py", + "../default/schedulers/default_50e_8xb8_adamw_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_6_2/" + +experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type +experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_48_channels_32_points" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + voxelize_cfg=dict( + max_num_points=32, + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + voxelize_reduce=False, + ), + pts_voxel_encoder=dict( + _delete_=True, + type="BEVFusionVoxelEncoder", + in_channels=4, + with_distance=False, + with_cluster_center=True, + with_voxel_center=True, + feat_channels=[16], + point_cloud_range=_base_.point_cloud_range, + voxel_size=_base_.voxel_size, + norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01), + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], + ), + pts_middle_encoder=dict( + in_channels=48, + sparse_shape=_base_.grid_size, + # num_aug_features=4, + # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here + # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], + # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], + ), + bbox_head=dict( + class_names=_base_.class_names, # Use class names to identify the correct class indices + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) From 731f6d9fea6db8f90a4e4c36c9dedb541292da4a Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 9 May 2026 16:58:35 +0900 Subject: [PATCH 089/183] Resolve conflict --- projects/BEVFusion/bevfusion/bevfusion.py | 19 +- .../bevfusion/bevfusion_voxel_encoder.py | 6 +- .../BEVFusion/bevfusion/sparse_encoder.py | 20 --- ...second_secfpn_30e_8xb8_j6gen2_base_120m.py | 28 +-- ...econd_secfpn_30e_8xb8_jpntaxi_base_120m.py | 32 ++-- ...oxel_second_secfpn_50e_8xb16_base_120m.py} | 19 +- ..._voxel_second_secfpn_50e_8xb8_base_120m.py | 160 ----------------- ...n_50e_8xb8_base_120m_sincos_34_channels.py | 163 ----------------- ...b8_base_120m_sincos_timeexp_34_channels.py | 165 ----------------- ...0m_sincos_timeexp_34_channels_32_points.py | 166 ------------------ ...d_secfpn_50e_8xb8_base_120m_t4metric_v2.py | 2 +- .../default_lidar_second_secfpn_120m.py | 15 +- ...fault_lidar_second_secfpn_120m_iou_loss.py | 117 +----------- .../default_camera_lidar_intensity_120m.py | 15 +- .../pipelines/default_lidar_intensity_120m.py | 2 +- ...e.py => default_30e_8xb16_adamw_cosine.py} | 9 +- ...e.py => default_50e_8xb16_adamw_cosine.py} | 9 +- 17 files changed, 59 insertions(+), 888 deletions(-) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos.py => bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py} (85%) delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_34_channels.py delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels.py delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels_32_points.py rename projects/BEVFusion/configs/t4dataset/default/schedulers/{default_30e_8xb8_adamw_cosine.py => default_30e_8xb16_adamw_cosine.py} (95%) rename projects/BEVFusion/configs/t4dataset/default/schedulers/{default_50e_8xb8_adamw_cosine.py => default_50e_8xb16_adamw_cosine.py} (95%) diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py index bc3f1b094..b113bb566 100644 --- a/projects/BEVFusion/bevfusion/bevfusion.py +++ b/projects/BEVFusion/bevfusion/bevfusion.py @@ -56,12 +56,10 @@ def __init__( super().__init__(data_preprocessor=data_preprocessor, init_cfg=init_cfg) if voxelize_cfg is not None: - self.voxelize_reduce = voxelize_cfg.pop("voxelize_reduce") self.pts_voxel_layer = Voxelization(**voxelize_cfg) self.pts_voxel_encoder = MODELS.build(pts_voxel_encoder) self.pts_middle_encoder = MODELS.build(pts_middle_encoder) else: - self.voxelize_reduce = False self.pts_voxel_layer = None self.pts_voxel_encoder = None self.pts_middle_encoder = None @@ -207,10 +205,6 @@ def extract_pts_feat(self, feats, coords, sizes, points=None) -> torch.Tensor: points = [point.float() for point in points] feats, coords, sizes = self.voxelize(points) batch_size = coords[-1, 0] + 1 - - if self.pts_voxel_encoder is not None: - assert not self.voxelize_reduce - feats = self.pts_voxel_encoder(feats, sizes, coords) else: # NOTE(knzo25): onnx inference. Voxelization happens outside the graph with torch.cuda.amp.autocast(enabled=False): @@ -224,12 +218,7 @@ def extract_pts_feat(self, feats, coords, sizes, points=None) -> torch.Tensor: # batch_size = coords[-1, 0] + 1 batch_size = 1 print("Run onnx point_eSpConvst") - if self.pts_voxel_encoder is not None: - feats = self.pts_voxel_encoder(feats, sizes, coords) - else: - assert self.voxelize_reduce - if self.voxelize_reduce: - feats = feats.sum(dim=1, keepdim=False) / sizes.type_as(feats).view(-1, 1) + feats = self.pts_voxel_encoder(feats, sizes, coords) x = self.pts_middle_encoder(feats, coords, batch_size) return x @@ -255,9 +244,9 @@ def voxelize(self, points): assert len(sizes) > 0, "No points in the voxel" sizes = torch.cat(sizes, dim=0) - if self.voxelize_reduce: - feats = feats.sum(dim=1, keepdim=False) / sizes.type_as(feats).view(-1, 1) - feats = feats.contiguous() + # if self.voxelize_reduce: + # feats = feats.sum(dim=1, keepdim=False) / sizes.type_as(feats).view(-1, 1) + # feats = feats.contiguous() return feats, coords, sizes diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py index 2cde57cc5..6c41234c5 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py +++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py @@ -376,11 +376,7 @@ class BEVFusionVoxelMeanSinCosEncoder(nn.Module): def __init__(self, min_norm_values: Tuple[float], max_norm_values: Tuple[float], - in_channels: Optional[int] = 4, - voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4), - point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4, - 40, 1), - mode: Optional[str] = 'max'): + in_channels: Optional[int] = 4): super(BEVFusionVoxelMeanSinCosEncoder, self).__init__() # Create PillarFeatureNet layers diff --git a/projects/BEVFusion/bevfusion/sparse_encoder.py b/projects/BEVFusion/bevfusion/sparse_encoder.py index 019cb630c..ce45d4536 100644 --- a/projects/BEVFusion/bevfusion/sparse_encoder.py +++ b/projects/BEVFusion/bevfusion/sparse_encoder.py @@ -47,9 +47,6 @@ class BEVFusionSparseEncoder(SparseEncoder): def __init__( self, in_channels, - aug_features_min_values, - aug_features_max_values, - num_aug_features, sparse_shape, order=("conv", "norm", "act"), norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01), @@ -64,9 +61,6 @@ def __init__( assert block_type in ["conv_module", "basicblock"] self.sparse_shape = sparse_shape self.in_channels = in_channels - self.register_buffer("aug_features_min_values", torch.tensor(aug_features_min_values)) - self.register_buffer("aug_features_max_values", torch.tensor(aug_features_max_values)) - self.num_aug_features = num_aug_features self.order = order self.base_channels = base_channels self.output_channels = output_channels @@ -77,10 +71,6 @@ def __init__( self.return_middle_feats = return_middle_feats # Spconv init all weight on its own - if num_aug_features: - self.in_channels = in_channels * num_aug_features * 2 - self.register_buffer("exponents", (2 ** torch.arange(0, num_aug_features).float())) - assert isinstance(order, tuple) and len(order) == 3 assert set(order) == {"conv", "norm", "act"} @@ -140,16 +130,6 @@ def forward(self, voxel_features, coors, batch_size): output features. When self.return_middle_feats is True, the module returns middle features. """ - - if self.num_aug_features: - num_points = voxel_features.shape[0] - x = (voxel_features - self.aug_features_min_values.view(1, -1)) / ( - self.aug_features_max_values - self.aug_features_min_values - ).view(1, -1) - y = x.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1) - y = y.reshape(num_points, -1) - voxel_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1) - coors = coors.int() input_sp_tensor = SparseConvTensor(voxel_features, coors, self.sparse_shape, batch_size) x = self.conv_input(input_sp_tensor) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index 380a4ba81..4cf51faa5 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -15,7 +15,7 @@ data_root = "data/t4dataset/" info_directory_path = "info/kokseang_2_8/" -experiment_group_name = "bevfusion_lidar_intensity/j6gen2_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name @@ -25,28 +25,16 @@ voxelize_cfg=dict( point_cloud_range=_base_.point_cloud_range, voxel_size=_base_.voxel_size, - voxelize_reduce=True, ), - pts_voxel_encoder=dict(num_features=_base_.point_use_dim), + pts_voxel_encoder=dict( + in_channels=len(_base_.lidar_sweep_dims), + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0, 0.0], + max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 255.0, 0.2], + ), pts_middle_encoder=dict( - in_channels=_base_.point_use_dim, + in_channels=50, sparse_shape=_base_.grid_size, - num_aug_features=5, - # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here - aug_features_min_values=[ - _base_.point_cloud_range[0], - _base_.point_cloud_range[1], - _base_.point_cloud_range[2], - 0.0, - 0.0, - ], - aug_features_max_values=[ - _base_.point_cloud_range[3], - _base_.point_cloud_range[4], - _base_.point_cloud_range[5], - 255.0, - 0.2, - ], ), bbox_head=dict( class_names=_base_.class_names, # Use class names to identify the correct class indices diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py index eec87a585..3b7c23b18 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py", "../default/pipelines/default_lidar_intensity_120m.py", "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_30e_8xb8_adamw_cosine.py", + "../default/schedulers/default_30e_8xb16_adamw_cosine.py", "../default/default_misc.py", ] @@ -15,8 +15,8 @@ data_root = "data/t4dataset/" info_directory_path = "info/kokseang_2_8/" -experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/jpntaxi_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_ignore" +experiment_group_name = "bevfusion_lidar_intensity_2_8_1/jpntaxi_base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter @@ -25,28 +25,16 @@ voxelize_cfg=dict( point_cloud_range=_base_.point_cloud_range, voxel_size=_base_.voxel_size, - voxelize_reduce=True, ), - pts_voxel_encoder=dict(num_features=_base_.point_use_dim), + pts_voxel_encoder=dict( + in_channels=len(_base_.lidar_sweep_dims), + # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0, 0.0], + max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 255.0, 0.2], + ), pts_middle_encoder=dict( - in_channels=_base_.point_use_dim, + in_channels=50, sparse_shape=_base_.grid_size, - num_aug_features=5, - # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here - aug_features_min_values=[ - _base_.point_cloud_range[0], - _base_.point_cloud_range[1], - _base_.point_cloud_range[2], - 0.0, - 0.0, - ], - aug_features_max_values=[ - _base_.point_cloud_range[3], - _base_.point_cloud_range[4], - _base_.point_cloud_range[5], - 255.0, - 0.2, - ], ), bbox_head=dict( class_names=_base_.class_names, # Use class names to identify the correct class indices diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py similarity index 85% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py index d856b1d4b..6d3a1f93b 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", "../default/pipelines/default_lidar_120m.py", "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_50e_8xb8_adamw_cosine.py", + "../default/schedulers/default_50e_8xb16_adamw_cosine.py", "../default/default_misc.py", ] @@ -13,10 +13,10 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" +info_directory_path = "info/kokseang_2_8/" -experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type -experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_sincos" +experiment_group_name = "bevfusion_lidar_2_8_0/base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter @@ -25,23 +25,16 @@ voxelize_cfg=dict( point_cloud_range=_base_.point_cloud_range, voxel_size=_base_.voxel_size, - voxelize_reduce=False, ), pts_voxel_encoder=dict( - _delete_=True, - type="BEVFusionVoxelMeanSinCosEncoder", - in_channels=4, - # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here + in_channels=len(_base_.lidar_sweep_dims), + # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], ), pts_middle_encoder=dict( in_channels=32, sparse_shape=_base_.grid_size, - # num_aug_features=4, - # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here - # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], - # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], ), bbox_head=dict( class_names=_base_.class_names, # Use class names to identify the correct class indices diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py deleted file mode 100644 index 1f52662a4..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py +++ /dev/null @@ -1,160 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", - "../default/pipelines/default_lidar_120m.py", - "../default/models/default_lidar_second_secfpn_120m_iou_loss.py", - "../default/schedulers/default_50e_8xb8_adamw_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" - -experiment_group_name = "bevfusion_lidar/base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_50e_8xb8_base_120m" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - voxelize_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - voxelize_reduce=False, - ), - pts_voxel_encoder=dict( - _delete_=True, - type="BEVFusionVoxelMeanSinCosEncoder", - in_channels=4, - # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here - min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], - max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], - ), - pts_middle_encoder=dict( - in_channels=32, - sparse_shape=_base_.grid_size, - # num_aug_features=4, - # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here - # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], - # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], - ), - bbox_head=dict( - class_names=_base_.class_names, # Use class names to identify the correct class indices - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - partial_ignore_labels=["traffic_cone", "barrier"], - loss_heatmap=dict( - reduction="none", - ), - ), -) - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_34_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_34_channels.py deleted file mode 100644 index 54af6be5f..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_34_channels.py +++ /dev/null @@ -1,163 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", - "../default/pipelines/default_lidar_120m.py", - "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_50e_8xb8_adamw_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" - -experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type -experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_34_channels" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - voxelize_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - voxelize_reduce=False, - ), - pts_voxel_encoder=dict( - _delete_=True, - type="BEVFusionVoxelSinCosEncoder", - in_channels=4, - with_distance=False, - with_cluster_center=True, - with_voxel_center=True, - feat_channels=[16], - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01), - # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here - min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], - max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], - ), - pts_middle_encoder=dict( - in_channels=34, - sparse_shape=_base_.grid_size, - # num_aug_features=4, - # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here - # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], - # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], - ), - bbox_head=dict( - class_names=_base_.class_names, # Use class names to identify the correct class indices - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - ), -) - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels.py deleted file mode 100644 index d7e61102b..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels.py +++ /dev/null @@ -1,165 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", - "../default/pipelines/default_lidar_120m.py", - "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_50e_8xb8_adamw_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" - -experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type -experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_34_channels_timeexp" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - voxelize_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - voxelize_reduce=False, - ), - pts_voxel_encoder=dict( - _delete_=True, - type="BEVFusionVoxelSinCosEncoder", - in_channels=4, - time_lag_channel_index=3, - time_exp_factor=1.0, - with_distance=False, - with_cluster_center=True, - with_voxel_center=True, - feat_channels=[16], - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01), - # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here - min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], - max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], - ), - pts_middle_encoder=dict( - in_channels=34, - sparse_shape=_base_.grid_size, - # num_aug_features=4, - # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here - # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], - # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], - ), - bbox_head=dict( - class_names=_base_.class_names, # Use class names to identify the correct class indices - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - ), -) - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels_32_points.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels_32_points.py deleted file mode 100644 index f784b2386..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_timeexp_34_channels_32_points.py +++ /dev/null @@ -1,166 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", - "../default/pipelines/default_lidar_120m.py", - "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_50e_8xb8_adamw_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" - -experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type -experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_34_channels_timeexp_32_points" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - voxelize_cfg=dict( - max_num_points=32, - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - voxelize_reduce=False, - ), - pts_voxel_encoder=dict( - _delete_=True, - type="BEVFusionVoxelSinCosEncoder", - in_channels=4, - time_lag_channel_index=3, - time_exp_factor=1.0, - with_distance=False, - with_cluster_center=True, - with_voxel_center=True, - feat_channels=[16], - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01), - # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here - min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], - max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], - ), - pts_middle_encoder=dict( - in_channels=34, - sparse_shape=_base_.grid_size, - # num_aug_features=4, - # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here - # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], - # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], - ), - bbox_head=dict( - class_names=_base_.class_names, # Use class names to identify the correct class indices - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - ), -) - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py index efcd091f5..98a65a3f9 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py @@ -18,7 +18,7 @@ frame_pass_fail_config = dict( target_labels=_base_.class_names, # Matching thresholds per class (must align with `plane_distance_thresholds` used in evaluation) - matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0], + matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0], confidence_threshold_list=None, ) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index 11a1b42b7..c097d10bf 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -1,5 +1,5 @@ num_proposals = 500 -max_num_points = 10 +max_num_points = 32 max_voxels = [120000, 160000] model = dict( @@ -7,19 +7,18 @@ voxelize_cfg=dict( max_num_points=max_num_points, max_voxels=max_voxels, - voxelize_reduce=True, ), data_preprocessor=dict( type="Det3DDataPreprocessor", pad_size_divisor=32, ), - pts_voxel_encoder=dict(type="HardSimpleVFE"), + pts_voxel_encoder=dict( + type="BEVFusionVoxelMeanSinCosEncoder", + in_channels=4, + ), pts_middle_encoder=dict( type="BEVFusionSparseEncoder", in_channels=5, - aug_features_min_values=[], - aug_features_max_values=[], - num_aug_features=0, order=("conv", "norm", "act"), norm_cfg=dict(type="BN1d", eps=0.001, momentum=0.01), encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)), @@ -112,9 +111,7 @@ reduction="mean", loss_weight=1.0, ), - # loss_iou=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=1.0), - loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="mean", loss_weight=1.0), + loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="none", loss_weight=1.0), loss_bbox=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=0.25), - partial_ignore_labels=None, ), ) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m_iou_loss.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m_iou_loss.py index 792392c09..e90687fe3 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m_iou_loss.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m_iou_loss.py @@ -1,119 +1,10 @@ -num_proposals = 500 -max_num_points = 10 -max_voxels = [120000, 160000] +_base_ = [ + "./default_lidar_second_secfpn_120m.py", +] model = dict( - type="BEVFusion", - voxelize_cfg=dict( - max_num_points=max_num_points, - max_voxels=max_voxels, - voxelize_reduce=True, - ), - data_preprocessor=dict( - type="Det3DDataPreprocessor", - pad_size_divisor=32, - ), - pts_voxel_encoder=dict(type="HardSimpleVFE"), - pts_middle_encoder=dict( - type="BEVFusionSparseEncoder", - in_channels=5, - aug_features_min_values=[], - aug_features_max_values=[], - num_aug_features=0, - order=("conv", "norm", "act"), - norm_cfg=dict(type="BN1d", eps=0.001, momentum=0.01), - encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)), - encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, (1, 1, 0)), (0, 0)), - block_type="basicblock", - ), - pts_backbone=dict( - type="SECOND", - in_channels=256, - out_channels=[128, 256], - layer_nums=[5, 5], - layer_strides=[1, 2], - norm_cfg=dict(type="BN", eps=0.001, momentum=0.01), - conv_cfg=dict(type="Conv2d", bias=False), - ), - pts_neck=dict( - type="SECONDFPN", - in_channels=[128, 256], - out_channels=[256, 256], - upsample_strides=[1, 2], - norm_cfg=dict(type="BN", eps=0.001, momentum=0.01), - upsample_cfg=dict(type="deconv", bias=False), - use_conv_for_no_stride=True, - ), bbox_head=dict( - type="BEVFusionHead", - num_proposals=num_proposals, - auxiliary=True, - in_channels=512, - hidden_channel=128, - nms_kernel_size=3, - bn_momentum=0.1, - num_decoder_layers=1, - decoder_layer=dict( - type="TransformerDecoderLayer", - self_attn_cfg=dict(embed_dims=128, num_heads=8, dropout=0.1), - cross_attn_cfg=dict(embed_dims=128, num_heads=8, dropout=0.1), - ffn_cfg=dict( - embed_dims=128, - feedforward_channels=256, - num_fcs=2, - ffn_drop=0.1, - act_cfg=dict(type="ReLU", inplace=True), - ), - norm_cfg=dict(type="LN"), - pos_encoding_cfg=dict(input_channel=2, num_pos_feats=128), - ), - train_cfg=dict( - dataset="t4datasets", - out_size_factor=8, - gaussian_overlap=0.1, - min_radius=2, - pos_weight=-1, - code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2], - assigner=dict( - type="HungarianAssigner3D", - iou_calculator=dict(type="BboxOverlaps3D", coordinate="lidar"), - cls_cost=dict(type="mmdet.FocalLossCost", gamma=2.0, alpha=0.25, weight=0.15), - reg_cost=dict(type="BBoxBEVL1Cost", weight=0.25), - iou_cost=dict(type="IoU3DCost", weight=0.25), - ), - ), - test_cfg=dict( - dataset="t4datasets", - out_size_factor=8, - nms_type=None, # Set to "circle" for circle_nms - # Set NMS for different clusters - nms_clusters=[ - dict(class_names=["car", "truck", "bus"], nms_threshold=0.5), # It's radius if using circle_nms - dict(class_names=["bicycle"], nms_threshold=0.5), - dict(class_names=["pedestrian"], nms_threshold=0.175), - dict(class_names=["barrier"], nms_threshold=0.5), - dict(class_names=["traffic_cone"], nms_threshold=0.175), - ], - ), - dense_heatmap_pooling_classes=["car", "truck", "bus", "bicycle", "barrier"], # Use class indices for pooling common_heads=dict(center=[2, 2], height=[1, 2], dim=[3, 2], rot=[2, 2], vel=[2, 2], iou=[1, 2]), - bbox_coder=dict( - type="TransFusionBBoxCoder", - post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0], - score_threshold=0.0, - out_size_factor=8, - code_size=10, - ), - loss_cls=dict( - type="mmdet.FocalLoss", - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - reduction="mean", - loss_weight=1.0, - ), - loss_iou=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=1.0), - loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="mean", loss_weight=1.0), - loss_bbox=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=0.25), + loss_iou=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=1.0), ), ) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py index 963a218e1..0b0f44c08 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py @@ -1,6 +1,6 @@ # Dataset parameters backend_args = None -num_workers = 32 +num_workers = 16 input_modality = dict(use_lidar=True, use_camera=True) # range setting @@ -13,6 +13,8 @@ "bus": 120, "bicycle": 120, "pedestrian": 120, + "traffic_cone": 120, + "barrier": 120, } # LiDAR parameters @@ -74,14 +76,11 @@ classes=[ "car", "truck", - "construction_vehicle", "bus", - "trailer", - "barrier", - "motorcycle", "bicycle", "pedestrian", "traffic_cone", + "barrier", ], ), dict(type="PointShuffle"), @@ -107,6 +106,9 @@ "img_aug_matrix", "lidar_aug_matrix", "timestamp", + "vehicle_type", + "city", + "traffic_cone_barrier_status", ], ), ] @@ -164,6 +166,9 @@ "num_pts_feats", "num_views", "timestamp", + "vehicle_type", + "city", + "traffic_cone_barrier_status", ], ), ] diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index e2de195e9..1ce2aa2be 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -1,6 +1,6 @@ # Dataset parameters backend_args = None -num_workers = 32 +num_workers = 16 input_modality = dict(use_lidar=True, use_camera=False) # range setting diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py similarity index 95% rename from projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py rename to projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py index a2cd2d2e9..1e1ce37ea 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py @@ -1,13 +1,12 @@ # learning rate -# 1e-4 * sqrt(2) = 0.0001414 -lr = 1.4141e-4 -t_max = 8 +lr = 2.0e-4 +t_max = 3 max_epochs = 30 val_interval = 5 train_gpu_size = 8 -test_batch_size = 2 -train_batch_size = 8 +test_batch_size = 4 +train_batch_size = 16 param_scheduler = [ # learning rate scheduler diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_cosine.py similarity index 95% rename from projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py rename to projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_cosine.py index 87571d0b3..5be98b3d9 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_cosine.py @@ -1,13 +1,12 @@ # learning rate -# 1e-4 * sqrt(2) = 0.0001414 -lr = 1.4141e-4 -t_max = 15 +lr = 2.0e-4 +t_max = 5 max_epochs = 50 val_interval = 5 train_gpu_size = 8 -test_batch_size = 2 -train_batch_size = 8 +test_batch_size = 4 +train_batch_size = 16 param_scheduler = [ # learning rate scheduler From ad4f746d8e4150a826e82009a80beedab991c7ad Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 9 May 2026 17:00:43 +0900 Subject: [PATCH 090/183] Resolve conflict --- .../default/schedulers/default_30e_8xb16_adamw_cosine.py | 6 +++--- .../default/schedulers/default_50e_8xb16_adamw_cosine.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py index 1e1ce37ea..e3975f6eb 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py @@ -10,7 +10,7 @@ param_scheduler = [ # learning rate scheduler - # During the first (max_epochs * 0.4) epochs, learning rate increases from 0 to lr * 10 + # During the first (max_epochs * 0.10) epochs, learning rate increases from 0 to lr * 10 # during the next epochs, learning rate decreases from lr * 10 to # lr * 1e-4 dict( @@ -23,7 +23,7 @@ convert_to_iter_based=True, ), dict( - type="CosineAnnealingLR", + type="CosineAnnealingLR T_max=(max_epochs - t_max), eta_min=lr * 1e-4, begin=t_max, @@ -32,7 +32,7 @@ convert_to_iter_based=True, ), # momentum scheduler - # During the first (0.4 * max_epochs) epochs, momentum increases from 0 to 0.85 / 0.95 + # During the first (max_epochs * 0.10) epochs, momentum increases from 0 to 0.85 / 0.95 # during the next epochs, momentum increases from 0.85 / 0.95 to 1 dict( type="CosineAnnealingMomentum", diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_cosine.py index 5be98b3d9..d209d0c1b 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_cosine.py @@ -10,7 +10,7 @@ param_scheduler = [ # learning rate scheduler - # During the first (max_epochs * 0.4) epochs, learning rate increases from 0 to lr * 10 + # During the first (max_epochs * 0.10) epochs, learning rate increases from 0 to lr * 10 # during the next epochs, learning rate decreases from lr * 10 to # lr * 1e-4 dict( @@ -32,7 +32,7 @@ convert_to_iter_based=True, ), # momentum scheduler - # During the first (0.4 * max_epochs) epochs, momentum increases from 0 to 0.85 / 0.95 + # During the first (0.10 * max_epochs) epochs, momentum increases from 0 to 0.85 / 0.95 # during the next epochs, momentum increases from 0.85 / 0.95 to 1 dict( type="CosineAnnealingMomentum", From be69b11cb6732c96d5ae185db7b5c6521a65708d Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 9 May 2026 17:03:29 +0900 Subject: [PATCH 091/183] Resolve conflict --- ...ion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py} | 0 ...xel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py} | 0 ...on_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py} | 0 ...el_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py} | 0 ..._secfpn_50e_8xb16_base_120m_sincos_48_channels_32_points.py} | 2 +- ...idar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py} | 0 6 files changed, 1 insertion(+), 1 deletion(-) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py} (100%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py} (100%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py} (100%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py => bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py} (100%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_48_channels_32_points.py => bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_sincos_48_channels_32_points.py} (98%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py => bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py} (100%) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py similarity index 100% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py similarity index 100% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py similarity index 100% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py similarity index 100% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_48_channels_32_points.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_sincos_48_channels_32_points.py similarity index 98% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_48_channels_32_points.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_sincos_48_channels_32_points.py index 073249a3e..44acb083c 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_sincos_48_channels_32_points.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_sincos_48_channels_32_points.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", "../default/pipelines/default_lidar_120m.py", "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_50e_8xb8_adamw_cosine.py", + "../default/schedulers/default_50e_8xb16_adamw_cosine.py", "../default/default_misc.py", ] diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py similarity index 100% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m_t4metric_v2.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_t4metric_v2.py From 258e64c8ce89af6e174d9eccb17a45737e63c0cb Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 9 May 2026 18:13:14 +0900 Subject: [PATCH 092/183] Resolve conflict --- projects/BEVFusion/bevfusion/bevfusion_head.py | 5 +++-- .../default/models/default_lidar_second_secfpn_120m.py | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 0b510eae7..1de3af05f 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -782,8 +782,9 @@ def loss_by_feat( for cls_i, class_name in enumerate(self.class_names): loss_dict[f"loss_heatmap_{class_name}"] = loss_heatmap_cls[cls_i] - # Prevent loss item to avoid computing gradients twice. This is for logging. - loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum() + # Logging-only aggregate. Detach so it does not retain the autograd graph; + # the per-class `loss_heatmap_{class_name}` entries are what drive gradients. + loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum().detach() # compute loss for each layer for idx_layer in range(self.num_decoder_layers if self.auxiliary else 1): diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index c097d10bf..d56e6d1a3 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -111,7 +111,10 @@ reduction="mean", loss_weight=1.0, ), + loss_iou=None, loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="none", loss_weight=1.0), loss_bbox=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=0.25), + # partial_ + partial_ignore_labels=["traffic_cone", "barrier"], ), ) From 75a46d3cfe983dc76fc4e3e478cc82b0ffe02e86 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 9 May 2026 19:07:24 +0900 Subject: [PATCH 093/183] Updated --- .../bevfusion/bevfusion_voxel_encoder.py | 2 +- ...ond_secfpn_50e_8xb16_base_120m_48_channels.py} | 15 +++++---------- 2 files changed, 6 insertions(+), 11 deletions(-) rename projects/BEVFusion/configs/t4dataset/BEVFusion-L/{bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_sincos_48_channels_32_points.py => bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py} (88%) diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py index 6c41234c5..06ca2e434 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py +++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py @@ -107,7 +107,7 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, Args: features (torch.Tensor): Point features or raw points in shape - (N, M, C). + (N, M, C) in (x, y, z, intensity, time_lag) if C is 5, (x, y, z, time_lag) if C is 4. num_points (torch.Tensor): Number of points in each pillar in shape (M). coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx). diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_sincos_48_channels_32_points.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py similarity index 88% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_sincos_48_channels_32_points.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py index 44acb083c..02f9642f2 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_sincos_48_channels_32_points.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py @@ -13,20 +13,18 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" +info_directory_path = "info/kokseang_2_8/" -experiment_group_name = "bevfusion_lidar_2.8.0/base/" + _base_.dataset_type -experiment_name = "lidar_pts_encoder_sin_cos_voxel_second_secfpn_50e_8xb8_base_120m_48_channels_32_points" +experiment_group_name = "bevfusion_lidar_2_8_0/base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter model = dict( type="BEVFusion", voxelize_cfg=dict( - max_num_points=32, point_cloud_range=_base_.point_cloud_range, voxel_size=_base_.voxel_size, - voxelize_reduce=False, ), pts_voxel_encoder=dict( _delete_=True, @@ -35,21 +33,18 @@ with_distance=False, with_cluster_center=True, with_voxel_center=True, - feat_channels=[16], + feat_channels=[16, 16], point_cloud_range=_base_.point_cloud_range, voxel_size=_base_.voxel_size, norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01), # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], + legacy=False ), pts_middle_encoder=dict( in_channels=48, sparse_shape=_base_.grid_size, - # num_aug_features=4, - # min-max normalization for x, y, z, time_lag, where the max of time lag technically is two seeps (200 ms) here - # aug_features_min_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], - # aug_features_max_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], ), bbox_head=dict( class_names=_base_.class_names, # Use class names to identify the correct class indices From 16fe09be0b4acd4bce50017351c816327f11d3fc Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 9 May 2026 22:26:29 +0900 Subject: [PATCH 094/183] Updated --- projects/BEVFusion/bevfusion/__init__.py | 2 +- .../bevfusion/bevfusion_voxel_encoder.py | 489 +++++++++--------- 2 files changed, 239 insertions(+), 252 deletions(-) diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py index 2e9822d76..ce9b31aa5 100644 --- a/projects/BEVFusion/bevfusion/__init__.py +++ b/projects/BEVFusion/bevfusion/__init__.py @@ -7,7 +7,7 @@ from .transformer import TransformerDecoderLayer from .transforms_3d import BEVFusionGlobalRotScaleTrans, BEVFusionRandomFlip3D, GridMask, ImageAug3D from .utils import BBoxBEVL1Cost, HeuristicAssigner3D, HungarianAssigner3D, IoU3DCost, TransFusionBBoxCoder -from .bevfusion_voxel_encoder import BEVFusionVoxelEncoder, BEVFusionVoxelSinCosEncoder, BEVFusionVoxelMeanSinCosEncoder +from .bevfusion_voxel_encoder import HardSimpleVoxelSinCosEncoder, BEVFusionVoxelEncoder __all__ = [ "BEVFusion", diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py index 06ca2e434..843624b56 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py +++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py @@ -11,32 +11,80 @@ @MODELS.register_module() -class BEVFusionVoxelEncoder(nn.Module): +class HardSimpleVoxelSinCosEncoder(nn.Module): + def __init__(self, + min_norm_values: Tuple[float], + max_norm_values: Tuple[float], + in_channels: Optional[int] = 4) -> None: + """ + Simple voxel encoder that only performs mean pooling on the normalize features, and then + performs sin-cos (fourier encoding) on each voxel channels. + + The output shape of each voxel is (N, feature_channels*2). + Args: + min_norm_values (Tuple[float]): Minimum values for the features. + max_norm_values (Tuple[float]): Maximum values for the features. + in_channels (int): Number of input channels. + """ + super(BEVFusionVoxelMeanSinCosEncoder, self).__init__() + + # Create PillarFeatureNet layers + self.in_channels = in_channels + + self.register_buffer("min_norm_values", torch.tensor(min_norm_values)) + self.register_buffer("max_norm_values", torch.tensor(max_norm_values)) + self.register_buffer("exponents", (2 ** torch.arange(0, self.in_channels)).float()) + + def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, + *args, **kwargs) -> Tensor: + """Forward function. + + Args: + features (torch.Tensor): Point features or raw points in shape + (N, M, C) in (x, y, z, intensity, time_lag) if C is 5, (x, y, z, time_lag) if C is 4. + num_points (torch.Tensor): Number of points in each pillar in shape (M). + coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx). + + Returns: + torch.Tensor: Features of pillars in shape (M, C*C*2). + + """ + num_voxels, max_points_per_voxel = features.shape[0], features.shape[1] + + # Mean in the voxel + # (N, M, 3) -> (N, 3) + voxel_features = (features.sum(dim=1, keepdim=False) / num_points.type_as(features).view( + -1, 1)).contiguous() + + # min-max normalization, (N, 3) -> (N, 3) + voxel_features_norm = (voxel_features - \ + self.min_norm_values.view(1, -1)) / ((self.max_norm_values - self.min_norm_values).view(1, -1)) + + # SinCos encoding + # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3) + y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1) + # (N*3, 3) -> (N, 3*3) + y = y.reshape(num_voxels, -1) + # (N, 3*3) -> (N, 3*3*2) + voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1) + + return voxel_fourier_features + + +@MODELS.register_module() +class BEVFusionVoxelEncoder(HardSimpleVoxelSinCosEncoder): """BEVFusion Voxel Encoder Feature Net. - The network is same as pillar featuer net. - The network prepares the pillar features and performs forward pass - through PFNLayers. + The network is same as HardSimpleVoxelSinCosEncoder, but it performs PFNLayers and max-pooling on the + offset features, for example, distances. After that, it concatenates the fourier features and the offset features + along the channel dimension for each voxel. Args: - in_channels (int, optional): Number of input features, - either x, y, z or x, y, z, r. Defaults to 4. + min_norm_values (Tuple[float]): Minimum values for the features. + max_norm_values (Tuple[float]): Maximum values for the features. + in_channels (int): Number of input channels. feat_channels (tuple, optional): Number of features in each of the N PFNLayers. Defaults to (64, ). - with_distance (bool, optional): Whether to include Euclidean distance - to points. Defaults to False. - with_cluster_center (bool, optional): [description]. Defaults to True. - with_voxel_center (bool, optional): [description]. Defaults to True. - voxel_size (tuple[float], optional): Size of voxels, only utilize x - and y size. Defaults to (0.2, 0.2, 4). - point_cloud_range (tuple[float], optional): Point cloud range, only - utilizes x and y min. Defaults to (0, -40, -3, 70.4, 40, 1). - norm_cfg ([type], optional): [description]. - Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01). - mode (str, optional): The mode to gather point features. Options are - 'max' or 'avg'. Defaults to 'max'. - legacy (bool, optional): Whether to use the new behavior or - the original behavior. Defaults to True. """ def __init__(self, @@ -54,7 +102,11 @@ def __init__(self, type='BN1d', eps=1e-3, momentum=0.01), mode: Optional[str] = 'max', legacy: Optional[bool] = True): - super(BEVFusionVoxelEncoder, self).__init__() + + super(BEVFusionVoxelEncoder, self).__init__( + min_norm_values=min_norm_values, + max_norm_values=max_norm_values, in_channels=in_channels + ) assert len(feat_channels) > 0 self.legacy = legacy pfn_in_channels = 0 @@ -64,11 +116,13 @@ def __init__(self, pfn_in_channels += 3 if with_distance: pfn_in_channels += 1 + + assert pfn_in_channels > 0, "pfn_in_channels must be greater than 0" self._with_distance = with_distance self._with_cluster_center = with_cluster_center self._with_voxel_center = with_voxel_center - # Create PillarFeatureNet layers - self.in_channels = in_channels + + # Create VoxelFeatureNet layers feat_channels = [pfn_in_channels] + list(feat_channels) pfn_layers = [] for i in range(len(feat_channels) - 1): @@ -96,11 +150,6 @@ def __init__(self, self.z_offset = self.vz / 2 + point_cloud_range[2] self.point_cloud_range = point_cloud_range - self.register_buffer("min_norm_values", torch.tensor(min_norm_values)) - self.register_buffer("max_norm_values", torch.tensor(max_norm_values)) - self.register_buffer("exponents", (2 ** torch.arange(0, self.in_channels)).float()) - # self.register_buffer("voxel_size", torch.tensor([self.vx, self.vy, self.vz])) - def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, *args, **kwargs) -> Tensor: """Forward function. @@ -112,26 +161,13 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx). Returns: - torch.Tensor: Features of pillars in shape (M, C). + torch.Tensor: Features of pillars in shape (M, C*C*2 + feat_channels[-1]). """ - num_voxels, max_points_per_voxel = features.shape[0], features.shape[1] - - # Mean in the voxel - # (N, M, 3) -> (N, 3) - voxel_features = (features.sum(dim=1, keepdim=False) / num_points.type_as(features).view( - -1, 1)).contiguous() - - # min-max normalization, (N, 3) -> (N, 3) - voxel_features_norm = (voxel_features - \ - self.min_norm_values.view(1, -1)) / ((self.max_norm_values - self.min_norm_values).view(1, -1)) + # (M, C*C*2) + voxel_fourier_features = super().forward(features, num_points, coors) - # SinCos encoding - # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3) - y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1) - # (N*3, 3) -> (N, 3*3) - y = y.reshape(num_voxels, -1) - # (N, 3*3) -> (N, 3*3*2) - voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1) + # Offset features + max_points_per_voxel = features.shape[1] features_ls = [] # Find distance of x, y, and z from cluster center, mapped to [-1, 1] if available @@ -200,222 +236,173 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, return features -@MODELS.register_module() -class BEVFusionVoxelSinCosEncoder(nn.Module): - def __init__(self, - min_norm_values: Tuple[float], - max_norm_values: Tuple[float], - time_lag_channel_index: int = 3, - time_exp_factor: Optional[float] = None, - feat_channels: Optional[tuple] = (16, ), - in_channels: Optional[int] = 4, - with_distance: Optional[bool] = False, - with_cluster_center: Optional[bool] = True, - with_voxel_center: Optional[bool] = True, - voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4), - point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4, - 40, 1), - norm_cfg: Optional[dict] = dict( - type='BN1d', eps=1e-3, momentum=0.01), - mode: Optional[str] = 'max'): - super(BEVFusionVoxelSinCosEncoder, self).__init__() - - self._with_distance = with_distance - self._with_cluster_center = with_cluster_center - self._with_voxel_center = with_voxel_center - # Create PillarFeatureNet layers - self.in_channels = in_channels - - # Need pillar (voxel) size and x/y offset in order to calculate offset - self.vx = voxel_size[0] - self.vy = voxel_size[1] - self.vz = voxel_size[2] - self.x_offset = self.vx / 2 + point_cloud_range[0] - self.y_offset = self.vy / 2 + point_cloud_range[1] - self.z_offset = self.vz / 2 + point_cloud_range[2] - self.point_cloud_range = point_cloud_range +# @MODELS.register_module() +# class BEVFusionVoxelSinCosEncoder(nn.Module): +# def __init__(self, +# min_norm_values: Tuple[float], +# max_norm_values: Tuple[float], +# time_lag_channel_index: int = 3, +# time_exp_factor: Optional[float] = None, +# feat_channels: Optional[tuple] = (16, ), +# in_channels: Optional[int] = 4, +# with_distance: Optional[bool] = False, +# with_cluster_center: Optional[bool] = True, +# with_voxel_center: Optional[bool] = True, +# voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4), +# point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4, +# 40, 1), +# norm_cfg: Optional[dict] = dict( +# type='BN1d', eps=1e-3, momentum=0.01), +# mode: Optional[str] = 'max'): +# super(BEVFusionVoxelSinCosEncoder, self).__init__() + +# self._with_distance = with_distance +# self._with_cluster_center = with_cluster_center +# self._with_voxel_center = with_voxel_center +# # Create PillarFeatureNet layers +# self.in_channels = in_channels + +# # Need pillar (voxel) size and x/y offset in order to calculate offset +# self.vx = voxel_size[0] +# self.vy = voxel_size[1] +# self.vz = voxel_size[2] +# self.x_offset = self.vx / 2 + point_cloud_range[0] +# self.y_offset = self.vy / 2 + point_cloud_range[1] +# self.z_offset = self.vz / 2 + point_cloud_range[2] +# self.point_cloud_range = point_cloud_range - self.xyz_channels = 3 - feat_offset_channels = in_channels - self.xyz_channels - if with_cluster_center: - feat_offset_channels += 3 - if with_voxel_center: - feat_offset_channels += 3 - if with_distance: - feat_offset_channels += 1 - - feat_channels = [feat_offset_channels] + list(feat_channels) - assert len(feat_channels) > 0, "feat_channels must be greater than 0" - pfn_layers = [] - for i in range(len(feat_channels) - 1): - in_filters = feat_channels[i] - out_filters = feat_channels[i + 1] - if i < len(feat_channels) - 2: - last_layer = False - else: - last_layer = True - pfn_layers.append( - PFNLayer( - in_filters, - out_filters, - norm_cfg=norm_cfg, - last_layer=last_layer, - mode=mode)) - self.pfn_layers = nn.ModuleList(pfn_layers) - - self.time_lag_channel_index = time_lag_channel_index - self.time_exp_factor = time_exp_factor +# self.xyz_channels = 3 +# feat_offset_channels = in_channels - self.xyz_channels +# if with_cluster_center: +# feat_offset_channels += 3 +# if with_voxel_center: +# feat_offset_channels += 3 +# if with_distance: +# feat_offset_channels += 1 + +# feat_channels = [feat_offset_channels] + list(feat_channels) +# assert len(feat_channels) > 0, "feat_channels must be greater than 0" +# pfn_layers = [] +# for i in range(len(feat_channels) - 1): +# in_filters = feat_channels[i] +# out_filters = feat_channels[i + 1] +# if i < len(feat_channels) - 2: +# last_layer = False +# else: +# last_layer = True +# pfn_layers.append( +# PFNLayer( +# in_filters, +# out_filters, +# norm_cfg=norm_cfg, +# last_layer=last_layer, +# mode=mode)) +# self.pfn_layers = nn.ModuleList(pfn_layers) + +# self.time_lag_channel_index = time_lag_channel_index +# self.time_exp_factor = time_exp_factor - self.register_buffer("min_norm_values", torch.tensor(min_norm_values)) - self.register_buffer("max_norm_values", torch.tensor(max_norm_values)) - self.register_buffer("voxel_size", torch.tensor([self.vx, self.vy, self.vz])) - self.register_buffer("exponents", (2 ** torch.arange(0, self.xyz_channels)).float()) - - def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, - *args, **kwargs) -> Tensor: - """Forward function. - - Args: - features (torch.Tensor): Point features or raw points in shape - (N, M, C). - num_points (torch.Tensor): Number of points in each pillar in shape (M). - coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx). - - Returns: - torch.Tensor: Features of pillars in shape (M, C). - """ - num_voxels, max_points_per_voxel = features.shape[0], features.shape[1] +# self.register_buffer("min_norm_values", torch.tensor(min_norm_values)) +# self.register_buffer("max_norm_values", torch.tensor(max_norm_values)) +# self.register_buffer("voxel_size", torch.tensor([self.vx, self.vy, self.vz])) +# self.register_buffer("exponents", (2 ** torch.arange(0, self.xyz_channels)).float()) + +# def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, +# *args, **kwargs) -> Tensor: +# """Forward function. + +# Args: +# features (torch.Tensor): Point features or raw points in shape +# (N, M, C). +# num_points (torch.Tensor): Number of points in each pillar in shape (M). +# coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx). + +# Returns: +# torch.Tensor: Features of pillars in shape (M, C). +# """ +# num_voxels, max_points_per_voxel = features.shape[0], features.shape[1] - # Mean in the voxel - # (N, M, 3) -> (N, 3) - voxel_features = (features[:, :, :self.xyz_channels].sum(dim=1, keepdim=False) / num_points.type_as(features).view( - -1, 1)).contiguous() - - # min-max normalization, (N, 3) -> (N, 3) - voxel_features_norm = (voxel_features - \ - self.min_norm_values[:self.xyz_channels].view(1, -1)) / ((self.max_norm_values[:self.xyz_channels] - self.min_norm_values[:self.xyz_channels]).view(1, -1)) +# # Mean in the voxel +# # (N, M, 3) -> (N, 3) +# voxel_features = (features[:, :, :self.xyz_channels].sum(dim=1, keepdim=False) / num_points.type_as(features).view( +# -1, 1)).contiguous() + +# # min-max normalization, (N, 3) -> (N, 3) +# voxel_features_norm = (voxel_features - \ +# self.min_norm_values[:self.xyz_channels].view(1, -1)) / ((self.max_norm_values[:self.xyz_channels] - self.min_norm_values[:self.xyz_channels]).view(1, -1)) - # SinCos encoding - # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3) - y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1) - # (N*3, 3) -> (N, 3*3) - y = y.reshape(num_voxels, -1) - # (N, 3*3) -> (N, 3*3*2) - voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1) - - # PFN - # Other features, for example, intensity or time_lag - other_features = features[:, :, self.xyz_channels:] +# # SinCos encoding +# # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3) +# y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1) +# # (N*3, 3) -> (N, 3*3) +# y = y.reshape(num_voxels, -1) +# # (N, 3*3) -> (N, 3*3*2) +# voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1) + +# # PFN +# # Other features, for example, intensity or time_lag +# other_features = features[:, :, self.xyz_channels:] - # Normalization - other_features_norm = (other_features - self.min_norm_values[self.xyz_channels:]) / (self.max_norm_values[self.xyz_channels:] - self.min_norm_values[self.xyz_channels:]) - - time_lag_feature_index = self.time_lag_channel_index - self.xyz_channels - # exponentiate time_lag features, it's higher when the normlized time lag is lower - # (1.0 when time_lag_features is 0.0) - if self.time_exp_factor is not None: - other_features_norm[:, :, time_lag_feature_index] = torch.exp(- other_features_norm[:, :, time_lag_feature_index] * self.time_exp_factor) - else: - # Inverse the time_lag feature - other_features_norm[:, :, time_lag_feature_index] = 1.0 - other_features_norm[:, :, time_lag_feature_index] +# # Normalization +# other_features_norm = (other_features - self.min_norm_values[self.xyz_channels:]) / (self.max_norm_values[self.xyz_channels:] - self.min_norm_values[self.xyz_channels:]) + +# time_lag_feature_index = self.time_lag_channel_index - self.xyz_channels +# # exponentiate time_lag features, it's higher when the normlized time lag is lower +# # (1.0 when time_lag_features is 0.0) +# if self.time_exp_factor is not None: +# other_features_norm[:, :, time_lag_feature_index] = torch.exp(- other_features_norm[:, :, time_lag_feature_index] * self.time_exp_factor) +# else: +# # Inverse the time_lag feature +# other_features_norm[:, :, time_lag_feature_index] = 1.0 - other_features_norm[:, :, time_lag_feature_index] - # Offsets - voxel_feature_offsets = [other_features_norm] - # Find distance of x, y, and z from cluster center - if self._with_cluster_center: - points_mean = features[:, :, :3].sum( - dim=1, keepdim=True) / num_points.type_as(features).view( - -1, 1, 1) +# # Offsets +# voxel_feature_offsets = [other_features_norm] +# # Find distance of x, y, and z from cluster center +# if self._with_cluster_center: +# points_mean = features[:, :, :3].sum( +# dim=1, keepdim=True) / num_points.type_as(features).view( +# -1, 1, 1) - # f_cluster = (features[:, :, :3] - points_mean) - f_cluster = features[:, :, :3] - points_mean - voxel_feature_offsets.append(f_cluster) - - # Find distance of x, y, and z from pillar center - dtype = features.dtype - if self._with_voxel_center: - f_center = torch.zeros_like(features[:, :, :3]) - f_center[:, :, 0] = features[:, :, 0] - ( - coors[:, 3].to(dtype).unsqueeze(1) * self.vx + - self.x_offset) - f_center[:, :, 1] = features[:, :, 1] - ( - coors[:, 2].to(dtype).unsqueeze(1) * self.vy + - self.y_offset) - f_center[:, :, 2] = features[:, :, 2] - ( - coors[:, 1].to(dtype).unsqueeze(1) * self.vz + - self.z_offset) +# # f_cluster = (features[:, :, :3] - points_mean) +# f_cluster = features[:, :, :3] - points_mean +# voxel_feature_offsets.append(f_cluster) + +# # Find distance of x, y, and z from pillar center +# dtype = features.dtype +# if self._with_voxel_center: +# f_center = torch.zeros_like(features[:, :, :3]) +# f_center[:, :, 0] = features[:, :, 0] - ( +# coors[:, 3].to(dtype).unsqueeze(1) * self.vx + +# self.x_offset) +# f_center[:, :, 1] = features[:, :, 1] - ( +# coors[:, 2].to(dtype).unsqueeze(1) * self.vy + +# self.y_offset) +# f_center[:, :, 2] = features[:, :, 2] - ( +# coors[:, 1].to(dtype).unsqueeze(1) * self.vz + +# self.z_offset) - # Map to [-1, 1] - # f_center = f_center / (self.voxel_size * 0.5) - voxel_feature_offsets.append(f_center) +# # Map to [-1, 1] +# # f_center = f_center / (self.voxel_size * 0.5) +# voxel_feature_offsets.append(f_center) - if self._with_distance: - points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True) - voxel_feature_offsets.append(points_dist) +# if self._with_distance: +# points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True) +# voxel_feature_offsets.append(points_dist) - voxel_feature_offsets = torch.cat(voxel_feature_offsets, dim=-1) - # The feature decorations were calculated without regard to whether - # pillar was empty. Need to ensure that - # empty pillars remain set to zeros. - mask = get_paddings_indicator(num_points, max_points_per_voxel, axis=0) - mask = torch.unsqueeze(mask, -1).type_as(voxel_feature_offsets) - voxel_feature_offsets *= mask +# voxel_feature_offsets = torch.cat(voxel_feature_offsets, dim=-1) +# # The feature decorations were calculated without regard to whether +# # pillar was empty. Need to ensure that +# # empty pillars remain set to zeros. +# mask = get_paddings_indicator(num_points, max_points_per_voxel, axis=0) +# mask = torch.unsqueeze(mask, -1).type_as(voxel_feature_offsets) +# voxel_feature_offsets *= mask - # PFN - for pfn in self.pfn_layers: - voxel_feature_offsets = pfn(voxel_feature_offsets, num_points) +# # PFN +# for pfn in self.pfn_layers: +# voxel_feature_offsets = pfn(voxel_feature_offsets, num_points) - # Concat - features = torch.cat([voxel_fourier_features, voxel_feature_offsets.squeeze(1)], dim=-1) - return features - - - -@MODELS.register_module() -class BEVFusionVoxelMeanSinCosEncoder(nn.Module): - def __init__(self, - min_norm_values: Tuple[float], - max_norm_values: Tuple[float], - in_channels: Optional[int] = 4): - super(BEVFusionVoxelMeanSinCosEncoder, self).__init__() - - # Create PillarFeatureNet layers - self.in_channels = in_channels +# # Concat +# features = torch.cat([voxel_fourier_features, voxel_feature_offsets.squeeze(1)], dim=-1) +# return features - self.register_buffer("min_norm_values", torch.tensor(min_norm_values)) - self.register_buffer("max_norm_values", torch.tensor(max_norm_values)) - self.register_buffer("exponents", (2 ** torch.arange(0, self.in_channels)).float()) - def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, - *args, **kwargs) -> Tensor: - """Forward function. - - Args: - features (torch.Tensor): Point features or raw points in shape - (N, M, C). - num_points (torch.Tensor): Number of points in each pillar in shape (M). - coors (torch.Tensor): Coordinates of each voxel in (M, [4]), which is (batch_idx, z_idx, y_idx, x_idx). - - Returns: - torch.Tensor: Features of pillars in shape (M, C). - """ - num_voxels, max_points_per_voxel = features.shape[0], features.shape[1] - - # Mean in the voxel - # (N, M, 3) -> (N, 3) - voxel_features = (features.sum(dim=1, keepdim=False) / num_points.type_as(features).view( - -1, 1)).contiguous() - - # min-max normalization, (N, 3) -> (N, 3) - voxel_features_norm = (voxel_features - \ - self.min_norm_values.view(1, -1)) / ((self.max_norm_values - self.min_norm_values).view(1, -1)) - - # SinCos encoding - # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3) - y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1) - # (N*3, 3) -> (N, 3*3) - y = y.reshape(num_voxels, -1) - # (N, 3*3) -> (N, 3*3*2) - voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1) - - return voxel_fourier_features From db8e7f8b2193e883ac806923a6f335dc127c8c8b Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 9 May 2026 22:41:59 +0900 Subject: [PATCH 095/183] Updated --- projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py | 6 +++--- ...r_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py index 843624b56..6c1955505 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py +++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py @@ -26,7 +26,7 @@ def __init__(self, max_norm_values (Tuple[float]): Maximum values for the features. in_channels (int): Number of input channels. """ - super(BEVFusionVoxelMeanSinCosEncoder, self).__init__() + super(HardSimpleVoxelSinCosEncoder, self).__init__() # Create PillarFeatureNet layers self.in_channels = in_channels @@ -75,8 +75,8 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, class BEVFusionVoxelEncoder(HardSimpleVoxelSinCosEncoder): """BEVFusion Voxel Encoder Feature Net. - The network is same as HardSimpleVoxelSinCosEncoder, but it performs PFNLayers and max-pooling on the - offset features, for example, distances. After that, it concatenates the fourier features and the offset features + The network is same as HardSimpleVoxelSinCosEncoder, but it performs PFNLayers on the + offset features, for example, distances. After that, it concatenates the fourier features and the PFN features along the channel dimension for each voxel. Args: diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py index 02f9642f2..72e73c036 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py @@ -27,7 +27,6 @@ voxel_size=_base_.voxel_size, ), pts_voxel_encoder=dict( - _delete_=True, type="BEVFusionVoxelEncoder", in_channels=4, with_distance=False, From 4c907aa1af5f986eaa21ac91a6664b68b7c7de07 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 9 May 2026 22:43:23 +0900 Subject: [PATCH 096/183] Updated --- projects/BEVFusion/bevfusion/__init__.py | 7 +++---- projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py | 2 +- ..._voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py index ce9b31aa5..fa23d120c 100644 --- a/projects/BEVFusion/bevfusion/__init__.py +++ b/projects/BEVFusion/bevfusion/__init__.py @@ -7,7 +7,7 @@ from .transformer import TransformerDecoderLayer from .transforms_3d import BEVFusionGlobalRotScaleTrans, BEVFusionRandomFlip3D, GridMask, ImageAug3D from .utils import BBoxBEVL1Cost, HeuristicAssigner3D, HungarianAssigner3D, IoU3DCost, TransFusionBBoxCoder -from .bevfusion_voxel_encoder import HardSimpleVoxelSinCosEncoder, BEVFusionVoxelEncoder +from .bevfusion_voxel_encoder import HardSimpleVoxelSinCosEncoder, BEVFusionVoxelFeatureNet __all__ = [ "BEVFusion", @@ -28,7 +28,6 @@ "BEVFusionRandomFlip3D", "BEVFusionGlobalRotScaleTrans", "TransFusionBBoxCoder", - "BEVFusionVoxelEncoder", - "BEVFusionVoxelSinCosEncoder", - "BEVFusionVoxelMeanSinCosEncoder", + "HardSimpleVoxelSinCosEncoder", + "BEVFusionVoxelFeatureNet", ] diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py index 6c1955505..f7a5c481c 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py +++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py @@ -72,7 +72,7 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, @MODELS.register_module() -class BEVFusionVoxelEncoder(HardSimpleVoxelSinCosEncoder): +class BEVFusionVoxelFeatureNet(HardSimpleVoxelSinCosEncoder): """BEVFusion Voxel Encoder Feature Net. The network is same as HardSimpleVoxelSinCosEncoder, but it performs PFNLayers on the diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py index 72e73c036..b6ad6cac2 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py @@ -27,7 +27,7 @@ voxel_size=_base_.voxel_size, ), pts_voxel_encoder=dict( - type="BEVFusionVoxelEncoder", + type="BEVFusionVoxelFeatureNet", in_channels=4, with_distance=False, with_cluster_center=True, From 8cb422d5ab8a3c35943ef089169fb8fb89046b3e Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 9 May 2026 22:45:03 +0900 Subject: [PATCH 097/183] Updated --- projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py | 2 +- ...lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py | 2 +- .../default/models/default_lidar_second_secfpn_120m.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py index f7a5c481c..efed0ce5d 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py +++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py @@ -103,7 +103,7 @@ def __init__(self, mode: Optional[str] = 'max', legacy: Optional[bool] = True): - super(BEVFusionVoxelEncoder, self).__init__( + super(BEVFusionVoxelFeatureNet, self).__init__( min_norm_values=min_norm_values, max_norm_values=max_norm_values, in_channels=in_channels ) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py index b6ad6cac2..83a607386 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py @@ -28,7 +28,7 @@ ), pts_voxel_encoder=dict( type="BEVFusionVoxelFeatureNet", - in_channels=4, + in_channels=len(_base_.lidar_sweep_dims), with_distance=False, with_cluster_center=True, with_voxel_center=True, diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index d56e6d1a3..5a880d975 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -13,7 +13,7 @@ pad_size_divisor=32, ), pts_voxel_encoder=dict( - type="BEVFusionVoxelMeanSinCosEncoder", + type="HardSimpleVoxelSinCosEncoder", in_channels=4, ), pts_middle_encoder=dict( From 5635a003821391a44d67eed601be99c88a58a84d Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 9 May 2026 22:48:59 +0900 Subject: [PATCH 098/183] Updated --- ...lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py index 83a607386..36c39dd5a 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py @@ -32,7 +32,7 @@ with_distance=False, with_cluster_center=True, with_voxel_center=True, - feat_channels=[16, 16], + feat_channels=[16], point_cloud_range=_base_.point_cloud_range, voxel_size=_base_.voxel_size, norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01), From fb27f498e2312bf60dee8ecb0c1e5c4b489bba39 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 12 May 2026 05:56:51 +0900 Subject: [PATCH 099/183] Resolve conflict --- projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py index efed0ce5d..83cd70482 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py +++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py @@ -109,7 +109,7 @@ def __init__(self, ) assert len(feat_channels) > 0 self.legacy = legacy - pfn_in_channels = 0 + pfn_in_channels = in_channels if with_cluster_center: pfn_in_channels += 3 if with_voxel_center: @@ -165,11 +165,14 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, """ # (M, C*C*2) voxel_fourier_features = super().forward(features, num_points, coors) + + # Normalize the features + norm_features = (features - self.min_norm_values.view(1, -1)) / ((self.max_norm_values - self.min_norm_values).view(1, -1)) # Offset features max_points_per_voxel = features.shape[1] - features_ls = [] + features_ls = [norm_features] # Find distance of x, y, and z from cluster center, mapped to [-1, 1] if available if self._with_cluster_center: points_mean = features[:, :, :3].sum( From bb5d7579e5d9906bf89e3ec9a88f54802992bd49 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 12 May 2026 22:24:10 +0900 Subject: [PATCH 100/183] Resolve conflict --- projects/BEVFusion/bevfusion/bevfusion_head.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 1de3af05f..69417347b 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -387,7 +387,7 @@ def predict_by_feat(self, preds_dicts, metas, img=None, rescale=False, for_roi=F batch_size = preds_dict[0]["heatmap"].shape[0] batch_score = preds_dict[0]["heatmap"][..., -self.num_proposals :].sigmoid() if self.loss_iou is not None: - batch_score = torch.sqrt(batch_score * preds_dict[0]['iou'][..., -self.num_proposals:].clamp(min=0.0, max=1.0)) # noqa: E501 + batch_score = torch.sqrt(batch_score * preds_dict[0]['iou'][..., -self.num_proposals:].sigmoid()) # noqa: E501 one_hot = F.one_hot(self.query_labels, num_classes=self.num_classes).permute(0, 2, 1) batch_score = batch_score * preds_dict[0]["query_heatmap_score"] * one_hot From 3f64c2c5323efc1ee5d0283f44f1255bd53dc3e5 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Wed, 13 May 2026 19:31:59 +0900 Subject: [PATCH 101/183] Resolve conflict --- ..._lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py | 6 +++--- .../default/schedulers/default_30e_8xb16_adamw_cosine.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py index 4cf51faa5..71c1829d4 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py @@ -3,7 +3,7 @@ "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", "../default/pipelines/default_lidar_intensity_120m.py", "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_30e_8xb8_adamw_cosine.py", + "../default/schedulers/default_30e_8xb16_adamw_cosine.py", "../default/default_misc.py", ] @@ -16,7 +16,7 @@ info_directory_path = "info/kokseang_2_8/" experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m" +experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter @@ -152,4 +152,4 @@ ) log_processor = dict(window_size=50) -load_from = None +load_from = "work_dirs/bevfusion_lidar_2_8_0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb16_base_120m/best_epoch_47.pth" diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py index e3975f6eb..d28468f71 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py @@ -23,7 +23,7 @@ convert_to_iter_based=True, ), dict( - type="CosineAnnealingLR + type="CosineAnnealingLR", T_max=(max_epochs - t_max), eta_min=lr * 1e-4, begin=t_max, From ac62b49630918abc492ce81f3274441dc9de1528 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Wed, 13 May 2026 20:16:32 +0900 Subject: [PATCH 102/183] Resolve conflict --- ...et50_fpn_lss_30e_8xb8_j6gen2_base_120m.py} | 8 +- ...snet50_fpn_lss_30e_8xb8_j6gen2_base_50m.py | 137 ++++++++++++++++++ ...ra_resnet50_fpn_lss_50e_8xb8_base_120m.py} | 6 +- ...mera_resnet50_fpn_lss_50e_8xb8_base_50m.py | 137 ++++++++++++++++++ ...swin_fpn_lss_30e_8xb8_jpntaxi_base_120m.py | 137 ++++++++++++++++++ ..._swin_fpn_lss_30e_8xb8_jpntaxi_base_50m.py | 137 ++++++++++++++++++ ..._camera_swin_fpn_lss_50e_8xb8_base_120m.py | 2 +- ...second_secfpn_20e_8xb8_j6gen2_base_120m.py | 4 +- ...econd_secfpn_20e_8xb8_jpntaxi_base_120m.py | 4 +- ...ault_camera_resnet50_fpn_depthlss_120m.py} | 39 ++--- ...0_fpn_depthlss_lidar_second_secfpn_120m.py | 53 +++++++ .../default_camera_resnet50_fpn_lss_50m.py | 23 +++ ..._fpn_depthlss_lidar_second_secfpn_120m.py} | 0 .../models/default_camera_swin_fpn_lss_50m.py | 49 +------ .../pipelines/default_camera_base_120m.py | 133 ----------------- ... default_20e_8xb16_adamw_linear_cosine.py} | 5 +- ... default_30e_8xb16_adamw_linear_cosine.py} | 5 +- ... default_50e_8xb16_adamw_linear_cosine.py} | 5 +- 18 files changed, 658 insertions(+), 226 deletions(-) rename projects/BEVFusion/configs/t4dataset/BEVFusion-C/{bevfusion_camera_swin_fpn_depthlss_30e_8xb8_j6gen2_base_120m.py => bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_120m.py} (95%) create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_50m.py rename projects/BEVFusion/configs/t4dataset/BEVFusion-C/{bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m.py => bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_120m.py} (96%) create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_50m.py create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_120m.py create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_50m.py rename projects/BEVFusion/configs/t4dataset/default/models/{default_camera_swin_fpn_lss_120m.py => default_camera_resnet50_fpn_depthlss_120m.py} (57%) create mode 100644 projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_lidar_second_secfpn_120m.py create mode 100644 projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_lss_50m.py rename projects/BEVFusion/configs/t4dataset/default/models/{default_camera_swin_fpn_lidar_second_secfpn_120m.py => default_camera_swin_fpn_depthlss_lidar_second_secfpn_120m.py} (100%) delete mode 100644 projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py rename projects/BEVFusion/configs/t4dataset/default/schedulers/{default_20e_8xb8_adamw_linear_cosine.py => default_20e_8xb16_adamw_linear_cosine.py} (96%) rename projects/BEVFusion/configs/t4dataset/default/schedulers/{default_30e_8xb8_adamw_linear_cosine.py => default_30e_8xb16_adamw_linear_cosine.py} (96%) rename projects/BEVFusion/configs/t4dataset/default/schedulers/{default_50e_8xb8_adamw_linear_cosine.py => default_50e_8xb16_adamw_linear_cosine.py} (96%) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_120m.py similarity index 95% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_30e_8xb8_j6gen2_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_120m.py index d31630dd0..4c809264e 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_30e_8xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_120m.py @@ -2,7 +2,7 @@ "../../../../../autoware_ml/configs/detection3d/default_runtime.py", "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", "../default/pipelines/default_camera_lidar_intensity_120m.py", - "../default/models/default_camera_swin_fpn_depthlss_120m.py", + "../default/models/default_camera_swin_fpn_lss_120m.py", "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py", "../default/default_misc.py", ] @@ -12,11 +12,11 @@ custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] # user setting -data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" +data_root = "data/t4datasets/" +info_directory_path = "info/kokseang_2_6_1/" experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type -experiment_name = "bevfusion_camera_swin_fpn_depthlss_30e_8xb8_j6gen2_base_120m" +experiment_name = "bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_50m.py new file mode 100644 index 000000000..927310e7d --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_50m.py @@ -0,0 +1,137 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", + "../default/pipelines/default_camera_base_50m.py", + "../default/models/default_camera_swin_fpn_lss_50m.py", + "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4datasets/" +info_directory_path = "info/kokseang_2_6_1/" + +experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type +experiment_name = "bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_50m" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + view_transform=dict(image_size=_base_.image_size), + bbox_head=dict( + class_names=_base_.class_names, + in_channels=80, + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_120m.py similarity index 96% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_120m.py index 4ac46afea..42f93d1b1 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_120m.py @@ -2,7 +2,7 @@ "../../../../../autoware_ml/configs/detection3d/default_runtime.py", "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", "../default/pipelines/default_camera_lidar_intensity_120m.py", - "../default/models/default_camera_swin_fpn_depthlss_120m.py", + "../default/models/default_camera_swin_fpn_lss_120m.py", "../default/schedulers/default_50e_8xb8_adamw_linear_cosine.py", "../default/default_misc.py", ] @@ -16,7 +16,7 @@ info_directory_path = "info/kokseang_2_6_2/" experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type -experiment_name = "bevfusion_camera_swin_fpn_depthlss_50e_8xb8_base_120m" +experiment_name = "bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter @@ -135,5 +135,3 @@ checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), ) log_processor = dict(window_size=50) - -resume = True \ No newline at end of file diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_50m.py new file mode 100644 index 000000000..7c5a5f91f --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_50m.py @@ -0,0 +1,137 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", + "../default/pipelines/default_camera_base_50m.py", + "../default/models/default_camera_swin_fpn_lss_50m.py", + "../default/schedulers/default_50e_8xb8_adamw_linear_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4dataset/" +info_directory_path = "info/kokseang_2_6_2/" + +experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type +experiment_name = "bevfusion_camera_swin_fpn_lss_50e_8xb8_base_50m" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + view_transform=dict(image_size=_base_.image_size), + bbox_head=dict( + class_names=_base_.class_names, + in_channels=80, + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_120m.py new file mode 100644 index 000000000..4c809264e --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_120m.py @@ -0,0 +1,137 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", + "../default/pipelines/default_camera_lidar_intensity_120m.py", + "../default/models/default_camera_swin_fpn_lss_120m.py", + "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4datasets/" +info_directory_path = "info/kokseang_2_6_1/" + +experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type +experiment_name = "bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_120m" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + view_transform=dict(image_size=_base_.image_size), + bbox_head=dict( + class_names=_base_.class_names, + in_channels=80, + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_50m.py new file mode 100644 index 000000000..927310e7d --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_50m.py @@ -0,0 +1,137 @@ +_base_ = [ + "../../../../../autoware_ml/configs/detection3d/default_runtime.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", + "../default/pipelines/default_camera_base_50m.py", + "../default/models/default_camera_swin_fpn_lss_50m.py", + "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py", + "../default/default_misc.py", +] + +custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) +custom_imports["imports"] += _base_.custom_imports["imports"] +custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] + +# user setting +data_root = "data/t4datasets/" +info_directory_path = "info/kokseang_2_6_1/" + +experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type +experiment_name = "bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_50m" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + view_transform=dict(image_size=_base_.image_size), + bbox_head=dict( + class_names=_base_.class_names, + in_channels=80, + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) + +# Dataset parameters +train_dataloader = dict( + batch_size=_base_.train_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=dict( + type=_base_.dataset_type, + pipeline=_base_.train_pipeline, + modality=_base_.input_modality, + backend_args=_base_.backend_args, + data_root=data_root, + ann_file=info_directory_path + _base_.info_train_file_name, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + test_mode=False, + data_prefix=_base_.data_prefix, + box_type_3d="LiDAR", + filter_cfg=_base_.filter_cfg, + ), +) + +val_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_val_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +test_dataloader = dict( + batch_size=_base_.test_batch_size, + num_workers=_base_.num_workers, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=_base_.dataset_type, + data_root=data_root, + ann_file=info_directory_path + _base_.info_test_file_name, + pipeline=_base_.test_pipeline, + metainfo=_base_.metainfo, + class_names=_base_.class_names, + modality=_base_.input_modality, + data_prefix=_base_.data_prefix, + test_mode=True, + box_type_3d="LiDAR", + backend_args=_base_.backend_args, + ), +) + +val_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_val_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, +) + +test_evaluator = dict( + type="T4Metric", + data_root=data_root, + ann_file=data_root + info_directory_path + _base_.info_test_file_name, + metric="bbox", + backend_args=_base_.backend_args, + class_names=_base_.class_names, + name_mapping=_base_.name_mapping, + eval_class_range=_base_.eval_class_range, + filter_attributes=_base_.filter_attributes, + save_csv=True, +) + +default_hooks = dict( + logger=dict(type="LoggerHook", interval=50), + checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), +) +log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m.py index 1c30d708a..42f93d1b1 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m.py @@ -1,7 +1,7 @@ _base_ = [ "../../../../../autoware_ml/configs/detection3d/default_runtime.py", "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", - "../default/pipelines/default_camera_base_120m.py", + "../default/pipelines/default_camera_lidar_intensity_120m.py", "../default/models/default_camera_swin_fpn_lss_120m.py", "../default/schedulers/default_50e_8xb8_adamw_linear_cosine.py", "../default/default_misc.py", diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_j6gen2_base_120m.py index 4f81af760..a93b1d435 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_j6gen2_base_120m.py @@ -2,8 +2,8 @@ "../../../../../autoware_ml/configs/detection3d/default_runtime.py", "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", "../default/pipelines/default_camera_lidar_intensity_120m.py", - "../default/models/default_camera_swin_fpn_lidar_second_secfpn_120m.py", - "../default/schedulers/default_20e_8xb8_adamw_linear_cosine.py", + "../default/models/default_camera_swin_fpn_depthlss_lidar_second_secfpn_120m.py", + "../default/schedulers/default_20e_8xb16_adamw_linear_cosine.py", "../default/default_misc.py", ] diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_base_120m.py index 20c85b1d8..b8408956b 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_20e_8xb8_jpntaxi_base_120m.py @@ -2,8 +2,8 @@ "../../../../../autoware_ml/configs/detection3d/default_runtime.py", "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py", "../default/pipelines/default_camera_lidar_intensity_120m.py", - "../default/models/default_camera_swin_fpn_lidar_second_secfpn_120m.py", - "../default/schedulers/default_20e_8xb8_adamw_linear_cosine.py", + "../default/models/default_camera_swin_fpn_depthlss_lidar_second_secfpn_120m.py", + "../default/schedulers/default_20e_8xb16_adamw_linear_cosine.py", "../default/default_misc.py", ] diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_120m.py similarity index 57% rename from projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_120m.py rename to projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_120m.py index 2f1d1f3be..0edff4398 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_120m.py @@ -18,44 +18,37 @@ bgr_to_rgb=False, rgb_to_bgr=False, ), - img_backbone=dict( - type="mmdet.SwinTransformer", - pretrain_img_size=(256, 704), - embed_dims=96, - depths=[2, 2, 6, 2], - num_heads=[3, 6, 12, 24], - window_size=7, - mlp_ratio=4, - qkv_bias=True, - qk_scale=None, - drop_rate=0.0, - attn_drop_rate=0.0, - drop_path_rate=0.2, - patch_norm=True, - out_indices=[1, 2, 3], + img_backbone=dict( + pretrained="torchvision://resnet50", + type="ResNet", + depth=50, + num_stages=4, + out_indices=(1, 2, 3), + frozen_stages=-1, + norm_cfg=dict(type="BN2d", requires_grad=True), + norm_eval=False, with_cp=False, - convert_weights=True, - init_cfg=dict( + style="pytorch", + init_cfg=dict( type="Pretrained", - # https://download.openmmlab.com/mmdetection3d/v1.1.0_models/bevfusion/swint-nuimages-pretrained.pth - checkpoint="work_dirs/swin_transformer/swint_nuimages_pretrained.pth", # noqa: E251 + checkpoint="work_dirs/resnet50/mmdet_resnet50-19c8e357.pth", # noqa: E251 ), ), img_neck=dict( type="GeneralizedLSSFPN", - in_channels=[192, 384, 768], + in_channels=[512, 1024, 2048], out_channels=256, start_level=0, - num_outs=3, + num_outs=2, norm_cfg=dict(type="BN2d", requires_grad=True), act_cfg=dict(type="ReLU", inplace=True), upsample_cfg=dict(mode="bilinear", align_corners=False), ), view_transform=dict( - type="LSSTransform", + type="DepthLSSTransform", in_channels=256, out_channels=80, - feature_size=[48, 96], + feature_size=[24, 48], xbound=[-122.40, 122.40, 0.68], ybound=[-122.40, 122.40, 0.68], zbound=[-10.0, 10.0, 20.0], diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_lidar_second_secfpn_120m.py new file mode 100644 index 000000000..21f746da8 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_lidar_second_secfpn_120m.py @@ -0,0 +1,53 @@ +_base_ = [ + "./default_lidar_second_secfpn_120m.py", +] + +# Image network +model = dict( + data_preprocessor=dict( + type="Det3DDataPreprocessor", + pad_size_divisor=32, + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=False, + rgb_to_bgr=False, + ), + img_backbone=dict( + pretrained="torchvision://resnet50", + type="ResNet", + depth=50, + num_stages=4, + out_indices=(1, 2, 3), + frozen_stages=-1, + norm_cfg=dict(type="BN2d", requires_grad=True), + norm_eval=False, + with_cp=False, + style="pytorch", + init_cfg=dict( + type="Pretrained", + checkpoint="work_dirs/resnet50/mmdet_resnet50-19c8e357.pth", # noqa: E251 + ), + ), + img_neck=dict( + type="GeneralizedLSSFPN", + in_channels=[512, 1024, 2048], + out_channels=256, + start_level=0, + num_outs=2, + norm_cfg=dict(type="BN2d", requires_grad=True), + act_cfg=dict(type="ReLU", inplace=True), + upsample_cfg=dict(mode="bilinear", align_corners=False), + ), + view_transform=dict( + type="DepthLSSTransform", + in_channels=256, + out_channels=80, + feature_size=[48, 96], + xbound=[-122.40, 122.40, 0.68], + ybound=[-122.40, 122.40, 0.68], + zbound=[-10.0, 10.0, 20.0], + dbound=[1.0, 130, 1.0], + downsample=2, + ), + fusion_layer=dict(type="ConvFuser", in_channels=[80, 256], out_channels=256, kernel_size=5, stride=2, padding=2), +) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_lss_50m.py new file mode 100644 index 000000000..5577723bf --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_lss_50m.py @@ -0,0 +1,23 @@ +_base_ = [ + "./default_camera_resnet50_fpn_depthlss_120m.py", +] + +# Image network +model = dict( + view_transform=dict( + type="LSSTransform", + in_channels=256, + out_channels=80, + feature_size=[48, 96], + xbound=[-54.0, 54.0, 0.3], + ybound=[-54.0, 54.0, 0.3], + zbound=[-10.0, 10.0, 20.0], + dbound=[1.0, 60, 0.5], + downsample=2, + ), + bbox_head=dict( + bbox_coder=dict( + post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], + ), + ) +) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_depthlss_lidar_second_secfpn_120m.py similarity index 100% rename from projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lidar_second_secfpn_120m.py rename to projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_depthlss_lidar_second_secfpn_120m.py diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_50m.py index a35e3a79a..39a4a637f 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_50m.py @@ -1,56 +1,9 @@ _base_ = [ - "./default_lidar_second_secfpn_120m.py", + "./default_camera_swin_fpn_depthlss_120m.py", ] # Image network model = dict( - # Remove all lidar related configs - voxelize_cfg=None, - pts_voxel_encoder=None, - pts_middle_encoder=None, - pts_neck=None, - pts_backbone=None, - data_preprocessor=dict( - type="Det3DDataPreprocessor", - pad_size_divisor=32, - mean=[123.675, 116.28, 103.53], - std=[58.395, 57.12, 57.375], - bgr_to_rgb=False, - rgb_to_bgr=False, - ), - img_backbone=dict( - type="mmdet.SwinTransformer", - pretrain_img_size=(256, 704), - embed_dims=96, - depths=[2, 2, 6, 2], - num_heads=[3, 6, 12, 24], - window_size=7, - mlp_ratio=4, - qkv_bias=True, - qk_scale=None, - drop_rate=0.0, - attn_drop_rate=0.0, - drop_path_rate=0.2, - patch_norm=True, - out_indices=[1, 2, 3], - with_cp=False, - convert_weights=True, - init_cfg=dict( - type="Pretrained", - # https://download.openmmlab.com/mmdetection3d/v1.1.0_models/bevfusion/swint-nuimages-pretrained.pth - checkpoint="work_dirs/swin_transformer/swint_nuimages_pretrained.pth", # noqa: E251 - ), - ), - img_neck=dict( - type="GeneralizedLSSFPN", - in_channels=[192, 384, 768], - out_channels=256, - start_level=0, - num_outs=3, - norm_cfg=dict(type="BN2d", requires_grad=True), - act_cfg=dict(type="ReLU", inplace=True), - upsample_cfg=dict(mode="bilinear", align_corners=False), - ), view_transform=dict( type="LSSTransform", in_channels=256, diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py deleted file mode 100644 index de8d48263..000000000 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_120m.py +++ /dev/null @@ -1,133 +0,0 @@ -## This config is for the camera_base only model, without lidar points - -_base_ = [ - "./default_lidar_120m.py", -] -input_modality = dict(use_lidar=True, use_camera=True) - -# Image parameters -image_size = [384, 768] # Height, Width -camera_order = ["CAM_FRONT", "CAM_FRONT_LEFT", "CAM_BACK_LEFT", "CAM_FRONT_RIGHT", "CAM_BACK_RIGHT"] - -train_pipeline = [ - dict( - type="BEVLoadMultiViewImageFromFiles", - to_float32=True, - color_type="color", - backend_args=_base_.backend_args, - camera_order=camera_order, - ), - # We keep loading LiDAR points to make downstream BEV augmentation easier - dict( - type="LoadPointsFromFile", - coord_type="LIDAR", - load_dim=_base_.point_load_dim, - use_dim=_base_.point_load_dim, - backend_args=_base_.backend_args, - ), - dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False), - dict( - type="ImageAug3D", - final_dim=image_size, - resize_lim=[0.28, 0.40], - bot_pct_lim=[0.0, 0.0], - rot_lim=[0.0, 0.0], - rand_flip=True, - is_train=True, - ), - dict( - type="BEVFusionGlobalRotScaleTrans", - scale_ratio_range=[0.95, 1.05], - rot_range=[-0.78539816, 0.78539816], - translation_std=[0.5, 0.5, 0.2], - ), - dict(type="BEVFusionRandomFlip3D"), - dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range), - # Remove LiDAR points from the data - dict(type="BEVFusionRemoveLiDARPoints"), - dict( - type="ObjectNameFilter", - classes=[ - "car", - "truck", - "construction_vehicle", - "bus", - "trailer", - "barrier", - "motorcycle", - "bicycle", - "pedestrian", - "traffic_cone", - ], - ), - dict( - type="Pack3DDetInputs", - keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"], - meta_keys=[ - "cam2img", - "ori_cam2img", - "lidar2cam", - "lidar2img", - "cam2lidar", - "ori_lidar2img", - "img_aug_matrix", - "box_type_3d", - "sample_idx", - "lidar_path", - "img_path", - "transformation_3d_flow", - "pcd_rotation", - "pcd_scale_factor", - "pcd_trans", - "img_aug_matrix", - "lidar_aug_matrix", - "timestamp", - "vehicle_type", - "city", - ], - ), -] - -test_pipeline = [ - dict( - type="BEVLoadMultiViewImageFromFiles", - to_float32=True, - color_type="color", - backend_args=_base_.backend_args, - camera_order=camera_order, - ), - dict( - type="ImageAug3D", - final_dim=image_size, - # resize_lim=[0.34, 0.34], - resize_lim=0.02, - bot_pct_lim=[0.0, 0.0], - rot_lim=[0.0, 0.0], - rand_flip=False, - is_train=False, - ), - dict( - type="Pack3DDetInputs", - keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"], - meta_keys=[ - "cam2img", - "ori_cam2img", - "lidar2cam", - "lidar2img", - "cam2lidar", - "ori_lidar2img", - "img_aug_matrix", - "box_type_3d", - "sample_idx", - "lidar_path", - "img_path", - "num_pts_feats", - "num_views", - "timestamp", - "vehicle_type", - "city", - ], - ), -] - -filter_cfg = dict(filter_frames_with_camera_order=camera_order) diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb16_adamw_linear_cosine.py similarity index 96% rename from projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py rename to projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb16_adamw_linear_cosine.py index db5515b46..05740e442 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_20e_8xb16_adamw_linear_cosine.py @@ -1,7 +1,6 @@ # learning rate -# 1e-4 * sqrt(2) = 0.0001414 -lr = 1e-4 -t_max = 6 +lr = 2e-4 +t_max = 2 max_epochs = 20 val_interval = 1 diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py similarity index 96% rename from projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py rename to projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py index 2181cbebb..261246886 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py @@ -1,7 +1,6 @@ # learning rate -# 1e-4 * sqrt(2) = 0.0001414 -lr = 1e-4 -t_max = 2 +lr = 2e-4 +t_max = 3 max_epochs = 30 val_interval = 1 diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_linear_cosine.py similarity index 96% rename from projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py rename to projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_linear_cosine.py index d569900d6..43715fed7 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb8_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_50e_8xb16_adamw_linear_cosine.py @@ -1,7 +1,6 @@ # learning rate -# 1e-4 * sqrt(2) = 0.0001414 -lr = 1e-4 -t_max = 3 +lr = 2e-4 +t_max = 5 max_epochs = 50 val_interval = 5 From 45cc9cf3378006f5c50858abe5f547e311763562 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Wed, 13 May 2026 20:22:50 +0900 Subject: [PATCH 103/183] Resolve conflict --- .../models/default_camera_resnet50_fpn_depthlss_120m.py | 4 ++-- ...t_camera_resnet50_fpn_depthlss_lidar_second_secfpn_120m.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_120m.py index 0edff4398..6203da514 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_120m.py @@ -23,7 +23,7 @@ type="ResNet", depth=50, num_stages=4, - out_indices=(1, 2, 3), + out_indices=(2, 3), frozen_stages=-1, norm_cfg=dict(type="BN2d", requires_grad=True), norm_eval=False, @@ -36,7 +36,7 @@ ), img_neck=dict( type="GeneralizedLSSFPN", - in_channels=[512, 1024, 2048], + in_channels=[1024, 2048], out_channels=256, start_level=0, num_outs=2, diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_lidar_second_secfpn_120m.py index 21f746da8..43e8dd9ac 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_lidar_second_secfpn_120m.py @@ -17,7 +17,7 @@ type="ResNet", depth=50, num_stages=4, - out_indices=(1, 2, 3), + out_indices=(2, 3), frozen_stages=-1, norm_cfg=dict(type="BN2d", requires_grad=True), norm_eval=False, @@ -30,7 +30,7 @@ ), img_neck=dict( type="GeneralizedLSSFPN", - in_channels=[512, 1024, 2048], + in_channels=[1024, 2048], out_channels=256, start_level=0, num_outs=2, From 1a81b03587668e4009a34d80fe050878f895a757 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 14 May 2026 16:23:50 +0900 Subject: [PATCH 104/183] Add local 3d box expand --- .../datasets/transforms/__init__.py | 3 +- .../datasets/transforms/local_3d_bbox.py | 57 +++++++ .../pipelines/default_lidar_120m_width.py | 150 ++++++++++++++++++ 3 files changed, 209 insertions(+), 1 deletion(-) create mode 100644 autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py create mode 100644 projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m_width.py diff --git a/autoware_ml/detection3d/datasets/transforms/__init__.py b/autoware_ml/detection3d/datasets/transforms/__init__.py index 6bc932f1a..b20961db6 100644 --- a/autoware_ml/detection3d/datasets/transforms/__init__.py +++ b/autoware_ml/detection3d/datasets/transforms/__init__.py @@ -1,3 +1,4 @@ from .object_min_points_filter import ObjectMinPointsFilter +from .local_3d_bbox import Local3DBBoxExpand -__all__ = ["ObjectMinPointsFilter"] +__all__ = ["ObjectMinPointsFilter", "Local3DBBoxExpand"] diff --git a/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py b/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py new file mode 100644 index 000000000..e417c4bfb --- /dev/null +++ b/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py @@ -0,0 +1,57 @@ +import numpy as np + +from mmcv.transforms import BaseTransform +from mmdet3d.structures.ops import box_np_ops +from mmengine.registry import TRANSFORMS + + +@TRANSFORMS.register_module() +class Local3DBBoxExpand(BaseTransform): + """Locally expand the 3D bounding boxes by scaling the width, which it doesn't scale the points. + + Args: + expand_widths: (List[float]): Uniformly sampled expand width. + width_dim: (int): The dimension of the width. Default is 4, which is the width dimension of the 3D + bounding box. Since 3D Bbox is in the format of [x, y, z, dx, dy, dz, heading], the width dimension is the + 4th dimension. + label_ids: (List[int]): The label IDs to expand. If None, all label IDs will be expanded. + """ + + def __init__(self, expand_widths: List[float], width_dim: int = 4, label_ids: List[int] = None) -> None: + assert isinstance(expand_widths, list) + assert len(expand_widths) == 2 + assert expand_widths[0] < expand_widths[1] + self.expand_widths = expand_widths + self.width_dim = width_dim + self.label_ids = label_ids + + def transform(self, input_dict: dict) -> dict: + """Call function to locally augment the 3D bounding boxes by scaling the width. + + Args: + input_dict (dict): Result dict from loading pipeline. + + Returns: + dict: Results after locally augmenting the 3D bounding boxes by scaling the width, 'gt_bboxes_3d' \ + key is updated in the result dict. + """ + # Label mask + if self.label_ids is not None: + label_masks = [True if label in self.label_ids else False for label in input_dict["gt_labels_3d"]] + else: + label_masks = np.ones(len(input_dict["gt_labels_3d"]), dtype=bool) + + for i in range(len(input_dict["gt_bboxes_3d"])): + if not label_masks[i]: + continue + + expand_width = np.random.uniform(self.expand_widths[0], self.expand_widths[1]) + input_dict["gt_bboxes_3d"].tensor[i, self.width_dim] += expand_width + + return input_dict + + def __repr__(self) -> str: + """str: Return a string that describes the module.""" + repr_str = self.__class__.__name__ + repr_str += f"(expand_widths={self.expand_widths}, width_dim={self.width_dim}, label_ids={self.label_ids})" + return repr_str diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m_width.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m_width.py new file mode 100644 index 000000000..0b32cc86a --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m_width.py @@ -0,0 +1,150 @@ +# Dataset parameters +backend_args = None +num_workers = 16 +input_modality = dict(use_lidar=True, use_camera=False) + +# range setting +point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0] +voxel_size = [0.17, 0.17, 0.2] +grid_size = [1440, 1440, 41] +eval_class_range = { + "car": 120, + "truck": 120, + "bus": 120, + "bicycle": 120, + "pedestrian": 120, + "traffic_cone": 120, + "barrier": 120, +} + +# LiDAR parameters +point_load_dim = 5 # x, y, z, intensity, ring_id +point_use_dim = 4 +lidar_sweep_dims = [0, 1, 2, 4] # x, y, z, time_lag +sweeps_num = 1 + +train_pipeline = [ + dict( + type="LoadPointsFromFile", + coord_type="LIDAR", + load_dim=point_load_dim, + use_dim=point_load_dim, + backend_args=backend_args, + ), + dict( + type="LoadPointsFromMultiSweeps", + sweeps_num=sweeps_num, + load_dim=point_load_dim, + use_dim=lidar_sweep_dims, + pad_empty_sweeps=True, + remove_close=True, + backend_args=backend_args, + test_mode=False, + ), + dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False), + # For the vehicle, we expand the width by 0.20 - 0.40 to try to include side mirros + dict(type="Local3DBBoxExpand", expand_widths=[0.20, 0.40], width_dim=4, label_ids=[0]), + # For truck and bus, they are usually huge vehicles, so we expand the width by 0.40 - 0.70 + dict(type="Local3DBBoxExpand", expand_widths=[0.40, 0.70], width_dim=4, label_ids=[1, 2]), + dict( + type="BEVFusionGlobalRotScaleTrans", + scale_ratio_range=[0.95, 1.05], + rot_range=[-0.78539816, 0.78539816], + translation_std=[0.5, 0.5, 0.2], + ), + dict(type="BEVFusionRandomFlip3D"), + dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range), + dict(type="ObjectRangeFilter", point_cloud_range=point_cloud_range), + dict( + type="ObjectNameFilter", + classes=[ + "car", + "truck", + "bus", + "bicycle", + "pedestrian", + "traffic_cone", + "barrier", + ], + ), + dict(type="PointShuffle"), + dict( + type="Pack3DDetInputs", + keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"], + meta_keys=[ + "cam2img", + "ori_cam2img", + "lidar2cam", + "lidar2img", + "cam2lidar", + "ori_lidar2img", + "img_aug_matrix", + "box_type_3d", + "sample_idx", + "lidar_path", + "img_path", + "transformation_3d_flow", + "pcd_rotation", + "pcd_scale_factor", + "pcd_trans", + "img_aug_matrix", + "lidar_aug_matrix", + "timestamp", + "vehicle_type", + "city", + "traffic_cone_barrier_status", + ], + ), +] + +test_pipeline = [ + dict( + type="LoadPointsFromFile", + coord_type="LIDAR", + load_dim=point_load_dim, + use_dim=point_load_dim, + backend_args=backend_args, + ), + dict( + type="LoadPointsFromMultiSweeps", + sweeps_num=sweeps_num, + load_dim=point_load_dim, + use_dim=lidar_sweep_dims, + pad_empty_sweeps=True, + remove_close=True, + backend_args=backend_args, + test_mode=True, + ), + dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range), + dict( + type="Pack3DDetInputs", + keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"], + meta_keys=[ + "cam2img", + "ori_cam2img", + "lidar2cam", + "lidar2img", + "cam2lidar", + "ori_lidar2img", + "img_aug_matrix", + "box_type_3d", + "sample_idx", + "lidar_path", + "img_path", + "num_pts_feats", + "num_views", + "timestamp", + "vehicle_type", + "city", + "traffic_cone_barrier_status", + ], + ), +] + +# Filtering configuration +# Note: +# - In camera–LiDAR configs, `filter_cfg` can enable image-based frame filtering, +# e.g., dict(filter_frames_with_missing_image=True). +# - This is a LiDAR-only config (`input_modality['use_camera'] = False`), so +# image-based filtering does not apply and `filter_cfg` is intentionally None. +filter_cfg = None From 5257c01a90aebb4f6aea4343717073c660089885 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Fri, 15 May 2026 00:26:00 +0900 Subject: [PATCH 105/183] Resolve conflict --- .../datasets/transforms/local_3d_bbox.py | 42 ++++++++++--------- ...cond_secfpn_30e_8xb16_jpntaxi_base_120m.py | 2 +- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py b/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py index e417c4bfb..96772cf44 100644 --- a/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py +++ b/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py @@ -1,3 +1,5 @@ +from typing import List + import numpy as np from mmcv.transforms import BaseTransform @@ -12,20 +14,20 @@ class Local3DBBoxExpand(BaseTransform): Args: expand_widths: (List[float]): Uniformly sampled expand width. width_dim: (int): The dimension of the width. Default is 4, which is the width dimension of the 3D - bounding box. Since 3D Bbox is in the format of [x, y, z, dx, dy, dz, heading], the width dimension is the - 4th dimension. - label_ids: (List[int]): The label IDs to expand. If None, all label IDs will be expanded. - """ + bounding box. Since 3D Bbox is in the format of [x, y, z, dx, dy, dz, heading], the width dimension is the + 4th dimension. + label_ids: (List[int]): The label IDs to expand. If None, all label IDs will be expanded. + """ def __init__(self, expand_widths: List[float], width_dim: int = 4, label_ids: List[int] = None) -> None: assert isinstance(expand_widths, list) assert len(expand_widths) == 2 assert expand_widths[0] < expand_widths[1] self.expand_widths = expand_widths - self.width_dim = width_dim - self.label_ids = label_ids + self.width_dim = width_dim + self.label_ids = label_ids - def transform(self, input_dict: dict) -> dict: + def transform(self, input_dict: dict) -> dict: """Call function to locally augment the 3D bounding boxes by scaling the width. Args: @@ -35,20 +37,20 @@ def transform(self, input_dict: dict) -> dict: dict: Results after locally augmenting the 3D bounding boxes by scaling the width, 'gt_bboxes_3d' \ key is updated in the result dict. """ - # Label mask - if self.label_ids is not None: - label_masks = [True if label in self.label_ids else False for label in input_dict["gt_labels_3d"]] - else: - label_masks = np.ones(len(input_dict["gt_labels_3d"]), dtype=bool) - - for i in range(len(input_dict["gt_bboxes_3d"])): - if not label_masks[i]: - continue - - expand_width = np.random.uniform(self.expand_widths[0], self.expand_widths[1]) - input_dict["gt_bboxes_3d"].tensor[i, self.width_dim] += expand_width + # Label mask + if self.label_ids is not None: + label_masks = [True if label in self.label_ids else False for label in input_dict["gt_labels_3d"]] + else: + label_masks = np.ones(len(input_dict["gt_labels_3d"]), dtype=bool) + + for i in range(len(input_dict["gt_bboxes_3d"])): + if not label_masks[i]: + continue + + expand_width = np.random.uniform(self.expand_widths[0], self.expand_widths[1]) + input_dict["gt_bboxes_3d"].tensor[i, self.width_dim] += expand_width - return input_dict + return input_dict def __repr__(self) -> str: """str: Return a string that describes the module.""" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py index 3b7c23b18..02ed7542a 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py @@ -152,4 +152,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_traffic_cone/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m_ignore/epoch_48.pth" +load_from = "work_dirs/bevfusion_lidar_2_8_0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb16_base_120m/best_epoch_47.pth" From 14a20e107d5f0f054be24ad1246d0d287098a0c9 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Fri, 15 May 2026 22:38:53 +0900 Subject: [PATCH 106/183] Updated --- ...lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py index 36c39dd5a..3208a592c 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py @@ -16,7 +16,7 @@ info_directory_path = "info/kokseang_2_8/" experiment_group_name = "bevfusion_lidar_2_8_0/base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels" +experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels_normalized" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter From 6dde84dd661ce4f48b23c7c9286d17f1f18be82e Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 16 May 2026 18:29:09 +0900 Subject: [PATCH 107/183] Add local 3d box expand --- ..._second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py index e3f7d5146..3bdda213e 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py @@ -1,10 +1,10 @@ _base_ = [ - "./bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py", + "./bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py", ] # user setting -experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2" +experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # Add evaluator configs From 5213d864533cb7f879895d9552b7b04a7423b7ab Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sun, 17 May 2026 16:46:20 +0900 Subject: [PATCH 108/183] Add local 3d box expand --- .../detection3d/dataset/t4dataset/base.py | 3 + .../detection3d/dataset/t4dataset/j6gen2.py | 3 + .../dataset/t4dataset/j6gen2_base.py | 3 + .../dataset/t4dataset/j6gen2_v2.py | 194 ------------------ ...30e_8xb16_jpntaxi_base_120m_t4metric_v2.py | 6 +- 5 files changed, 12 insertions(+), 197 deletions(-) delete mode 100644 autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_v2.py diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py index 3be587072..8e49f2396 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py @@ -29,6 +29,9 @@ "db_j6gen2_v7", "db_j6gen2_v8", "db_j6gen2_v9", + "db_j6gen2_v10", + "db_j6gen2_v11", + "db_j6gen2_v12", "db_largebus_v1", "db_largebus_v2", "db_largebus_v3", diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py index 0324e7207..a93bf56af 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py @@ -27,6 +27,9 @@ "db_j6gen2_v7", "db_j6gen2_v8", "db_j6gen2_v9", + "db_j6gen2_v10", + "db_j6gen2_v11", + "db_j6gen2_v12", ] dataset_test_groups = { diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py index b9ec03f27..170086752 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py @@ -27,6 +27,9 @@ "db_j6gen2_v7", "db_j6gen2_v8", "db_j6gen2_v9", + "db_j6gen2_v10", + "db_j6gen2_v11", + "db_j6gen2_v12", "db_largebus_v1", "db_largebus_v2", "db_largebus_v3", diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_v2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_v2.py deleted file mode 100644 index e4375d576..000000000 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_v2.py +++ /dev/null @@ -1,194 +0,0 @@ -custom_imports = dict( - imports=[ - "autoware_ml.detection3d.datasets.t4dataset", - "autoware_ml.detection3d.evaluation.t4metric.t4metric", - "autoware_ml.detection3d.evaluation.t4metric.t4metric_v2", - ] -) - -# dataset type setting -dataset_type = "T4Dataset" -info_train_file_name = "t4dataset_j6gen2_v2_infos_train.pkl" -info_val_file_name = "t4dataset_j6gen2_v2_infos_val.pkl" -info_test_file_name = "t4dataset_j6gen2_v2_infos_test.pkl" - -info_train_statistics_file_name = "t4dataset_j6gen2_v2_statistics_train.parquet" -info_val_statistics_file_name = "t4dataset_j6gen2_v2_statistics_val.parquet" -info_test_statistics_file_name = "t4dataset_j6gen2_v2_statistics_test.parquet" - -# dataset scene setting -dataset_version_list = [ - "db_j6gen2_v2", -] - -dataset_test_groups = { - "j6gen2_v2": ("t4dataset_j6gen2_v2_infos_test.pkl", True), -} - -# dataset format setting -data_prefix = dict( - pts="", - CAM_FRONT="", - CAM_FRONT_LEFT="", - CAM_FRONT_RIGHT="", - CAM_BACK="", - CAM_BACK_RIGHT="", - CAM_BACK_LEFT="", - sweeps="", -) -camera_types = { - "CAM_FRONT", - "CAM_FRONT_RIGHT", - "CAM_FRONT_LEFT", - "CAM_BACK", - "CAM_BACK_LEFT", - "CAM_BACK_RIGHT", -} - -# class setting -name_mapping = { - # DBv1.0 - "vehicle.car": "car", - "vehicle.construction": "truck", - "vehicle.emergency (ambulance & police)": "car", - "vehicle.motorcycle": "bicycle", - "vehicle.trailer": "trailer", - "vehicle.truck": "truck", - "vehicle.bicycle": "bicycle", - "vehicle.bus (bendy & rigid)": "bus", - "pedestrian.adult": "pedestrian", - "pedestrian.child": "pedestrian", - "pedestrian.construction_worker": "pedestrian", - "pedestrian.personal_mobility": "pedestrian", - "pedestrian.police_officer": "pedestrian", - "pedestrian.stroller": "pedestrian", - "pedestrian.wheelchair": "pedestrian", - "movable_object.barrier": "barrier", - "movable_object.debris": "barrier", - "movable_object.pushable_pullable": "barrier", - "movable_object.trafficcone": "traffic_cone", - "movable_object.traffic_cone": "traffic_cone", - "animal": "animal", - "static_object.bicycle_rack": "bicycle_rack", - # DBv1.1 and UCv2.0 - "car": "car", - "truck": "truck", - "bus": "bus", - "trailer": "trailer", - "motorcycle": "bicycle", - "bicycle": "bicycle", - "police_car": "car", - "pedestrian": "pedestrian", - "police_officer": "pedestrian", - "forklift": "car", - "construction_worker": "pedestrian", - "stroller": "pedestrian", - # DBv2.0 and DBv3.0 - "animal": "animal", - "movable_object.barrier": "barrier", - "movable_object.pushable_pullable": "barrier", - "movable_object.traffic_cone": "traffic_cone", - "pedestrian.adult": "pedestrian", - "pedestrian.child": "pedestrian", - "pedestrian.construction_worker": "pedestrian", - "pedestrian.personal_mobility": "pedestrian", - "pedestrian.police_officer": "pedestrian", - "pedestrian.stroller": "pedestrian", - "pedestrian.wheelchair": "pedestrian", - "static_object.bicycle rack": "bicycle rack", - "static_object.bollard": "bollard", - "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car - "vehicle.bicycle": "bicycle", - "vehicle.bus": "bus", - "vehicle.car": "car", - "vehicle.construction": "truck", - "vehicle.fire": "truck", - "vehicle.motorcycle": "bicycle", - "vehicle.police": "car", - "vehicle.trailer": "trailer", - "vehicle.truck": "truck", - # DBv1.3 - "ambulance": "car", - "kart": "car", - "wheelchair": "pedestrian", - "personal_mobility": "pedestrian", - "fire_truck": "truck", - "semi_trailer": "trailer", - "tractor_unit": "truck", - "construction_vehicle": "truck", - "traffic_cone": "traffic_cone", - "trafficcone": "traffic_cone", - "barrier": "barrier", -} - -class_names = [ - "car", - "truck", - "bus", - "bicycle", - "pedestrian", - "traffic_cone", - "barrier", -] -num_class = len(class_names) -metainfo = dict(classes=class_names) - -merge_objects = [ - ("truck", ["truck", "trailer"]), -] -merge_type = "extend_longer" # One of ["extend_longer","union", None] - -# visualization -class_colors = { - "car": (30, 144, 255), - "truck": (140, 0, 255), - "construction_vehicle": (255, 255, 0), - "bus": (111, 255, 111), - "trailer": (0, 255, 255), - "barrier": (0, 0, 0), - "motorcycle": (100, 0, 30), - "bicycle": (255, 0, 30), - "pedestrian": (255, 200, 200), - "traffic_cone": (120, 120, 120), -} -camera_panels = [ - "data/CAM_FRONT_LEFT", - "data/CAM_FRONT", - "data/CAM_FRONT_RIGHT", - "data/CAM_BACK_LEFT", - "data/CAM_BACK", - "data/CAM_BACK_RIGHT", -] - -filter_attributes = [ - ("vehicle.bicycle", "vehicle_state.parked"), - ("vehicle.bicycle", "cycle_state.without_rider"), - ("vehicle.bicycle", "motorcycle_state.without_rider"), - ("vehicle.motorcycle", "vehicle_state.parked"), - ("vehicle.motorcycle", "cycle_state.without_rider"), - ("vehicle.motorcycle", "motorcycle_state.without_rider"), - ("bicycle", "vehicle_state.parked"), - ("bicycle", "cycle_state.without_rider"), - ("bicycle", "motorcycle_state.without_rider"), - ("motorcycle", "vehicle_state.parked"), - ("motorcycle", "cycle_state.without_rider"), - ("motorcycle", "motorcycle_state.without_rider"), -] - -evaluator_metric_configs = dict( - evaluation_task="detection", - target_labels=class_names, - center_distance_bev_thresholds=[0.5, 1.0, 2.0, 4.0], - # plane_distance_thresholds is required for the pass fail evaluation - plane_distance_thresholds=[2.0, 4.0], - iou_2d_thresholds=None, - iou_3d_thresholds=None, - label_prefix="autoware", - # bev minimum distance ranges for each range bucket, must be the same length as max_distance, - # they will form bev distance ranges in [(min_distance[0], max_distance[0]), (min_distance[1], max_distance[1]), ...] when filtering - min_distance=[0.0, 50.0, 90.0, 0.0], - # bev maximum distance ranges for each range bucket, must be the same length as min_distance - max_distance=[50.0, 90.0, 121.0, 121.0], - min_point_numbers=0, - matching_class_agnostic_fps=False, -) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py index 213f0041b..64d494655 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2.py @@ -1,10 +1,10 @@ _base_ = [ - "./bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py", + "./bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py", ] # user setting -experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/jpntaxi_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2" +experiment_group_name = "bevfusion_lidar_intensity_2_8_1/jpntaxi_base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # Add evaluator configs From e616c02adf9a2451a9e9e26088a2c7469b531435 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Mon, 18 May 2026 23:35:01 +0900 Subject: [PATCH 109/183] Add local 3d box expand --- projects/BEVFusion/bevfusion/utils.py | 28 ++-- .../default_lidar_second_secfpn_120m.py | 16 +- .../pipelines/default_lidar_120m_width.py | 150 ------------------ 3 files changed, 25 insertions(+), 169 deletions(-) delete mode 100644 projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m_width.py diff --git a/projects/BEVFusion/bevfusion/utils.py b/projects/BEVFusion/bevfusion/utils.py index c47604dbd..84797cc51 100644 --- a/projects/BEVFusion/bevfusion/utils.py +++ b/projects/BEVFusion/bevfusion/utils.py @@ -85,26 +85,31 @@ def decode(self, heatmap, rot, dim, center, height, vel, filter=False): final_box_preds = torch.cat([center, height, dim, rot, vel], dim=1).permute(0, 2, 1) predictions_dicts = [] - for i in range(heatmap.shape[0]): - boxes3d = final_box_preds[i] - scores = final_scores[i] - labels = final_preds[i] - predictions_dict = {"bboxes": boxes3d, "scores": scores, "labels": labels} - predictions_dicts.append(predictions_dict) - - if filter is False: + if not filter: + for i in range(heatmap.shape[0]): + boxes3d = final_box_preds[i] + scores = final_scores[i] + labels = final_preds[i] + predictions_dict = {"bboxes": boxes3d, "scores": scores, "labels": labels} + predictions_dicts.append(predictions_dict) return predictions_dicts - + # use score threshold if self.score_threshold is not None: - thresh_mask = final_scores > self.score_threshold + if isinstance(self.score_threshold, float): + thresh_mask = final_scores > self.score_threshold + elif isinstance(self.score_threshold, (list, tuple)): + score_threshold = final_scores.new_tensor(self.score_threshold) + thresh_mask = final_scores > score_threshold[final_preds] + else: + raise ValueError("score_threshold must be a float or list") + predictions_dicts = [] if self.post_center_range is not None: self.post_center_range = torch.tensor(self.post_center_range, device=heatmap.device) mask = (final_box_preds[..., :3] >= self.post_center_range[:3]).all(2) mask &= (final_box_preds[..., :3] <= self.post_center_range[3:]).all(2) - predictions_dicts = [] for i in range(heatmap.shape[0]): cmask = mask[i, :] if self.score_threshold: @@ -114,7 +119,6 @@ def decode(self, heatmap, rot, dim, center, height, vel, filter=False): scores = final_scores[i, cmask] labels = final_preds[i, cmask] predictions_dict = {"bboxes": boxes3d, "scores": scores, "labels": labels} - predictions_dicts.append(predictions_dict) else: raise NotImplementedError( diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index 5a880d975..f1fa5a90d 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -84,14 +84,14 @@ test_cfg=dict( dataset="t4datasets", out_size_factor=8, - nms_type=None, # Set to "circle" for circle_nms + nms_type="circle", # Set to "circle" for circle_nms # Set NMS for different clusters nms_clusters=[ - dict(class_names=["car", "truck", "bus"], nms_threshold=0.5), # It's radius if using circle_nms - dict(class_names=["bicycle"], nms_threshold=0.5), - dict(class_names=["pedestrian"], nms_threshold=0.175), - dict(class_names=["barrier"], nms_threshold=0.5), - dict(class_names=["traffic_cone"], nms_threshold=0.175), + dict(class_names=["car", "truck", "bus"], class_indices=[0, 1, 2], nms_threshold=0.5), # It's radius if using circle_nms + dict(class_names=["bicycle"], class_indices=[3], nms_threshold=0.0), + dict(class_names=["pedestrian"], class_indices=[4], nms_threshold=0.0), + dict(class_names=["traffic_cone"], class_indices=[5], nms_threshold=0.0), + dict(class_names=["barrier"], class_indices=[6], nms_threshold=0.0), ], ), dense_heatmap_pooling_classes=["car", "truck", "bus", "bicycle", "barrier"], # Use class indices for pooling @@ -99,7 +99,9 @@ bbox_coder=dict( type="TransFusionBBoxCoder", post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0], - score_threshold=0.0, + # score_threshold=0.03, + # CAR, TRUCK, BUS, BICYCLE, PEDESTRIAN, TRAFFIC_CONE, BARRIER + score_threshold=[0.02, 0.015, 0.015, 0.01, 0.02, 0.02, 0.015], out_size_factor=8, code_size=10, ), diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m_width.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m_width.py deleted file mode 100644 index 0b32cc86a..000000000 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m_width.py +++ /dev/null @@ -1,150 +0,0 @@ -# Dataset parameters -backend_args = None -num_workers = 16 -input_modality = dict(use_lidar=True, use_camera=False) - -# range setting -point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0] -voxel_size = [0.17, 0.17, 0.2] -grid_size = [1440, 1440, 41] -eval_class_range = { - "car": 120, - "truck": 120, - "bus": 120, - "bicycle": 120, - "pedestrian": 120, - "traffic_cone": 120, - "barrier": 120, -} - -# LiDAR parameters -point_load_dim = 5 # x, y, z, intensity, ring_id -point_use_dim = 4 -lidar_sweep_dims = [0, 1, 2, 4] # x, y, z, time_lag -sweeps_num = 1 - -train_pipeline = [ - dict( - type="LoadPointsFromFile", - coord_type="LIDAR", - load_dim=point_load_dim, - use_dim=point_load_dim, - backend_args=backend_args, - ), - dict( - type="LoadPointsFromMultiSweeps", - sweeps_num=sweeps_num, - load_dim=point_load_dim, - use_dim=lidar_sweep_dims, - pad_empty_sweeps=True, - remove_close=True, - backend_args=backend_args, - test_mode=False, - ), - dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False), - # For the vehicle, we expand the width by 0.20 - 0.40 to try to include side mirros - dict(type="Local3DBBoxExpand", expand_widths=[0.20, 0.40], width_dim=4, label_ids=[0]), - # For truck and bus, they are usually huge vehicles, so we expand the width by 0.40 - 0.70 - dict(type="Local3DBBoxExpand", expand_widths=[0.40, 0.70], width_dim=4, label_ids=[1, 2]), - dict( - type="BEVFusionGlobalRotScaleTrans", - scale_ratio_range=[0.95, 1.05], - rot_range=[-0.78539816, 0.78539816], - translation_std=[0.5, 0.5, 0.2], - ), - dict(type="BEVFusionRandomFlip3D"), - dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range), - dict(type="ObjectRangeFilter", point_cloud_range=point_cloud_range), - dict( - type="ObjectNameFilter", - classes=[ - "car", - "truck", - "bus", - "bicycle", - "pedestrian", - "traffic_cone", - "barrier", - ], - ), - dict(type="PointShuffle"), - dict( - type="Pack3DDetInputs", - keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"], - meta_keys=[ - "cam2img", - "ori_cam2img", - "lidar2cam", - "lidar2img", - "cam2lidar", - "ori_lidar2img", - "img_aug_matrix", - "box_type_3d", - "sample_idx", - "lidar_path", - "img_path", - "transformation_3d_flow", - "pcd_rotation", - "pcd_scale_factor", - "pcd_trans", - "img_aug_matrix", - "lidar_aug_matrix", - "timestamp", - "vehicle_type", - "city", - "traffic_cone_barrier_status", - ], - ), -] - -test_pipeline = [ - dict( - type="LoadPointsFromFile", - coord_type="LIDAR", - load_dim=point_load_dim, - use_dim=point_load_dim, - backend_args=backend_args, - ), - dict( - type="LoadPointsFromMultiSweeps", - sweeps_num=sweeps_num, - load_dim=point_load_dim, - use_dim=lidar_sweep_dims, - pad_empty_sweeps=True, - remove_close=True, - backend_args=backend_args, - test_mode=True, - ), - dict(type="PointsRangeFilter", point_cloud_range=point_cloud_range), - dict( - type="Pack3DDetInputs", - keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"], - meta_keys=[ - "cam2img", - "ori_cam2img", - "lidar2cam", - "lidar2img", - "cam2lidar", - "ori_lidar2img", - "img_aug_matrix", - "box_type_3d", - "sample_idx", - "lidar_path", - "img_path", - "num_pts_feats", - "num_views", - "timestamp", - "vehicle_type", - "city", - "traffic_cone_barrier_status", - ], - ), -] - -# Filtering configuration -# Note: -# - In camera–LiDAR configs, `filter_cfg` can enable image-based frame filtering, -# e.g., dict(filter_frames_with_missing_image=True). -# - This is a LiDAR-only config (`input_modality['use_camera'] = False`), so -# image-based filtering does not apply and `filter_cfg` is intentionally None. -filter_cfg = None From b5036550910474284c7346d746060eff066d4b95 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Fri, 15 May 2026 22:38:53 +0900 Subject: [PATCH 110/183] Updated --- ...lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py index 36c39dd5a..3208a592c 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py @@ -16,7 +16,7 @@ info_directory_path = "info/kokseang_2_8/" experiment_group_name = "bevfusion_lidar_2_8_0/base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels" +experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels_normalized" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter From 6a9a47578d58018db28cab42b1e5364666bd4302 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Mon, 18 May 2026 23:41:31 +0900 Subject: [PATCH 111/183] Updated --- ..._secfpn_50e_8xb16_base_120m_48_channels.py | 158 ------------------ 1 file changed, 158 deletions(-) delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py deleted file mode 100644 index 3208a592c..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels.py +++ /dev/null @@ -1,158 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", - "../default/pipelines/default_lidar_120m.py", - "../default/models/default_lidar_second_secfpn_120m.py", - "../default/schedulers/default_50e_8xb16_adamw_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_8/" - -experiment_group_name = "bevfusion_lidar_2_8_0/base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m_48_channels_normalized" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - voxelize_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - ), - pts_voxel_encoder=dict( - type="BEVFusionVoxelFeatureNet", - in_channels=len(_base_.lidar_sweep_dims), - with_distance=False, - with_cluster_center=True, - with_voxel_center=True, - feat_channels=[16], - point_cloud_range=_base_.point_cloud_range, - voxel_size=_base_.voxel_size, - norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01), - # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here - min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0], - max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 0.2], - legacy=False - ), - pts_middle_encoder=dict( - in_channels=48, - sparse_shape=_base_.grid_size, - ), - bbox_head=dict( - class_names=_base_.class_names, # Use class names to identify the correct class indices - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - ), -) - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) From f0f4542b29768388482152b5f5ff5b127cd795b7 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 19 May 2026 17:22:48 +0900 Subject: [PATCH 112/183] Updated --- projects/BEVFusion/bevfusion/sparse_encoder.py | 15 +++++++++++---- ...dar_voxel_second_secfpn_50e_8xb16_base_120m.py | 4 ++-- .../models/default_lidar_second_secfpn_120m.py | 9 ++++++--- .../default/pipelines/default_lidar_120m.py | 4 ++-- 4 files changed, 21 insertions(+), 11 deletions(-) diff --git a/projects/BEVFusion/bevfusion/sparse_encoder.py b/projects/BEVFusion/bevfusion/sparse_encoder.py index ce45d4536..6e98a73ab 100644 --- a/projects/BEVFusion/bevfusion/sparse_encoder.py +++ b/projects/BEVFusion/bevfusion/sparse_encoder.py @@ -1,4 +1,10 @@ # Copyright (c) OpenMMLab. All rights reserved. + +from typing import Dict, Optional + +import numpy as np +import torch + from mmdet3d.models.layers import make_sparse_convmodule from mmdet3d.models.layers.spconv import IS_SPCONV2_AVAILABLE from mmdet3d.models.middle_encoders import SparseEncoder @@ -9,8 +15,6 @@ else: from mmcv.ops import SparseConvTensor -import numpy as np -import torch @MODELS.register_module() @@ -56,6 +60,8 @@ def __init__( encoder_paddings=((1,), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1, 1)), block_type="conv_module", return_middle_feats=False, + encoder_strides=(2, 2, 2, -1), + output_stride=2, ): super(SparseEncoder, self).__init__() assert block_type in ["conv_module", "basicblock"] @@ -66,6 +72,7 @@ def __init__( self.output_channels = output_channels self.encoder_channels = encoder_channels self.encoder_paddings = encoder_paddings + self.encoder_strides = encoder_strides self.stage_num = len(self.encoder_channels) self.fp16_enabled = False self.return_middle_feats = return_middle_feats @@ -110,7 +117,7 @@ def __init__( indice_key="spconv_down2", conv_type="SparseConv3d", ) - + def forward(self, voxel_features, coors, batch_size): """Forward of SparseEncoder. @@ -138,7 +145,7 @@ def forward(self, voxel_features, coors, batch_size): for encoder_layer in self.encoder_layers: x = encoder_layer(x) encode_features.append(x) - + # for detection head # [200, 176, 5] -> [200, 176, 2] out = self.conv_out(encode_features[-1]) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py index 6d3a1f93b..28499b4f9 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py @@ -13,9 +13,9 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_8/" +info_directory_path = "info/kokseang_2_8_1/" -experiment_group_name = "bevfusion_lidar_2_8_0/base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_2_8_2/base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index f1fa5a90d..e3297de3d 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -1,6 +1,7 @@ num_proposals = 500 max_num_points = 32 max_voxels = [120000, 160000] +out_size_factor = 8 model = dict( type="BEVFusion", @@ -23,7 +24,9 @@ norm_cfg=dict(type="BN1d", eps=0.001, momentum=0.01), encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)), encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, (1, 1, 0)), (0, 0)), + encoder_strides=(2, 2, 2, -1), # No stride for the last stage block_type="basicblock", + output_stride=2, # downsample stride ), pts_backbone=dict( type="SECOND", @@ -68,7 +71,7 @@ ), train_cfg=dict( dataset="t4datasets", - out_size_factor=8, + out_size_factor=out_size_factor, gaussian_overlap=0.1, min_radius=2, pos_weight=-1, @@ -83,7 +86,7 @@ ), test_cfg=dict( dataset="t4datasets", - out_size_factor=8, + out_size_factor=out_size_factor, nms_type="circle", # Set to "circle" for circle_nms # Set NMS for different clusters nms_clusters=[ @@ -102,7 +105,7 @@ # score_threshold=0.03, # CAR, TRUCK, BUS, BICYCLE, PEDESTRIAN, TRAFFIC_CONE, BARRIER score_threshold=[0.02, 0.015, 0.015, 0.01, 0.02, 0.02, 0.015], - out_size_factor=8, + out_size_factor=out_size_factor, code_size=10, ), loss_cls=dict( diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index 09b9f7b26..317c594c1 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -5,8 +5,8 @@ # range setting point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0] -voxel_size = [0.17, 0.17, 0.2] -grid_size = [1440, 1440, 41] +voxel_size = [0.15, 0.15, 0.2] +grid_size = [1632, 1632, 41] eval_class_range = { "car": 120, "truck": 120, From c9c34bb3c35c39bd452dde6d35e5936072d66fc6 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 19 May 2026 17:24:16 +0900 Subject: [PATCH 113/183] Add local 3d box expand --- projects/BEVFusion/bevfusion/bevfusion_head.py | 1 + ..._secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py | 2 +- .../models/default_lidar_second_secfpn_120m.py | 12 ++++++------ 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 69417347b..96c38658b 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -435,6 +435,7 @@ def predict_by_feat(self, preds_dicts, metas, img=None, rescale=False, for_roi=F circle_nms( boxes_for_nms.detach().cpu().numpy(), nms_cluster["nms_threshold"], + post_max_size=nms_cluster["post_max_size"], ) ) else: diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py index 3bdda213e..4a32f99a0 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v2/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index e3297de3d..8b450d72a 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -90,11 +90,11 @@ nms_type="circle", # Set to "circle" for circle_nms # Set NMS for different clusters nms_clusters=[ - dict(class_names=["car", "truck", "bus"], class_indices=[0, 1, 2], nms_threshold=0.5), # It's radius if using circle_nms - dict(class_names=["bicycle"], class_indices=[3], nms_threshold=0.0), - dict(class_names=["pedestrian"], class_indices=[4], nms_threshold=0.0), - dict(class_names=["traffic_cone"], class_indices=[5], nms_threshold=0.0), - dict(class_names=["barrier"], class_indices=[6], nms_threshold=0.0), + dict(class_names=["car", "truck", "bus"], class_indices=[0, 1, 2], nms_threshold=0.25, post_max_size=300), # It's radius if using circle_nms + dict(class_names=["bicycle"], class_indices=[3], nms_threshold=0.0, post_max_size=50), + dict(class_names=["pedestrian"], class_indices=[4], nms_threshold=0.0, post_max_size=100), + dict(class_names=["traffic_cone"], class_indices=[5], nms_threshold=0.0, post_max_size=100), + dict(class_names=["barrier"], class_indices=[6], nms_threshold=0.0, post_max_size=50), ], ), dense_heatmap_pooling_classes=["car", "truck", "bus", "bicycle", "barrier"], # Use class indices for pooling @@ -104,7 +104,7 @@ post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0], # score_threshold=0.03, # CAR, TRUCK, BUS, BICYCLE, PEDESTRIAN, TRAFFIC_CONE, BARRIER - score_threshold=[0.02, 0.015, 0.015, 0.01, 0.02, 0.02, 0.015], + score_threshold=[0.015, 0.010, 0.010, 0.010, 0.015, 0.015, 0.010], out_size_factor=out_size_factor, code_size=10, ), From a8073f8ddf7bf1c876b87c2eb1a489bfc644b3f5 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 19 May 2026 17:24:39 +0900 Subject: [PATCH 114/183] Add local 3d box expand --- ...on_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py | 2 +- .../default/models/default_lidar_second_secfpn_120m.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py index 71c1829d4..b3858fb06 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py @@ -15,7 +15,7 @@ data_root = "data/t4dataset/" info_directory_path = "info/kokseang_2_8/" -experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v3/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index 8b450d72a..e871fce58 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -104,8 +104,8 @@ post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0], # score_threshold=0.03, # CAR, TRUCK, BUS, BICYCLE, PEDESTRIAN, TRAFFIC_CONE, BARRIER - score_threshold=[0.015, 0.010, 0.010, 0.010, 0.015, 0.015, 0.010], - out_size_factor=out_size_factor, + score_threshold=[0.015, 0.010, 0.010, 0.010, 0.015, 0.015, 0.015], + out_size_factor=8, code_size=10, ), loss_cls=dict( From be70f2fbb991e5d1272df5689bdeeb775df3d02f Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 19 May 2026 17:23:07 +0900 Subject: [PATCH 115/183] Add local 3d box expand --- ...sion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py | 2 +- ...oxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py index b3858fb06..71c1829d4 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py @@ -15,7 +15,7 @@ data_root = "data/t4dataset/" info_directory_path = "info/kokseang_2_8/" -experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v3/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py index 4a32f99a0..c77e0332b 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v2/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v3/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name From 25d154bd7967b381be76b417d2f7e50c3a3313ca Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 19 May 2026 18:52:03 +0900 Subject: [PATCH 116/183] Update camera config structure --- ...net50_fpn_lss_30e_8xb8_j6gen2_base_120m.py | 137 ---------------- ...snet50_fpn_lss_30e_8xb8_j6gen2_base_50m.py | 137 ---------------- ..._swin_fpn_lss_30e_8xb8_j6gen2_base_120m.py | 137 ---------------- ...a_swin_fpn_lss_30e_8xb8_j6gen2_base_50m.py | 137 ---------------- ...swin_fpn_lss_30e_8xb8_jpntaxi_base_120m.py | 137 ---------------- ..._swin_fpn_lss_30e_8xb8_jpntaxi_base_50m.py | 137 ---------------- ...sion_camera_30e_8xb16_j6gen2_base_120m.py} | 37 +---- ...usion_camera_30e_8xb16_j6gen2_base_50m.py} | 37 +---- ...t_bevfusion_camera_50e_8xb16_base_120m.py} | 35 +---- ...lt_bevfusion_camera_50e_8xb16_base_50m.py} | 35 +---- ...fpn_depthlss_30e_8xb16_j6gen2_base_120m.py | 32 ++++ ...snet50_fpn_depthlss_50e_8xb16_base_120m.py | 32 ++++ ...net50_fpn_lss_30e_8xb16_j6gen2_base_50m.py | 32 ++++ ...era_resnet50_fpn_lss_50e_8xb16_base_50m.py | 32 ++++ ...a_swin_fpn_depthlss_50e_8xb16_base_120m.py | 32 ++++ ..._camera_swin_fpn_lss_50e_8xb16_base_50m.py | 32 ++++ ...swin_fpn_lss_50e_8xb16_j6gen2_base_120m.py | 32 ++++ ..._swin_fpn_lss_50e_8xb16_j6gen2_base_50m.py | 32 ++++ ...0_fpn_depthlss_lidar_second_secfpn_120m.py | 2 +- .../camera_resnet50_fpn_depthlss_120m.py} | 6 +- .../camera_resnet50_fpn_lss_50m.py} | 5 +- .../camera_swin_fpn_depthlss_120m.py} | 2 +- ..._fpn_depthlss_lidar_second_secfpn_120m.py} | 2 +- .../camera_swin_fpn_lss_50m.py} | 2 +- .../pipelines/cameras/default_camera_120m.py | 147 ++++++++++++++++++ .../default_camera_50m.py} | 6 +- .../default/pipelines/default_lidar_50m.py | 6 + 27 files changed, 437 insertions(+), 963 deletions(-) delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_120m.py delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_50m.py delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_120m.py delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_50m.py delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_120m.py delete mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_50m.py rename projects/BEVFusion/configs/t4dataset/BEVFusion-C/{bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_120m.py => default_bevfusion_camera_30e_8xb16_j6gen2_base_120m.py} (74%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-C/{bevfusion_camera_swin_fpn_lss_50e_8xb8_base_50m.py => default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py} (76%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-C/{bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_50m.py => default_bevfusion_camera_50e_8xb16_base_120m.py} (75%) rename projects/BEVFusion/configs/t4dataset/BEVFusion-C/{bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m.py => default_bevfusion_camera_50e_8xb16_base_50m.py} (74%) create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m.py create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_50e_8xb16_base_120m.py create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m.py create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_depthlss_50e_8xb16_base_120m.py create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_lss_50e_8xb16_base_50m.py create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_lss_50e_8xb16_j6gen2_base_120m.py create mode 100644 projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_lss_50e_8xb16_j6gen2_base_50m.py rename projects/BEVFusion/configs/t4dataset/default/models/{default_camera_resnet50_fpn_depthlss_120m.py => resnet50/camera_resnet50_fpn_depthlss_120m.py} (94%) rename projects/BEVFusion/configs/t4dataset/default/models/{default_camera_resnet50_fpn_lss_50m.py => resnet50/camera_resnet50_fpn_lss_50m.py} (83%) rename projects/BEVFusion/configs/t4dataset/default/models/{default_camera_swin_fpn_depthlss_120m.py => swin_transformer/camera_swin_fpn_depthlss_120m.py} (97%) rename projects/BEVFusion/configs/t4dataset/default/models/{default_camera_swin_fpn_depthlss_lidar_second_secfpn_120m.py => swin_transformer/camera_swin_fpn_depthlss_lidar_second_secfpn_120m.py} (97%) rename projects/BEVFusion/configs/t4dataset/default/models/{default_camera_swin_fpn_lss_50m.py => swin_transformer/camera_swin_fpn_lss_50m.py} (98%) create mode 100644 projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py rename projects/BEVFusion/configs/t4dataset/default/pipelines/{default_camera_base_50m.py => cameras/default_camera_50m.py} (96%) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_120m.py deleted file mode 100644 index 4c809264e..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_120m.py +++ /dev/null @@ -1,137 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", - "../default/pipelines/default_camera_lidar_intensity_120m.py", - "../default/models/default_camera_swin_fpn_lss_120m.py", - "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4datasets/" -info_directory_path = "info/kokseang_2_6_1/" - -experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type -experiment_name = "bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_120m" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - view_transform=dict(image_size=_base_.image_size), - bbox_head=dict( - class_names=_base_.class_names, - in_channels=80, - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - ), -) - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_50m.py deleted file mode 100644 index 927310e7d..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_30e_8xb8_j6gen2_base_50m.py +++ /dev/null @@ -1,137 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", - "../default/pipelines/default_camera_base_50m.py", - "../default/models/default_camera_swin_fpn_lss_50m.py", - "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4datasets/" -info_directory_path = "info/kokseang_2_6_1/" - -experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type -experiment_name = "bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_50m" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - view_transform=dict(image_size=_base_.image_size), - bbox_head=dict( - class_names=_base_.class_names, - in_channels=80, - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - ), -) - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_120m.py deleted file mode 100644 index 4c809264e..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_120m.py +++ /dev/null @@ -1,137 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", - "../default/pipelines/default_camera_lidar_intensity_120m.py", - "../default/models/default_camera_swin_fpn_lss_120m.py", - "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4datasets/" -info_directory_path = "info/kokseang_2_6_1/" - -experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type -experiment_name = "bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_120m" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - view_transform=dict(image_size=_base_.image_size), - bbox_head=dict( - class_names=_base_.class_names, - in_channels=80, - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - ), -) - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_50m.py deleted file mode 100644 index 927310e7d..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_50m.py +++ /dev/null @@ -1,137 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", - "../default/pipelines/default_camera_base_50m.py", - "../default/models/default_camera_swin_fpn_lss_50m.py", - "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4datasets/" -info_directory_path = "info/kokseang_2_6_1/" - -experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type -experiment_name = "bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_50m" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - view_transform=dict(image_size=_base_.image_size), - bbox_head=dict( - class_names=_base_.class_names, - in_channels=80, - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - ), -) - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_120m.py deleted file mode 100644 index 4c809264e..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_120m.py +++ /dev/null @@ -1,137 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", - "../default/pipelines/default_camera_lidar_intensity_120m.py", - "../default/models/default_camera_swin_fpn_lss_120m.py", - "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4datasets/" -info_directory_path = "info/kokseang_2_6_1/" - -experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type -experiment_name = "bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_120m" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - view_transform=dict(image_size=_base_.image_size), - bbox_head=dict( - class_names=_base_.class_names, - in_channels=80, - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - ), -) - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_50m.py deleted file mode 100644 index 927310e7d..000000000 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_30e_8xb8_jpntaxi_base_50m.py +++ /dev/null @@ -1,137 +0,0 @@ -_base_ = [ - "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", - "../default/pipelines/default_camera_base_50m.py", - "../default/models/default_camera_swin_fpn_lss_50m.py", - "../default/schedulers/default_30e_8xb8_adamw_linear_cosine.py", - "../default/default_misc.py", -] - -custom_imports = dict(imports=["projects.BEVFusion.bevfusion"], allow_failed_imports=False) -custom_imports["imports"] += _base_.custom_imports["imports"] -custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] - -# user setting -data_root = "data/t4datasets/" -info_directory_path = "info/kokseang_2_6_1/" - -experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type -experiment_name = "bevfusion_camera_swin_fpn_lss_30e_8xb8_j6gen2_base_50m" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - view_transform=dict(image_size=_base_.image_size), - bbox_head=dict( - class_names=_base_.class_names, - in_channels=80, - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - ), -) - -# Dataset parameters -train_dataloader = dict( - batch_size=_base_.train_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=dict( - type=_base_.dataset_type, - pipeline=_base_.train_pipeline, - modality=_base_.input_modality, - backend_args=_base_.backend_args, - data_root=data_root, - ann_file=info_directory_path + _base_.info_train_file_name, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - test_mode=False, - data_prefix=_base_.data_prefix, - box_type_3d="LiDAR", - filter_cfg=_base_.filter_cfg, - ), -) - -val_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_val_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -test_dataloader = dict( - batch_size=_base_.test_batch_size, - num_workers=_base_.num_workers, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=_base_.dataset_type, - data_root=data_root, - ann_file=info_directory_path + _base_.info_test_file_name, - pipeline=_base_.test_pipeline, - metainfo=_base_.metainfo, - class_names=_base_.class_names, - modality=_base_.input_modality, - data_prefix=_base_.data_prefix, - test_mode=True, - box_type_3d="LiDAR", - backend_args=_base_.backend_args, - ), -) - -val_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_val_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, -) - -test_evaluator = dict( - type="T4Metric", - data_root=data_root, - ann_file=data_root + info_directory_path + _base_.info_test_file_name, - metric="bbox", - backend_args=_base_.backend_args, - class_names=_base_.class_names, - name_mapping=_base_.name_mapping, - eval_class_range=_base_.eval_class_range, - filter_attributes=_base_.filter_attributes, - save_csv=True, -) - -default_hooks = dict( - logger=dict(type="LoggerHook", interval=50), - checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), -) -log_processor = dict(window_size=50) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_120m.py similarity index 74% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_120m.py index 42f93d1b1..987c13393 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_120m.py @@ -1,9 +1,8 @@ _base_ = [ "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", - "../default/pipelines/default_camera_lidar_intensity_120m.py", - "../default/models/default_camera_swin_fpn_lss_120m.py", - "../default/schedulers/default_50e_8xb8_adamw_linear_cosine.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", + "../default/pipelines/cameras/default_camera_120m.py", + "../default/schedulers/default_30e_8xb16_adamw_linear_cosine.py", "../default/default_misc.py", ] @@ -13,35 +12,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" - -experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type -experiment_name = "bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - view_transform=dict(image_size=_base_.image_size), - bbox_head=dict( - class_names=_base_.class_names, - in_channels=80, - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - ), -) +info_directory_path = "info/kokseang_2_8_0/" # Dataset parameters train_dataloader = dict( diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py similarity index 76% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_50m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py index 7c5a5f91f..ceedda1c9 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_50m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py @@ -1,9 +1,8 @@ _base_ = [ "../../../../../autoware_ml/configs/detection3d/default_runtime.py", - "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", - "../default/pipelines/default_camera_base_50m.py", - "../default/models/default_camera_swin_fpn_lss_50m.py", - "../default/schedulers/default_50e_8xb8_adamw_linear_cosine.py", + "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py", + "../default/pipelines/cameras/default_camera_50m.py", + "../default/schedulers/default_30e_8xb16_adamw_linear_cosine.py", "../default/default_misc.py", ] @@ -13,36 +12,12 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" +info_directory_path = "info/kokseang_2_8_0/" -experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type -experiment_name = "bevfusion_camera_swin_fpn_lss_50e_8xb8_base_50m" +experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type +experiment_name = "bevfusion_camera_30e_8xb16_j6gen2_base_50m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name -# model parameter -model = dict( - type="BEVFusion", - view_transform=dict(image_size=_base_.image_size), - bbox_head=dict( - class_names=_base_.class_names, - in_channels=80, - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - ), -) - # Dataset parameters train_dataloader = dict( batch_size=_base_.train_batch_size, diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_50e_8xb16_base_120m.py similarity index 75% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_50m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_50e_8xb16_base_120m.py index 7c5a5f91f..7a81be126 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_resnet50_fpn_lss_50e_8xb8_base_50m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_50e_8xb16_base_120m.py @@ -1,9 +1,8 @@ _base_ = [ "../../../../../autoware_ml/configs/detection3d/default_runtime.py", "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", - "../default/pipelines/default_camera_base_50m.py", - "../default/models/default_camera_swin_fpn_lss_50m.py", - "../default/schedulers/default_50e_8xb8_adamw_linear_cosine.py", + "../default/pipelines/cameras/default_camera_120m.py", + "../default/schedulers/default_50e_8xb16_adamw_linear_cosine.py", "../default/default_misc.py", ] @@ -13,35 +12,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" - -experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type -experiment_name = "bevfusion_camera_swin_fpn_lss_50e_8xb8_base_50m" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - view_transform=dict(image_size=_base_.image_size), - bbox_head=dict( - class_names=_base_.class_names, - in_channels=80, - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - ), -) +info_directory_path = "info/kokseang_2_8_0/" # Dataset parameters train_dataloader = dict( diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_50e_8xb16_base_50m.py similarity index 74% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_50e_8xb16_base_50m.py index 42f93d1b1..4b79e2102 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_50e_8xb16_base_50m.py @@ -1,9 +1,8 @@ _base_ = [ "../../../../../autoware_ml/configs/detection3d/default_runtime.py", "../../../../../autoware_ml/configs/detection3d/dataset/t4dataset/base.py", - "../default/pipelines/default_camera_lidar_intensity_120m.py", - "../default/models/default_camera_swin_fpn_lss_120m.py", - "../default/schedulers/default_50e_8xb8_adamw_linear_cosine.py", + "../default/pipelines/cameras/default_camera_50m.py", + "../default/schedulers/default_50e_8xb16_adamw_linear_cosine.py", "../default/default_misc.py", ] @@ -13,35 +12,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_6_2/" - -experiment_group_name = "bevfusion_camera_2_6_0/base/" + _base_.dataset_type -experiment_name = "bevfusion_camera_swin_fpn_lss_50e_8xb8_base_120m" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - -# model parameter -model = dict( - type="BEVFusion", - view_transform=dict(image_size=_base_.image_size), - bbox_head=dict( - class_names=_base_.class_names, - in_channels=80, - train_cfg=dict( - point_cloud_range=_base_.point_cloud_range, - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size, - ), - test_cfg=dict( - grid_size=_base_.grid_size, - voxel_size=_base_.voxel_size[0:2], - pc_range=_base_.point_cloud_range[0:2], - ), - bbox_coder=dict( - pc_range=_base_.point_cloud_range[0:2], - voxel_size=_base_.voxel_size[0:2], - ), - ), -) +info_directory_path = "info/kokseang_2_8_0/" # Dataset parameters train_dataloader = dict( diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m.py new file mode 100644 index 000000000..e73416744 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m.py @@ -0,0 +1,32 @@ +_base_ = [ + "../default_bevfusion_camera_30e_8xb16_j6gen2_base_120m.py", + "../../default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py", +] + +experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type +experiment_name = "bevfusion_camera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + view_transform=dict(image_size=_base_.image_size), + bbox_head=dict( + class_names=_base_.class_names, + in_channels=80, + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_50e_8xb16_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_50e_8xb16_base_120m.py new file mode 100644 index 000000000..ebdfff437 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_50e_8xb16_base_120m.py @@ -0,0 +1,32 @@ +_base_ = [ + "../default_bevfusion_camera_50e_8xb16_base_120m.py", + "../../default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py", +] + +experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type +experiment_name = "bevfusion_camera_resnet50_fpn_depthlss_50e_8xb16_base_120m" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + view_transform=dict(image_size=_base_.image_size), + bbox_head=dict( + class_names=_base_.class_names, + in_channels=80, + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py new file mode 100644 index 000000000..e23efb65a --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py @@ -0,0 +1,32 @@ +_base_ = [ + "../default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py", + "../../default/models/resnet50/camera_resnet50_fpn_lss_50m.py", +] + +experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type +experiment_name = "bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + view_transform=dict(image_size=_base_.image_size), + bbox_head=dict( + class_names=_base_.class_names, + in_channels=80, + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m.py new file mode 100644 index 000000000..7bf63010b --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m.py @@ -0,0 +1,32 @@ +_base_ = [ + "../default_bevfusion_camera_50e_8xb16_base_50m.py", + "../../default/models/resnet50/camera_resnet50_fpn_lss_50m.py", +] + +experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type +experiment_name = "bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + view_transform=dict(image_size=_base_.image_size), + bbox_head=dict( + class_names=_base_.class_names, + in_channels=80, + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_depthlss_50e_8xb16_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_depthlss_50e_8xb16_base_120m.py new file mode 100644 index 000000000..56c2930bb --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_depthlss_50e_8xb16_base_120m.py @@ -0,0 +1,32 @@ +_base_ = [ + "../default_bevfusion_camera_50e_8xb16_base_50m.py", + "../../default/models/swin_transformer/camera_swin_fpn_depthlss_120m.py", +] + +experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type +experiment_name = "bevfusion_camera_swin_fpn_depthlss_50e_8xb16_base_120m" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + view_transform=dict(image_size=_base_.image_size), + bbox_head=dict( + class_names=_base_.class_names, + in_channels=80, + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_lss_50e_8xb16_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_lss_50e_8xb16_base_50m.py new file mode 100644 index 000000000..8d1ff7681 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_lss_50e_8xb16_base_50m.py @@ -0,0 +1,32 @@ +_base_ = [ + "../default_bevfusion_camera_50e_8xb16_base_50m.py", + "../../default/models/swin_transformer/camera_swin_fpn_lss_50m.py", +] + +experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type +experiment_name = "bevfusion_camera_swin_fpn_lss_50e_8xb16_base_50m" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + view_transform=dict(image_size=_base_.image_size), + bbox_head=dict( + class_names=_base_.class_names, + in_channels=80, + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_lss_50e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_lss_50e_8xb16_j6gen2_base_120m.py new file mode 100644 index 000000000..401ac7861 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_lss_50e_8xb16_j6gen2_base_120m.py @@ -0,0 +1,32 @@ +_base_ = [ + "../default_bevfusion_camera_30e_8xb16_j6gen2_base_120m.py", + "../../default/models/swin_transformer/camera_swin_fpn_depthlss_120m.py", +] + +experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type +experiment_name = "bevfusion_camera_swin_fpn_depthlss_50e_8xb16_j6gen2_base_120m" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + view_transform=dict(image_size=_base_.image_size), + bbox_head=dict( + class_names=_base_.class_names, + in_channels=80, + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_lss_50e_8xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_lss_50e_8xb16_j6gen2_base_50m.py new file mode 100644 index 000000000..80e81be39 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/swin_transformer/bevfusion_camera_swin_fpn_lss_50e_8xb16_j6gen2_base_50m.py @@ -0,0 +1,32 @@ +_base_ = [ + "../default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py", + "../../default/models/swin_transformer/camera_swin_fpn_lss_50m.py", +] + +experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type +experiment_name = "bevfusion_camera_swin_fpn_lss_50e_8xb16_j6gen2_base_50m" +work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name + +# model parameter +model = dict( + type="BEVFusion", + view_transform=dict(image_size=_base_.image_size), + bbox_head=dict( + class_names=_base_.class_names, + in_channels=80, + train_cfg=dict( + point_cloud_range=_base_.point_cloud_range, + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size, + ), + test_cfg=dict( + grid_size=_base_.grid_size, + voxel_size=_base_.voxel_size[0:2], + pc_range=_base_.point_cloud_range[0:2], + ), + bbox_coder=dict( + pc_range=_base_.point_cloud_range[0:2], + voxel_size=_base_.voxel_size[0:2], + ), + ), +) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_lidar_second_secfpn_120m.py index 43e8dd9ac..339f3e97e 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_lidar_second_secfpn_120m.py @@ -23,7 +23,7 @@ norm_eval=False, with_cp=False, style="pytorch", - init_cfg=dict( + init_cfg=dict( type="Pretrained", checkpoint="work_dirs/resnet50/mmdet_resnet50-19c8e357.pth", # noqa: E251 ), diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py similarity index 94% rename from projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_120m.py rename to projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py index 6203da514..a6ccca5dc 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_depthlss_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py @@ -1,5 +1,5 @@ _base_ = [ - "./default_lidar_second_secfpn_120m.py", + "../default_lidar_second_secfpn_120m.py", ] # Image network @@ -18,7 +18,7 @@ bgr_to_rgb=False, rgb_to_bgr=False, ), - img_backbone=dict( + img_backbone=dict( pretrained="torchvision://resnet50", type="ResNet", depth=50, @@ -29,7 +29,7 @@ norm_eval=False, with_cp=False, style="pytorch", - init_cfg=dict( + init_cfg=dict( type="Pretrained", checkpoint="work_dirs/resnet50/mmdet_resnet50-19c8e357.pth", # noqa: E251 ), diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py similarity index 83% rename from projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_lss_50m.py rename to projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py index 5577723bf..ca3e8f8a2 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_resnet50_fpn_lss_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py @@ -6,9 +6,6 @@ model = dict( view_transform=dict( type="LSSTransform", - in_channels=256, - out_channels=80, - feature_size=[48, 96], xbound=[-54.0, 54.0, 0.3], ybound=[-54.0, 54.0, 0.3], zbound=[-10.0, 10.0, 20.0], @@ -19,5 +16,5 @@ bbox_coder=dict( post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], ), - ) + ), ) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_depthlss_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/swin_transformer/camera_swin_fpn_depthlss_120m.py similarity index 97% rename from projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_depthlss_120m.py rename to projects/BEVFusion/configs/t4dataset/default/models/swin_transformer/camera_swin_fpn_depthlss_120m.py index c4b0cd9ab..88e74efc7 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_depthlss_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/swin_transformer/camera_swin_fpn_depthlss_120m.py @@ -1,5 +1,5 @@ _base_ = [ - "./default_lidar_second_secfpn_120m.py", + "../default_lidar_second_secfpn_120m.py", ] # Image network diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_depthlss_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/swin_transformer/camera_swin_fpn_depthlss_lidar_second_secfpn_120m.py similarity index 97% rename from projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_depthlss_lidar_second_secfpn_120m.py rename to projects/BEVFusion/configs/t4dataset/default/models/swin_transformer/camera_swin_fpn_depthlss_lidar_second_secfpn_120m.py index 55c6ca3cd..2ac22b1b6 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_depthlss_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/swin_transformer/camera_swin_fpn_depthlss_lidar_second_secfpn_120m.py @@ -1,5 +1,5 @@ _base_ = [ - "./default_lidar_second_secfpn_120m.py", + "../default_lidar_second_secfpn_120m.py", ] # Image network diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/swin_transformer/camera_swin_fpn_lss_50m.py similarity index 98% rename from projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_50m.py rename to projects/BEVFusion/configs/t4dataset/default/models/swin_transformer/camera_swin_fpn_lss_50m.py index 39a4a637f..1294416ad 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_camera_swin_fpn_lss_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/swin_transformer/camera_swin_fpn_lss_50m.py @@ -19,5 +19,5 @@ bbox_coder=dict( post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], ), - ) + ), ) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py new file mode 100644 index 000000000..fc7338699 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py @@ -0,0 +1,147 @@ +## This config is for the camera_base only model, without lidar points + +_base_ = [ + "../default_lidar_120m.py", +] +input_modality = dict(use_lidar=True, use_camera=True) + +# Image parameters +image_size = [384, 768] # Height, Width +camera_order = ["CAM_FRONT", "CAM_FRONT_LEFT", "CAM_BACK_LEFT", "CAM_FRONT_RIGHT", "CAM_BACK_RIGHT"] + +train_pipeline = [ + dict( + type="BEVLoadMultiViewImageFromFiles", + to_float32=True, + color_type="color", + backend_args=_base_.backend_args, + camera_order=camera_order, + ), + # We keep loading LiDAR points to make downstream BEV augmentation easier + dict( + type="LoadPointsFromFile", + coord_type="LIDAR", + load_dim=_base_.point_load_dim, + use_dim=_base_.point_load_dim, + backend_args=_base_.backend_args, + ), + dict( + type="LoadPointsFromMultiSweeps", + sweeps_num=_base_.sweeps_num, + load_dim=_base_.point_load_dim, + use_dim=_base_.lidar_sweep_dims, + pad_empty_sweeps=True, + remove_close=True, + backend_args=_base_.backend_args, + test_mode=False, + ), + dict(type="LoadAnnotations3D", with_bbox_3d=True, with_label_3d=True, with_attr_label=False), + dict( + type="ImageAug3D", + final_dim=image_size, + resize_lim=[0.28, 0.40], + bot_pct_lim=[0.0, 0.0], + rot_lim=[0.0, 0.0], + rand_flip=True, + is_train=True, + ), + dict( + type="BEVFusionGlobalRotScaleTrans", + scale_ratio_range=[0.95, 1.05], + rot_range=[-0.78539816, 0.78539816], + translation_std=[0.5, 0.5, 0.2], + ), + dict(type="BEVFusionRandomFlip3D"), + dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range), + dict( + type="ObjectNameFilter", + classes=[ + "car", + "truck", + "bus", + "bicycle", + "pedestrian", + "traffic_cone", + "barrier", + ], + ), + dict( + type="Pack3DDetInputs", + keys=["points", "img", "gt_bboxes_3d", "gt_labels_3d", "gt_bboxes", "gt_labels"], + meta_keys=[ + "cam2img", + "ori_cam2img", + "lidar2cam", + "lidar2img", + "cam2lidar", + "ori_lidar2img", + "img_aug_matrix", + "box_type_3d", + "sample_idx", + "lidar_path", + "img_path", + "transformation_3d_flow", + "pcd_rotation", + "pcd_scale_factor", + "pcd_trans", + "img_aug_matrix", + "lidar_aug_matrix", + "timestamp", + "vehicle_type", + "city", + ], + ), +] + +test_pipeline = [ + dict( + type="BEVLoadMultiViewImageFromFiles", + to_float32=True, + color_type="color", + backend_args=_base_.backend_args, + camera_order=camera_order, + ), + dict( + type="LoadPointsFromMultiSweeps", + sweeps_num=_base_.sweeps_num, + load_dim=_base_.point_load_dim, + use_dim=_base_.lidar_sweep_dims, + pad_empty_sweeps=True, + remove_close=True, + backend_args=_base_.backend_args, + test_mode=True, + ), + dict( + type="ImageAug3D", + final_dim=image_size, + resize_lim=[0.34, 0.34], + bot_pct_lim=[0.0, 0.0], + rot_lim=[0.0, 0.0], + rand_flip=False, + is_train=False, + ), + dict( + type="Pack3DDetInputs", + keys=["img", "points", "gt_bboxes_3d", "gt_labels_3d"], + meta_keys=[ + "cam2img", + "ori_cam2img", + "lidar2cam", + "lidar2img", + "cam2lidar", + "ori_lidar2img", + "img_aug_matrix", + "box_type_3d", + "sample_idx", + "lidar_path", + "img_path", + "num_pts_feats", + "num_views", + "timestamp", + "vehicle_type", + "city", + ], + ), +] + +filter_cfg = dict(filter_frames_with_camera_order=camera_order) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py similarity index 96% rename from projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_50m.py rename to projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py index 37c17e79a..77470a938 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_base_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py @@ -1,7 +1,7 @@ ## This config is for the camera_base only model, without lidar points _base_ = [ - "./default_lidar_50m.py", + "../default_lidar_50m.py", ] input_modality = dict(use_lidar=True, use_camera=True) @@ -17,7 +17,7 @@ backend_args=_base_.backend_args, camera_order=camera_order, ), - # We keep loading LiDAR points to make downstream BEV augmentation easier + # We keep loading LiDAR points to make downstream BEV augmentation easier dict( type="LoadPointsFromFile", coord_type="LIDAR", @@ -84,6 +84,7 @@ "timestamp", "vehicle_type", "city", + "traffic_cone_barrier_status", ], ), ] @@ -125,6 +126,7 @@ "timestamp", "vehicle_type", "city", + "traffic_cone_barrier_status", ], ), ] diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py index 964d6eef9..80fdac189 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py @@ -13,6 +13,8 @@ "bus": 54.0, "bicycle": 54.0, "pedestrian": 54.0, + "traffic_cone": 54.0, + "barrier": 54.0, } # LiDAR parameters @@ -57,6 +59,8 @@ "bus", "bicycle", "pedestrian", + "traffic_cone", + "barrier", ], ), dict(type="PointShuffle"), @@ -84,6 +88,7 @@ "timestamp", "vehicle_type", "city", + "traffic_cone_barrier_status", ], ), ] @@ -127,6 +132,7 @@ "timestamp", "vehicle_type", "city", + "traffic_cone_barrier_status", ], ), ] From 842dec226224e26258324b0ef8d19c2f16dcbf6a Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 19 May 2026 18:57:06 +0900 Subject: [PATCH 117/183] Updated --- projects/BEVFusion/bevfusion/depth_lss_v2.py | 9 + .../bevfusion/ops/bev_pool_v2/__init__.py | 3 + .../bevfusion/ops/bev_pool_v2/bev_pool_v2.py | 190 ++++++++++++++++++ .../ops/bev_pool_v2/src/bev_pool.cpp | 111 ++++++++++ .../ops/bev_pool_v2/src/bev_pool_cuda.cu | 140 +++++++++++++ projects/BEVFusion/setup.py | 8 + 6 files changed, 461 insertions(+) create mode 100644 projects/BEVFusion/bevfusion/depth_lss_v2.py create mode 100644 projects/BEVFusion/bevfusion/ops/bev_pool_v2/__init__.py create mode 100644 projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py create mode 100644 projects/BEVFusion/bevfusion/ops/bev_pool_v2/src/bev_pool.cpp create mode 100644 projects/BEVFusion/bevfusion/ops/bev_pool_v2/src/bev_pool_cuda.cu diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py new file mode 100644 index 000000000..974a39cce --- /dev/null +++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py @@ -0,0 +1,9 @@ +from typing import Tuple + +import torch +from mmdet3d.registry import MODELS +from torch import nn + +from .depth_lss import DepthLSSNet, DownSampleNet, LidarDepthImageNet, BaseViewTransform +from .ops import bev_pool_v2 + diff --git a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/__init__.py b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/__init__.py new file mode 100644 index 000000000..549a97e81 --- /dev/null +++ b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/__init__.py @@ -0,0 +1,3 @@ +from .bev_pool_v2 import bev_pool_v2 + +__all__ = ["bev_pool_v2"] \ No newline at end of file diff --git a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py new file mode 100644 index 000000000..b1d2f03af --- /dev/null +++ b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py @@ -0,0 +1,190 @@ +# Copyright (c) Phigent Robotics. All rights reserved. + +import numpy as np +import torch + +from . import bev_pool_v2_ext + + +class QuickCumsumV2TrainingCuda(torch.autograd.Function): + r"""BEVPoolv2 implementation for Lift-Splat-Shoot view transformation. + + Please refer to the `paper `_ + """ + @staticmethod + def forward(ctx, depth, feat, ranks_depth, ranks_feat, ranks_bev, + bev_feat_shape, interval_starts, interval_lengths): + ranks_bev = ranks_bev.int() + depth = depth.contiguous().float() + feat = feat.contiguous().float() + ranks_depth = ranks_depth.contiguous().int() + ranks_feat = ranks_feat.contiguous().int() + interval_lengths = interval_lengths.contiguous().int() + interval_starts = interval_starts.contiguous().int() + + out = feat.new_zeros(bev_feat_shape) + + bev_pool_v2_ext.bev_pool_v2_forward( + depth, + feat, + out, + ranks_depth, + ranks_feat, + ranks_bev, + interval_lengths, + interval_starts, + ) + + ctx.save_for_backward(ranks_bev, depth, feat, ranks_feat, ranks_depth) + return out + + @staticmethod + def backward(ctx, out_grad): + ranks_bev, depth, feat, ranks_feat, ranks_depth = ctx.saved_tensors + + order = ranks_feat.argsort() + ranks_feat, ranks_depth, ranks_bev = \ + ranks_feat[order], ranks_depth[order], ranks_bev[order] + kept = torch.ones( + ranks_bev.shape[0], device=ranks_bev.device, dtype=torch.bool) + kept[1:] = ranks_feat[1:] != ranks_feat[:-1] + interval_starts_bp = torch.where(kept)[0].int() + interval_lengths_bp = torch.zeros_like(interval_starts_bp) + interval_lengths_bp[:-1] = interval_starts_bp[ + 1:] - interval_starts_bp[:-1] + interval_lengths_bp[-1] = ranks_bev.shape[0] - interval_starts_bp[-1] + + depth = depth.contiguous() + feat = feat.contiguous() + ranks_depth = ranks_depth.contiguous() + ranks_feat = ranks_feat.contiguous() + ranks_bev = ranks_bev.contiguous() + interval_lengths_bp = interval_lengths_bp.contiguous() + interval_starts_bp = interval_starts_bp.contiguous() + + depth_grad = depth.new_zeros(depth.shape) + feat_grad = feat.new_zeros(feat.shape) + out_grad = out_grad.contiguous() + bev_pool_v2_ext.bev_pool_v2_backward( + out_grad, + depth_grad, + feat_grad, + depth, + feat, + ranks_depth, + ranks_feat, + ranks_bev, + interval_lengths_bp, + interval_starts_bp, + ) + return depth_grad, feat_grad, None, None, None, None, None, \ + None, None, None + + +class QuickCumsumV2Cuda(torch.autograd.Function): + + @staticmethod + def symbolic(g, + depth, + feat, + ranks_depth, + ranks_feat, + ranks_bev, + interval_starts, + interval_lengths, + out_height=128, + out_width=128): + """symbolic function for creating onnx op.""" + x = g.op( + 'autoware::QuickCumsumV2Cuda', + depth, + feat, + ranks_depth, + ranks_feat, + ranks_bev, + interval_starts, + interval_lengths, + out_height_i=out_height, + out_width_i=out_width) + + # features_shape = _get_tensor_sizes(feat) + # if features_shape is not None and hasattr(x.type(), "with_sizes"): + # output_type = x.type().with_sizes([B, D, H, W, _get_tensor_dim_size(x, -1)]) + # output.setType(output_type) + + @staticmethod + def forward(ctx, + depth, # N,D,H,W + feat, # N,H,W,C + ranks_depth, + ranks_feat, + ranks_bev, + interval_starts, + interval_lengths, + out_height=128, + out_width=128): + """run forward.""" + feat = feat.unsqueeze(0) + depth = depth.unsqueeze(0) + bev_feat_shape = (depth.shape[0], 1, out_height, out_width, + feat.shape[-1]) # (B, Z, Y, X, C) + bev_feat = bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev, + bev_feat_shape, interval_starts, + interval_lengths) + bev_feat = bev_feat.squeeze(2) + bev_feat = bev_feat.permute(0, 2, 3, 1) + return bev_feat + + @staticmethod + def backward(ctx, out_grad): + raise NotImplementedError + + +def bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev, + bev_feat_shape, interval_starts, interval_lengths, is_training): + + if is_training: + x = QuickCumsumV2TrainingCuda.apply(depth, feat, ranks_depth, ranks_feat, ranks_bev, + bev_feat_shape, interval_starts, + interval_lengths) + else: + x = QuickCumsumV2Cuda.apply(depth, feat, ranks_depth, ranks_feat, ranks_bev, + bev_feat_shape, interval_starts, + interval_lengths) + + x = x.permute(0, 4, 1, 2, 3).contiguous() + return x + + +def test_bev_pool_v2(): + depth = np.array([0.3, 0.4, 0.2, 0.1, 0.7, 0.6, 0.8, 0.9]) + depth = torch.from_numpy(depth).float().cuda() + depth = depth.view(1, 1, 2, 2, 2).requires_grad_() + feat = torch.ones( + size=[1, 1, 2, 2, 2], dtype=torch.float, + device='cuda').requires_grad_() + ranks_depth = torch.from_numpy(np.array([0, 4, 1, 6])).int().cuda() + ranks_feat = torch.from_numpy(np.array([0, 0, 1, 2])).int().cuda() + ranks_bev = torch.from_numpy(np.array([0, 0, 1, 1])).int().cuda() + + kept = torch.ones( + ranks_bev.shape[0], device=ranks_bev.device, dtype=torch.bool) + kept[1:] = ranks_bev[1:] != ranks_bev[:-1] + interval_starts = torch.where(kept)[0].int() + if len(interval_starts) == 0: + return None, None, None, None, None + interval_lengths = torch.zeros_like(interval_starts) + interval_lengths[:-1] = interval_starts[1:] - interval_starts[:-1] + interval_lengths[-1] = ranks_bev.shape[0] - interval_starts[-1] + bev_feat = bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev, + (1, 1, 2, 2, 2), interval_starts, interval_lengths) + loss = torch.sum(bev_feat) + loss.backward() + assert loss == 4.4 + grad_depth = np.array([2., 2., 0., 0., 2., 0., 2., 0.]) + grad_depth = torch.from_numpy(grad_depth).float() + grad_depth = grad_depth.cuda().view(1, 1, 2, 2, 2) + assert depth.grad.allclose(grad_depth) + grad_feat = np.array([1.0, 1.0, 0.4, 0.4, 0.8, 0.8, 0., 0.]) + grad_feat = torch.from_numpy(grad_feat).float().cuda().view(1, 1, 2, 2, 2) + assert feat.grad.allclose(grad_feat) diff --git a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/src/bev_pool.cpp b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/src/bev_pool.cpp new file mode 100644 index 000000000..c7c38f695 --- /dev/null +++ b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/src/bev_pool.cpp @@ -0,0 +1,111 @@ +// Copyright (c) Phigent Robotics. All rights reserved. +// Reference https://arxiv.org/abs/2211.17111 +#include +#include + +// CUDA function declarations +void bev_pool_v2(int c, int n_intervals, const float* depth, const float* feat, + const int* ranks_depth, const int* ranks_feat, const int* ranks_bev, + const int* interval_starts, const int* interval_lengths, float* out); + +void bev_pool_v2_grad(int c, int n_intervals, const float* out_grad, + const float* depth, const float* feat, const int* ranks_depth, const int* ranks_feat, + const int* ranks_bev, const int* interval_starts, const int* interval_lengths, + float* depth_grad, float* feat_grad); + + +/* + Function: pillar pooling (forward, cuda) + Args: + depth : input depth, FloatTensor[n, d, h, w] + feat : input features, FloatTensor[n, h, w, c] + out : output features, FloatTensor[b, c, h_out, w_out] + ranks_depth : depth index of points, IntTensor[n_points] + ranks_feat : feat index of points, IntTensor[n_points] + ranks_bev : output index of points, IntTensor[n_points] + interval_lengths : starting position for pooled point, IntTensor[n_intervals] + interval_starts : how many points in each pooled point, IntTensor[n_intervals] + Return: +*/ +void bev_pool_v2_forward( + const at::Tensor _depth, + const at::Tensor _feat, + at::Tensor _out, + const at::Tensor _ranks_depth, + const at::Tensor _ranks_feat, + const at::Tensor _ranks_bev, + const at::Tensor _interval_lengths, + const at::Tensor _interval_starts +) { + int c = _feat.size(4); + int n_intervals = _interval_lengths.size(0); + const at::cuda::OptionalCUDAGuard device_guard(device_of(_depth)); + const float* depth = _depth.data_ptr(); + const float* feat = _feat.data_ptr(); + const int* ranks_depth = _ranks_depth.data_ptr(); + const int* ranks_feat = _ranks_feat.data_ptr(); + const int* ranks_bev = _ranks_bev.data_ptr(); + + const int* interval_lengths = _interval_lengths.data_ptr(); + const int* interval_starts = _interval_starts.data_ptr(); + + float* out = _out.data_ptr(); + bev_pool_v2( + c, n_intervals, depth, feat, ranks_depth, ranks_feat, + ranks_bev, interval_starts, interval_lengths, out + ); +} + + +/* + Function: pillar pooling (backward, cuda) + Args: + out_grad : grad of output bev feature, FloatTensor[b, c, h_out, w_out] + depth_grad : grad of input depth, FloatTensor[n, d, h, w] + feat_grad : grad of input feature, FloatTensor[n, h, w, c] + depth : input depth, FloatTensor[n, d, h, w] + feat : input features, FloatTensor[n, h, w, c] + ranks_depth : depth index of points, IntTensor[n_points] + ranks_feat : feat index of points, IntTensor[n_points] + ranks_bev : output index of points, IntTensor[n_points] + interval_lengths : starting position for pooled point, IntTensor[n_intervals] + interval_starts : how many points in each pooled point, IntTensor[n_intervals] +*/ +void bev_pool_v2_backward( + const at::Tensor _out_grad, + at::Tensor _depth_grad, + at::Tensor _feat_grad, + const at::Tensor _depth, + const at::Tensor _feat, + const at::Tensor _ranks_depth, + const at::Tensor _ranks_feat, + const at::Tensor _ranks_bev, + const at::Tensor _interval_lengths, + const at::Tensor _interval_starts +) { + int c = _out_grad.size(4); + int n_intervals = _interval_lengths.size(0); + const at::cuda::OptionalCUDAGuard device_guard(device_of(_out_grad)); + const float* out_grad = _out_grad.data_ptr(); + float* depth_grad = _depth_grad.data_ptr(); + float* feat_grad = _feat_grad.data_ptr(); + const float* depth = _depth.data_ptr(); + const float* feat = _feat.data_ptr(); + const int* ranks_depth = _ranks_depth.data_ptr(); + const int* ranks_feat = _ranks_feat.data_ptr(); + const int* ranks_bev = _ranks_bev.data_ptr(); + const int* interval_lengths = _interval_lengths.data_ptr(); + const int* interval_starts = _interval_starts.data_ptr(); + + bev_pool_v2_grad( + c, n_intervals, out_grad, depth, feat, ranks_depth, ranks_feat, + ranks_bev, interval_starts, interval_lengths, depth_grad, feat_grad + ); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("bev_pool_v2_forward", &bev_pool_v2_forward, + "bev_pool_v2_forward"); + m.def("bev_pool_v2_backward", &bev_pool_v2_backward, + "bev_pool_v2_backward"); +} diff --git a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/src/bev_pool_cuda.cu b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/src/bev_pool_cuda.cu new file mode 100644 index 000000000..7fa3179b7 --- /dev/null +++ b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/src/bev_pool_cuda.cu @@ -0,0 +1,140 @@ +// Copyright (c) Phigent Robotics. All rights reserved. +// Reference https://arxiv.org/abs/2211.17111 + +#include +#include + +/* + Function: pillar pooling + Args: + c : number of channels + n_intervals : number of unique points + depth : input depth, FloatTensor[b,n,d,h,w] + feat : input feat, FloatTensor[b,n,h,w,c] + ranks_depth : input index of depth, IntTensor[n] + ranks_feat : input index of feat, IntTensor[n] + ranks_bev : output index, IntTensor[n] + interval_lengths : starting position for pooled point, IntTensor[n_intervals] + interval_starts : how many points in each pooled point, IntTensor[n_intervals] + out : output features, FloatTensor[b, d, h, w, c] +*/ +__global__ void bev_pool_v2_kernel(int c, int n_intervals, + const float *__restrict__ depth, + const float *__restrict__ feat, + const int *__restrict__ ranks_depth, + const int *__restrict__ ranks_feat, + const int *__restrict__ ranks_bev, + const int *__restrict__ interval_starts, + const int *__restrict__ interval_lengths, + float* __restrict__ out) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + int index = idx / c; + int cur_c = idx % c; + if (index >= n_intervals) return; + int interval_start = interval_starts[index]; + int interval_length = interval_lengths[index]; + float psum = 0; + const float* cur_depth; + const float* cur_feat; + for(int i = 0; i < interval_length; i++){ + cur_depth = depth + ranks_depth[interval_start+i]; + cur_feat = feat + ranks_feat[interval_start+i] * c + cur_c; + psum += *cur_feat * *cur_depth; + } + + const int* cur_rank = ranks_bev + interval_start; + float* cur_out = out + *cur_rank * c + cur_c; + *cur_out = psum; +} + + +/* + Function: pillar pooling backward + Args: + c : number of channels + n_intervals : number of unique points + out_grad : gradient of the BEV fmap from top, FloatTensor[b, d, h, w, c] + depth : input depth, FloatTensor[b,n,d,h,w] + feat : input feat, FloatTensor[b,n,h,w,c] + ranks_depth : input index of depth, IntTensor[n] + ranks_feat : input index of feat, IntTensor[n] + ranks_bev : output index, IntTensor[n] + interval_lengths : starting position for pooled point, IntTensor[n_intervals] + interval_starts : how many points in each pooled point, IntTensor[n_intervals] + depth_grad : gradient of the depth fmap, FloatTensor + feat_grad : gradient of the feature fmap, FloatTensor +*/ +__global__ void bev_pool_grad_kernel(int c, int n_intervals, + const float *__restrict__ out_grad, + const float *__restrict__ depth, + const float *__restrict__ feat, + const int *__restrict__ ranks_depth, + const int *__restrict__ ranks_feat, + const int *__restrict__ ranks_bev, + const int *__restrict__ interval_starts, + const int *__restrict__ interval_lengths, + float* __restrict__ depth_grad, + float* __restrict__ feat_grad) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= n_intervals) return; + int interval_start = interval_starts[idx]; + int interval_length = interval_lengths[idx]; + + const int* cur_rank; + const float* cur_out_grad; + const float* cur_out_grad_start; + + const float* cur_feat; + const float* cur_feat_start; + float* cur_depth_grad; + float grad_sum; + for(int i = 0; i < interval_length; i++){ + cur_rank = ranks_bev + interval_start + i; + cur_out_grad_start = out_grad + * cur_rank * c; + cur_feat_start = feat + ranks_feat[interval_start+i] * c; + + grad_sum = 0; + for(int cur_c = 0; cur_c < c; cur_c++){ + cur_out_grad = cur_out_grad_start + cur_c; + cur_feat = cur_feat_start + cur_c; + grad_sum += *cur_out_grad * *cur_feat; + } + + cur_depth_grad = depth_grad + ranks_depth[interval_start+i]; + *cur_depth_grad = grad_sum; + } + + float* cur_feat_grad; + const float* cur_depth; + for(int cur_c = 0; cur_c < c; cur_c++){ + grad_sum = 0; + for(int i = 0; i < interval_length; i++){ + cur_rank = ranks_bev + interval_start + i; + cur_out_grad = out_grad + *cur_rank * c + cur_c; + + cur_depth = depth + ranks_depth[interval_start+i]; + grad_sum += *cur_out_grad * *cur_depth; + } + cur_feat_grad = feat_grad + ranks_feat[interval_start] * c + cur_c ; + * cur_feat_grad = grad_sum; + } +} + + + +void bev_pool_v2(int c, int n_intervals, const float* depth, const float* feat, const int* ranks_depth, + const int* ranks_feat, const int* ranks_bev, const int* interval_starts, const int* interval_lengths, float* out) { + bev_pool_v2_kernel<<<(int)ceil(((double)n_intervals * c / 256)), 256>>>( + c, n_intervals, depth, feat, ranks_depth, ranks_feat, + ranks_bev, interval_starts, interval_lengths, out + ); +} + +void bev_pool_v2_grad(int c, int n_intervals, const float* out_grad, + const float* depth, const float* feat, const int* ranks_depth, const int* ranks_feat, + const int* ranks_bev, const int* interval_starts, const int* interval_lengths, float* depth_grad, float* feat_grad) { + bev_pool_grad_kernel<<<(int)ceil(((double)n_intervals / 256)), 256>>>( + c, n_intervals, out_grad, depth, feat, ranks_depth, ranks_feat, + ranks_bev, interval_starts, interval_lengths, depth_grad, feat_grad + ); +} diff --git a/projects/BEVFusion/setup.py b/projects/BEVFusion/setup.py index 38f588b20..02fe93524 100644 --- a/projects/BEVFusion/setup.py +++ b/projects/BEVFusion/setup.py @@ -54,6 +54,14 @@ def make_cuda_ext(name, module, sources, sources_cuda=[], extra_args=[], extra_i "src/bev_pool_cuda.cu", ], ), + make_cuda_ext( + name="bev_pool_v2_ext", + module="projects.BEVFusion.bevfusion.ops.bev_pool_v2", + sources=[ + "src/bev_pool_v2.cpp", + "src/bev_pool_v2_cuda.cu", + ], + ), make_cuda_ext( name="voxel_layer", module="projects.BEVFusion.bevfusion.ops.voxel", From 1499a91a4c7854793a87c4a943fba22bf3e9234b Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 19 May 2026 20:00:32 +0900 Subject: [PATCH 118/183] Update camera config structure --- projects/BEVFusion/bevfusion/depth_lss.py | 2 +- projects/BEVFusion/bevfusion/depth_lss_v2.py | 215 ++++++++++++++++++- 2 files changed, 215 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/bevfusion/depth_lss.py b/projects/BEVFusion/bevfusion/depth_lss.py index f202c3777..ac7c5b503 100644 --- a/projects/BEVFusion/bevfusion/depth_lss.py +++ b/projects/BEVFusion/bevfusion/depth_lss.py @@ -567,7 +567,7 @@ def __init__( zbound=zbound, dbound=dbound, ) - + self.dtransform = LidarDepthImageNet(in_channels=1, out_channels=64, last_stride=lidar_depth_image_last_stride) self.depthnet = DepthLSSNet( in_channels=in_channels + self.dtransform.out_channels, out_channels=self.D + self.C diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py index 974a39cce..4305b1fe6 100644 --- a/projects/BEVFusion/bevfusion/depth_lss_v2.py +++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py @@ -4,6 +4,219 @@ from mmdet3d.registry import MODELS from torch import nn -from .depth_lss import DepthLSSNet, DownSampleNet, LidarDepthImageNet, BaseViewTransform +from .depth_lss import BaseViewTransform, DepthLSSNet, DownSampleNet, LidarDepthImageNet from .ops import bev_pool_v2 + +class BaseViewTransformV2(BaseViewTransform): + + def __init__( + self, + in_channels: int, + out_channels: int, + image_size: Tuple[int, int], + feature_size: Tuple[int, int], + xbound: Tuple[float, float, float], + ybound: Tuple[float, float, float], + zbound: Tuple[float, float, float], + dbound: Tuple[float, float, float], + ): + super().__init__( + in_channels=in_channels, + out_channels=out_channels, + image_size=image_size, + feature_size=feature_size, + xbound=xbound, + ybound=ybound, + zbound=zbound, + dbound=dbound, + ) + + def get_cam_feats(self, x) -> Tuple[torch.Tensor, torch.Tensor]: + raise NotImplementedError + + def forward( + self, + img, + points, + lidar2image, + camera_intrinsics, + camera2lidar, + img_aug_matrix, + lidar_aug_matrix, + metas, + camera_intrinsics_inverse, + img_aug_matrix_inverse, + lidar_aug_matrix_inverse, + geom_feats_precomputed, + ): + if geom_feats_precomputed is not None: + geom_feats, kept, ranks, indices = geom_feats_precomputed + x, depth_softmax = self.get_cam_feats(img) + x = self.bev_pool_precomputed(x, depth_softmax, geom_feats, kept, ranks, indices) + + else: + intrins = camera_intrinsics[..., :3, :3] + post_rots = img_aug_matrix[..., :3, :3] + post_trans = img_aug_matrix[..., :3, 3] + camera2lidar_rots = camera2lidar[..., :3, :3] + camera2lidar_trans = camera2lidar[..., :3, 3] + + extra_rots = lidar_aug_matrix[..., :3, :3] + extra_trans = lidar_aug_matrix[..., :3, 3] + + geom = self.get_geometry( + camera2lidar_rots, + camera2lidar_trans, + torch.inverse(intrins), + torch.inverse(post_rots), + post_trans, + extra_rots=extra_rots, + extra_trans=extra_trans, + ) + + # depth is not connected to the calibration + # on_img is + # is also flattened_indices + ( + view_feats, + depth_softmax, + ) = self.get_cam_feats(img) + x = self.bev_pool(view_feats, depth_softmax, geom) + + return x + + def bev_pool_aux(self, geom_feats): + B, N, D, H, W, C = geom_feats.shape + Nprime = B * N * D * H * W + assert C == 3 + + # record the index of selected points for acceleration purpose + ranks_depth = torch.range(0, Nprime - 1, dtype=torch.int, device=geom_feats.device) + ranks_feat = torch.range(0, Nprime // D - 1, dtype=torch.int, device=geom_feats.device) + ranks_feat = ranks_feat.reshape(B, N, 1, H, W) + ranks_feat = ranks_feat.expand(B, N, D, H, W).flatten() + + # flatten indices + geom_feats = ((geom_feats - (self.bx - self.dx / 2.0)) / self.dx).long() + geom_feats = geom_feats.view(Nprime, 3) + batch_ix = torch.cat( + [torch.full([Nprime // B, 1], ix, device=geom_feats.device, dtype=torch.long) for ix in range(B)] + ) + geom_feats = torch.cat((geom_feats, batch_ix), 1) + + # filter out points that are outside box + kept = ( + (geom_feats[:, 0] >= 0) + & (geom_feats[:, 0] < self.nx[0]) + & (geom_feats[:, 1] >= 0) + & (geom_feats[:, 1] < self.nx[1]) + & (geom_feats[:, 2] >= 0) + & (geom_feats[:, 2] < self.nx[2]) + ) + + if len(kept) == 0: + return None, None, None, None + + geom_feats, ranks_depth, ranks_feat = geom_feats[kept], ranks_depth[kept], ranks_feat[kept] + + # nx is the total number of voxels/cells in the BEV grid + # nx[0] is x, nx[1] is y, nx[2] is z + ranks_bev = ( + geom_feats[:, 0] * (self.nx[1] * self.nx[2] * B) + + geom_feats[:, 1] * (self.nx[2] * B) + + geom_feats[:, 2] * B + + geom_feats[:, 3] + ) + indices = ranks_bev.argsort() + ranks_bev, ranks_depth, ranks_feat = ranks_bev[indices], ranks_depth[indices], ranks_feat[indices] + + intervals = self.compute_intervals(ranks_bev) + if intervals is None: + return None, None, None, None, None + + interval_starts, interval_lengths = intervals + return ( + ranks_bev.int().contiguous(), + ranks_depth.int().contiguous(), + ranks_feat.int().contiguous(), + interval_starts.int().contiguous(), + interval_lengths.int().contiguous(), + ) + + def compute_intervals(self, ranks_bev: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + kept = torch.ones(ranks_bev.shape[0], device=ranks_bev.device, dtype=torch.bool) + kept[1:] = ranks_bev[1:] != ranks_bev[:-1] + interval_starts = torch.where(kept)[0].int() + if len(interval_starts) == 0: + return None + + interval_lengths = torch.zeros_like(interval_starts) + interval_lengths[:-1] = interval_starts[1:] - interval_starts[:-1] + interval_lengths[-1] = ranks_bev.shape[0] - interval_starts[-1] + return interval_starts.int().contiguous(), interval_lengths.int().contiguous() + + def bev_pool(self, view_feats, depth_softmax, geom) -> torch.Tensor: + """ """ + B, N, D, H, W, _ = geom.shape + num_points = B * N * D * H * W + + # record the index of selected points for acceleration purpose + ranks_depth = torch.range(0, num_points - 1, dtype=torch.int, device=geom.device) + ranks_feat = torch.range(0, num_points // D - 1, dtype=torch.int, device=geom.device) + ranks_feat = ranks_feat.reshape(B, N, 1, H, W) + ranks_feat = ranks_feat.expand(B, N, D, H, W).flatten() + + B, N, C, fH, fW = view_feats.shape + + bev_feat = bev_pool_v2( + depth_softmax, + x, + ranks_depth, + ranks_feat, + ranks_bev, + bev_feat_shape, + interval_starts, + interval_lengths, + is_training, + ) + return bev_feat + + +class LSSTransformV2(BaseViewTransformV2): + + def __init__( + self, + in_channels: int, + out_channels: int, + image_size: Tuple[int, int], + feature_size: Tuple[int, int], + xbound: Tuple[float, float, float], + ybound: Tuple[float, float, float], + zbound: Tuple[float, float, float], + dbound: Tuple[float, float, float], + downsample: int = 1, + ): + super().__init__( + in_channels=in_channels, + out_channels=out_channels, + image_size=image_size, + feature_size=feature_size, + xbound=xbound, + ybound=ybound, + zbound=zbound, + dbound=dbound, + ) + self.depthnet = nn.Conv2d(self.in_channels, self.D + self.C, 1) + self.downsample = DownSampleNet(downsample, self.out_channels, self.out_channels) + + def get_cam_feats(self, x): + B, N, C, fH, fW = x.shape + x = x.view(B * N, C, fH, fW) + x = self.depthnet(x) + + depth_softmax = x[:, : self.D].softmax(dim=1) + depth_softmax = depth_softmax.view(B, N, self.D, fH, fW) + view_feats = x[:, self.D : (self.D + self.C)] + view_feats = view_feats.view(B, N, self.C, fH, fW) + return view_feats, depth_softmax From a5b406540c1495081c62ed8e4ddf338914fbee42 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 19 May 2026 20:14:30 +0900 Subject: [PATCH 119/183] Add local 3d box expand --- .../default/models/default_lidar_second_secfpn_120m.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index e871fce58..4e4e7dde7 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -104,7 +104,7 @@ post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0], # score_threshold=0.03, # CAR, TRUCK, BUS, BICYCLE, PEDESTRIAN, TRAFFIC_CONE, BARRIER - score_threshold=[0.015, 0.010, 0.010, 0.010, 0.015, 0.015, 0.015], + score_threshold=[0.015, 0.010, 0.010, 0.010, 0.020, 0.020, 0.015], out_size_factor=8, code_size=10, ), From 0b547bf528d8ec474856a40e0697137c75bab6b6 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 19 May 2026 20:27:56 +0900 Subject: [PATCH 120/183] Add local 3d box expand --- ...oxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py | 2 +- .../default/models/default_lidar_second_secfpn_120m.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py index c77e0332b..288cb4d1b 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v3/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v4/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index 4e4e7dde7..796fef3e8 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -104,7 +104,7 @@ post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0], # score_threshold=0.03, # CAR, TRUCK, BUS, BICYCLE, PEDESTRIAN, TRAFFIC_CONE, BARRIER - score_threshold=[0.015, 0.010, 0.010, 0.010, 0.020, 0.020, 0.015], + score_threshold=[0.015, 0.015, 0.010, 0.020, 0.030, 0.030, 0.020], out_size_factor=8, code_size=10, ), From 6949e4154493164b950e56b2c85d38abbca7c29c Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 19 May 2026 22:37:21 +0900 Subject: [PATCH 121/183] Add local 3d box expand --- ...l_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py | 2 +- .../default/models/default_lidar_second_secfpn_120m.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py index 288cb4d1b..71a60c0d5 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v4/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v5/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index 796fef3e8..6b5f28e31 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -90,9 +90,10 @@ nms_type="circle", # Set to "circle" for circle_nms # Set NMS for different clusters nms_clusters=[ + # Sqrt(0.25) = 0.5 dict(class_names=["car", "truck", "bus"], class_indices=[0, 1, 2], nms_threshold=0.25, post_max_size=300), # It's radius if using circle_nms - dict(class_names=["bicycle"], class_indices=[3], nms_threshold=0.0, post_max_size=50), - dict(class_names=["pedestrian"], class_indices=[4], nms_threshold=0.0, post_max_size=100), + # Sqrt(0.04) = 0.2 + dict(class_names=["bicycle". "pedestrian"], class_indices=[3, 4], nms_threshold=0.04, post_max_size=200), dict(class_names=["traffic_cone"], class_indices=[5], nms_threshold=0.0, post_max_size=100), dict(class_names=["barrier"], class_indices=[6], nms_threshold=0.0, post_max_size=50), ], From bc6d024bc2b5f35dddc08ad5d582e279c358e19c Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 19 May 2026 22:45:08 +0900 Subject: [PATCH 122/183] Add local 3d box expand --- .../default/models/default_lidar_second_secfpn_120m.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index 6b5f28e31..44744c1dd 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -93,7 +93,7 @@ # Sqrt(0.25) = 0.5 dict(class_names=["car", "truck", "bus"], class_indices=[0, 1, 2], nms_threshold=0.25, post_max_size=300), # It's radius if using circle_nms # Sqrt(0.04) = 0.2 - dict(class_names=["bicycle". "pedestrian"], class_indices=[3, 4], nms_threshold=0.04, post_max_size=200), + dict(class_names=["bicycle", "pedestrian"], class_indices=[3, 4], nms_threshold=0.04, post_max_size=200), dict(class_names=["traffic_cone"], class_indices=[5], nms_threshold=0.0, post_max_size=100), dict(class_names=["barrier"], class_indices=[6], nms_threshold=0.0, post_max_size=50), ], @@ -105,7 +105,7 @@ post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0], # score_threshold=0.03, # CAR, TRUCK, BUS, BICYCLE, PEDESTRIAN, TRAFFIC_CONE, BARRIER - score_threshold=[0.015, 0.015, 0.010, 0.020, 0.030, 0.030, 0.020], + score_threshold=[0.015, 0.015, 0.010, 0.020, 0.030, 0.040, 0.025], out_size_factor=8, code_size=10, ), From 366e7a4975688ecc6f8069684dd3c9a772b53960 Mon Sep 17 00:00:00 2001 From: KSeangTan Date: Wed, 20 May 2026 08:34:03 +0900 Subject: [PATCH 123/183] Updated --- projects/BEVFusion/bevfusion/depth_lss_v2.py | 56 ++++++++++++------- .../bevfusion/ops/bev_pool_v2/bev_pool_v2.py | 7 ++- 2 files changed, 41 insertions(+), 22 deletions(-) diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py index 4305b1fe6..b4276834d 100644 --- a/projects/BEVFusion/bevfusion/depth_lss_v2.py +++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py @@ -3,6 +3,7 @@ import torch from mmdet3d.registry import MODELS from torch import nn +from mmengine.logging import print_log from .depth_lss import BaseViewTransform, DepthLSSNet, DownSampleNet, LidarDepthImageNet from .ops import bev_pool_v2 @@ -20,6 +21,7 @@ def __init__( ybound: Tuple[float, float, float], zbound: Tuple[float, float, float], dbound: Tuple[float, float, float], + collapse_z: bool = True, ): super().__init__( in_channels=in_channels, @@ -31,7 +33,8 @@ def __init__( zbound=zbound, dbound=dbound, ) - + self.collapse_z = collapse_z + def get_cam_feats(self, x) -> Tuple[torch.Tensor, torch.Tensor]: raise NotImplementedError @@ -161,25 +164,40 @@ def bev_pool(self, view_feats, depth_softmax, geom) -> torch.Tensor: B, N, D, H, W, _ = geom.shape num_points = B * N * D * H * W - # record the index of selected points for acceleration purpose - ranks_depth = torch.range(0, num_points - 1, dtype=torch.int, device=geom.device) - ranks_feat = torch.range(0, num_points // D - 1, dtype=torch.int, device=geom.device) - ranks_feat = ranks_feat.reshape(B, N, 1, H, W) - ranks_feat = ranks_feat.expand(B, N, D, H, W).flatten() - - B, N, C, fH, fW = view_feats.shape - + ranks_bev, ranks_depth, ranks_feat, interval_starts, interval_lengths = self.bev_pool_aux(geom) + + if ranks_feat is None: + print_log('warning ---> no points within the predefined bev receptive field') + dummy = torch.zeros(size=[ + view_feats.shape[0], view_feats.shape[2], + int(self.nx[2]), + int(self.nx[1]), + int(self.nx[0]) + ]).to(view_feats) + dummy = torch.cat(dummy.unbind(dim=2), 1) + return dummy + + # permute view_feats from (B, N, C, fH, fW) to (B, N, fH, fW, C) + view_feats = view_feats.permute(0, 1, 3, 4, 2) + bev_feat_shape = (depth_softmax.shape[0], int(self.nx[2]), + int(self.nx[1]), int(self.nx[0]), + view_feats.shape[-1]) # (B, Z, Y, X, C) + bev_feat = bev_pool_v2( - depth_softmax, - x, - ranks_depth, - ranks_feat, - ranks_bev, - bev_feat_shape, - interval_starts, - interval_lengths, - is_training, - ) + depth=depth_softmax, + feat=view_feats, + ranks_depth=ranks_depth, + ranks_feat=ranks_feat, + ranks_bev=ranks_bev, + interval_starts=interval_starts, + interval_lengths=interval_lengths, + bev_feat_shape=bev_feat_shape, + is_training=self.training) + + # collapse Z + if self.collapse_z: + bev_feat = torch.cat(bev_feat.unbind(dim=2), 1) + return bev_feat diff --git a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py index b1d2f03af..a40717503 100644 --- a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py +++ b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py @@ -141,16 +141,17 @@ def backward(ctx, out_grad): def bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev, - bev_feat_shape, interval_starts, interval_lengths, is_training): + interval_starts, interval_lengths, bev_feat_shape, is_training): if is_training: x = QuickCumsumV2TrainingCuda.apply(depth, feat, ranks_depth, ranks_feat, ranks_bev, bev_feat_shape, interval_starts, interval_lengths) else: + # BEV Shape is (B, Z, Y, X, C) + out_height, out_width = bev_feat_shape[2], bev_feat_shape[2] x = QuickCumsumV2Cuda.apply(depth, feat, ranks_depth, ranks_feat, ranks_bev, - bev_feat_shape, interval_starts, - interval_lengths) + interval_starts, interval_lengths) x = x.permute(0, 4, 1, 2, 3).contiguous() return x From a61c859ec61b5f2639ed742fa7850648e72afba0 Mon Sep 17 00:00:00 2001 From: KSeangTan Date: Wed, 20 May 2026 08:35:41 +0900 Subject: [PATCH 124/183] Updated --- projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py index a40717503..5a479924d 100644 --- a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py +++ b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py @@ -151,8 +151,9 @@ def bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev, # BEV Shape is (B, Z, Y, X, C) out_height, out_width = bev_feat_shape[2], bev_feat_shape[2] x = QuickCumsumV2Cuda.apply(depth, feat, ranks_depth, ranks_feat, ranks_bev, - interval_starts, interval_lengths) + out_height, out_width, interval_starts, interval_lengths) + # Final shape: (B, C, Z, Y, X) x = x.permute(0, 4, 1, 2, 3).contiguous() return x From 81e26bbc0fca82c9174f593c2877e48a31436d55 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Wed, 20 May 2026 16:55:27 +0900 Subject: [PATCH 125/183] Add local 3d box expand --- ...cond_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py | 2 +- .../default/models/default_lidar_second_secfpn_120m.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py index 71a60c0d5..245ae0814 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v5/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v9/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index 44744c1dd..979dd31f2 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -90,10 +90,11 @@ nms_type="circle", # Set to "circle" for circle_nms # Set NMS for different clusters nms_clusters=[ - # Sqrt(0.25) = 0.5 + # Sqrt(0.25) = 0.50 dict(class_names=["car", "truck", "bus"], class_indices=[0, 1, 2], nms_threshold=0.25, post_max_size=300), # It's radius if using circle_nms - # Sqrt(0.04) = 0.2 - dict(class_names=["bicycle", "pedestrian"], class_indices=[3, 4], nms_threshold=0.04, post_max_size=200), + # Sqrt(0.001) = 0.0316 + dict(class_names=["bicycle", "pedestrian"], class_indices=[3, 4], nms_threshold=0.001, post_max_size=200), + # dict(class_names=["pedestrian"], class_indices=[4], nms_threshold=0.0, post_max_size=200), dict(class_names=["traffic_cone"], class_indices=[5], nms_threshold=0.0, post_max_size=100), dict(class_names=["barrier"], class_indices=[6], nms_threshold=0.0, post_max_size=50), ], @@ -105,7 +106,7 @@ post_center_range=[-200.0, -200.0, -10.0, 200.0, 200.0, 10.0], # score_threshold=0.03, # CAR, TRUCK, BUS, BICYCLE, PEDESTRIAN, TRAFFIC_CONE, BARRIER - score_threshold=[0.015, 0.015, 0.010, 0.020, 0.030, 0.040, 0.025], + score_threshold=[0.015, 0.010, 0.010, 0.020, 0.030, 0.040, 0.020], out_size_factor=8, code_size=10, ), From 770be09224b0536ab40505abe3aa1a0622bbb1cb Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Wed, 20 May 2026 18:52:18 +0900 Subject: [PATCH 126/183] Add local 3d box expand --- .../default/models/default_lidar_second_secfpn_120m.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index 979dd31f2..bad602cb7 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -92,9 +92,8 @@ nms_clusters=[ # Sqrt(0.25) = 0.50 dict(class_names=["car", "truck", "bus"], class_indices=[0, 1, 2], nms_threshold=0.25, post_max_size=300), # It's radius if using circle_nms - # Sqrt(0.001) = 0.0316 - dict(class_names=["bicycle", "pedestrian"], class_indices=[3, 4], nms_threshold=0.001, post_max_size=200), - # dict(class_names=["pedestrian"], class_indices=[4], nms_threshold=0.0, post_max_size=200), + dict(class_names=["bicycle"], class_indices=[3], nms_threshold=0.0, post_max_size=50), + dict(class_names=["pedestrian"], class_indices=[4], nms_threshold=0.0, post_max_size=100), dict(class_names=["traffic_cone"], class_indices=[5], nms_threshold=0.0, post_max_size=100), dict(class_names=["barrier"], class_indices=[6], nms_threshold=0.0, post_max_size=50), ], From 89ab9c329e29afc3f8ae15f217664c3ac3421aa2 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 21 May 2026 01:22:59 +0900 Subject: [PATCH 127/183] Updated --- autoware_ml/configs/detection3d/dataset/t4dataset/base.py | 2 ++ autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py | 2 ++ .../configs/detection3d/dataset/t4dataset/j6gen2_base.py | 2 ++ .../configs/detection3d/dataset/t4dataset/jpntaxi_base.py | 2 ++ .../configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py | 2 ++ autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py | 2 ++ ...oxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py | 2 +- 7 files changed, 13 insertions(+), 1 deletion(-) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py index 8e49f2396..7f4be6293 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py @@ -149,6 +149,8 @@ "traffic_cone": "traffic_cone", "trafficcone": "traffic_cone", "barrier": "barrier", + "other_vehicle": "car", + "other_pedestrian": "pedestrian", } class_names = ["car", "truck", "bus", "bicycle", "pedestrian", "traffic_cone", "barrier"] diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py index a93bf56af..a87166019 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py @@ -130,6 +130,8 @@ "traffic_cone": "traffic_cone", "trafficcone": "traffic_cone", "barrier": "barrier", + "other_vehicle": "car", + "other_pedestrian": "pedestrian", } class_names = [ diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py index 170086752..ef0141a5b 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py @@ -136,6 +136,8 @@ "traffic_cone": "traffic_cone", "trafficcone": "traffic_cone", "barrier": "barrier", + "other_vehicle": "car", + "other_pedestrian": "pedestrian", } diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py index c08decfa1..0f00a651d 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py @@ -123,6 +123,8 @@ "traffic_cone": "traffic_cone", "trafficcone": "traffic_cone", "barrier": "barrier", + "other_vehicle": "car", + "other_pedestrian": "pedestrian", } class_names = [ diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py index dbd6e2813..9995cd9b7 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py @@ -120,6 +120,8 @@ "traffic_cone": "traffic_cone", "trafficcone": "traffic_cone", "barrier": "barrier", + "other_vehicle": "car", + "other_pedestrian": "pedestrian", } class_names = [ diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py b/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py index 2212b8e56..cd42362b5 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py @@ -122,6 +122,8 @@ "traffic_cone": "traffic_cone", "trafficcone": "traffic_cone", "barrier": "barrier", + "other_vehicle": "car", + "other_pedestrian": "pedestrian", } class_names = [ diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py index 245ae0814..3bdda213e 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base_nms_v9/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name From e6f64520a3fe5a28d60ac09213c6e4361ce6780b Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 21 May 2026 17:51:44 +0900 Subject: [PATCH 128/183] Update camera config structure --- projects/BEVFusion/bevfusion/__init__.py | 12 +- projects/BEVFusion/bevfusion/depth_lss_v2.py | 59 ++++---- .../bevfusion/ops/bev_pool_v2/bev_pool_v2.py | 127 +++++++++--------- .../resnet50/camera_resnet50_fpn_lss_50m.py | 2 +- 4 files changed, 111 insertions(+), 89 deletions(-) diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py index 6a1a32ecc..385ccd89e 100644 --- a/projects/BEVFusion/bevfusion/__init__.py +++ b/projects/BEVFusion/bevfusion/__init__.py @@ -1,13 +1,20 @@ from .bevfusion import BEVFusion from .bevfusion_head import BEVFusionHead, ConvFuser from .bevfusion_necks import GeneralizedLSSFPN +from .bevfusion_voxel_encoder import BEVFusionVoxelFeatureNet, HardSimpleVoxelSinCosEncoder from .depth_lss import DepthLSSTransform, LSSTransform +from .depth_lss_v2 import LSSTransformV2 from .loading import BEVLoadMultiViewImageFromFiles from .sparse_encoder import BEVFusionSparseEncoder from .transformer import TransformerDecoderLayer -from .transforms_3d import BEVFusionGlobalRotScaleTrans, BEVFusionRandomFlip3D, GridMask, ImageAug3D, BEVFusionRemoveLiDARPoints +from .transforms_3d import ( + BEVFusionGlobalRotScaleTrans, + BEVFusionRandomFlip3D, + BEVFusionRemoveLiDARPoints, + GridMask, + ImageAug3D, +) from .utils import BBoxBEVL1Cost, HeuristicAssigner3D, HungarianAssigner3D, IoU3DCost, TransFusionBBoxCoder -from .bevfusion_voxel_encoder import HardSimpleVoxelSinCosEncoder, BEVFusionVoxelFeatureNet __all__ = [ "BEVFusion", @@ -31,4 +38,5 @@ "TransFusionBBoxCoder", "HardSimpleVoxelSinCosEncoder", "BEVFusionVoxelFeatureNet", + "LSSTransformV2", ] diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py index b4276834d..e3213eb6e 100644 --- a/projects/BEVFusion/bevfusion/depth_lss_v2.py +++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py @@ -2,8 +2,8 @@ import torch from mmdet3d.registry import MODELS -from torch import nn from mmengine.logging import print_log +from torch import nn from .depth_lss import BaseViewTransform, DepthLSSNet, DownSampleNet, LidarDepthImageNet from .ops import bev_pool_v2 @@ -22,7 +22,13 @@ def __init__( zbound: Tuple[float, float, float], dbound: Tuple[float, float, float], collapse_z: bool = True, + expand_batch_axis: bool = False, ): + """ + Args: + collapse_z: collapse the Z axis of the BEV grid + expand_batch_axis: expand the batch axis of the inputs to bev pool if this is set to True. + """ super().__init__( in_channels=in_channels, out_channels=out_channels, @@ -34,7 +40,8 @@ def __init__( dbound=dbound, ) self.collapse_z = collapse_z - + self.expand_batch_axis = expand_batch_axis + def get_cam_feats(self, x) -> Tuple[torch.Tensor, torch.Tensor]: raise NotImplementedError @@ -161,39 +168,41 @@ def compute_intervals(self, ranks_bev: torch.Tensor) -> Tuple[torch.Tensor, torc def bev_pool(self, view_feats, depth_softmax, geom) -> torch.Tensor: """ """ - B, N, D, H, W, _ = geom.shape - num_points = B * N * D * H * W - ranks_bev, ranks_depth, ranks_feat, interval_starts, interval_lengths = self.bev_pool_aux(geom) + if self.expand_batch_axis: + view_feats = view_feats.unsqueeze(0) + depth_softmax = depth_softmax.unsqueeze(0) if ranks_feat is None: - print_log('warning ---> no points within the predefined bev receptive field') - dummy = torch.zeros(size=[ - view_feats.shape[0], view_feats.shape[2], - int(self.nx[2]), - int(self.nx[1]), - int(self.nx[0]) - ]).to(view_feats) + print_log("warning ---> no points within the predefined bev receptive field") + dummy = torch.zeros( + size=[view_feats.shape[0], view_feats.shape[2], int(self.nx[2]), int(self.nx[1]), int(self.nx[0])] + ).to(view_feats) dummy = torch.cat(dummy.unbind(dim=2), 1) return dummy - + # permute view_feats from (B, N, C, fH, fW) to (B, N, fH, fW, C) view_feats = view_feats.permute(0, 1, 3, 4, 2) - bev_feat_shape = (depth_softmax.shape[0], int(self.nx[2]), - int(self.nx[1]), int(self.nx[0]), - view_feats.shape[-1]) # (B, Z, Y, X, C) - + bev_feat_shape = ( + depth_softmax.shape[0], + int(self.nx[2]), + int(self.nx[1]), + int(self.nx[0]), + view_feats.shape[-1], + ) # (B, Z, Y, X, C) + bev_feat = bev_pool_v2( - depth=depth_softmax, - feat=view_feats, - ranks_depth=ranks_depth, - ranks_feat=ranks_feat, + depth=depth_softmax, + feat=view_feats, + ranks_depth=ranks_depth, + ranks_feat=ranks_feat, ranks_bev=ranks_bev, - interval_starts=interval_starts, - interval_lengths=interval_lengths, + interval_starts=interval_starts, + interval_lengths=interval_lengths, bev_feat_shape=bev_feat_shape, - is_training=self.training) - + is_training=self.training, + ) + # collapse Z if self.collapse_z: bev_feat = torch.cat(bev_feat.unbind(dim=2), 1) diff --git a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py index 5a479924d..d9408d078 100644 --- a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py +++ b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py @@ -11,9 +11,11 @@ class QuickCumsumV2TrainingCuda(torch.autograd.Function): Please refer to the `paper `_ """ + @staticmethod - def forward(ctx, depth, feat, ranks_depth, ranks_feat, ranks_bev, - bev_feat_shape, interval_starts, interval_lengths): + def forward( + ctx, depth, feat, ranks_depth, ranks_feat, ranks_bev, bev_feat_shape, interval_starts, interval_lengths + ): ranks_bev = ranks_bev.int() depth = depth.contiguous().float() feat = feat.contiguous().float() @@ -43,15 +45,12 @@ def backward(ctx, out_grad): ranks_bev, depth, feat, ranks_feat, ranks_depth = ctx.saved_tensors order = ranks_feat.argsort() - ranks_feat, ranks_depth, ranks_bev = \ - ranks_feat[order], ranks_depth[order], ranks_bev[order] - kept = torch.ones( - ranks_bev.shape[0], device=ranks_bev.device, dtype=torch.bool) + ranks_feat, ranks_depth, ranks_bev = ranks_feat[order], ranks_depth[order], ranks_bev[order] + kept = torch.ones(ranks_bev.shape[0], device=ranks_bev.device, dtype=torch.bool) kept[1:] = ranks_feat[1:] != ranks_feat[:-1] interval_starts_bp = torch.where(kept)[0].int() interval_lengths_bp = torch.zeros_like(interval_starts_bp) - interval_lengths_bp[:-1] = interval_starts_bp[ - 1:] - interval_starts_bp[:-1] + interval_lengths_bp[:-1] = interval_starts_bp[1:] - interval_starts_bp[:-1] interval_lengths_bp[-1] = ranks_bev.shape[0] - interval_starts_bp[-1] depth = depth.contiguous() @@ -77,26 +76,27 @@ def backward(ctx, out_grad): interval_lengths_bp, interval_starts_bp, ) - return depth_grad, feat_grad, None, None, None, None, None, \ - None, None, None + return depth_grad, feat_grad, None, None, None, None, None, None, None, None class QuickCumsumV2Cuda(torch.autograd.Function): @staticmethod - def symbolic(g, - depth, - feat, - ranks_depth, - ranks_feat, - ranks_bev, - interval_starts, - interval_lengths, - out_height=128, - out_width=128): + def symbolic( + g, + depth, + feat, + ranks_depth, + ranks_feat, + ranks_bev, + interval_starts, + interval_lengths, + out_height=128, + out_width=128, + ): """symbolic function for creating onnx op.""" x = g.op( - 'autoware::QuickCumsumV2Cuda', + "autoware::QuickCumsumV2Cuda", depth, feat, ranks_depth, @@ -105,54 +105,61 @@ def symbolic(g, interval_starts, interval_lengths, out_height_i=out_height, - out_width_i=out_width) - + out_width_i=out_width, + ) + # features_shape = _get_tensor_sizes(feat) # if features_shape is not None and hasattr(x.type(), "with_sizes"): # output_type = x.type().with_sizes([B, D, H, W, _get_tensor_dim_size(x, -1)]) # output.setType(output_type) @staticmethod - def forward(ctx, - depth, # N,D,H,W - feat, # N,H,W,C - ranks_depth, - ranks_feat, - ranks_bev, - interval_starts, - interval_lengths, - out_height=128, - out_width=128): + def forward( + ctx, + depth, # B,N,D,H,W + feat, # B,N,H,W,C + ranks_depth, + ranks_feat, + ranks_bev, + interval_starts, + interval_lengths, + out_height=128, + out_width=128, + ): """run forward.""" - feat = feat.unsqueeze(0) - depth = depth.unsqueeze(0) - bev_feat_shape = (depth.shape[0], 1, out_height, out_width, - feat.shape[-1]) # (B, Z, Y, X, C) - bev_feat = bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev, - bev_feat_shape, interval_starts, - interval_lengths) - bev_feat = bev_feat.squeeze(2) - bev_feat = bev_feat.permute(0, 2, 3, 1) + out = feat.new_zeros(depth.shape[0], 1, out_height, out_width, feat.shape[-1]) + bev_feat = bev_pool_v2_ext.bev_pool_v2_forward( + depth, + feat, + out, + ranks_depth, + ranks_feat, + ranks_bev, + interval_lengths, + interval_starts, + ) return bev_feat - + @staticmethod def backward(ctx, out_grad): raise NotImplementedError -def bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev, - interval_starts, interval_lengths, bev_feat_shape, is_training): - +def bev_pool_v2( + depth, feat, ranks_depth, ranks_feat, ranks_bev, interval_starts, interval_lengths, bev_feat_shape, is_training +): + if is_training: - x = QuickCumsumV2TrainingCuda.apply(depth, feat, ranks_depth, ranks_feat, ranks_bev, - bev_feat_shape, interval_starts, - interval_lengths) + x = QuickCumsumV2TrainingCuda.apply( + depth, feat, ranks_depth, ranks_feat, ranks_bev, bev_feat_shape, interval_starts, interval_lengths + ) else: # BEV Shape is (B, Z, Y, X, C) out_height, out_width = bev_feat_shape[2], bev_feat_shape[2] - x = QuickCumsumV2Cuda.apply(depth, feat, ranks_depth, ranks_feat, ranks_bev, - out_height, out_width, interval_starts, interval_lengths) - + x = QuickCumsumV2Cuda.apply( + depth, feat, ranks_depth, ranks_feat, ranks_bev, out_height, out_width, interval_starts, interval_lengths + ) + # Final shape: (B, C, Z, Y, X) x = x.permute(0, 4, 1, 2, 3).contiguous() return x @@ -162,15 +169,12 @@ def test_bev_pool_v2(): depth = np.array([0.3, 0.4, 0.2, 0.1, 0.7, 0.6, 0.8, 0.9]) depth = torch.from_numpy(depth).float().cuda() depth = depth.view(1, 1, 2, 2, 2).requires_grad_() - feat = torch.ones( - size=[1, 1, 2, 2, 2], dtype=torch.float, - device='cuda').requires_grad_() + feat = torch.ones(size=[1, 1, 2, 2, 2], dtype=torch.float, device="cuda").requires_grad_() ranks_depth = torch.from_numpy(np.array([0, 4, 1, 6])).int().cuda() ranks_feat = torch.from_numpy(np.array([0, 0, 1, 2])).int().cuda() ranks_bev = torch.from_numpy(np.array([0, 0, 1, 1])).int().cuda() - kept = torch.ones( - ranks_bev.shape[0], device=ranks_bev.device, dtype=torch.bool) + kept = torch.ones(ranks_bev.shape[0], device=ranks_bev.device, dtype=torch.bool) kept[1:] = ranks_bev[1:] != ranks_bev[:-1] interval_starts = torch.where(kept)[0].int() if len(interval_starts) == 0: @@ -178,15 +182,16 @@ def test_bev_pool_v2(): interval_lengths = torch.zeros_like(interval_starts) interval_lengths[:-1] = interval_starts[1:] - interval_starts[:-1] interval_lengths[-1] = ranks_bev.shape[0] - interval_starts[-1] - bev_feat = bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev, - (1, 1, 2, 2, 2), interval_starts, interval_lengths) + bev_feat = bev_pool_v2( + depth, feat, ranks_depth, ranks_feat, ranks_bev, (1, 1, 2, 2, 2), interval_starts, interval_lengths + ) loss = torch.sum(bev_feat) loss.backward() assert loss == 4.4 - grad_depth = np.array([2., 2., 0., 0., 2., 0., 2., 0.]) + grad_depth = np.array([2.0, 2.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0]) grad_depth = torch.from_numpy(grad_depth).float() grad_depth = grad_depth.cuda().view(1, 1, 2, 2, 2) assert depth.grad.allclose(grad_depth) - grad_feat = np.array([1.0, 1.0, 0.4, 0.4, 0.8, 0.8, 0., 0.]) + grad_feat = np.array([1.0, 1.0, 0.4, 0.4, 0.8, 0.8, 0.0, 0.0]) grad_feat = torch.from_numpy(grad_feat).float().cuda().view(1, 1, 2, 2, 2) assert feat.grad.allclose(grad_feat) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py index ca3e8f8a2..7a6420d51 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py @@ -5,7 +5,7 @@ # Image network model = dict( view_transform=dict( - type="LSSTransform", + type="LSSTransformV2", xbound=[-54.0, 54.0, 0.3], ybound=[-54.0, 54.0, 0.3], zbound=[-10.0, 10.0, 20.0], From 7ecba750481a60ea4626c45fdd357c517403cd28 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 21 May 2026 18:41:09 +0900 Subject: [PATCH 129/183] Update camera config structure --- projects/BEVFusion/bevfusion/depth_lss_v2.py | 49 +++++++++++++------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py index e3213eb6e..d00958b3c 100644 --- a/projects/BEVFusion/bevfusion/depth_lss_v2.py +++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py @@ -1,4 +1,4 @@ -from typing import Tuple +from typing import Optional, Tuple import torch from mmdet3d.registry import MODELS @@ -61,9 +61,9 @@ def forward( geom_feats_precomputed, ): if geom_feats_precomputed is not None: - geom_feats, kept, ranks, indices = geom_feats_precomputed + ranks_bev, ranks_depth, ranks_feat = geom_feats_precomputed x, depth_softmax = self.get_cam_feats(img) - x = self.bev_pool_precomputed(x, depth_softmax, geom_feats, kept, ranks, indices) + x = self.bev_pool_precomputed(x, depth_softmax, ranks_bev, ranks_depth, ranks_feat) else: intrins = camera_intrinsics[..., :3, :3] @@ -126,7 +126,7 @@ def bev_pool_aux(self, geom_feats): ) if len(kept) == 0: - return None, None, None, None + return None, None, None geom_feats, ranks_depth, ranks_feat = geom_feats[kept], ranks_depth[kept], ranks_feat[kept] @@ -141,25 +141,21 @@ def bev_pool_aux(self, geom_feats): indices = ranks_bev.argsort() ranks_bev, ranks_depth, ranks_feat = ranks_bev[indices], ranks_depth[indices], ranks_feat[indices] - intervals = self.compute_intervals(ranks_bev) - if intervals is None: - return None, None, None, None, None - - interval_starts, interval_lengths = intervals return ( ranks_bev.int().contiguous(), ranks_depth.int().contiguous(), ranks_feat.int().contiguous(), - interval_starts.int().contiguous(), - interval_lengths.int().contiguous(), ) - def compute_intervals(self, ranks_bev: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + def compute_intervals(self, ranks_bev: Optional[torch.Tensor]) -> Tuple[torch.Tensor, torch.Tensor]: + if ranks_bev is None: + return None, None + kept = torch.ones(ranks_bev.shape[0], device=ranks_bev.device, dtype=torch.bool) kept[1:] = ranks_bev[1:] != ranks_bev[:-1] interval_starts = torch.where(kept)[0].int() if len(interval_starts) == 0: - return None + return None, None interval_lengths = torch.zeros_like(interval_starts) interval_lengths[:-1] = interval_starts[1:] - interval_starts[:-1] @@ -168,12 +164,18 @@ def compute_intervals(self, ranks_bev: torch.Tensor) -> Tuple[torch.Tensor, torc def bev_pool(self, view_feats, depth_softmax, geom) -> torch.Tensor: """ """ - ranks_bev, ranks_depth, ranks_feat, interval_starts, interval_lengths = self.bev_pool_aux(geom) - if self.expand_batch_axis: - view_feats = view_feats.unsqueeze(0) - depth_softmax = depth_softmax.unsqueeze(0) + ranks_bev, ranks_depth, ranks_feat = self.bev_pool_aux(geom) + interval_starts, interval_lengths = self.compute_intervals(ranks_bev) + bev_feat = self.compute_bev_pool( + view_feats, depth_softmax, ranks_bev, ranks_depth, ranks_feat, interval_starts, interval_lengths + ) + return bev_feat - if ranks_feat is None: + def compute_bev_pool( + self, view_feats, depth_softmax, ranks_bev, ranks_depth, ranks_feat, interval_starts, interval_lengths + ): + """Compute the BEV pool for the given view features, depth softmax, ranks, and intervals.""" + if interval_starts is None: print_log("warning ---> no points within the predefined bev receptive field") dummy = torch.zeros( size=[view_feats.shape[0], view_feats.shape[2], int(self.nx[2]), int(self.nx[1]), int(self.nx[0])] @@ -181,6 +183,10 @@ def bev_pool(self, view_feats, depth_softmax, geom) -> torch.Tensor: dummy = torch.cat(dummy.unbind(dim=2), 1) return dummy + if self.expand_batch_axis: + view_feats = view_feats.unsqueeze(0) + depth_softmax = depth_softmax.unsqueeze(0) + # permute view_feats from (B, N, C, fH, fW) to (B, N, fH, fW, C) view_feats = view_feats.permute(0, 1, 3, 4, 2) bev_feat_shape = ( @@ -209,6 +215,13 @@ def bev_pool(self, view_feats, depth_softmax, geom) -> torch.Tensor: return bev_feat + def bev_pool_precomputed(self, view_feats, depth_softmax, ranks_bev, ranks_depth, ranks_feat): + interval_starts, interval_lengths = self.compute_intervals(ranks_bev) + bev_feat = self.compute_bev_pool( + view_feats, depth_softmax, ranks_bev, ranks_depth, ranks_feat, interval_starts, interval_lengths + ) + return bev_feat + class LSSTransformV2(BaseViewTransformV2): From aa0fdda9043f060173eddc8d3ffce22c61a42f1f Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 21 May 2026 18:47:13 +0900 Subject: [PATCH 130/183] Update camera config structure --- projects/BEVFusion/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/setup.py b/projects/BEVFusion/setup.py index 02fe93524..52d397c12 100644 --- a/projects/BEVFusion/setup.py +++ b/projects/BEVFusion/setup.py @@ -58,8 +58,8 @@ def make_cuda_ext(name, module, sources, sources_cuda=[], extra_args=[], extra_i name="bev_pool_v2_ext", module="projects.BEVFusion.bevfusion.ops.bev_pool_v2", sources=[ - "src/bev_pool_v2.cpp", - "src/bev_pool_v2_cuda.cu", + "src/bev_pool.cpp", + "src/bev_pool_cuda.cu", ], ), make_cuda_ext( From 178c0e0a0724247e0aa84f6e488be43c9226aadc Mon Sep 17 00:00:00 2001 From: KSeabgTan Date: Fri, 22 May 2026 00:49:30 +0900 Subject: [PATCH 131/183] Added --- projects/BEVFusion/bevfusion/depth_lss_v2.py | 3 ++- .../default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py | 4 ++-- .../models/resnet50/camera_resnet50_fpn_depthlss_120m.py | 3 +-- .../default/models/resnet50/camera_resnet50_fpn_lss_50m.py | 2 +- .../schedulers/default_30e_8xb16_adamw_linear_cosine.py | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py index d00958b3c..c6383e329 100644 --- a/projects/BEVFusion/bevfusion/depth_lss_v2.py +++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py @@ -223,6 +223,7 @@ def bev_pool_precomputed(self, view_feats, depth_softmax, ranks_bev, ranks_depth return bev_feat +@MODELS.register_module() class LSSTransformV2(BaseViewTransformV2): def __init__( @@ -248,7 +249,7 @@ def __init__( dbound=dbound, ) self.depthnet = nn.Conv2d(self.in_channels, self.D + self.C, 1) - self.downsample = DownSampleNet(downsample, self.out_channels, self.out_channels) + self.downsample = DownSampleNet(downsample, out_channels, out_channels) def get_cam_feats(self, x): B, N, C, fH, fW = x.shape diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py index ceedda1c9..81859eed7 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py @@ -11,8 +11,8 @@ custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] # user setting -data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_8_0/" +data_root = "data/t4datasets/" +info_directory_path = "info/kokseang_2_8/" experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type experiment_name = "bevfusion_camera_30e_8xb16_j6gen2_base_50m" diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py index a6ccca5dc..90aa87210 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py @@ -19,8 +19,7 @@ rgb_to_bgr=False, ), img_backbone=dict( - pretrained="torchvision://resnet50", - type="ResNet", + type="mmdet.ResNet", depth=50, num_stages=4, out_indices=(2, 3), diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py index 7a6420d51..95ab35f85 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py @@ -1,5 +1,5 @@ _base_ = [ - "./default_camera_resnet50_fpn_depthlss_120m.py", + "./camera_resnet50_fpn_depthlss_120m.py", ] # Image network diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py index 261246886..b56cef0d2 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py @@ -4,7 +4,7 @@ max_epochs = 30 val_interval = 1 -train_gpu_size = 8 +train_gpu_size = 2 test_batch_size = 2 train_batch_size = 8 From fb6935dea96af8eab3c06ba9eaa22eb77a10eaf1 Mon Sep 17 00:00:00 2001 From: KSeabgTan Date: Fri, 22 May 2026 00:55:38 +0900 Subject: [PATCH 132/183] Added --- projects/BEVFusion/bevfusion/ops/__init__.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/projects/BEVFusion/bevfusion/ops/__init__.py b/projects/BEVFusion/bevfusion/ops/__init__.py index e08abbc6d..f74f0edbb 100644 --- a/projects/BEVFusion/bevfusion/ops/__init__.py +++ b/projects/BEVFusion/bevfusion/ops/__init__.py @@ -1,4 +1,12 @@ from .bev_pool import bev_pool +from .bev_pool_v2 import bev_pool_v2 from .voxel import DynamicScatter, Voxelization, dynamic_scatter, voxelization -__all__ = ["bev_pool", "Voxelization", "voxelization", "dynamic_scatter", "DynamicScatter"] +__all__ = [ + "bev_pool", + "bev_pool_v2", + "Voxelization", + "voxelization", + "dynamic_scatter", + "DynamicScatter", +] From 4a9557864a6aeb47356142b87dbc322f74669887 Mon Sep 17 00:00:00 2001 From: KSeabgTan Date: Fri, 22 May 2026 01:18:59 +0900 Subject: [PATCH 133/183] Added --- projects/BEVFusion/bevfusion/depth_lss_v2.py | 6 +++++- ...mera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m.py | 1 - ...sion_camera_resnet50_fpn_depthlss_50e_8xb16_base_120m.py | 1 - ...ion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py | 1 - .../bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m.py | 1 - .../default/models/resnet50/camera_resnet50_fpn_lss_50m.py | 3 +++ 6 files changed, 8 insertions(+), 5 deletions(-) diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py index c6383e329..df0740331 100644 --- a/projects/BEVFusion/bevfusion/depth_lss_v2.py +++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py @@ -212,7 +212,6 @@ def compute_bev_pool( # collapse Z if self.collapse_z: bev_feat = torch.cat(bev_feat.unbind(dim=2), 1) - return bev_feat def bev_pool_precomputed(self, view_feats, depth_softmax, ranks_bev, ranks_depth, ranks_feat): @@ -261,3 +260,8 @@ def get_cam_feats(self, x): view_feats = x[:, self.D : (self.D + self.C)] view_feats = view_feats.view(B, N, self.C, fH, fW) return view_feats, depth_softmax + + def forward(self, *args, **kwargs): + x = super().forward(*args, **kwargs) + x = self.downsample(x) + return x \ No newline at end of file diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m.py index e73416744..9473ceb0f 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m.py @@ -13,7 +13,6 @@ view_transform=dict(image_size=_base_.image_size), bbox_head=dict( class_names=_base_.class_names, - in_channels=80, train_cfg=dict( point_cloud_range=_base_.point_cloud_range, grid_size=_base_.grid_size, diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_50e_8xb16_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_50e_8xb16_base_120m.py index ebdfff437..47c91cfb3 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_50e_8xb16_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_50e_8xb16_base_120m.py @@ -13,7 +13,6 @@ view_transform=dict(image_size=_base_.image_size), bbox_head=dict( class_names=_base_.class_names, - in_channels=80, train_cfg=dict( point_cloud_range=_base_.point_cloud_range, grid_size=_base_.grid_size, diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py index e23efb65a..d7bd79913 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py @@ -13,7 +13,6 @@ view_transform=dict(image_size=_base_.image_size), bbox_head=dict( class_names=_base_.class_names, - in_channels=80, train_cfg=dict( point_cloud_range=_base_.point_cloud_range, grid_size=_base_.grid_size, diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m.py index 7bf63010b..5215dc9f3 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m.py @@ -13,7 +13,6 @@ view_transform=dict(image_size=_base_.image_size), bbox_head=dict( class_names=_base_.class_names, - in_channels=80, train_cfg=dict( point_cloud_range=_base_.point_cloud_range, grid_size=_base_.grid_size, diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py index 95ab35f85..5381ea708 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py @@ -1,6 +1,7 @@ _base_ = [ "./camera_resnet50_fpn_depthlss_120m.py", ] +num_proposals = 200 # Image network model = dict( @@ -13,6 +14,8 @@ downsample=2, ), bbox_head=dict( + in_channels=80, + num_proposals=num_proposals, bbox_coder=dict( post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], ), From 237e0aae0a4d7b692f710e4f10014f35dd56f01b Mon Sep 17 00:00:00 2001 From: KSeabgTan Date: Fri, 22 May 2026 11:32:04 +0900 Subject: [PATCH 134/183] Added --- projects/BEVFusion/bevfusion/depth_lss.py | 37 +++++++++++++++++++ projects/BEVFusion/bevfusion/depth_lss_v2.py | 23 ++++++++---- .../bevfusion/ops/bev_pool_v2/bev_pool_v2.py | 12 +++++- ...net50_fpn_lss_30e_8xb16_j6gen2_base_50m.py | 2 +- .../camera_resnet50_fpn_depthlss_120m.py | 3 ++ .../resnet50/camera_resnet50_fpn_lss_50m.py | 3 +- .../default_30e_8xb16_adamw_linear_cosine.py | 2 +- 7 files changed, 69 insertions(+), 13 deletions(-) diff --git a/projects/BEVFusion/bevfusion/depth_lss.py b/projects/BEVFusion/bevfusion/depth_lss.py index ac7c5b503..f363a8e76 100644 --- a/projects/BEVFusion/bevfusion/depth_lss.py +++ b/projects/BEVFusion/bevfusion/depth_lss.py @@ -164,6 +164,7 @@ def __init__( ybound: Tuple[float, float, float], zbound: Tuple[float, float, float], dbound: Tuple[float, float, float], + visualize_bev_feat: bool = False, ) -> None: super().__init__() self.in_channels = in_channels @@ -183,6 +184,7 @@ def __init__( self.frustum = self.create_frustum() self.D = self.frustum.shape[0] self.fp16_enabled = False + self.visualize_bev_feat = visualize_bev_feat def create_frustum(self): iH, iW = self.image_size @@ -319,8 +321,43 @@ def bev_pool_precomputed(self, x, geom_feats, kept, ranks, indices): # collapse Z final = torch.cat(x.unbind(dim=2), 1) + if self.visualize_bev_feat: + self.visualize_bev_feat(final) + return final + def visualize_bev_feat(self, bev_feat): + """Visualize the BEV feat for the given batch index.""" + batch_idx = 0 + # save first 10 raw channel maps for one batch sample (B, C, Y, X) + num_channels = 10 + feat = bev_feat[batch_idx].detach().float().cpu().numpy() + channel_indices = np.arange(min(num_channels, feat.shape[0])) + ncols = min(5, len(channel_indices)) + nrows = math.ceil(len(channel_indices) / ncols) + fig, axes = plt.subplots(nrows, ncols, figsize=(3 * ncols, 3 * nrows), squeeze=False) + for ax, ch_idx in zip(axes.ravel(), channel_indices): + ch_map = feat[ch_idx] + im = ax.imshow(ch_map, cmap="viridis", origin="lower", aspect="equal") + ax.set_title(f"ch {ch_idx}", fontsize=9) + ax.set_xlabel("X") + ax.set_ylabel("Y") + fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04) + for ax in axes.ravel()[len(channel_indices) :]: + ax.axis("off") + fig.suptitle(f"bev_feat channels 0-{len(channel_indices) - 1} (batch={batch_idx})") + fig.tight_layout() + + save_dir = Path("work_dirs/bev_feat_vis") + save_dir.mkdir(parents=True, exist_ok=True) + if not hasattr(self, "_bev_feat_vis_count"): + self._bev_feat_vis_count = 0 + self._bev_feat_vis_count += 1 + save_path = save_dir / f"bev_feat_batch{batch_idx}_{self._bev_feat_vis_count:06d}.png" + fig.savefig(save_path, dpi=150, bbox_inches="tight") + plt.close(fig) + print_log(f"Saved BEV feat visualization to {save_path.resolve()}") + def forward( self, img, diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py index df0740331..e50992b46 100644 --- a/projects/BEVFusion/bevfusion/depth_lss_v2.py +++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py @@ -1,5 +1,9 @@ +import math +from pathlib import Path from typing import Optional, Tuple +import matplotlib.pyplot as plt +import numpy as np import torch from mmdet3d.registry import MODELS from mmengine.logging import print_log @@ -23,6 +27,7 @@ def __init__( dbound: Tuple[float, float, float], collapse_z: bool = True, expand_batch_axis: bool = False, + visualize_bev_feat: bool = False, ): """ Args: @@ -38,6 +43,7 @@ def __init__( ybound=ybound, zbound=zbound, dbound=dbound, + visualize_bev_feat=visualize_bev_feat, ) self.collapse_z = collapse_z self.expand_batch_axis = expand_batch_axis @@ -130,13 +136,12 @@ def bev_pool_aux(self, geom_feats): geom_feats, ranks_depth, ranks_feat = geom_feats[kept], ranks_depth[kept], ranks_feat[kept] - # nx is the total number of voxels/cells in the BEV grid - # nx[0] is x, nx[1] is y, nx[2] is z + # nx[0]=x, nx[1]=y, nx[2]=z; flat index for out shape (B, Z, Y, X, C) ranks_bev = ( - geom_feats[:, 0] * (self.nx[1] * self.nx[2] * B) - + geom_feats[:, 1] * (self.nx[2] * B) - + geom_feats[:, 2] * B - + geom_feats[:, 3] + geom_feats[:, 3] * (self.nx[2] * self.nx[1] * self.nx[0]) + + geom_feats[:, 2] * (self.nx[1] * self.nx[0]) + + geom_feats[:, 1] * (self.nx[0]) + + geom_feats[:, 0] ) indices = ranks_bev.argsort() ranks_bev, ranks_depth, ranks_feat = ranks_bev[indices], ranks_depth[indices], ranks_feat[indices] @@ -212,8 +217,12 @@ def compute_bev_pool( # collapse Z if self.collapse_z: bev_feat = torch.cat(bev_feat.unbind(dim=2), 1) - return bev_feat + if self.visualize_bev_feat: + self.visualize_bev_feat(bev_feat) + + return bev_feat + def bev_pool_precomputed(self, view_feats, depth_softmax, ranks_bev, ranks_depth, ranks_feat): interval_starts, interval_lengths = self.compute_intervals(ranks_bev) bev_feat = self.compute_bev_pool( diff --git a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py index d9408d078..57b18a69e 100644 --- a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py +++ b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py @@ -155,9 +155,17 @@ def bev_pool_v2( ) else: # BEV Shape is (B, Z, Y, X, C) - out_height, out_width = bev_feat_shape[2], bev_feat_shape[2] + out_height, out_width = bev_feat_shape[2], bev_feat_shape[3] x = QuickCumsumV2Cuda.apply( - depth, feat, ranks_depth, ranks_feat, ranks_bev, out_height, out_width, interval_starts, interval_lengths + depth, + feat, + ranks_depth, + ranks_feat, + ranks_bev, + interval_starts, + interval_lengths, + out_height, + out_width, ) # Final shape: (B, C, Z, Y, X) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py index d7bd79913..751b24438 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py @@ -4,7 +4,7 @@ ] experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type -experiment_name = "bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m" +experiment_name = "bevfusion_camera_resnet50_fpn_view_lss_30e_8xb16_j6gen2_base_50m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py index 90aa87210..4a1f33040 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py @@ -54,4 +54,7 @@ dbound=[1.0, 130, 1.0], downsample=2, ), + bbox_head=dict( + in_channels=80, + ) ) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py index 5381ea708..2f556a122 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py @@ -6,7 +6,7 @@ # Image network model = dict( view_transform=dict( - type="LSSTransformV2", + type="LSSTransform", xbound=[-54.0, 54.0, 0.3], ybound=[-54.0, 54.0, 0.3], zbound=[-10.0, 10.0, 20.0], @@ -14,7 +14,6 @@ downsample=2, ), bbox_head=dict( - in_channels=80, num_proposals=num_proposals, bbox_coder=dict( post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py index b56cef0d2..18dd6126c 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py @@ -1,5 +1,5 @@ # learning rate -lr = 2e-4 +lr = 1e-4 t_max = 3 max_epochs = 30 val_interval = 1 From 2749ef586d35e4f908d438585501a9ed3b3453ae Mon Sep 17 00:00:00 2001 From: KSeabgTan Date: Fri, 22 May 2026 14:20:24 +0900 Subject: [PATCH 135/183] Added --- projects/BEVFusion/bevfusion/depth_lss.py | 27 +++++++++++++---- projects/BEVFusion/bevfusion/depth_lss_v2.py | 30 ++++++++++++------- .../bevfusion/ops/bev_pool_v2/bev_pool_v2.py | 26 ++++------------ ...net50_fpn_lss_30e_8xb16_j6gen2_base_50m.py | 2 +- .../resnet50/camera_resnet50_fpn_lss_50m.py | 2 +- 5 files changed, 50 insertions(+), 37 deletions(-) diff --git a/projects/BEVFusion/bevfusion/depth_lss.py b/projects/BEVFusion/bevfusion/depth_lss.py index f363a8e76..2e68a3772 100644 --- a/projects/BEVFusion/bevfusion/depth_lss.py +++ b/projects/BEVFusion/bevfusion/depth_lss.py @@ -1,8 +1,13 @@ # modify from https://github.com/mit-han-lab/bevfusion +import math +from pathlib import Path from typing import Tuple +import matplotlib.pyplot as plt +import numpy as np import torch from mmdet3d.registry import MODELS +from mmengine.logging import print_log from torch import nn from .ops import bev_pool @@ -322,16 +327,28 @@ def bev_pool_precomputed(self, x, geom_feats, kept, ranks, indices): # collapse Z final = torch.cat(x.unbind(dim=2), 1) if self.visualize_bev_feat: - self.visualize_bev_feat(final) + self.plot_bev_feat(final) return final - def visualize_bev_feat(self, bev_feat): + def plot_bev_feat(self, bev_feat): """Visualize the BEV feat for the given batch index.""" + try: + import torch.distributed as dist + + if dist.is_available() and dist.is_initialized() and dist.get_rank() != 0: + return + except ImportError: + pass + batch_idx = 0 - # save first 10 raw channel maps for one batch sample (B, C, Y, X) + if bev_feat.shape[0] <= batch_idx: + return + + # save first 10 raw channel maps for one batch sample (B, C, Y, X) num_channels = 10 - feat = bev_feat[batch_idx].detach().float().cpu().numpy() + with torch.no_grad(): + feat = bev_feat[batch_idx].detach().float().cpu().numpy() channel_indices = np.arange(min(num_channels, feat.shape[0])) ncols = min(5, len(channel_indices)) nrows = math.ceil(len(channel_indices) / ncols) @@ -348,7 +365,7 @@ def visualize_bev_feat(self, bev_feat): fig.suptitle(f"bev_feat channels 0-{len(channel_indices) - 1} (batch={batch_idx})") fig.tight_layout() - save_dir = Path("work_dirs/bev_feat_vis") + save_dir = Path("work_dirs/bev_feat_vis_2") save_dir.mkdir(parents=True, exist_ok=True) if not hasattr(self, "_bev_feat_vis_count"): self._bev_feat_vis_count = 0 diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py index e50992b46..e30a5534a 100644 --- a/projects/BEVFusion/bevfusion/depth_lss_v2.py +++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py @@ -27,7 +27,7 @@ def __init__( dbound: Tuple[float, float, float], collapse_z: bool = True, expand_batch_axis: bool = False, - visualize_bev_feat: bool = False, + visualize_bev_feat: bool = True, ): """ Args: @@ -104,6 +104,7 @@ def forward( def bev_pool_aux(self, geom_feats): B, N, D, H, W, C = geom_feats.shape + print("geom_feats:", geom_feats.shape) Nprime = B * N * D * H * W assert C == 3 @@ -137,15 +138,23 @@ def bev_pool_aux(self, geom_feats): geom_feats, ranks_depth, ranks_feat = geom_feats[kept], ranks_depth[kept], ranks_feat[kept] # nx[0]=x, nx[1]=y, nx[2]=z; flat index for out shape (B, Z, Y, X, C) + print("ranks_depth, ranks_feat, geom_feats:", ranks_depth.shape, ranks_feat.shape, geom_feats.shape) + # ranks_bev = ( + # geom_feats[:, 3] * (self.nx[2] * self.nx[1] * self.nx[0]) + # + geom_feats[:, 2] * (self.nx[1] * self.nx[0]) + # + geom_feats[:, 1] * (self.nx[0]) + # + geom_feats[:, 0] + # ) ranks_bev = ( - geom_feats[:, 3] * (self.nx[2] * self.nx[1] * self.nx[0]) - + geom_feats[:, 2] * (self.nx[1] * self.nx[0]) - + geom_feats[:, 1] * (self.nx[0]) - + geom_feats[:, 0] + geom_feats[:, 0] * (self.nx[2] * self.nx[1] * B) + + geom_feats[:, 1] * (self.nx[2] * B) + + geom_feats[:, 2] * (B) + + geom_feats[:, 3] ) indices = ranks_bev.argsort() + print("indices:", indices[:10]) ranks_bev, ranks_depth, ranks_feat = ranks_bev[indices], ranks_depth[indices], ranks_feat[indices] - + print("ranks_bev, ranks_depth, ranks_feat:", ranks_bev.shape, ranks_depth.shape, ranks_feat.shape) return ( ranks_bev.int().contiguous(), ranks_depth.int().contiguous(), @@ -183,7 +192,7 @@ def compute_bev_pool( if interval_starts is None: print_log("warning ---> no points within the predefined bev receptive field") dummy = torch.zeros( - size=[view_feats.shape[0], view_feats.shape[2], int(self.nx[2]), int(self.nx[1]), int(self.nx[0])] + size=[view_feats.shape[0], view_feats.shape[2], self.nx[0], self.nx[1], self.nx[2]] ).to(view_feats) dummy = torch.cat(dummy.unbind(dim=2), 1) return dummy @@ -197,11 +206,12 @@ def compute_bev_pool( bev_feat_shape = ( depth_softmax.shape[0], int(self.nx[2]), - int(self.nx[1]), int(self.nx[0]), + int(self.nx[1]), view_feats.shape[-1], ) # (B, Z, Y, X, C) - + print("bev_feat_shape:", bev_feat_shape) + print("nx[0], nx[1], nx[2]:", self.nx[0], self.nx[1], self.nx[2]) bev_feat = bev_pool_v2( depth=depth_softmax, feat=view_feats, @@ -219,7 +229,7 @@ def compute_bev_pool( bev_feat = torch.cat(bev_feat.unbind(dim=2), 1) if self.visualize_bev_feat: - self.visualize_bev_feat(bev_feat) + self.plot_bev_feat(bev_feat) return bev_feat diff --git a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py index 57b18a69e..af1ba15de 100644 --- a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py +++ b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/bev_pool_v2.py @@ -148,27 +148,13 @@ def backward(ctx, out_grad): def bev_pool_v2( depth, feat, ranks_depth, ranks_feat, ranks_bev, interval_starts, interval_lengths, bev_feat_shape, is_training ): + # Always use full (B, Z, H, W, C) buffer; QuickCumsumV2Cuda (Z=1) is ONNX-only. + del is_training + x = QuickCumsumV2TrainingCuda.apply( + depth, feat, ranks_depth, ranks_feat, ranks_bev, bev_feat_shape, interval_starts, interval_lengths + ) - if is_training: - x = QuickCumsumV2TrainingCuda.apply( - depth, feat, ranks_depth, ranks_feat, ranks_bev, bev_feat_shape, interval_starts, interval_lengths - ) - else: - # BEV Shape is (B, Z, Y, X, C) - out_height, out_width = bev_feat_shape[2], bev_feat_shape[3] - x = QuickCumsumV2Cuda.apply( - depth, - feat, - ranks_depth, - ranks_feat, - ranks_bev, - interval_starts, - interval_lengths, - out_height, - out_width, - ) - - # Final shape: (B, C, Z, Y, X) + # Final shape: (B, C, Z, H, W) — matches LSSTransform v1 after permute x = x.permute(0, 4, 1, 2, 3).contiguous() return x diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py index 751b24438..e47714f50 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py @@ -4,7 +4,7 @@ ] experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type -experiment_name = "bevfusion_camera_resnet50_fpn_view_lss_30e_8xb16_j6gen2_base_50m" +experiment_name = "bevfusion_camera_resnet50_fpn_lssV2_30e_8xb16_j6gen2_base_50m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py index 2f556a122..4a0770971 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py @@ -6,7 +6,7 @@ # Image network model = dict( view_transform=dict( - type="LSSTransform", + type="LSSTransformV2", xbound=[-54.0, 54.0, 0.3], ybound=[-54.0, 54.0, 0.3], zbound=[-10.0, 10.0, 20.0], From 5da147b044da421caf82853698487929a7d7a829 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Fri, 22 May 2026 18:01:25 +0900 Subject: [PATCH 136/183] Add local 3d box expand --- projects/BEVFusion/bevfusion/depth_lss_v2.py | 38 +++++++++---------- ...fusion_camera_30e_8xb16_j6gen2_base_50m.py | 2 +- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py index e30a5534a..59585cb49 100644 --- a/projects/BEVFusion/bevfusion/depth_lss_v2.py +++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py @@ -27,7 +27,7 @@ def __init__( dbound: Tuple[float, float, float], collapse_z: bool = True, expand_batch_axis: bool = False, - visualize_bev_feat: bool = True, + visualize_bev_feat: bool = False, ): """ Args: @@ -104,13 +104,12 @@ def forward( def bev_pool_aux(self, geom_feats): B, N, D, H, W, C = geom_feats.shape - print("geom_feats:", geom_feats.shape) Nprime = B * N * D * H * W assert C == 3 # record the index of selected points for acceleration purpose - ranks_depth = torch.range(0, Nprime - 1, dtype=torch.int, device=geom_feats.device) - ranks_feat = torch.range(0, Nprime // D - 1, dtype=torch.int, device=geom_feats.device) + ranks_depth = torch.arange(0, Nprime, dtype=torch.int, device=geom_feats.device) + ranks_feat = torch.arange(0, Nprime // D, dtype=torch.int, device=geom_feats.device) ranks_feat = ranks_feat.reshape(B, N, 1, H, W) ranks_feat = ranks_feat.expand(B, N, D, H, W).flatten() @@ -137,24 +136,20 @@ def bev_pool_aux(self, geom_feats): geom_feats, ranks_depth, ranks_feat = geom_feats[kept], ranks_depth[kept], ranks_feat[kept] - # nx[0]=x, nx[1]=y, nx[2]=z; flat index for out shape (B, Z, Y, X, C) - print("ranks_depth, ranks_feat, geom_feats:", ranks_depth.shape, ranks_feat.shape, geom_feats.shape) + ranks_bev = ( + geom_feats[:, 3] * (self.nx[2] * self.nx[1] * self.nx[0]) + + geom_feats[:, 2] * (self.nx[1] * self.nx[0]) + + geom_feats[:, 0] * self.nx[1] + + geom_feats[:, 1] + ) # ranks_bev = ( # geom_feats[:, 3] * (self.nx[2] * self.nx[1] * self.nx[0]) # + geom_feats[:, 2] * (self.nx[1] * self.nx[0]) - # + geom_feats[:, 1] * (self.nx[0]) + # + geom_feats[:, 1] * self.nx[0] # + geom_feats[:, 0] # ) - ranks_bev = ( - geom_feats[:, 0] * (self.nx[2] * self.nx[1] * B) - + geom_feats[:, 1] * (self.nx[2] * B) - + geom_feats[:, 2] * (B) - + geom_feats[:, 3] - ) indices = ranks_bev.argsort() - print("indices:", indices[:10]) ranks_bev, ranks_depth, ranks_feat = ranks_bev[indices], ranks_depth[indices], ranks_feat[indices] - print("ranks_bev, ranks_depth, ranks_feat:", ranks_bev.shape, ranks_depth.shape, ranks_feat.shape) return ( ranks_bev.int().contiguous(), ranks_depth.int().contiguous(), @@ -192,9 +187,12 @@ def compute_bev_pool( if interval_starts is None: print_log("warning ---> no points within the predefined bev receptive field") dummy = torch.zeros( - size=[view_feats.shape[0], view_feats.shape[2], self.nx[0], self.nx[1], self.nx[2]] - ).to(view_feats) - dummy = torch.cat(dummy.unbind(dim=2), 1) + size=[view_feats.shape[0], view_feats.shape[2], self.nx[2], self.nx[1], self.nx[0]], + dtype=view_feats.dtype, + device=view_feats.device, + ) + if self.collapse_z: + dummy = torch.cat(dummy.unbind(dim=2), 1) return dummy if self.expand_batch_axis: @@ -206,12 +204,10 @@ def compute_bev_pool( bev_feat_shape = ( depth_softmax.shape[0], int(self.nx[2]), - int(self.nx[0]), int(self.nx[1]), + int(self.nx[0]), view_feats.shape[-1], ) # (B, Z, Y, X, C) - print("bev_feat_shape:", bev_feat_shape) - print("nx[0], nx[1], nx[2]:", self.nx[0], self.nx[1], self.nx[2]) bev_feat = bev_pool_v2( depth=depth_softmax, feat=view_feats, diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py index 81859eed7..ce26b4905 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py @@ -11,7 +11,7 @@ custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] # user setting -data_root = "data/t4datasets/" +data_root = "data/t4dataset/" info_directory_path = "info/kokseang_2_8/" experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type From ef44141130a8bb9fc4b648caef474ae0cde59d0f Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Fri, 22 May 2026 19:09:28 +0900 Subject: [PATCH 137/183] Add local 3d box expand --- ...mera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m.py | 2 +- ...ion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py | 2 +- .../schedulers/default_30e_8xb16_adamw_linear_cosine.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m.py index 9473ceb0f..92501d169 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m.py @@ -3,7 +3,7 @@ "../../default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py", ] -experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type +experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type experiment_name = "bevfusion_camera_resnet50_fpn_depthlss_30e_8xb16_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py index e47714f50..d559d40b1 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py @@ -3,7 +3,7 @@ "../../default/models/resnet50/camera_resnet50_fpn_lss_50m.py", ] -experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type +experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type experiment_name = "bevfusion_camera_resnet50_fpn_lssV2_30e_8xb16_j6gen2_base_50m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py index 18dd6126c..1430b10ba 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py @@ -1,12 +1,12 @@ # learning rate -lr = 1e-4 +lr = 2e-4 t_max = 3 max_epochs = 30 val_interval = 1 -train_gpu_size = 2 +train_gpu_size = 8 test_batch_size = 2 -train_batch_size = 8 +train_batch_size = 16 param_scheduler = [ # learning rate scheduler From 9594f3b46afc2a94f59664e3439926ea7e3bac32 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 23 May 2026 11:24:43 +0900 Subject: [PATCH 138/183] Updated --- ...on_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py | 4 ++-- ...n_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py | 4 ++-- ...bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py | 2 ++ 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py index 71c1829d4..08df461a4 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py @@ -13,9 +13,9 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_8/" +info_directory_path = "info/kokseang_2_8_1/" -experiment_group_name = "bevfusion_lidar_intensity_2_8_1/j6gen2_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_2/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py index 02ed7542a..44785da25 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py @@ -13,9 +13,9 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/kokseang_2_8/" +info_directory_path = "info/kokseang_2_8_1/" -experiment_group_name = "bevfusion_lidar_intensity_2_8_1/jpntaxi_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_2/jpntaxi_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py index 28499b4f9..3499885e5 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py @@ -147,3 +147,5 @@ checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), ) log_processor = dict(window_size=50) + +resume = True \ No newline at end of file From 096f44389f0f359b9bf4be0ff7ee4da86dc5c888 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 23 May 2026 15:41:52 +0900 Subject: [PATCH 139/183] Updated --- projects/BEVFusion/bevfusion/bevfusion.py | 8 +- .../BEVFusion/bevfusion/bevfusion_head.py | 94 ++++++++++++++----- ...econd_secfpn_30e_8xb16_j6gen2_base_120m.py | 2 +- 3 files changed, 78 insertions(+), 26 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py index b113bb566..aeddc09fa 100644 --- a/projects/BEVFusion/bevfusion/bevfusion.py +++ b/projects/BEVFusion/bevfusion/bevfusion.py @@ -179,7 +179,7 @@ def extract_img_feat( if not using_image_features: x = self.get_image_backbone_features(x) - with torch.cuda.amp.autocast(enabled=False): + with torch.amp.autocast("cuda",enabled=False): # with torch.autocast(device_type='cuda', dtype=torch.float32): x = self.view_transform( x, @@ -200,14 +200,14 @@ def extract_img_feat( def extract_pts_feat(self, feats, coords, sizes, points=None) -> torch.Tensor: if points is not None: # NOTE(knzo25): training and normal inference - with torch.cuda.amp.autocast(enabled=False): + with torch.amp.autocast("cuda", enabled=False): # with torch.autocast('cuda', enabled=False): points = [point.float() for point in points] feats, coords, sizes = self.voxelize(points) batch_size = coords[-1, 0] + 1 else: - # NOTE(knzo25): onnx inference. Voxelization happens outside the graph - with torch.cuda.amp.autocast(enabled=False): + # NOTE: (knzo25): onnx inference. Voxelization happens outside the graph + with torch.amp.autocast("cuda", enabled=False): # with torch.autocast('cuda', enabled=False): # NOTE(knzo25): onnx demmands this diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 96c38658b..df61ff629 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -175,12 +175,22 @@ def __init__( self.dense_heatmap_exclude_pooling_classes = sorted( list(set(self.class_name_to_indices.values()) - set(self.dense_heatmap_pooling_class_indices)) - ) + ) + # Pre-compute the correct order of the classes for the final local_max + heatmap_concat_order = self.dense_heatmap_pooling_class_indices + self.dense_heatmap_exclude_pooling_classes + local_concat_class_remapping = [ + heatmap_concat_order.index(i) + for i in range(self.num_classes) + ] else: self.dense_heatmap_pooling_class_indices = None self.dense_heatmap_exclude_pooling_classes = None - + local_concat_class_remapping = [i for i in range(self.num_classes)] + + # Register the remapping as a buffer so it moves to the GPU automatically and gets saved in the state_dict. + self.register_buffer("local_concat_class_remapping", torch.tensor(local_concat_class_remapping)) self.local_heatmap_padding = self.nms_kernel_size // 2 + # NMS clusters self.nms_clusters = self.test_cfg.get("nms_clusters", []) # Add class indices for nms @@ -201,7 +211,8 @@ def __init__( self.partial_ignore_labels = None print_log(f"BEVFusionHead Partial ignore labels: {self.partial_ignore_labels}, dense heatmap pooling classes: \ - {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}", logger="current") + {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}, \ + local_concat_class_remapping: {self.local_concat_class_remapping}", logger="current") def create_2D_grid(self, x_size, y_size): meshgrid = [[0, x_size - 1, x_size], [0, y_size - 1, y_size]] @@ -261,14 +272,12 @@ def forward_single(self, inputs, metas): ################################# # query initialization ################################# - with torch.cuda.amp.autocast(enabled=False): + with torch.amp.autocast("cuda", enabled=False): # with torch.autocast('cuda', enabled=False): dense_heatmap = self.heatmap_head(fusion_feat.float()) heatmap = dense_heatmap.detach().sigmoid() - local_max = torch.zeros_like(heatmap) - # equals to nms radius = voxel_size * out_size_factor * kenel_size if self.dense_heatmap_pooling_class_indices is not None: - # Pooling + # Pooling selected_heatmap = heatmap[:, self.dense_heatmap_pooling_class_indices, :, :] local_max_inner = F.max_pool2d( selected_heatmap, @@ -276,31 +285,74 @@ def forward_single(self, inputs, metas): stride=1, padding=0, ) - local_max[ - :, - self.dense_heatmap_pooling_class_indices, - self.local_heatmap_padding : (-self.local_heatmap_padding), - self.local_heatmap_padding : (-self.local_heatmap_padding), - ] = local_max_inner - # Non-pooling classes + + # 2. Restore spatial size using F.pad instead of slice mutation + local_max = F.pad( + local_max_inner, + (self.local_heatmap_padding, self.local_heatmap_padding, self.local_heatmap_padding, + self.local_heatmap_padding), + mode="constant", + value=0.0 + ) + + # 3. Any non-pooling classes if self.dense_heatmap_exclude_pooling_classes: - local_max[:, self.dense_heatmap_exclude_pooling_classes] = heatmap[ - :, self.dense_heatmap_exclude_pooling_classes - ] + excluded_local_max = heatmap[:, self.dense_heatmap_exclude_pooling_classes, :, :] + local_max = torch.cat([local_max, excluded_local_max], dim=1) + local_max = local_max[:, self.local_concat_class_remapping, :, :] else: - local_max = heatmap + local_max = heatmap + + # local_max = torch.zeros_like(heatmap) + # # equals to nms radius = voxel_size * out_size_factor * kenel_size + # if self.dense_heatmap_pooling_class_indices is not None: + # # Pooling + # selected_heatmap = heatmap[:, self.dense_heatmap_pooling_class_indices, :, :] + # local_max_inner = F.max_pool2d( + # selected_heatmap, + # kernel_size=self.nms_kernel_size, + # stride=1, + # padding=0, + # ) + # local_max[ + # :, + # self.dense_heatmap_pooling_class_indices, + # self.local_heatmap_padding : (-self.local_heatmap_padding), + # self.local_heatmap_padding : (-self.local_heatmap_padding), + # ] = local_max_inner + # # Non-pooling classes + # if self.dense_heatmap_exclude_pooling_classes: + # local_max[:, self.dense_heatmap_exclude_pooling_classes] = heatmap[ + # :, self.dense_heatmap_exclude_pooling_classes + # ] + # else: + # local_max = heatmap heatmap = heatmap * (heatmap == local_max) heatmap = heatmap.view(batch_size, heatmap.shape[1], -1) # top num_proposals among all classes - top_proposals = heatmap.view(batch_size, -1).argsort(dim=-1, descending=True)[..., : self.num_proposals] - top_proposals_class = top_proposals // heatmap.shape[-1] - top_proposals_index = top_proposals % heatmap.shape[-1] + flattened_heatmap = heatmap.view(batch_size, -1) + + # Use topk instead or argsort to avoid sorting the entire flattened heatmap. + _, top_proposals = flattened_heatmap.topk(k=self.num_proposals, dim=-1, largest=True, sorted=True) + + # 2. Calculate class and spatial indices + # Use shape[-1] dynamically to handle grid sizes safely. + spatial_dim = heatmap.shape[-1] + top_proposals_class = top_proposals // spatial_dim + top_proposals_index = top_proposals % spatial_dim query_feat = fusion_feat_flatten.gather( index=top_proposals_index[:, None, :].expand(-1, fusion_feat_flatten.shape[1], -1), dim=-1, ) + # top_proposals = heatmap.view(batch_size, -1).argsort(dim=-1, descending=True)[..., : self.num_proposals] + # top_proposals_class = top_proposals // heatmap.shape[-1] + # top_proposals_index = top_proposals % heatmap.shape[-1] + # query_feat = fusion_feat_flatten.gather( + # index=top_proposals_index[:, None, :].expand(-1, fusion_feat_flatten.shape[1], -1), + # dim=-1, + # ) self.query_labels = top_proposals_class # add category embedding diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py index 08df461a4..5a4bc00fc 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py @@ -152,4 +152,4 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2_8_0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb16_base_120m/best_epoch_47.pth" +load_from = "work_dirs/bevfusion_lidar_2_8_2/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb16_base_120m/epoch_50.pth" From c6c79f80d723bd0550baccbc0d1a896bfedbf089 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 23 May 2026 16:49:15 +0900 Subject: [PATCH 140/183] Updated --- projects/BEVFusion/bevfusion/bevfusion_head.py | 4 ++-- .../default/pipelines/default_lidar_intensity_120m.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index df61ff629..0b59f274d 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -188,7 +188,7 @@ def __init__( local_concat_class_remapping = [i for i in range(self.num_classes)] # Register the remapping as a buffer so it moves to the GPU automatically and gets saved in the state_dict. - self.register_buffer("local_concat_class_remapping", torch.tensor(local_concat_class_remapping)) + self.register_buffer("local_concat_class_remapping", torch.tensor(local_concat_class_remapping), persistent=False) self.local_heatmap_padding = self.nms_kernel_size // 2 # NMS clusters @@ -334,7 +334,7 @@ def forward_single(self, inputs, metas): # top num_proposals among all classes flattened_heatmap = heatmap.view(batch_size, -1) - # Use topk instead or argsort to avoid sorting the entire flattened heatmap. + # Use topk instead of argsort to avoid sorting the entire flattened heatmap. _, top_proposals = flattened_heatmap.topk(k=self.num_proposals, dim=-1, largest=True, sorted=True) # 2. Calculate class and spatial indices diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index 1ce2aa2be..ecf983c37 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -5,8 +5,8 @@ # range setting point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0] -voxel_size = [0.17, 0.17, 0.2] -grid_size = [1440, 1440, 41] +voxel_size = [0.15, 0.15, 0.2] +grid_size = [1632, 1632, 41] eval_class_range = { "car": 120, "truck": 120, From 1e639c70d0e3cd9e92bdf226e0e0906c3e9714b0 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sun, 24 May 2026 01:46:53 +0900 Subject: [PATCH 141/183] Updated --- .../BEVFusion/bevfusion/bevfusion_head.py | 62 +++++-------------- .../bevfusion/bevfusion_voxel_encoder.py | 48 +++++++------- 2 files changed, 43 insertions(+), 67 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 0b59f274d..8af826d47 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -92,6 +92,7 @@ def __init__( self.loss_iou = MODELS.build(loss_iou) if loss_iou is not None else None self.loss_bbox = MODELS.build(loss_bbox) self.loss_heatmap = MODELS.build(loss_heatmap) + self.share_conv_out_channels = hidden_channel self.bbox_coder = build_bbox_coder(bbox_coder) self.sampling = False @@ -157,7 +158,11 @@ def __init__( # Position Embedding for Cross-Attention, which is re-used during training # noqa: E501 x_size = self.test_cfg["grid_size"][0] // self.test_cfg["out_size_factor"] y_size = self.test_cfg["grid_size"][1] // self.test_cfg["out_size_factor"] - self.bev_pos = self.create_2D_grid(x_size, y_size) + self.spatial_dim = x_size * y_size + bev_pos = self.create_2D_grid(x_size, y_size) + + # Register the bev_pos as a buffer so it moves to the GPU automatically. + self.register_buffer("bev_pos", bev_pos, persistent=False) # (1, H * W, 2) self.img_feat_pos = None self.img_feat_collapsed_pos = None @@ -266,8 +271,7 @@ def forward_single(self, inputs, metas): ################################# # image to BEV ################################# - fusion_feat_flatten = fusion_feat.view(batch_size, fusion_feat.shape[1], -1) # [BS, C, H*W] - bev_pos = self.bev_pos.repeat(batch_size, 1, 1).to(fusion_feat.device) + fusion_feat_flatten = fusion_feat.view(batch_size, self.share_conv_out_channels, -1) # [BS, C, H*W] ################################# # query initialization @@ -302,34 +306,10 @@ def forward_single(self, inputs, metas): local_max = local_max[:, self.local_concat_class_remapping, :, :] else: local_max = heatmap - - # local_max = torch.zeros_like(heatmap) - # # equals to nms radius = voxel_size * out_size_factor * kenel_size - # if self.dense_heatmap_pooling_class_indices is not None: - # # Pooling - # selected_heatmap = heatmap[:, self.dense_heatmap_pooling_class_indices, :, :] - # local_max_inner = F.max_pool2d( - # selected_heatmap, - # kernel_size=self.nms_kernel_size, - # stride=1, - # padding=0, - # ) - # local_max[ - # :, - # self.dense_heatmap_pooling_class_indices, - # self.local_heatmap_padding : (-self.local_heatmap_padding), - # self.local_heatmap_padding : (-self.local_heatmap_padding), - # ] = local_max_inner - # # Non-pooling classes - # if self.dense_heatmap_exclude_pooling_classes: - # local_max[:, self.dense_heatmap_exclude_pooling_classes] = heatmap[ - # :, self.dense_heatmap_exclude_pooling_classes - # ] - # else: - # local_max = heatmap heatmap = heatmap * (heatmap == local_max) - heatmap = heatmap.view(batch_size, heatmap.shape[1], -1) + # (BS, num_classes, H*W) + heatmap = heatmap.view(batch_size, self.num_classes, -1) # top num_proposals among all classes flattened_heatmap = heatmap.view(batch_size, -1) @@ -339,31 +319,21 @@ def forward_single(self, inputs, metas): # 2. Calculate class and spatial indices # Use shape[-1] dynamically to handle grid sizes safely. - spatial_dim = heatmap.shape[-1] - top_proposals_class = top_proposals // spatial_dim - top_proposals_index = top_proposals % spatial_dim + top_proposals_class = top_proposals // self.spatial_dim + top_proposals_index = top_proposals % self.spatial_dim query_feat = fusion_feat_flatten.gather( - index=top_proposals_index[:, None, :].expand(-1, fusion_feat_flatten.shape[1], -1), + index=top_proposals_index[:, None, :].expand(-1, self.share_conv_out_channels, -1), dim=-1, ) - # top_proposals = heatmap.view(batch_size, -1).argsort(dim=-1, descending=True)[..., : self.num_proposals] - # top_proposals_class = top_proposals // heatmap.shape[-1] - # top_proposals_index = top_proposals % heatmap.shape[-1] - # query_feat = fusion_feat_flatten.gather( - # index=top_proposals_index[:, None, :].expand(-1, fusion_feat_flatten.shape[1], -1), - # dim=-1, - # ) self.query_labels = top_proposals_class # add category embedding one_hot = F.one_hot(top_proposals_class, num_classes=self.num_classes).permute(0, 2, 1) query_cat_encoding = self.class_encoding(one_hot.float()) query_feat += query_cat_encoding - - query_pos = bev_pos.gather( - index=top_proposals_index[:, None, :].permute(0, 2, 1).expand(-1, -1, bev_pos.shape[-1]), - dim=1, - ) + + # (B, N, 2) + query_pos = self.bev_pos.squeeze(0)[top_proposals_index] ################################# # transformer decoder layer (Fusion feature as K,V) ################################# @@ -371,7 +341,7 @@ def forward_single(self, inputs, metas): for i in range(self.num_decoder_layers): # Transformer Decoder Layer # :param query: B C Pq :param query_pos: B Pq 3/6 - query_feat = self.decoder[i](query_feat, key=fusion_feat_flatten, query_pos=query_pos, key_pos=bev_pos) + query_feat = self.decoder[i](query_feat, key=fusion_feat_flatten, query_pos=query_pos, key_pos=self.bev_pos) # Prediction res_layer = self.prediction_heads[i](query_feat) diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py index 83cd70482..30afdc41d 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py +++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py @@ -2,8 +2,6 @@ import torch import numpy as np -from mmcv.cnn import build_norm_layer -from mmcv.ops import DynamicScatter from torch import Tensor, nn from mmdet3d.registry import MODELS @@ -26,14 +24,28 @@ def __init__(self, max_norm_values (Tuple[float]): Maximum values for the features. in_channels (int): Number of input channels. """ - super(HardSimpleVoxelSinCosEncoder, self).__init__() + super().__init__() # Create PillarFeatureNet layers self.in_channels = in_channels - - self.register_buffer("min_norm_values", torch.tensor(min_norm_values)) - self.register_buffer("max_norm_values", torch.tensor(max_norm_values)) - self.register_buffer("exponents", (2 ** torch.arange(0, self.in_channels)).float()) + + # Convert the ((x - min) / (max - min)) * pi * exponents to x * scale + bias for folding them into one OP + min_norm_values = torch.tensor(min_norm_values) + max_norm_values = torch.tensor(max_norm_values) + # Let alpha = pi * exponents, beta = max - min + # y = ((x - min) / beta) * alpha + # y = alpha / beta * (x - min) + # y = (alpha / beta) * x - (alpha / beta) * min + # Therefore, scale = alpha / beta, bias = - (alpha * min) / beta + # y = scale * x + bias + exponents = (2 ** torch.arange(0, self.in_channels)).float() + alpha = (torch.pi * exponents).unsqueeze(0) # (1, C) + beta = (max_norm_values - min_norm_values).unsqueeze(1) # (C, 1) + scale = alpha / beta + bias = - (alpha * min_norm_values.unsqueeze(1)) / beta # (C, C) + + self.register_buffer("exponent_scale", scale.unsqueeze(0), persistent=False) # (1, C, C) + self.register_buffer("exponent_bias", bias.unsqueeze(0), persistent=False) # (1, C, C) def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, *args, **kwargs) -> Tensor: @@ -49,23 +61,17 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, torch.Tensor: Features of pillars in shape (M, C*C*2). """ - num_voxels, max_points_per_voxel = features.shape[0], features.shape[1] - # Mean in the voxel - # (N, M, 3) -> (N, 3) - voxel_features = (features.sum(dim=1, keepdim=False) / num_points.type_as(features).view( - -1, 1)).contiguous() + # (N, M, C) -> (N, C) + voxel_mean_features = (features.sum(dim=1, keepdim=False) / num_points.type_as(features).view(-1, 1)).contiguous() - # min-max normalization, (N, 3) -> (N, 3) - voxel_features_norm = (voxel_features - \ - self.min_norm_values.view(1, -1)) / ((self.max_norm_values - self.min_norm_values).view(1, -1)) - + # x * scale + bias, (1, C, C) + (1, C, C) * (N, C, 1) -> (N, C, C) + # FMA (fused multiply-add): y = bias + scale * voxel_mean_features + y = torch.addcmul(self.exponent_bias, self.exponent_scale, voxel_mean_features.unsqueeze(-1)) # SinCos encoding - # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3) - y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1) - # (N*3, 3) -> (N, 3*3) - y = y.reshape(num_voxels, -1) - # (N, 3*3) -> (N, 3*3*2) + # (N*C, C) -> (N, C*C) + y = y.reshape(-1, self.in_channels*self.in_channels) + # (N, C*C) -> (N, C*C*2) voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1) return voxel_fourier_features From 03d94164b806d5596e423b2592c10feef50b8d85 Mon Sep 17 00:00:00 2001 From: KSeangTan Date: Sun, 24 May 2026 19:51:48 +0900 Subject: [PATCH 142/183] Updated --- .../BEVFusion/bevfusion/bevfusion_head.py | 8 +++--- .../BEVFusion/bevfusion/sparse_encoder.py | 27 ++++++++++++------- ...econd_secfpn_30e_8xb16_j6gen2_base_120m.py | 1 + ...cond_secfpn_30e_8xb16_jpntaxi_base_120m.py | 1 + ...voxel_second_secfpn_50e_8xb16_base_120m.py | 1 + .../default_lidar_second_secfpn_120m.py | 2 -- .../default/pipelines/default_lidar_120m.py | 3 +++ .../pipelines/default_lidar_intensity_120m.py | 3 +++ 8 files changed, 31 insertions(+), 15 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 8af826d47..e3f5610c8 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -265,13 +265,13 @@ def forward_single(self, inputs, metas): Returns: list[dict]: Output results for tasks. """ - batch_size = inputs.shape[0] + # batch_size = inputs.shape[0] fusion_feat = self.shared_conv(inputs) ################################# # image to BEV ################################# - fusion_feat_flatten = fusion_feat.view(batch_size, self.share_conv_out_channels, -1) # [BS, C, H*W] + fusion_feat_flatten = fusion_feat.view(-1, self.share_conv_out_channels, self.spatial_dim) # [BS, C, H*W] ################################# # query initialization @@ -309,10 +309,10 @@ def forward_single(self, inputs, metas): heatmap = heatmap * (heatmap == local_max) # (BS, num_classes, H*W) - heatmap = heatmap.view(batch_size, self.num_classes, -1) + heatmap = heatmap.view(-1, self.num_classes, self.spatial_dim) # top num_proposals among all classes - flattened_heatmap = heatmap.view(batch_size, -1) + flattened_heatmap = heatmap.view(-1, self.num_classes*self.spatial_dim) # Use topk instead of argsort to avoid sorting the entire flattened heatmap. _, top_proposals = flattened_heatmap.topk(k=self.num_proposals, dim=-1, largest=True, sorted=True) diff --git a/projects/BEVFusion/bevfusion/sparse_encoder.py b/projects/BEVFusion/bevfusion/sparse_encoder.py index 6e98a73ab..cd2ffb50b 100644 --- a/projects/BEVFusion/bevfusion/sparse_encoder.py +++ b/projects/BEVFusion/bevfusion/sparse_encoder.py @@ -11,7 +11,7 @@ from mmdet3d.registry import MODELS if IS_SPCONV2_AVAILABLE: - from spconv.pytorch import SparseConvTensor + from .custom_sparse_conv_tensor import CustomSparseConvTensor as SparseConvTensor else: from mmcv.ops import SparseConvTensor @@ -28,6 +28,7 @@ class BEVFusionSparseEncoder(SparseEncoder): Args: in_channels (int): The number of input channels. sparse_shape (list[int]): The sparse shape of input tensor. + dense_output_shape (list[int]): The final shape of the dense output tensor. order (list[str], optional): Order of conv module. Defaults to ('conv', 'norm', 'act'). norm_cfg (dict, optional): Config of normalization layer. Defaults to @@ -52,6 +53,7 @@ def __init__( self, in_channels, sparse_shape, + dense_output_shapes, order=("conv", "norm", "act"), norm_cfg=dict(type="BN1d", eps=1e-3, momentum=0.01), base_channels=16, @@ -60,19 +62,17 @@ def __init__( encoder_paddings=((1,), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1, 1)), block_type="conv_module", return_middle_feats=False, - encoder_strides=(2, 2, 2, -1), - output_stride=2, ): super(SparseEncoder, self).__init__() assert block_type in ["conv_module", "basicblock"] self.sparse_shape = sparse_shape + self.dense_output_shapes = dense_output_shapes self.in_channels = in_channels self.order = order self.base_channels = base_channels self.output_channels = output_channels self.encoder_channels = encoder_channels self.encoder_paddings = encoder_paddings - self.encoder_strides = encoder_strides self.stage_num = len(self.encoder_channels) self.fp16_enabled = False self.return_middle_feats = return_middle_feats @@ -149,11 +149,20 @@ def forward(self, voxel_features, coors, batch_size): # for detection head # [200, 176, 5] -> [200, 176, 2] out = self.conv_out(encode_features[-1]) - spatial_features = out.dense() - - N, C, H, W, D = spatial_features.shape - spatial_features = spatial_features.permute(0, 1, 4, 2, 3).contiguous() - spatial_features = spatial_features.view(N, C * D, H, W) + # Return (N, H, W, D, C) instead of (N, C, H, W, D) + spatial_features = out.dense(channels_first=False) + + # Reshape to (N, C, D, H, W) + spatial_features = spatial_features.permute(0, 4, 3, 1, 2).contiguous() + spatial_features = spatial_features.view( + batch_size, + self.output_channels * self.dense_output_shapes[2], + self.dense_output_shapes[0], + self.dense_output_shapes[1], + ) + # N, C, H, W, D = spatial_features.shape + # spatial_features = spatial_features.permute(0, 1, 4, 2, 3).contiguous() + # spatial_features = spatial_features.view(N, C * D, H, W) if self.return_middle_feats: return spatial_features, encode_features diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py index 5a4bc00fc..269a0f00e 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py @@ -35,6 +35,7 @@ pts_middle_encoder=dict( in_channels=50, sparse_shape=_base_.grid_size, + dense_output_shapes=_base_.sparse_dense_output_shapes, ), bbox_head=dict( class_names=_base_.class_names, # Use class names to identify the correct class indices diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py index 44785da25..d3c5154c6 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py @@ -35,6 +35,7 @@ pts_middle_encoder=dict( in_channels=50, sparse_shape=_base_.grid_size, + dense_output_shapes=_base_.sparse_dense_output_shapes, ), bbox_head=dict( class_names=_base_.class_names, # Use class names to identify the correct class indices diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py index 3499885e5..70c27f0a7 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py @@ -35,6 +35,7 @@ pts_middle_encoder=dict( in_channels=32, sparse_shape=_base_.grid_size, + dense_output_shapes=_base_.sparse_dense_output_shapes, ), bbox_head=dict( class_names=_base_.class_names, # Use class names to identify the correct class indices diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index bad602cb7..aa275f558 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -24,9 +24,7 @@ norm_cfg=dict(type="BN1d", eps=0.001, momentum=0.01), encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)), encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, (1, 1, 0)), (0, 0)), - encoder_strides=(2, 2, 2, -1), # No stride for the last stage block_type="basicblock", - output_stride=2, # downsample stride ), pts_backbone=dict( type="SECOND", diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index 317c594c1..613ff8d0c 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -7,6 +7,9 @@ point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0] voxel_size = [0.15, 0.15, 0.2] grid_size = [1632, 1632, 41] +# Sparse dense output shapes +sparse_dense_output_shapes = [204, 204, 2] + eval_class_range = { "car": 120, "truck": 120, diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index ecf983c37..e7b78955a 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -7,6 +7,9 @@ point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0] voxel_size = [0.15, 0.15, 0.2] grid_size = [1632, 1632, 41] +# Sparse dense output shapes +sparse_dense_output_shapes = [204, 204, 2] + eval_class_range = { "car": 120, "truck": 120, From 94e175279116c6e34cb570b9b003f0b339691f4d Mon Sep 17 00:00:00 2001 From: KSeangTan Date: Sun, 24 May 2026 21:51:41 +0900 Subject: [PATCH 143/183] Updated --- .../bevfusion/custom_sparse_conv_tensor.py | 68 +++++++++++++++++++ ...y_lidar_only_intensity_tensorrt_dynamic.py | 1 + .../BEVFusion/deploy/rewriters/__init__.py | 3 + .../BEVFusion/deploy/rewriters/layer_norm.py | 15 ++++ 4 files changed, 87 insertions(+) create mode 100644 projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py create mode 100644 projects/BEVFusion/deploy/rewriters/__init__.py create mode 100644 projects/BEVFusion/deploy/rewriters/layer_norm.py diff --git a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py new file mode 100644 index 000000000..c6841f64c --- /dev/null +++ b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py @@ -0,0 +1,68 @@ +""" +Custom SparseConvTensor for BEVFusion. +This customiztion is used to support cleaner ONNX export of sparse convolutions. +""" + +from typing import Union, List, Optional + +import torch +from spconv.pytorch import SparseConvTensor +from spconv.core import ConvAlgo + + +class CustomSparseConvTensor(SparseConvTensor): + def __init__(self, + features: torch.Tensor, + indices: torch.Tensor, + spatial_shape: Union[List[int], np.ndarray], + batch_size: int, + grid: Optional[torch.Tensor] = None, + voxel_num: Optional[torch.Tensor] = None, + indice_dict: Optional[dict] = None, + benchmark: bool = False, + permanent_thrust_allocator: bool = False, + enable_timer: bool = False, + force_algo: Optional[ConvAlgo] = None): + """ + Check the superclass documentation for more details. + """ + + super().__init__( + features=features, + indices=indices, + spatial_shape=spatial_shape, + batch_size=batch_size, + grid=grid, + voxel_num=voxel_num, + indice_dict=indice_dict, + benchmark=benchmark, + permanent_thrust_allocator=permanent_thrust_allocator, + enable_timer=enable_timer, + force_algo=force_algo) + + # Precomputation for dense output shape. + self.spatial_shape_list = list(self.spatial_shape) + self.spatial_ndim = len(self.spatial_shape_list) + self.trans_params = list(range(0, self.spatial_ndim + 1)) + self.trans_params.insert(1, self.spatial_ndim + 1) + + def dense(self, channels_first: bool = True): + """ + Convert the sparse tensor to a dense tensor. + """ + C = self.features.shape[1] + out = self.features.zeros( + [ + self.batch_size, + *self.spatial_shape_list, + C, + ] + ) + idx = self.indices.to(self.features.device).long() # [N, 1+D] + out.index_put_(idx.unbind(1), self.features) + if not channels_first: + return out + + out = out.permute(*self.trans_params).contiguous() + return out + \ No newline at end of file diff --git a/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py b/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py index e22e0f41b..0936cf8a0 100644 --- a/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py +++ b/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py @@ -5,6 +5,7 @@ "projects.BEVFusion.deploy", "projects.BEVFusion.bevfusion", "projects.SparseConvolution", + "projects.BEVFusion.deploy.rewriters", ], allow_failed_imports=False, ) diff --git a/projects/BEVFusion/deploy/rewriters/__init__.py b/projects/BEVFusion/deploy/rewriters/__init__.py new file mode 100644 index 000000000..1eb59b5c1 --- /dev/null +++ b/projects/BEVFusion/deploy/rewriters/__init__.py @@ -0,0 +1,3 @@ +from .layer_norm import layer_norm__passthrough + +__all__ = ["layer_norm__passthrough"] \ No newline at end of file diff --git a/projects/BEVFusion/deploy/rewriters/layer_norm.py b/projects/BEVFusion/deploy/rewriters/layer_norm.py new file mode 100644 index 000000000..0b8cc09c7 --- /dev/null +++ b/projects/BEVFusion/deploy/rewriters/layer_norm.py @@ -0,0 +1,15 @@ +import torch.nn.functional as F +from mmdeploy.core import FUNCTION_REWRITER + + +@FUNCTION_REWRITER.register_rewriter( + func_name="torch.nn.functional.layer_norm", backend="tensorrt" +) +@FUNCTION_REWRITER.register_rewriter( + func_name="torch.nn.functional.layer_norm", backend="default" +) +def layer_norm__passthrough(input, normalized_shape, weight=None, bias=None, eps=1e-5): + # Call the *original* op so the ONNX exporter sees aten::layer_norm + # and emits a single LayerNormalization node at opset >= 17. + ctx = FUNCTION_REWRITER.get_context() + return ctx.origin_func(input, normalized_shape, weight, bias, eps) \ No newline at end of file From 36bfd4ed24898d13deb977a926d36329f8fc9b22 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Mon, 25 May 2026 08:15:29 +0900 Subject: [PATCH 144/183] Updated --- ...sion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py index 269a0f00e..238690492 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py @@ -15,7 +15,7 @@ data_root = "data/t4dataset/" info_directory_path = "info/kokseang_2_8_1/" -experiment_group_name = "bevfusion_lidar_intensity_2_8_2/j6gen2_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_3/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name From 26e6bb0d8337d3b12a0fc94cfad66442ced9eca6 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Mon, 25 May 2026 09:43:58 +0900 Subject: [PATCH 145/183] Updated --- .../bevfusion/custom_sparse_conv_tensor.py | 2 ++ ...y_lidar_only_intensity_tensorrt_dynamic.py | 7 ++-- projects/BEVFusion/deploy/exporter.py | 32 +++++++++++++++++-- .../BEVFusion/deploy/rewriters/__init__.py | 3 -- .../BEVFusion/deploy/rewriters/layer_norm.py | 15 --------- 5 files changed, 35 insertions(+), 24 deletions(-) delete mode 100644 projects/BEVFusion/deploy/rewriters/__init__.py delete mode 100644 projects/BEVFusion/deploy/rewriters/layer_norm.py diff --git a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py index c6841f64c..8481e4853 100644 --- a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py +++ b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py @@ -6,6 +6,7 @@ from typing import Union, List, Optional import torch +import numpy as np from spconv.pytorch import SparseConvTensor from spconv.core import ConvAlgo @@ -58,6 +59,7 @@ def dense(self, channels_first: bool = True): C, ] ) + print("out.shape: ", out.shape) idx = self.indices.to(self.features.device).long() # [N, 1+D] out.index_put_(idx.unbind(1), self.features) if not channels_first: diff --git a/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py b/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py index 0936cf8a0..0863889bb 100644 --- a/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py +++ b/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py @@ -4,8 +4,7 @@ imports=[ "projects.BEVFusion.deploy", "projects.BEVFusion.bevfusion", - "projects.SparseConvolution", - "projects.BEVFusion.deploy.rewriters", + "projects.SparseConvolution" ], allow_failed_imports=False, ) @@ -30,7 +29,7 @@ type="onnx", export_params=True, keep_initializers_as_inputs=False, - opset_version=17, + opset_version=18, save_file="bevfusion_lidar_intensity.onnx", input_names=["voxels", "coors", "num_points_per_voxel"], output_names=["bbox_pred", "score", "label_pred"], @@ -46,5 +45,5 @@ }, }, input_shape=None, - verbose=True, + verbose=False, ) diff --git a/projects/BEVFusion/deploy/exporter.py b/projects/BEVFusion/deploy/exporter.py index b1a430eb9..ab5489a7e 100644 --- a/projects/BEVFusion/deploy/exporter.py +++ b/projects/BEVFusion/deploy/exporter.py @@ -2,7 +2,7 @@ import logging import os.path as osp -from typing import Optional +from typing import Optional, Any import numpy as np import onnx @@ -11,12 +11,33 @@ from builder import ExportBuilder from containers import TrtBevFusionCameraOnlyContainer, TrtBevFusionImageBackboneContainer, TrtBevFusionMainContainer from data_classes import ModelData, SetupConfigs -from mmdeploy.core import RewriterContext +from mmdeploy.core import RewriterContext, SYMBOLIC_REWRITER from mmdeploy.utils import ( get_root_logger, ) +def purge_mmdeploy_symbolics(op_names: list[str]) -> dict: + """Delete mmdeploy's symbolic records for the given op names. + Both the op-name key (e.g. `"layer_norm"`) and the function-path + bookkeeping key (e.g. `"mmdeploy.pytorch.symbolics.layer_norm.layer_norm__default"`) + are removed. Returns a snapshot of what was deleted for optional restore. + """ + records = SYMBOLIC_REWRITER._registry._rewrite_records + removed: dict = {} + for key in list(records.keys()): + # Primary key: the aten op name itself. + if key in op_names: + removed[key] = records.pop(key) + continue + # Bookkeeping key: full Python path of an implementer function. + # Match by "...symbolics.." or "...symbolics.__" + if any(f".symbolics.{op}." in key or f".symbolics.{op}__" in key + for op in op_names): + removed[key] = records.pop(key) + return removed + + class Torch2OnnxExporter: def __init__(self, setup_configs: SetupConfigs, log_level: str): @@ -62,7 +83,14 @@ def _export_model( patched_model (torch.nn.Module): Patched Pytorch model. ir_configs (dict): Configs for intermediate representations in ONNX. """ + # Purge the mmdeploy symbolic records for the layer_norm op, remove this if LayerNorm OP is not supported + # in the tensorrt version + removed = purge_mmdeploy_symbolics(["layer_norm"]) + self.logger.info( + f"Purged {len(removed)} mmdeploy symbolic records: {list(removed.keys())}" + ) with RewriterContext(**context_info), torch.no_grad(): + list_layer_norm_rewriters() image_feats = None if "img_backbone" in self.setup_configs.model_cfg.model: image_feats = self._export_image_backbone(model_data, ir_configs, patched_model) diff --git a/projects/BEVFusion/deploy/rewriters/__init__.py b/projects/BEVFusion/deploy/rewriters/__init__.py deleted file mode 100644 index 1eb59b5c1..000000000 --- a/projects/BEVFusion/deploy/rewriters/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .layer_norm import layer_norm__passthrough - -__all__ = ["layer_norm__passthrough"] \ No newline at end of file diff --git a/projects/BEVFusion/deploy/rewriters/layer_norm.py b/projects/BEVFusion/deploy/rewriters/layer_norm.py deleted file mode 100644 index 0b8cc09c7..000000000 --- a/projects/BEVFusion/deploy/rewriters/layer_norm.py +++ /dev/null @@ -1,15 +0,0 @@ -import torch.nn.functional as F -from mmdeploy.core import FUNCTION_REWRITER - - -@FUNCTION_REWRITER.register_rewriter( - func_name="torch.nn.functional.layer_norm", backend="tensorrt" -) -@FUNCTION_REWRITER.register_rewriter( - func_name="torch.nn.functional.layer_norm", backend="default" -) -def layer_norm__passthrough(input, normalized_shape, weight=None, bias=None, eps=1e-5): - # Call the *original* op so the ONNX exporter sees aten::layer_norm - # and emits a single LayerNormalization node at opset >= 17. - ctx = FUNCTION_REWRITER.get_context() - return ctx.origin_func(input, normalized_shape, weight, bias, eps) \ No newline at end of file From b096792a9e87f1307c5fe6b5d9f99ad6bde25438 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Mon, 25 May 2026 12:42:26 +0900 Subject: [PATCH 146/183] Updated --- .../bevfusion/custom_sparse_conv_tensor.py | 92 +++++++------------ .../BEVFusion/bevfusion/sparse_encoder.py | 7 +- projects/BEVFusion/deploy/exporter.py | 1 - 3 files changed, 35 insertions(+), 65 deletions(-) diff --git a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py index 8481e4853..b16191ca8 100644 --- a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py +++ b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py @@ -2,69 +2,39 @@ Custom SparseConvTensor for BEVFusion. This customiztion is used to support cleaner ONNX export of sparse convolutions. """ +import torch -from typing import Union, List, Optional +from mmdet3d.models.layers.spconv import IS_SPCONV2_AVAILABLE -import torch -import numpy as np -from spconv.pytorch import SparseConvTensor -from spconv.core import ConvAlgo +if IS_SPCONV2_AVAILABLE: + from spconv.pytorch import SparseConvTensor +else: + from mmcv.ops import SparseConvTensor -class CustomSparseConvTensor(SparseConvTensor): - def __init__(self, - features: torch.Tensor, - indices: torch.Tensor, - spatial_shape: Union[List[int], np.ndarray], - batch_size: int, - grid: Optional[torch.Tensor] = None, - voxel_num: Optional[torch.Tensor] = None, - indice_dict: Optional[dict] = None, - benchmark: bool = False, - permanent_thrust_allocator: bool = False, - enable_timer: bool = False, - force_algo: Optional[ConvAlgo] = None): - """ - Check the superclass documentation for more details. - """ - - super().__init__( - features=features, - indices=indices, - spatial_shape=spatial_shape, - batch_size=batch_size, - grid=grid, - voxel_num=voxel_num, - indice_dict=indice_dict, - benchmark=benchmark, - permanent_thrust_allocator=permanent_thrust_allocator, - enable_timer=enable_timer, - force_algo=force_algo) - - # Precomputation for dense output shape. - self.spatial_shape_list = list(self.spatial_shape) - self.spatial_ndim = len(self.spatial_shape_list) - self.trans_params = list(range(0, self.spatial_ndim + 1)) - self.trans_params.insert(1, self.spatial_ndim + 1) +def sparse_to_dense(sparse_tensor: SparseConvTensor, batch_size: int, spatial_shapes: list[int], out_channels: int): + """ + Convert the sparse tensor to a dense tensor. + """ + H, W, D = spatial_shapes + num_cells = batch_size * H * W * D + idx = sparse_tensor.indices.to(sparse_tensor.features.device).long() # [N, 1+D] + b, h, w, d = idx.unbind(1) + # b * (H * W * D) + h*(W*D) + w*D + d + # Factor out the common terms D and W + # (b*H*W + h*W + w) * D + d -> (b*H + h) * W + w) * D + d + linear_idx = ((b * H + h) * W + w) * D + d # [N] - def dense(self, channels_first: bool = True): - """ - Convert the sparse tensor to a dense tensor. - """ - C = self.features.shape[1] - out = self.features.zeros( - [ - self.batch_size, - *self.spatial_shape_list, - C, - ] - ) - print("out.shape: ", out.shape) - idx = self.indices.to(self.features.device).long() # [N, 1+D] - out.index_put_(idx.unbind(1), self.features) - if not channels_first: - return out - - out = out.permute(*self.trans_params).contiguous() - return out - \ No newline at end of file + out = torch.zeros( + [ + num_cells, + out_channels + ], + device=sparse_tensor.features.device, + dtype=sparse_tensor.features.dtype, + ) + # out = out.index_copy(0, linear_idx, sparse_tensor.features) + # out = out.scatter(0, linear_idx, sparse_tensor.features) + scatter_idx = linear_idx.unsqueeze(1).expand(-1, out_channels) # [N, C] + out = out.scatter(0, scatter_idx, sparse_tensor.features) + return out.view(batch_size, H, W, D, out_channels) diff --git a/projects/BEVFusion/bevfusion/sparse_encoder.py b/projects/BEVFusion/bevfusion/sparse_encoder.py index cd2ffb50b..f46ca621b 100644 --- a/projects/BEVFusion/bevfusion/sparse_encoder.py +++ b/projects/BEVFusion/bevfusion/sparse_encoder.py @@ -11,10 +11,11 @@ from mmdet3d.registry import MODELS if IS_SPCONV2_AVAILABLE: - from .custom_sparse_conv_tensor import CustomSparseConvTensor as SparseConvTensor + from spconv.pytorch import SparseConvTensor else: from mmcv.ops import SparseConvTensor +from .custom_sparse_conv_tensor import sparse_to_dense @MODELS.register_module() @@ -150,8 +151,8 @@ def forward(self, voxel_features, coors, batch_size): # [200, 176, 5] -> [200, 176, 2] out = self.conv_out(encode_features[-1]) # Return (N, H, W, D, C) instead of (N, C, H, W, D) - spatial_features = out.dense(channels_first=False) - + # spatial_features = out.dense(channels_first=False) + spatial_features = sparse_to_dense(out, batch_size, self.dense_output_shapes, self.output_channels) # Reshape to (N, C, D, H, W) spatial_features = spatial_features.permute(0, 4, 3, 1, 2).contiguous() spatial_features = spatial_features.view( diff --git a/projects/BEVFusion/deploy/exporter.py b/projects/BEVFusion/deploy/exporter.py index ab5489a7e..4c5e72ac7 100644 --- a/projects/BEVFusion/deploy/exporter.py +++ b/projects/BEVFusion/deploy/exporter.py @@ -90,7 +90,6 @@ def _export_model( f"Purged {len(removed)} mmdeploy symbolic records: {list(removed.keys())}" ) with RewriterContext(**context_info), torch.no_grad(): - list_layer_norm_rewriters() image_feats = None if "img_backbone" in self.setup_configs.model_cfg.model: image_feats = self._export_image_backbone(model_data, ir_configs, patched_model) From 0af40fc3ad622b7dbe1198d77329072512127264 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Mon, 25 May 2026 18:59:46 +0900 Subject: [PATCH 147/183] Updated --- .../BEVFusion/bevfusion/sparse_encoder.py | 38 +++++++++++++++---- ...econd_secfpn_30e_8xb16_j6gen2_base_120m.py | 2 +- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/projects/BEVFusion/bevfusion/sparse_encoder.py b/projects/BEVFusion/bevfusion/sparse_encoder.py index f46ca621b..e046d8917 100644 --- a/projects/BEVFusion/bevfusion/sparse_encoder.py +++ b/projects/BEVFusion/bevfusion/sparse_encoder.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. +import os from typing import Dict, Optional import numpy as np @@ -152,18 +153,39 @@ def forward(self, voxel_features, coors, batch_size): out = self.conv_out(encode_features[-1]) # Return (N, H, W, D, C) instead of (N, C, H, W, D) # spatial_features = out.dense(channels_first=False) - spatial_features = sparse_to_dense(out, batch_size, self.dense_output_shapes, self.output_channels) + # spatial_features = sparse_to_dense(out, batch_size, self.dense_output_shapes, self.output_channels) # Reshape to (N, C, D, H, W) + # spatial_features = out.dense(channels_first=False) + + # with torch.no_grad(): + # ref = spatial_features + # cand = sparse_to_dense( + # out, batch_size, self.dense_output_shapes, self.output_channels + # ) + # assert ref.shape == cand.shape, ( + # f"shape mismatch: dense={tuple(ref.shape)} " + # f"sparse_to_dense={tuple(cand.shape)}" + # ) + # max_abs = (ref - cand).abs().max().item() + # num_mismatch = (ref != cand).sum().item() + # allclose = torch.allclose(ref, cand, rtol=1e-5, atol=1e-6) + # print( + # f"[BEVFusionSparseEncoder] dense vs sparse_to_dense: " + # f"shape={tuple(ref.shape)} max_abs_diff={max_abs:.3e} " + # f"num_mismatch={num_mismatch} allclose={allclose}" + # ) + # assert allclose, ( + # "sparse_to_dense disagrees with out.dense(channels_first=False) " + # "-- index/order mismatch in BEVFusionSparseEncoder." + # ) + spatial_features = sparse_to_dense(out, batch_size, self.dense_output_shapes, self.output_channels) spatial_features = spatial_features.permute(0, 4, 3, 1, 2).contiguous() spatial_features = spatial_features.view( - batch_size, - self.output_channels * self.dense_output_shapes[2], - self.dense_output_shapes[0], - self.dense_output_shapes[1], + batch_size, + self.output_channels * self.dense_output_shapes[2], + self.dense_output_shapes[0], + self.dense_output_shapes[1], ) - # N, C, H, W, D = spatial_features.shape - # spatial_features = spatial_features.permute(0, 1, 4, 2, 3).contiguous() - # spatial_features = spatial_features.view(N, C * D, H, W) if self.return_middle_feats: return spatial_features, encode_features diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py index 238690492..7a8afad3e 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py @@ -15,7 +15,7 @@ data_root = "data/t4dataset/" info_directory_path = "info/kokseang_2_8_1/" -experiment_group_name = "bevfusion_lidar_intensity_2_8_3/j6gen2_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_2_8_3/j6gen2_base_normal_dense/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name From f871a14e2cb15b9c9a8a9c0f96f53b612ae6585e Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Mon, 25 May 2026 22:08:46 +0900 Subject: [PATCH 148/183] Updated --- .../BEVFusion/bevfusion/bevfusion_head.py | 2 +- .../bevfusion/custom_sparse_conv_tensor.py | 5 ++-- .../BEVFusion/bevfusion/sparse_encoder.py | 29 ++----------------- .../default/pipelines/default_lidar_120m.py | 6 ++-- .../pipelines/default_lidar_intensity_120m.py | 6 ++-- projects/BEVFusion/deploy/containers.py | 9 ++++-- 6 files changed, 19 insertions(+), 38 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index e3f5610c8..d06305db9 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -315,7 +315,7 @@ def forward_single(self, inputs, metas): flattened_heatmap = heatmap.view(-1, self.num_classes*self.spatial_dim) # Use topk instead of argsort to avoid sorting the entire flattened heatmap. - _, top_proposals = flattened_heatmap.topk(k=self.num_proposals, dim=-1, largest=True, sorted=True) + _, top_proposals = flattened_heatmap.topk(k=self.num_proposals, dim=-1, largest=True, sorted=False) # 2. Calculate class and spatial indices # Use shape[-1] dynamically to handle grid sizes safely. diff --git a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py index b16191ca8..054b0609b 100644 --- a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py +++ b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py @@ -24,11 +24,12 @@ def sparse_to_dense(sparse_tensor: SparseConvTensor, batch_size: int, spatial_sh # Factor out the common terms D and W # (b*H*W + h*W + w) * D + d -> (b*H + h) * W + w) * D + d linear_idx = ((b * H + h) * W + w) * D + d # [N] - + + # out_channels = sparse_tensor.features.shape[1] out = torch.zeros( [ num_cells, - out_channels + sparse_tensor.features.shape[1] ], device=sparse_tensor.features.device, dtype=sparse_tensor.features.dtype, diff --git a/projects/BEVFusion/bevfusion/sparse_encoder.py b/projects/BEVFusion/bevfusion/sparse_encoder.py index e046d8917..0fc20cd19 100644 --- a/projects/BEVFusion/bevfusion/sparse_encoder.py +++ b/projects/BEVFusion/bevfusion/sparse_encoder.py @@ -151,34 +151,9 @@ def forward(self, voxel_features, coors, batch_size): # for detection head # [200, 176, 5] -> [200, 176, 2] out = self.conv_out(encode_features[-1]) - # Return (N, H, W, D, C) instead of (N, C, H, W, D) - # spatial_features = out.dense(channels_first=False) - # spatial_features = sparse_to_dense(out, batch_size, self.dense_output_shapes, self.output_channels) - # Reshape to (N, C, D, H, W) - # spatial_features = out.dense(channels_first=False) - - # with torch.no_grad(): - # ref = spatial_features - # cand = sparse_to_dense( - # out, batch_size, self.dense_output_shapes, self.output_channels - # ) - # assert ref.shape == cand.shape, ( - # f"shape mismatch: dense={tuple(ref.shape)} " - # f"sparse_to_dense={tuple(cand.shape)}" - # ) - # max_abs = (ref - cand).abs().max().item() - # num_mismatch = (ref != cand).sum().item() - # allclose = torch.allclose(ref, cand, rtol=1e-5, atol=1e-6) - # print( - # f"[BEVFusionSparseEncoder] dense vs sparse_to_dense: " - # f"shape={tuple(ref.shape)} max_abs_diff={max_abs:.3e} " - # f"num_mismatch={num_mismatch} allclose={allclose}" - # ) - # assert allclose, ( - # "sparse_to_dense disagrees with out.dense(channels_first=False) " - # "-- index/order mismatch in BEVFusionSparseEncoder." - # ) + spatial_features = sparse_to_dense(out, batch_size, self.dense_output_shapes, self.output_channels) + # spatial_features = out.dense(channels_first=False) spatial_features = spatial_features.permute(0, 4, 3, 1, 2).contiguous() spatial_features = spatial_features.view( batch_size, diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index 613ff8d0c..d384bb4e0 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -5,10 +5,10 @@ # range setting point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0] -voxel_size = [0.15, 0.15, 0.2] -grid_size = [1632, 1632, 41] +voxel_size = [0.17, 0.17, 0.2] +grid_size = [1440, 1440, 41] # Sparse dense output shapes -sparse_dense_output_shapes = [204, 204, 2] +sparse_dense_output_shapes = [180, 180, 2] eval_class_range = { "car": 120, diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index e7b78955a..394647684 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -5,10 +5,10 @@ # range setting point_cloud_range = [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0] -voxel_size = [0.15, 0.15, 0.2] -grid_size = [1632, 1632, 41] +voxel_size = [0.17, 0.17, 0.2] +grid_size = [1440, 1440, 41] # Sparse dense output shapes -sparse_dense_output_shapes = [204, 204, 2] +sparse_dense_output_shapes = [180, 180, 2] eval_class_range = { "car": 120, diff --git a/projects/BEVFusion/deploy/containers.py b/projects/BEVFusion/deploy/containers.py index 51f2316df..018b5db7e 100644 --- a/projects/BEVFusion/deploy/containers.py +++ b/projects/BEVFusion/deploy/containers.py @@ -2,7 +2,7 @@ import torch.nn.functional as F # Wrapper Classes for onnx conversion - +import numpy as np class TrtBevFusionImageBackboneContainer(torch.nn.Module): def __init__(self, mod, mean, std) -> None: @@ -49,8 +49,13 @@ def forward( batch_inputs_dict = { "voxels": {"voxels": voxels, "coors": coors, "num_points_per_voxel": num_points_per_voxel}, + } - + + voxels.cpu().numpy().astype(np.float32).tofile("work_dirs/dummy_inputs/voxels.bin") + coors[:, :3].cpu().numpy().astype(np.int32).tofile("work_dirs/dummy_inputs/coors.bin") + num_points_per_voxel.cpu().numpy().astype(np.int32).tofile("work_dirs/dummy_inputs/num_points_per_voxel.bin") + print("voxels.shape, coors.shape, num_points_per_voxel.shape:", voxels.shape, coors.shape, num_points_per_voxel.shape) if points is not None: batch_inputs_dict["points"] = [points] From 9a4642a59d8f869cb389d7317adbdc7267e069da Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 26 May 2026 17:12:53 +0900 Subject: [PATCH 149/183] Updated --- .../BEVFusion/bevfusion/bevfusion_head.py | 4 +- .../bevfusion/custom_sparse_conv_tensor.py | 1 - .../BEVFusion/bevfusion/ops/topk/__init__.py | 0 projects/BEVFusion/bevfusion/ops/topk/topk.py | 45 +++++++++++++++++++ ...voxel_second_secfpn_50e_8xb16_base_120m.py | 4 +- .../default_camera_lidar_intensity_120m.py | 2 + .../default/pipelines/default_lidar_120m.py | 2 + .../pipelines/default_lidar_intensity_120m.py | 2 + 8 files changed, 55 insertions(+), 5 deletions(-) create mode 100644 projects/BEVFusion/bevfusion/ops/topk/__init__.py create mode 100644 projects/BEVFusion/bevfusion/ops/topk/topk.py diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index d06305db9..5b6c5ad92 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -17,6 +17,8 @@ from mmengine.structures import InstanceData from torch import nn +from .ops.topk.topk import topk + def clip_sigmoid(x, eps=1e-4): y = torch.clamp(x.sigmoid_(), min=eps, max=1 - eps) @@ -315,7 +317,7 @@ def forward_single(self, inputs, metas): flattened_heatmap = heatmap.view(-1, self.num_classes*self.spatial_dim) # Use topk instead of argsort to avoid sorting the entire flattened heatmap. - _, top_proposals = flattened_heatmap.topk(k=self.num_proposals, dim=-1, largest=True, sorted=False) + top_proposals_indices = topk(x=flattened_heatmap, k=self.num_proposals, dim=-1, sorted=False) # 2. Calculate class and spatial indices # Use shape[-1] dynamically to handle grid sizes safely. diff --git a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py index 054b0609b..175c08bed 100644 --- a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py +++ b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py @@ -25,7 +25,6 @@ def sparse_to_dense(sparse_tensor: SparseConvTensor, batch_size: int, spatial_sh # (b*H*W + h*W + w) * D + d -> (b*H + h) * W + w) * D + d linear_idx = ((b * H + h) * W + w) * D + d # [N] - # out_channels = sparse_tensor.features.shape[1] out = torch.zeros( [ num_cells, diff --git a/projects/BEVFusion/bevfusion/ops/topk/__init__.py b/projects/BEVFusion/bevfusion/ops/topk/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/projects/BEVFusion/bevfusion/ops/topk/topk.py b/projects/BEVFusion/bevfusion/ops/topk/topk.py new file mode 100644 index 000000000..020e677bf --- /dev/null +++ b/projects/BEVFusion/bevfusion/ops/topk/topk.py @@ -0,0 +1,45 @@ +""" +This file is used to write functions to deploy custom plugins to support Autoware, for example, TopK. +""" + +import torch +from torch.autograd import Function +from torch.onnx.symbolic_helper import _get_tensor_sizes + + +class TopK(Function): + + @staticmethod + def symbolic( + g, + x: torch.Tensor, + k: int, + dim: int, + sorted: bool = False, + ): + + output = g.op( + "autoware::Argsort", + x, + outputs=1, + ) + x_shape = _get_tensor_sizes(x) + if x_shape is not None and hasattr(output.type(), "with_sizes"): + output_type = x.type().with_sizes(x_shape) + output.setType(output_type) + # Argsort from Autoware is in ascending order, so we need to return the last k elements. + return output[-k:] + + @staticmethod + def forward( + ctx, + x: torch.Tensor, + k: int, + dim: int, + sorted: bool = False, + ): + indices = torch.topk(x, k=k, dim=dim, largest=True, sorted=sorted) + return indices + +def topk(x: torch.Tensor, k: int, dim: int, sorted: bool = False): + return TopK.apply(x, k, dim, sorted) \ No newline at end of file diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py index 70c27f0a7..22762bed1 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py @@ -15,7 +15,7 @@ data_root = "data/t4dataset/" info_directory_path = "info/kokseang_2_8_1/" -experiment_group_name = "bevfusion_lidar_2_8_2/base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_2_8_0/base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name @@ -148,5 +148,3 @@ checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), ) log_processor = dict(window_size=50) - -resume = True \ No newline at end of file diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py index 0b0f44c08..1b0f91c3d 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py @@ -83,6 +83,8 @@ "barrier", ], ), + dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), + dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), dict(type="PointShuffle"), dict( type="Pack3DDetInputs", diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index d384bb4e0..929c81c81 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -66,6 +66,8 @@ "barrier", ], ), + dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), + dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), dict(type="PointShuffle"), dict( type="Pack3DDetInputs", diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index 394647684..2c95445ea 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -66,6 +66,8 @@ "barrier", ], ), + dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), + dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), dict(type="PointShuffle"), dict( type="Pack3DDetInputs", From 3cfe9870cef009e69e7e860cfb2c105b6f8e0662 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 26 May 2026 17:35:19 +0900 Subject: [PATCH 150/183] Updated --- projects/BEVFusion/bevfusion/bevfusion_head.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 5b6c5ad92..2f0a99a9f 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -317,7 +317,7 @@ def forward_single(self, inputs, metas): flattened_heatmap = heatmap.view(-1, self.num_classes*self.spatial_dim) # Use topk instead of argsort to avoid sorting the entire flattened heatmap. - top_proposals_indices = topk(x=flattened_heatmap, k=self.num_proposals, dim=-1, sorted=False) + top_proposals = topk(x=flattened_heatmap, k=self.num_proposals, dim=-1, sorted=False) # 2. Calculate class and spatial indices # Use shape[-1] dynamically to handle grid sizes safely. From 798021d22468eb10d0b89e8b73265807d4871bff Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Tue, 26 May 2026 17:48:40 +0900 Subject: [PATCH 151/183] Updated --- projects/BEVFusion/bevfusion/ops/topk/topk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/bevfusion/ops/topk/topk.py b/projects/BEVFusion/bevfusion/ops/topk/topk.py index 020e677bf..a767bb720 100644 --- a/projects/BEVFusion/bevfusion/ops/topk/topk.py +++ b/projects/BEVFusion/bevfusion/ops/topk/topk.py @@ -38,7 +38,7 @@ def forward( dim: int, sorted: bool = False, ): - indices = torch.topk(x, k=k, dim=dim, largest=True, sorted=sorted) + _, indices = torch.topk(x, k=k, dim=dim, largest=True, sorted=sorted) return indices def topk(x: torch.Tensor, k: int, dim: int, sorted: bool = False): From b7a32d0b5d1431c67427e5ee0657b78e4d316cfb Mon Sep 17 00:00:00 2001 From: KSeabgTan Date: Wed, 27 May 2026 13:58:22 +0900 Subject: [PATCH 152/183] Added --- projects/BEVFusion/bevfusion/depth_lss_v2.py | 9 ++------- ...default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py | 6 +----- ...camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py} | 2 +- .../models/resnet50/camera_resnet50_fpn_lss_50m.py | 2 +- .../schedulers/default_30e_8xb16_adamw_linear_cosine.py | 4 ++-- 5 files changed, 7 insertions(+), 16 deletions(-) rename projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/{bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py => bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py} (92%) diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py index 59585cb49..81d1fb3c6 100644 --- a/projects/BEVFusion/bevfusion/depth_lss_v2.py +++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py @@ -135,19 +135,14 @@ def bev_pool_aux(self, geom_feats): return None, None, None geom_feats, ranks_depth, ranks_feat = geom_feats[kept], ranks_depth[kept], ranks_feat[kept] - + + # Switch x and y to match the order of the BEV grid ranks_bev = ( geom_feats[:, 3] * (self.nx[2] * self.nx[1] * self.nx[0]) + geom_feats[:, 2] * (self.nx[1] * self.nx[0]) + geom_feats[:, 0] * self.nx[1] + geom_feats[:, 1] ) - # ranks_bev = ( - # geom_feats[:, 3] * (self.nx[2] * self.nx[1] * self.nx[0]) - # + geom_feats[:, 2] * (self.nx[1] * self.nx[0]) - # + geom_feats[:, 1] * self.nx[0] - # + geom_feats[:, 0] - # ) indices = ranks_bev.argsort() ranks_bev, ranks_depth, ranks_feat = ranks_bev[indices], ranks_depth[indices], ranks_feat[indices] return ( diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py index ce26b4905..591399a4e 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py @@ -11,13 +11,9 @@ custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] # user setting -data_root = "data/t4dataset/" +data_root = "data/t4datasets/" info_directory_path = "info/kokseang_2_8/" -experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type -experiment_name = "bevfusion_camera_30e_8xb16_j6gen2_base_50m" -work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name - # Dataset parameters train_dataloader = dict( batch_size=_base_.train_batch_size, diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py similarity index 92% rename from projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py rename to projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py index d559d40b1..c39ca365d 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_8xb16_j6gen2_base_50m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py @@ -4,7 +4,7 @@ ] experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type -experiment_name = "bevfusion_camera_resnet50_fpn_lssV2_30e_8xb16_j6gen2_base_50m" +experiment_name = "bevfusion_camera_resnet50_fpn_lss_v1_30e_8xb16_j6gen2_base_50m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py index 4a0770971..2f556a122 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py @@ -6,7 +6,7 @@ # Image network model = dict( view_transform=dict( - type="LSSTransformV2", + type="LSSTransform", xbound=[-54.0, 54.0, 0.3], ybound=[-54.0, 54.0, 0.3], zbound=[-10.0, 10.0, 20.0], diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py index 1430b10ba..f4ec3e0db 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py @@ -1,10 +1,10 @@ # learning rate -lr = 2e-4 +lr = 1e-4 t_max = 3 max_epochs = 30 val_interval = 1 -train_gpu_size = 8 +train_gpu_size = 2 test_batch_size = 2 train_batch_size = 16 From 088c1b3fa22c8578983d9933499ff194102154d8 Mon Sep 17 00:00:00 2001 From: KSeabgTan Date: Wed, 27 May 2026 15:24:57 +0900 Subject: [PATCH 153/183] Added --- ...n_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py | 2 +- .../models/resnet50/camera_resnet50_fpn_depthlss_120m.py | 8 ++++---- .../models/resnet50/camera_resnet50_fpn_lss_50m.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py index c39ca365d..69df79b51 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py @@ -4,7 +4,7 @@ ] experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type -experiment_name = "bevfusion_camera_resnet50_fpn_lss_v1_30e_8xb16_j6gen2_base_50m" +experiment_name = "bevfusion_camera_resnet50_fpn_lss_v2_30e_8xb16_j6gen2_base_50m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py index 4a1f33040..1059ce9e8 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py @@ -22,11 +22,11 @@ type="mmdet.ResNet", depth=50, num_stages=4, - out_indices=(2, 3), + out_indices=(1, 2, 3), frozen_stages=-1, norm_cfg=dict(type="BN2d", requires_grad=True), norm_eval=False, - with_cp=False, + with_cp=True, style="pytorch", init_cfg=dict( type="Pretrained", @@ -35,7 +35,7 @@ ), img_neck=dict( type="GeneralizedLSSFPN", - in_channels=[1024, 2048], + in_channels=[512, 1024, 2048], out_channels=256, start_level=0, num_outs=2, @@ -47,7 +47,7 @@ type="DepthLSSTransform", in_channels=256, out_channels=80, - feature_size=[24, 48], + feature_size=[48, 96], xbound=[-122.40, 122.40, 0.68], ybound=[-122.40, 122.40, 0.68], zbound=[-10.0, 10.0, 20.0], diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py index 2f556a122..4a0770971 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py @@ -6,7 +6,7 @@ # Image network model = dict( view_transform=dict( - type="LSSTransform", + type="LSSTransformV2", xbound=[-54.0, 54.0, 0.3], ybound=[-54.0, 54.0, 0.3], zbound=[-10.0, 10.0, 20.0], From 40f3ea943da2d862c7bf42d0fd54bde90f7e1d9d Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Fri, 29 May 2026 16:56:32 +0900 Subject: [PATCH 154/183] Updated --- ...bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py | 2 +- .../configs/t4dataset/default/pipelines/default_lidar_120m.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py index 22762bed1..ea4f75569 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py @@ -15,7 +15,7 @@ data_root = "data/t4dataset/" info_directory_path = "info/kokseang_2_8_1/" -experiment_group_name = "bevfusion_lidar_2_8_0/base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_2_8_0/base_more_filters/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_50e_8xb16_base_120m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index 929c81c81..6472bb582 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -66,8 +66,8 @@ "barrier", ], ), - dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), - dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), + dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=5), + dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=3), dict(type="PointShuffle"), dict( type="Pack3DDetInputs", From 7053c2d6942aa0b2ed306602ac606d7186b9501a Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Sat, 30 May 2026 00:20:35 +0900 Subject: [PATCH 155/183] Updated --- .../bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py index ea4f75569..171c3076e 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py @@ -148,3 +148,5 @@ checkpoint=dict(type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="NuScenes metric/T4Metric/mAP"), ) log_processor = dict(window_size=50) + +resume = True \ No newline at end of file From 49e214068b46c0339d4a2ce00b36ffab3d9988c6 Mon Sep 17 00:00:00 2001 From: KSeabgTan Date: Sat, 30 May 2026 12:14:17 +0900 Subject: [PATCH 156/183] Added --- .../t4dataset/default/pipelines/cameras/default_camera_50m.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py index 77470a938..a8098ed8e 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py @@ -29,7 +29,7 @@ dict( type="ImageAug3D", final_dim=image_size, - resize_lim=[0.28, 0.40], + resize_lim=[0.29, 0.35], bot_pct_lim=[0.0, 0.0], rot_lim=[0.0, 0.0], rand_flip=True, @@ -100,7 +100,7 @@ dict( type="ImageAug3D", final_dim=image_size, - resize_lim=[0.34, 0.34], + resize_lim=[0.32, 0.32], bot_pct_lim=[0.0, 0.0], rot_lim=[0.0, 0.0], rand_flip=False, From bfb3d2e1cbbb37b3865f254c11f18d7c3b8ed4fe Mon Sep 17 00:00:00 2001 From: KSeangTan Date: Sat, 30 May 2026 22:21:30 +0900 Subject: [PATCH 157/183] Updated --- projects/BEVFusion/bevfusion/bevfusion.py | 75 +++++++-- projects/BEVFusion/bevfusion/depth_lss_v2.py | 18 ++- projects/BEVFusion/bevfusion/loading.py | 161 ++++++++++++++++++- 3 files changed, 238 insertions(+), 16 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py index aeddc09fa..bda80ed0d 100644 --- a/projects/BEVFusion/bevfusion/bevfusion.py +++ b/projects/BEVFusion/bevfusion/bevfusion.py @@ -34,6 +34,8 @@ def __init__( bbox_head: Optional[dict] = None, init_cfg: OptMultiConfig = None, seg_head: Optional[dict] = None, + loss_depth_weight: float = 3.0, + depth_gt_downsample: int = 1, **kwargs, ) -> None: """Initialize BEVFusion model. @@ -76,6 +78,8 @@ def __init__( self.bbox_head = MODELS.build(bbox_head) self.init_weights() + self.loss_depth_weight = loss_depth_weight + self.depth_gt_downsample = depth_gt_downsample def _forward( self, batch_inputs_dict: Tensor, batch_data_samples: OptSampleList = [], using_image_features=False, **kwargs @@ -174,14 +178,14 @@ def extract_img_feat( lidar_aug_matrix_inverse=None, geom_feats=None, using_image_features=False, - ) -> torch.Tensor: + ) -> Tuple[torch.Tensor, torch.Tensor]: if not using_image_features: x = self.get_image_backbone_features(x) with torch.amp.autocast("cuda",enabled=False): # with torch.autocast(device_type='cuda', dtype=torch.float32): - x = self.view_transform( + x, pred_depths = self.view_transform( x, points, lidar2image, @@ -195,7 +199,7 @@ def extract_img_feat( lidar_aug_matrix_inverse, geom_feats, ) - return x + return x, pred_depths def extract_pts_feat(self, feats, coords, sizes, points=None) -> torch.Tensor: if points is not None: @@ -320,7 +324,7 @@ def extract_feat( camera2lidar = imgs.new_tensor(np.asarray(camera2lidar)) img_aug_matrix = imgs.new_tensor(np.asarray(img_aug_matrix)) lidar_aug_matrix = imgs.new_tensor(np.asarray(lidar_aug_matrix)) - img_feature = self.extract_img_feat( + img_feature, pred_depths = self.extract_img_feat( imgs, deepcopy(points), lidar2image, @@ -342,7 +346,7 @@ def extract_feat( lidar_aug_matrix = batch_inputs_dict["lidar_aug_matrix"] geom_feats = batch_inputs_dict["geom_feats"] - img_feature = self.extract_img_feat( + img_feature, pred_depths = self.extract_img_feat( imgs, points, lidar2image, @@ -377,7 +381,7 @@ def extract_feat( if self.pts_neck is not None: x = self.pts_neck(x) - return x + return x, pred_depths def loss( self, @@ -387,12 +391,65 @@ def loss( **kwargs, ) -> List[Det3DDataSample]: batch_input_metas = [item.metainfo for item in batch_data_samples] - feats = self.extract_feat(batch_inputs_dict, batch_input_metas, using_image_features) + feats, pred_depths = self.extract_feat(batch_inputs_dict, batch_input_metas, using_image_features) losses = dict() + if self.loss_depth_weight > 0 and "gt_depths" in batch_inputs_dict: + with torch.amp.autocast("cuda", enabled=False): + gt_depths = batch_inputs_dict["gt_depths"] + depth_loss = self.get_depth_loss(gt_depths, pred_depths) + losses["loss_depth"] = depth_loss + if self.with_bbox_head: bbox_loss = self.bbox_head.loss(feats, batch_data_samples) - - losses.update(bbox_loss) + losses.update(bbox_loss) return losses + + def get_downsampled_gt_depth(self, gt_depths): + """ + Input: + gt_depths: [B, N, H, W] + Output: + gt_depths: [B*N*h*w, d] + """ + B, N, H, W = gt_depths.shape + D = self.view_transform.D + dbounds = self.view_transform.dbound + gt_depths = gt_depths.view(B * N, H // self.depth_gt_downsample, + self.depth_gt_downsample, W // self.depth_gt_downsample, + self.depth_gt_downsample, 1) + gt_depths = gt_depths.permute(0, 1, 3, 5, 2, 4).contiguous() + gt_depths = gt_depths.view(-1, self.depth_gt_downsample * self.depth_gt_downsample) + gt_depths_tmp = torch.where(gt_depths == 0.0, + 1e5 * torch.ones_like(gt_depths), + gt_depths) + gt_depths = torch.min(gt_depths_tmp, dim=-1).values + gt_depths = gt_depths.view(B * N, H // self.depth_gt_downsample, + W // self.downsample) + + gt_depths = torch.log(gt_depths) - torch.log( + torch.tensor(dbounds[0]).float()) + gt_depths = gt_depths * (D - 1) / torch.log( + torch.tensor(dbounds[1] - 1.).float() / + dbounds[0]) + gt_depths = gt_depths + 1. + gt_depths = torch.where((gt_depths < D + 1) & (gt_depths >= 0.0), + gt_depths, torch.zeros_like(gt_depths)) + gt_depths = F.one_hot( + gt_depths.long(), num_classes=D + 1).view(-1, D + 1)[:, 1:] + return gt_depths.float() + + def get_depth_loss(self, depth_labels, depth_preds): + depth_labels = self.get_downsampled_gt_depth(depth_labels) + depth_preds = depth_preds.permute(0, 2, 3, + 1).contiguous().view(-1, self.D) + fg_mask = torch.max(depth_labels, dim=1).values > 0.0 + depth_labels = depth_labels[fg_mask] + depth_preds = depth_preds[fg_mask] + depth_loss = F.binary_cross_entropy( + depth_preds, + depth_labels, + reduction='none', + ).sum() / max(1.0, fg_mask.sum()) + return self.loss_depth_weight * depth_loss \ No newline at end of file diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py index 81d1fb3c6..66c58add8 100644 --- a/projects/BEVFusion/bevfusion/depth_lss_v2.py +++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py @@ -70,7 +70,10 @@ def forward( ranks_bev, ranks_depth, ranks_feat = geom_feats_precomputed x, depth_softmax = self.get_cam_feats(img) x = self.bev_pool_precomputed(x, depth_softmax, ranks_bev, ranks_depth, ranks_feat) - + + # No return depth predictions when precomputed geometry features are used + depth_softmax = None + else: intrins = camera_intrinsics[..., :3, :3] post_rots = img_aug_matrix[..., :3, :3] @@ -99,8 +102,8 @@ def forward( depth_softmax, ) = self.get_cam_feats(img) x = self.bev_pool(view_feats, depth_softmax, geom) - - return x + + return x, depth_softmax def bev_pool_aux(self, geom_feats): B, N, D, H, W, C = geom_feats.shape @@ -229,7 +232,7 @@ def bev_pool_precomputed(self, view_feats, depth_softmax, ranks_bev, ranks_depth bev_feat = self.compute_bev_pool( view_feats, depth_softmax, ranks_bev, ranks_depth, ranks_feat, interval_starts, interval_lengths ) - return bev_feat + return bev_feat @MODELS.register_module() @@ -245,6 +248,7 @@ def __init__( ybound: Tuple[float, float, float], zbound: Tuple[float, float, float], dbound: Tuple[float, float, float], + loss_depth_weight: float = 3.0, downsample: int = 1, ): super().__init__( @@ -259,6 +263,7 @@ def __init__( ) self.depthnet = nn.Conv2d(self.in_channels, self.D + self.C, 1) self.downsample = DownSampleNet(downsample, out_channels, out_channels) + self.loss_depth_weight = loss_depth_weight def get_cam_feats(self, x): B, N, C, fH, fW = x.shape @@ -272,6 +277,7 @@ def get_cam_feats(self, x): return view_feats, depth_softmax def forward(self, *args, **kwargs): - x = super().forward(*args, **kwargs) + x, depth_softmax = super().forward(*args, **kwargs) x = self.downsample(x) - return x \ No newline at end of file + return x + \ No newline at end of file diff --git a/projects/BEVFusion/bevfusion/loading.py b/projects/BEVFusion/bevfusion/loading.py index 0478d67a3..5cdc499b9 100644 --- a/projects/BEVFusion/bevfusion/loading.py +++ b/projects/BEVFusion/bevfusion/loading.py @@ -1,14 +1,18 @@ # Copyright (c) OpenMMLab. All rights reserved. import copy -import os +from pathlib import Path from typing import List, Optional +import matplotlib.pyplot as plt + import mmcv import numpy as np from mmdet3d.datasets.transforms import LoadMultiViewImageFromFiles from mmdet3d.registry import TRANSFORMS from mmengine.fileio import get from mmengine.logging import print_log +from mmcv.transforms import BaseTransform + @TRANSFORMS.register_module() @@ -217,3 +221,158 @@ def transform(self, results: dict) -> Optional[dict]: results["num_views"] = self.num_views results["num_ref_frames"] = self.num_ref_frames return results + + +@TRANSFORMS.register_module() +class PointsToMultiViewImageDepths(BaseTransform): + """Convert points to multi-view image depths. + + Args: + points (np.ndarray): Points in the world coordinate system. + img_shape (tuple): Shape of the image. + cam2img (np.ndarray): Camera to image transformation matrix. + lidar2cam (np.ndarray): LiDAR to camera transformation matrix. + visualize_dir (str, optional): If set, saves a per-sample subplot + of `gt_depths` (one panel per camera) to this directory. + Useful for debugging the projection. Defaults to None. + max_depth (float): Upper clip for the depth color scale (m). + Defaults to 80. + """ + def __init__( + self, + img_shape, + num_cameras: int, + visualize_dir: Optional[str] = None, + max_depth: float = 80.0, + ): + self.img_shape = img_shape + self.num_cameras = num_cameras + self.visualize_dir = visualize_dir + self.max_depth = max_depth + self.visualize_dir = Path(visualize_dir) if visualize_dir is not None else None + if self.visualize_dir is not None: + self.visualize_dir.mkdir(parents=True, exist_ok=True) + self._depth_idx = 0 + + def transform(self, results: dict) -> Optional[dict]: + """Call function to load multi-view image from files. + + Args: + results (dict): Result dict containing multi-view image filenames. + + Returns: + dict: The result dict containing the multi-view image data. + Added keys: + - gt_depths (np.ndarray): Ground truth depths in (N, H, W) for (number of cameras, height, width). + """ + lidar2image = results["lidar2img"] + img_aug_matrix = results.get("img_aug_matrix", np.eye(4)) + lidar_aug_matrix = results.get("lidar_aug_matrix", np.eye(4)) + + lidar_aug_matrix_inverse = np.linalg.inv(lidar_aug_matrix) + depth = np.zeros((self.num_cameras, self.img_shape[0], self.img_shape[1]), dtype=np.float32) + + cur_coords = results["points"][:,:3] + # inverse aug + cur_coords -= lidar_aug_matrix[:3, 3] + cur_coords = lidar_aug_matrix_inverse[:3, :3].matmul(cur_coords.transpose(1, 0)) + + # lidar2image + cur_coords = lidar2image[:, :3, :3].matmul(cur_coords) + cur_coords += lidar2image[:, :3, 3].reshape(-1, 3, 1) + + # get 2d coords + dist = cur_coords[:, 2, :] + valid_dist_mask = dist > 0 + + cur_coords[:, 2, :] = np.clip(cur_coords[:, 2, :], 1e-5, 1e5) + cur_coords[:, :2, :] /= cur_coords[:, 2:3, :] + + # imgaug + cur_coords = img_aug_matrix[:, :3, :3].matmul(cur_coords) + cur_coords += img_aug_matrix[:, :3, 3].reshape(-1, 3, 1) + cur_coords = cur_coords[:, :2, :].transpose(1, 2) + + # normalize coords for grid sample + cur_coords = cur_coords[..., [1, 0]] + on_img = ( + (cur_coords[..., 0] < self.img_shape[0]) + & (cur_coords[..., 0] >= 0) + & (cur_coords[..., 1] < self.img_shape[1]) + & (cur_coords[..., 1] >= 0) + & valid_dist_mask + ) + for c in range(self.num_cameras): + masked_coords = cur_coords[c, on_img[c]].astype(np.int64) + masked_dist = dist[c, on_img[c]] + depth[c, masked_coords[:, 0], masked_coords[:, 1]] = masked_dist + + results["gt_depths"] = depth + + if self.visualize_dir is not None: + self._save_depth_subplot(depth, results) + + return results + + def _save_depth_subplot(self, depth: np.ndarray, results: dict) -> None: + """Save `gt_depths` as a subplot with one panel per camera. + + Each panel shows the camera image (if available) with the projected + LiDAR depth points overlaid, color-coded by distance. A standalone + depth-only figure is also saved alongside it. + + Args: + depth (np.ndarray): (num_cameras, H, W) ground-truth depth map. + results (dict): The pipeline result dict; used for the underlay + image and to derive a unique filename. + """ + imgs = results.get("img", None) + + # Layout: keep it a single row up to 6 cameras, otherwise wrap to a + # roughly-square grid. + if self.num_cameras <= 6: + rows, cols = 1, self.num_cameras + else: + cols = int(np.ceil(np.sqrt(self.num_cameras))) + rows = int(np.ceil(self.num_cameras / cols)) + + fig, axes = plt.subplots( + rows, cols, figsize=(4 * cols, 4 * rows), squeeze=False + ) + + for c in range(self.num_cameras): + ax = axes[c // cols, c % cols] + d = depth[c] + ys, xs = np.nonzero(d) + vals = d[ys, xs] + + if imgs is not None and c < len(imgs): + ax.imshow(imgs[c].astype(np.uint8)) + if vals.size > 0: + ax.scatter( + xs, ys, c=vals, cmap="turbo", + vmin=0, vmax=self.max_depth, s=1, + ) + else: + ax.imshow( + d, cmap="turbo", vmin=0, vmax=self.max_depth, + interpolation="nearest", + ) + + ax.set_title(f"cam {c} ({vals.size} pts)") + ax.set_xticks([]) + ax.set_yticks([]) + + # Hide any unused subplots when n doesn't fill the grid. + for c in range(self.num_cameras, rows * cols): + axes[c // cols, c % cols].axis("off") + + fig.suptitle(f"gt_depths — {self._depth_idx}") + fig.tight_layout() + + self._depth_idx += 1 + out_path = self.visualize_dir / f"{self._depth_idx:06d}_gt_depths.png" + fig.savefig(out_path, dpi=120, bbox_inches="tight") + plt.close(fig) + print(f"Saved gt_depths visualization to {out_path}") + \ No newline at end of file From 029dd7655d537c85cfefa061c0c1563f78c07c38 Mon Sep 17 00:00:00 2001 From: KSeangTan Date: Sat, 30 May 2026 22:50:24 +0900 Subject: [PATCH 158/183] Updated --- projects/BEVFusion/bevfusion/bevfusion.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py index bda80ed0d..a047c08ca 100644 --- a/projects/BEVFusion/bevfusion/bevfusion.py +++ b/projects/BEVFusion/bevfusion/bevfusion.py @@ -426,14 +426,9 @@ def get_downsampled_gt_depth(self, gt_depths): gt_depths) gt_depths = torch.min(gt_depths_tmp, dim=-1).values gt_depths = gt_depths.view(B * N, H // self.depth_gt_downsample, - W // self.downsample) - - gt_depths = torch.log(gt_depths) - torch.log( - torch.tensor(dbounds[0]).float()) - gt_depths = gt_depths * (D - 1) / torch.log( - torch.tensor(dbounds[1] - 1.).float() / - dbounds[0]) - gt_depths = gt_depths + 1. + W // self.depth_gt_downsample) + + gt_depths = (gt_depths - (dbounds[0] - dbounds[2])) / dbounds[2] gt_depths = torch.where((gt_depths < D + 1) & (gt_depths >= 0.0), gt_depths, torch.zeros_like(gt_depths)) gt_depths = F.one_hot( From 28e6af83947d610609e3490bd70775be60b9fe4c Mon Sep 17 00:00:00 2001 From: KSeabgTan Date: Sat, 30 May 2026 23:06:14 +0900 Subject: [PATCH 159/183] Added --- projects/BEVFusion/bevfusion/__init__.py | 3 ++- projects/BEVFusion/bevfusion/bevfusion.py | 2 ++ projects/BEVFusion/bevfusion/depth_lss_v2.py | 4 +--- projects/BEVFusion/bevfusion/loading.py | 6 +++--- ..._camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py | 2 +- .../models/resnet50/camera_resnet50_fpn_lss_50m.py | 2 ++ .../default/pipelines/cameras/default_camera_120m.py | 2 ++ .../default/pipelines/cameras/default_camera_50m.py | 9 ++++++++- 8 files changed, 21 insertions(+), 9 deletions(-) diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py index 385ccd89e..9e1ca1cf4 100644 --- a/projects/BEVFusion/bevfusion/__init__.py +++ b/projects/BEVFusion/bevfusion/__init__.py @@ -4,7 +4,7 @@ from .bevfusion_voxel_encoder import BEVFusionVoxelFeatureNet, HardSimpleVoxelSinCosEncoder from .depth_lss import DepthLSSTransform, LSSTransform from .depth_lss_v2 import LSSTransformV2 -from .loading import BEVLoadMultiViewImageFromFiles +from .loading import BEVLoadMultiViewImageFromFiles, PointsToMultiViewImageDepths from .sparse_encoder import BEVFusionSparseEncoder from .transformer import TransformerDecoderLayer from .transforms_3d import ( @@ -39,4 +39,5 @@ "HardSimpleVoxelSinCosEncoder", "BEVFusionVoxelFeatureNet", "LSSTransformV2", + "PointsToMultiViewImageDepths" ] diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py index a047c08ca..5c387ddb4 100644 --- a/projects/BEVFusion/bevfusion/bevfusion.py +++ b/projects/BEVFusion/bevfusion/bevfusion.py @@ -397,6 +397,8 @@ def loss( if self.loss_depth_weight > 0 and "gt_depths" in batch_inputs_dict: with torch.amp.autocast("cuda", enabled=False): gt_depths = batch_inputs_dict["gt_depths"] + print("gt_depths shape: ", gt_depths.shape) + print("pred_depths shape: ", pred_depths.shape) depth_loss = self.get_depth_loss(gt_depths, pred_depths) losses["loss_depth"] = depth_loss diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py index 66c58add8..def32ee5e 100644 --- a/projects/BEVFusion/bevfusion/depth_lss_v2.py +++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py @@ -248,7 +248,6 @@ def __init__( ybound: Tuple[float, float, float], zbound: Tuple[float, float, float], dbound: Tuple[float, float, float], - loss_depth_weight: float = 3.0, downsample: int = 1, ): super().__init__( @@ -263,7 +262,6 @@ def __init__( ) self.depthnet = nn.Conv2d(self.in_channels, self.D + self.C, 1) self.downsample = DownSampleNet(downsample, out_channels, out_channels) - self.loss_depth_weight = loss_depth_weight def get_cam_feats(self, x): B, N, C, fH, fW = x.shape @@ -279,5 +277,5 @@ def get_cam_feats(self, x): def forward(self, *args, **kwargs): x, depth_softmax = super().forward(*args, **kwargs) x = self.downsample(x) - return x + return x, depth_softmax \ No newline at end of file diff --git a/projects/BEVFusion/bevfusion/loading.py b/projects/BEVFusion/bevfusion/loading.py index 5cdc499b9..8f9456d19 100644 --- a/projects/BEVFusion/bevfusion/loading.py +++ b/projects/BEVFusion/bevfusion/loading.py @@ -275,10 +275,10 @@ def transform(self, results: dict) -> Optional[dict]: cur_coords = results["points"][:,:3] # inverse aug cur_coords -= lidar_aug_matrix[:3, 3] - cur_coords = lidar_aug_matrix_inverse[:3, :3].matmul(cur_coords.transpose(1, 0)) + cur_coords = lidar_aug_matrix_inverse[:3, :3] @ cur_coords.transpose(1, 0) # lidar2image - cur_coords = lidar2image[:, :3, :3].matmul(cur_coords) + cur_coords = lidar2image[:, :3, :3] @ cur_coords cur_coords += lidar2image[:, :3, 3].reshape(-1, 3, 1) # get 2d coords @@ -289,7 +289,7 @@ def transform(self, results: dict) -> Optional[dict]: cur_coords[:, :2, :] /= cur_coords[:, 2:3, :] # imgaug - cur_coords = img_aug_matrix[:, :3, :3].matmul(cur_coords) + cur_coords = img_aug_matrix[:, :3, :3] @ cur_coords cur_coords += img_aug_matrix[:, :3, 3].reshape(-1, 3, 1) cur_coords = cur_coords[:, :2, :].transpose(1, 2) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py index 69df79b51..6695e397e 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py @@ -3,7 +3,7 @@ "../../default/models/resnet50/camera_resnet50_fpn_lss_50m.py", ] -experiment_group_name = "bevfusion_camera/j6gen2_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_camera/j6gen2_depth_base/" + _base_.dataset_type experiment_name = "bevfusion_camera_resnet50_fpn_lss_v2_30e_8xb16_j6gen2_base_50m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py index 4a0770971..c29925243 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py @@ -5,6 +5,8 @@ # Image network model = dict( + depth_gt_downsample=8, + loss_depth_weight=3.0, view_transform=dict( type="LSSTransformV2", xbound=[-54.0, 54.0, 0.3], diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py index fc7338699..78bc0167d 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py @@ -89,6 +89,8 @@ "timestamp", "vehicle_type", "city", + "traffic_cone_barrier_status", + "gt_depths", ], ), ] diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py index a8098ed8e..05917e6bd 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py @@ -29,12 +29,18 @@ dict( type="ImageAug3D", final_dim=image_size, - resize_lim=[0.29, 0.35], + resize_lim=[0.29, 0.35], bot_pct_lim=[0.0, 0.0], rot_lim=[0.0, 0.0], rand_flip=True, is_train=True, ), + dict( + type="PointsToMultiViewImageDepths", + img_shape=image_size, + num_cameras=len(camera_order), + visualize_dir="work_dirs/visualize_depths", + ), dict( type="BEVFusionGlobalRotScaleTrans", scale_ratio_range=[0.95, 1.05], @@ -85,6 +91,7 @@ "vehicle_type", "city", "traffic_cone_barrier_status", + "gt_depths", ], ), ] From 3903988fe91cfad4af6c9b78ee77033427cabc2a Mon Sep 17 00:00:00 2001 From: KSeangTan Date: Sat, 30 May 2026 23:29:51 +0900 Subject: [PATCH 160/183] Updated --- projects/BEVFusion/bevfusion/loading.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/projects/BEVFusion/bevfusion/loading.py b/projects/BEVFusion/bevfusion/loading.py index 8f9456d19..79019e0bf 100644 --- a/projects/BEVFusion/bevfusion/loading.py +++ b/projects/BEVFusion/bevfusion/loading.py @@ -265,13 +265,16 @@ def transform(self, results: dict) -> Optional[dict]: Added keys: - gt_depths (np.ndarray): Ground truth depths in (N, H, W) for (number of cameras, height, width). """ - lidar2image = results["lidar2img"] - img_aug_matrix = results.get("img_aug_matrix", np.eye(4)) - lidar_aug_matrix = results.get("lidar_aug_matrix", np.eye(4)) + lidar2image = np.array(results["lidar2img"]) + img_aug_matrix = np.array(results.get("img_aug_matrix", np.eye(4))) + lidar_aug_matrix = np.array(results.get("lidar_aug_matrix", np.eye(4))) lidar_aug_matrix_inverse = np.linalg.inv(lidar_aug_matrix) - depth = np.zeros((self.num_cameras, self.img_shape[0], self.img_shape[1]), dtype=np.float32) - + depth = np.full( + (self.num_cameras, self.img_shape[0], self.img_shape[1]), + np.inf, + dtype=np.float32, + ) cur_coords = results["points"][:,:3] # inverse aug cur_coords -= lidar_aug_matrix[:3, 3] @@ -305,8 +308,13 @@ def transform(self, results: dict) -> Optional[dict]: for c in range(self.num_cameras): masked_coords = cur_coords[c, on_img[c]].astype(np.int64) masked_dist = dist[c, on_img[c]] - depth[c, masked_coords[:, 0], masked_coords[:, 1]] = masked_dist + np.fmin.at( + depth[c], + (masked_coords[:, 0], masked_coords[:, 1]), + masked_dist, + ) + depth[np.isinf(depth)] = 0 results["gt_depths"] = depth if self.visualize_dir is not None: From 6e6de1c88f54116595e42bb9f0e5ef497693d521 Mon Sep 17 00:00:00 2001 From: KSeabgTan Date: Sun, 31 May 2026 00:23:40 +0900 Subject: [PATCH 161/183] Added --- projects/BEVFusion/bevfusion/bevfusion.py | 20 +++-- projects/BEVFusion/bevfusion/loading.py | 73 ++++++++++++++----- .../pipelines/cameras/default_camera_50m.py | 2 +- 3 files changed, 70 insertions(+), 25 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py index 5c387ddb4..c305405cd 100644 --- a/projects/BEVFusion/bevfusion/bevfusion.py +++ b/projects/BEVFusion/bevfusion/bevfusion.py @@ -286,7 +286,7 @@ def predict( contains a tensor with shape (num_instances, 7). """ batch_input_metas = [item.metainfo for item in batch_data_samples] - feats = self.extract_feat(batch_inputs_dict, batch_input_metas, using_image_features) + feats, _ = self.extract_feat(batch_inputs_dict, batch_input_metas, using_image_features) if self.with_bbox_head: outputs = self.bbox_head.predict(feats, batch_input_metas) @@ -307,6 +307,7 @@ def extract_feat( features = [] is_onnx_inference = False + pred_depths = None if imgs is not None and "lidar2img" not in batch_inputs_dict: # NOTE(knzo25): normal training and testing imgs = imgs.contiguous() @@ -394,11 +395,16 @@ def loss( feats, pred_depths = self.extract_feat(batch_inputs_dict, batch_input_metas, using_image_features) losses = dict() - if self.loss_depth_weight > 0 and "gt_depths" in batch_inputs_dict: + if self.loss_depth_weight > 0 and pred_depths is not None: with torch.amp.autocast("cuda", enabled=False): - gt_depths = batch_inputs_dict["gt_depths"] - print("gt_depths shape: ", gt_depths.shape) - print("pred_depths shape: ", pred_depths.shape) + gt_depths = torch.stack( + [ + meta["gt_depths"] + if isinstance(meta["gt_depths"], torch.Tensor) + else torch.as_tensor(meta["gt_depths"]) + for meta in batch_input_metas + ] + ).to(device=pred_depths.device, dtype=torch.float32) depth_loss = self.get_depth_loss(gt_depths, pred_depths) losses["loss_depth"] = depth_loss @@ -439,8 +445,8 @@ def get_downsampled_gt_depth(self, gt_depths): def get_depth_loss(self, depth_labels, depth_preds): depth_labels = self.get_downsampled_gt_depth(depth_labels) - depth_preds = depth_preds.permute(0, 2, 3, - 1).contiguous().view(-1, self.D) + # (B, N, D, H, W) -> (B*N*H*W, D) + depth_preds = depth_preds.permute(0, 1, 3, 4, 2).contiguous().view(-1, self.view_transform.D) fg_mask = torch.max(depth_labels, dim=1).values > 0.0 depth_labels = depth_labels[fg_mask] depth_preds = depth_preds[fg_mask] diff --git a/projects/BEVFusion/bevfusion/loading.py b/projects/BEVFusion/bevfusion/loading.py index 79019e0bf..fc1d9b985 100644 --- a/projects/BEVFusion/bevfusion/loading.py +++ b/projects/BEVFusion/bevfusion/loading.py @@ -294,7 +294,7 @@ def transform(self, results: dict) -> Optional[dict]: # imgaug cur_coords = img_aug_matrix[:, :3, :3] @ cur_coords cur_coords += img_aug_matrix[:, :3, 3].reshape(-1, 3, 1) - cur_coords = cur_coords[:, :2, :].transpose(1, 2) + cur_coords = cur_coords[:, :2, :].transpose(0, 2, 1) # normalize coords for grid sample cur_coords = cur_coords[..., [1, 0]] @@ -325,9 +325,10 @@ def transform(self, results: dict) -> Optional[dict]: def _save_depth_subplot(self, depth: np.ndarray, results: dict) -> None: """Save `gt_depths` as a subplot with one panel per camera. - Each panel shows the camera image (if available) with the projected - LiDAR depth points overlaid, color-coded by distance. A standalone - depth-only figure is also saved alongside it. + The figure contains three row blocks per camera: + - image underlay (if available) + projected LiDAR depth points + - image pixels only + - depth-only heatmap (no image pixel values) Args: depth (np.ndarray): (num_cameras, H, W) ground-truth depth map. @@ -336,47 +337,85 @@ def _save_depth_subplot(self, depth: np.ndarray, results: dict) -> None: """ imgs = results.get("img", None) - # Layout: keep it a single row up to 6 cameras, otherwise wrap to a - # roughly-square grid. + # Layout: + # - Top block: image underlay + projected depth points. + # - Middle block: image pixels only. + # - Bottom block: depth-only heatmap (no image pixel values). if self.num_cameras <= 6: - rows, cols = 1, self.num_cameras + base_rows, cols = 1, self.num_cameras else: cols = int(np.ceil(np.sqrt(self.num_cameras))) - rows = int(np.ceil(self.num_cameras / cols)) + base_rows = int(np.ceil(self.num_cameras / cols)) + rows = base_rows * 3 fig, axes = plt.subplots( rows, cols, figsize=(4 * cols, 4 * rows), squeeze=False ) for c in range(self.num_cameras): - ax = axes[c // cols, c % cols] d = depth[c] ys, xs = np.nonzero(d) vals = d[ys, xs] + # Row block 1: image + depth scatter. + ax_overlay = axes[c // cols, c % cols] if imgs is not None and c < len(imgs): - ax.imshow(imgs[c].astype(np.uint8)) + ax_overlay.imshow(imgs[c].astype(np.uint8)) if vals.size > 0: - ax.scatter( + ax_overlay.scatter( xs, ys, c=vals, cmap="turbo", vmin=0, vmax=self.max_depth, s=1, ) else: - ax.imshow( + ax_overlay.imshow( d, cmap="turbo", vmin=0, vmax=self.max_depth, interpolation="nearest", ) + ax_overlay.set_title(f"cam {c} overlay ({vals.size} pts)") + ax_overlay.set_xticks([]) + ax_overlay.set_yticks([]) - ax.set_title(f"cam {c} ({vals.size} pts)") - ax.set_xticks([]) - ax.set_yticks([]) + # Row block 2: image-only visualization. + ax_img = axes[base_rows + (c // cols), c % cols] + if imgs is not None and c < len(imgs): + ax_img.imshow(imgs[c].astype(np.uint8)) + else: + ax_img.imshow( + d, cmap="gray", vmin=0, vmax=self.max_depth, + interpolation="nearest", + ) + ax_img.set_title(f"cam {c} image-only") + ax_img.set_xticks([]) + ax_img.set_yticks([]) + + # Row block 3: depth-only visualization. + ax_depth = axes[(base_rows * 2) + (c // cols), c % cols] + ax_depth.imshow( + d, cmap="turbo", vmin=0, vmax=self.max_depth, + interpolation="nearest", + ) + ax_depth.set_title(f"cam {c} depth-only") + ax_depth.set_xticks([]) + ax_depth.set_yticks([]) # Hide any unused subplots when n doesn't fill the grid. - for c in range(self.num_cameras, rows * cols): + for c in range(self.num_cameras, base_rows * cols): axes[c // cols, c % cols].axis("off") + axes[base_rows + (c // cols), c % cols].axis("off") + axes[(base_rows * 2) + (c // cols), c % cols].axis("off") + + # Shared depth colorbar with numeric values. + depth_mappable = plt.cm.ScalarMappable( + cmap="turbo", norm=plt.Normalize(vmin=0, vmax=self.max_depth) + ) + depth_mappable.set_array([]) + cbar = fig.colorbar( + depth_mappable, ax=axes, location="right", fraction=0.02, pad=0.02 + ) + cbar.set_label("Depth (m)") fig.suptitle(f"gt_depths — {self._depth_idx}") - fig.tight_layout() + fig.tight_layout(rect=[0, 0, 0.96, 0.97]) self._depth_idx += 1 out_path = self.visualize_dir / f"{self._depth_idx:06d}_gt_depths.png" diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py index 05917e6bd..556c9ef06 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py @@ -39,7 +39,7 @@ type="PointsToMultiViewImageDepths", img_shape=image_size, num_cameras=len(camera_order), - visualize_dir="work_dirs/visualize_depths", + # visualize_dir="work_dirs/visualize_depths", ), dict( type="BEVFusionGlobalRotScaleTrans", From b87c3a379e9d02831af18cd28ff7d8741b6ebc4a Mon Sep 17 00:00:00 2001 From: KSeabgTan Date: Sun, 31 May 2026 00:47:36 +0900 Subject: [PATCH 162/183] Added --- .../t4dataset/default/pipelines/cameras/default_camera_50m.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py index 556c9ef06..77ff07e29 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py @@ -35,6 +35,10 @@ rand_flip=True, is_train=True, ), + dict( + type="PointsRangeFilter", + point_cloud_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0] + ), dict( type="PointsToMultiViewImageDepths", img_shape=image_size, From 3b7966093baa211e2528c231d8ace1cabee1fb75 Mon Sep 17 00:00:00 2001 From: KSeabgTan Date: Sun, 31 May 2026 00:50:54 +0900 Subject: [PATCH 163/183] Added --- .../t4dataset/default/pipelines/cameras/default_camera_120m.py | 2 ++ .../t4dataset/default/pipelines/cameras/default_camera_50m.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py index 78bc0167d..f31a604b0 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py @@ -53,6 +53,8 @@ ), dict(type="BEVFusionRandomFlip3D"), dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range), + dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=5), + dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=3), dict( type="ObjectNameFilter", classes=[ diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py index 77ff07e29..741524a34 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py @@ -53,6 +53,8 @@ ), dict(type="BEVFusionRandomFlip3D"), dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range), + dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=5), + dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=3), # Remove LiDAR points from the data dict(type="BEVFusionRemoveLiDARPoints"), dict( From d5cbf2b9e5c84c85a33de9650be917883f241531 Mon Sep 17 00:00:00 2001 From: KSeabgTan Date: Sun, 31 May 2026 00:52:05 +0900 Subject: [PATCH 164/183] Added --- .../t4dataset/default/pipelines/cameras/default_camera_120m.py | 1 - .../t4dataset/default/pipelines/cameras/default_camera_50m.py | 1 - 2 files changed, 2 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py index f31a604b0..9496cda0c 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py @@ -54,7 +54,6 @@ dict(type="BEVFusionRandomFlip3D"), dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range), dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=5), - dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=3), dict( type="ObjectNameFilter", classes=[ diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py index 741524a34..15b63829b 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py @@ -54,7 +54,6 @@ dict(type="BEVFusionRandomFlip3D"), dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range), dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=5), - dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=3), # Remove LiDAR points from the data dict(type="BEVFusionRemoveLiDARPoints"), dict( From 5df6adf593603ba234b6edc16b7768a180687838 Mon Sep 17 00:00:00 2001 From: KSeabgTan Date: Sun, 31 May 2026 00:52:25 +0900 Subject: [PATCH 165/183] Added --- .../t4dataset/default/pipelines/cameras/default_camera_120m.py | 1 + 1 file changed, 1 insertion(+) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py index 9496cda0c..f31a604b0 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py @@ -54,6 +54,7 @@ dict(type="BEVFusionRandomFlip3D"), dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range), dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=5), + dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=3), dict( type="ObjectNameFilter", classes=[ From 8da5b1699646b92195cd4f7faa3f7f45fa823d51 Mon Sep 17 00:00:00 2001 From: KSeabgTan Date: Sun, 31 May 2026 00:57:33 +0900 Subject: [PATCH 166/183] Added --- .../t4dataset/default/pipelines/cameras/default_camera_50m.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py index 15b63829b..11b0869d3 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py @@ -37,7 +37,7 @@ ), dict( type="PointsRangeFilter", - point_cloud_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0] + point_cloud_range=[-80.0, -80.0, -10.0, 80.0, 80.0, 10.0] ), dict( type="PointsToMultiViewImageDepths", From 216e5577e26c608b1c0f95e9a76bf0dda52a63f9 Mon Sep 17 00:00:00 2001 From: KSeabgTan Date: Sun, 31 May 2026 01:09:18 +0900 Subject: [PATCH 167/183] Added --- projects/BEVFusion/bevfusion/bevfusion.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py index c305405cd..1c1a159d7 100644 --- a/projects/BEVFusion/bevfusion/bevfusion.py +++ b/projects/BEVFusion/bevfusion/bevfusion.py @@ -437,8 +437,8 @@ def get_downsampled_gt_depth(self, gt_depths): W // self.depth_gt_downsample) gt_depths = (gt_depths - (dbounds[0] - dbounds[2])) / dbounds[2] - gt_depths = torch.where((gt_depths < D + 1) & (gt_depths >= 0.0), - gt_depths, torch.zeros_like(gt_depths)) + gt_depths = torch.where(gt_depths >= 0.0, gt_depths, torch.zeros_like(gt_depths)) + gt_depths = torch.clamp(gt_depths, max=float(D)) gt_depths = F.one_hot( gt_depths.long(), num_classes=D + 1).view(-1, D + 1)[:, 1:] return gt_depths.float() From d899a6209b7d0239e136b8a5d79dfa028f0e9b02 Mon Sep 17 00:00:00 2001 From: KSeabgTan Date: Mon, 1 Jun 2026 17:10:09 +0900 Subject: [PATCH 168/183] Added --- .../configs/detection3d/default_runtime.py | 12 ++++++++++-- projects/BEVFusion/bevfusion/bevfusion.py | 12 ++++++++---- projects/BEVFusion/bevfusion/loading.py | 6 ++++-- ...esnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py | 2 +- .../resnet50/camera_resnet50_fpn_lss_50m.py | 2 +- .../pipelines/cameras/default_camera_50m.py | 11 ++++++----- .../default/pipelines/default_lidar_50m.py | 16 ++++++++-------- .../default_30e_8xb16_adamw_cosine.py | 6 +++--- .../default_30e_8xb16_adamw_linear_cosine.py | 17 ++++++++++++----- 9 files changed, 53 insertions(+), 31 deletions(-) diff --git a/autoware_ml/configs/detection3d/default_runtime.py b/autoware_ml/configs/detection3d/default_runtime.py index cc2b896f7..6da761425 100644 --- a/autoware_ml/configs/detection3d/default_runtime.py +++ b/autoware_ml/configs/detection3d/default_runtime.py @@ -2,9 +2,17 @@ default_hooks = dict( timer=dict(type="IterTimerHook"), - logger=dict(type="LoggerHook", interval=50), + logger=dict( + type="LoggerHook", + interval=50, + backend_args=dict(backend="local"), + ), param_scheduler=dict(type="ParamSchedulerHook"), - checkpoint=dict(type="CheckpointHook", interval=-1), + checkpoint=dict( + type="CheckpointHook", + interval=-1, + backend_args=dict(backend="local"), + ), sampler_seed=dict(type="DistSamplerSeedHook"), visualization=dict(type="Det3DVisualizationHook"), ) diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py index 1c1a159d7..17d924ff9 100644 --- a/projects/BEVFusion/bevfusion/bevfusion.py +++ b/projects/BEVFusion/bevfusion/bevfusion.py @@ -76,8 +76,7 @@ def __init__( self.pts_neck = MODELS.build(pts_neck) if pts_neck is not None else None self.bbox_head = MODELS.build(bbox_head) - - self.init_weights() + self._weights_initialized = False self.loss_depth_weight = loss_depth_weight self.depth_gt_downsample = depth_gt_downsample @@ -135,8 +134,11 @@ def parse_losses(self, losses: Dict[str, torch.Tensor]) -> Tuple[torch.Tensor, D return loss, log_vars # type: ignore def init_weights(self) -> None: + if self._weights_initialized: + return if self.img_backbone is not None: self.img_backbone.init_weights() + self._weights_initialized = True @property def with_bbox_head(self): @@ -437,8 +439,10 @@ def get_downsampled_gt_depth(self, gt_depths): W // self.depth_gt_downsample) gt_depths = (gt_depths - (dbounds[0] - dbounds[2])) / dbounds[2] - gt_depths = torch.where(gt_depths >= 0.0, gt_depths, torch.zeros_like(gt_depths)) - gt_depths = torch.clamp(gt_depths, max=float(D)) + # gt_depths = torch.where(gt_depths >= 0.0, gt_depths, torch.zeros_like(gt_depths)) + # gt_depths = torch.clamp(gt_depths, max=float(D)) + gt_depths = torch.where((gt_depths >= 0.0) & (gt_depths < D + 1), gt_depths, torch.zeros_like(gt_depths)) + # gt_depths = torch.clamp(gt_depths, max=float(D)) gt_depths = F.one_hot( gt_depths.long(), num_classes=D + 1).view(-1, D + 1)[:, 1:] return gt_depths.float() diff --git a/projects/BEVFusion/bevfusion/loading.py b/projects/BEVFusion/bevfusion/loading.py index fc1d9b985..40999f562 100644 --- a/projects/BEVFusion/bevfusion/loading.py +++ b/projects/BEVFusion/bevfusion/loading.py @@ -1,7 +1,7 @@ # Copyright (c) OpenMMLab. All rights reserved. import copy from pathlib import Path -from typing import List, Optional +from typing import List, Optional, Tuple import matplotlib.pyplot as plt @@ -242,6 +242,7 @@ def __init__( self, img_shape, num_cameras: int, + depth_bounds: Tuple[float, float], visualize_dir: Optional[str] = None, max_depth: float = 80.0, ): @@ -249,6 +250,7 @@ def __init__( self.num_cameras = num_cameras self.visualize_dir = visualize_dir self.max_depth = max_depth + self.depth_bounds = depth_bounds self.visualize_dir = Path(visualize_dir) if visualize_dir is not None else None if self.visualize_dir is not None: self.visualize_dir.mkdir(parents=True, exist_ok=True) @@ -286,7 +288,7 @@ def transform(self, results: dict) -> Optional[dict]: # get 2d coords dist = cur_coords[:, 2, :] - valid_dist_mask = dist > 0 + valid_dist_mask = (dist >= self.depth_bounds[0]) & (dist < self.depth_bounds[1]) cur_coords[:, 2, :] = np.clip(cur_coords[:, 2, :], 1e-5, 1e5) cur_coords[:, :2, :] /= cur_coords[:, 2:3, :] diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py index 6695e397e..10a8924d2 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py @@ -3,7 +3,7 @@ "../../default/models/resnet50/camera_resnet50_fpn_lss_50m.py", ] -experiment_group_name = "bevfusion_camera/j6gen2_depth_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_camera/j6gen2_depth_base_adjusted/" + _base_.dataset_type experiment_name = "bevfusion_camera_resnet50_fpn_lss_v2_30e_8xb16_j6gen2_base_50m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py index c29925243..d0920ccf1 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py @@ -6,7 +6,7 @@ # Image network model = dict( depth_gt_downsample=8, - loss_depth_weight=3.0, + loss_depth_weight=2.0, view_transform=dict( type="LSSTransformV2", xbound=[-54.0, 54.0, 0.3], diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py index 11b0869d3..0a2452de3 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py @@ -35,15 +35,16 @@ rand_flip=True, is_train=True, ), - dict( - type="PointsRangeFilter", - point_cloud_range=[-80.0, -80.0, -10.0, 80.0, 80.0, 10.0] - ), + # dict( + # type="PointsRangeFilter", + # point_cloud_range=[-80.0, -80.0, -10.0, 80.0, 80.0, 10.0] + # ), dict( type="PointsToMultiViewImageDepths", img_shape=image_size, num_cameras=len(camera_order), - # visualize_dir="work_dirs/visualize_depths", + depth_bounds=[1.0, 60.0], + # visualize_dir="work_dirs/visualize_depths_2", ), dict( type="BEVFusionGlobalRotScaleTrans", diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py index 68c3e3688..eb1737e27 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py @@ -1,6 +1,6 @@ # Dataset parameters backend_args = None -num_workers = 16 +num_workers = 8 input_modality = dict(use_lidar=True, use_camera=False) # range setting @@ -8,13 +8,13 @@ voxel_size = [0.075, 0.075, 0.2] grid_size = [1440, 1440, 41] eval_class_range = { - "car": 54.0, - "truck": 54.0, - "bus": 54.0, - "bicycle": 54.0, - "pedestrian": 54.0, - "traffic_cone": 54.0, - "barrier": 54.0, + "car": 51.2, + "truck": 51.2, + "bus": 51.2, + "bicycle": 51.2, + "pedestrian": 51.2, + "traffic_cone": 51.2, + "barrier": 51.2, } # LiDAR parameters diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py index d28468f71..2893b2e74 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_cosine.py @@ -1,12 +1,12 @@ # learning rate lr = 2.0e-4 -t_max = 3 +t_max = 1 max_epochs = 30 val_interval = 5 -train_gpu_size = 8 +train_gpu_size = 2 test_batch_size = 4 -train_batch_size = 16 +train_batch_size = 32 param_scheduler = [ # learning rate scheduler diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py index f4ec3e0db..5b018de19 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py @@ -1,6 +1,6 @@ # learning rate -lr = 1e-4 -t_max = 3 +lr = 2e-4 +t_max = 1 max_epochs = 30 val_interval = 1 @@ -10,7 +10,14 @@ param_scheduler = [ # learning rate scheduler - dict(type="LinearLR", start_factor=1.0 / 3, begin=0, end=t_max, by_epoch=True), + dict( + type="LinearLR", + start_factor=1.0 / 3, + begin=0, + end=t_max, + by_epoch=True, + convert_to_iter_based=True, + ), dict( type="CosineAnnealingLR", T_max=(max_epochs - t_max), @@ -51,8 +58,8 @@ optim_wrapper = dict( type="OptimWrapper", - optimizer=dict(type="AdamW", lr=lr, weight_decay=0.01), - clip_grad=dict(max_norm=0.1, norm_type=2), + optimizer=dict(type="AdamW", lr=lr, weight_decay=1e-3), + clip_grad=dict(max_norm=5.0, norm_type=2), ) auto_scale_lr = dict(enable=False, base_batch_size=train_gpu_size * train_batch_size) From 8b6c1577e0726aaf20e5626c5745bae8c67e1ffe Mon Sep 17 00:00:00 2001 From: KSeabgTan Date: Tue, 2 Jun 2026 14:28:54 +0900 Subject: [PATCH 169/183] Added --- projects/BEVFusion/bevfusion/loading.py | 28 +++++++++---------- .../pipelines/cameras/default_camera_50m.py | 2 +- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/projects/BEVFusion/bevfusion/loading.py b/projects/BEVFusion/bevfusion/loading.py index 40999f562..6bf3e1b38 100644 --- a/projects/BEVFusion/bevfusion/loading.py +++ b/projects/BEVFusion/bevfusion/loading.py @@ -272,11 +272,6 @@ def transform(self, results: dict) -> Optional[dict]: lidar_aug_matrix = np.array(results.get("lidar_aug_matrix", np.eye(4))) lidar_aug_matrix_inverse = np.linalg.inv(lidar_aug_matrix) - depth = np.full( - (self.num_cameras, self.img_shape[0], self.img_shape[1]), - np.inf, - dtype=np.float32, - ) cur_coords = results["points"][:,:3] # inverse aug cur_coords -= lidar_aug_matrix[:3, 3] @@ -307,16 +302,19 @@ def transform(self, results: dict) -> Optional[dict]: & (cur_coords[..., 1] >= 0) & valid_dist_mask ) - for c in range(self.num_cameras): - masked_coords = cur_coords[c, on_img[c]].astype(np.int64) - masked_dist = dist[c, on_img[c]] - np.fmin.at( - depth[c], - (masked_coords[:, 0], masked_coords[:, 1]), - masked_dist, - ) - - depth[np.isinf(depth)] = 0 + + # Avoid loops since it's slow + indices = np.nonzero(on_img) + camera_indices = indices[0] + point_indices = indices[1] + masked_coords = cur_coords[camera_indices, point_indices].astype(np.int64) + masked_dist = dist[camera_indices, point_indices] + + # Possibly to have duplicates and the last one will be used, however, the chance is small + flatten_indices = camera_indices * self.img_shape[0] * self.img_shape[1] + masked_coords[:, 0] * self.img_shape[1] + masked_coords[:, 1] + depth_flat = np.zeros(self.num_cameras * self.img_shape[0] * self.img_shape[1], dtype=np.float32) + depth_flat[flatten_indices] = masked_dist + depth = depth_flat.reshape(self.num_cameras, self.img_shape[0], self.img_shape[1]) results["gt_depths"] = depth if self.visualize_dir is not None: diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py index 0a2452de3..b846f3624 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py @@ -44,7 +44,7 @@ img_shape=image_size, num_cameras=len(camera_order), depth_bounds=[1.0, 60.0], - # visualize_dir="work_dirs/visualize_depths_2", + # visualize_dir="work_dirs/visualize_depths_3", ), dict( type="BEVFusionGlobalRotScaleTrans", From 5002fb2a265042a0ee75016fe7ae1430d6dbaaec Mon Sep 17 00:00:00 2001 From: KSeabgTan Date: Tue, 2 Jun 2026 15:12:36 +0900 Subject: [PATCH 170/183] Added --- projects/BEVFusion/bevfusion/loading.py | 31 ++++++++++++------- .../pipelines/cameras/default_camera_50m.py | 10 +++--- .../default/pipelines/default_lidar_50m.py | 2 +- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/projects/BEVFusion/bevfusion/loading.py b/projects/BEVFusion/bevfusion/loading.py index 6bf3e1b38..2ef6e718c 100644 --- a/projects/BEVFusion/bevfusion/loading.py +++ b/projects/BEVFusion/bevfusion/loading.py @@ -1,6 +1,7 @@ # Copyright (c) OpenMMLab. All rights reserved. import copy from pathlib import Path +import time from typing import List, Optional, Tuple import matplotlib.pyplot as plt @@ -266,17 +267,22 @@ def transform(self, results: dict) -> Optional[dict]: dict: The result dict containing the multi-view image data. Added keys: - gt_depths (np.ndarray): Ground truth depths in (N, H, W) for (number of cameras, height, width). - """ - lidar2image = np.array(results["lidar2img"]) - img_aug_matrix = np.array(results.get("img_aug_matrix", np.eye(4))) - lidar_aug_matrix = np.array(results.get("lidar_aug_matrix", np.eye(4))) + """ + start_time = time.perf_counter() - lidar_aug_matrix_inverse = np.linalg.inv(lidar_aug_matrix) - cur_coords = results["points"][:,:3] - # inverse aug - cur_coords -= lidar_aug_matrix[:3, 3] - cur_coords = lidar_aug_matrix_inverse[:3, :3] @ cur_coords.transpose(1, 0) - + lidar2image = np.asarray(results["lidar2img"]) + img_aug_matrix = np.asarray(results["img_aug_matrix"]) if "img_aug_matrix" in results else np.eye(4) + cur_coords = results["points"].numpy()[:,:3] + + # inverse lidar aug + if "lidar_aug_matrix" in results: + lidar_aug_matrix = np.asarray(results["lidar_aug_matrix"]) + lidar_aug_matrix_inverse = np.linalg.inv(lidar_aug_matrix) + cur_coords -= lidar_aug_matrix[:3, 3] + cur_coords = lidar_aug_matrix_inverse[:3, :3] @ cur_coords.transpose(1, 0) + else: + cur_coords = cur_coords.transpose(1, 0) + # lidar2image cur_coords = lidar2image[:, :3, :3] @ cur_coords cur_coords += lidar2image[:, :3, 3].reshape(-1, 3, 1) @@ -302,7 +308,7 @@ def transform(self, results: dict) -> Optional[dict]: & (cur_coords[..., 1] >= 0) & valid_dist_mask ) - + # Avoid loops since it's slow indices = np.nonzero(on_img) camera_indices = indices[0] @@ -319,7 +325,8 @@ def transform(self, results: dict) -> Optional[dict]: if self.visualize_dir is not None: self._save_depth_subplot(depth, results) - + end_time = time.perf_counter() + print(f"Time taken: {end_time - start_time} seconds") return results def _save_depth_subplot(self, depth: np.ndarray, results: dict) -> None: diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py index b846f3624..00e7ac896 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py @@ -35,16 +35,16 @@ rand_flip=True, is_train=True, ), - # dict( - # type="PointsRangeFilter", - # point_cloud_range=[-80.0, -80.0, -10.0, 80.0, 80.0, 10.0] - # ), + dict( + type="PointsRangeFilter", + point_cloud_range=[-80.0, -80.0, -10.0, 80.0, 80.0, 10.0] + ), dict( type="PointsToMultiViewImageDepths", img_shape=image_size, num_cameras=len(camera_order), depth_bounds=[1.0, 60.0], - # visualize_dir="work_dirs/visualize_depths_3", + # visualize_dir="work_dirs/visualize_depths_6", ), dict( type="BEVFusionGlobalRotScaleTrans", diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py index eb1737e27..f49e2dbb4 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py @@ -1,6 +1,6 @@ # Dataset parameters backend_args = None -num_workers = 8 +num_workers = 4 input_modality = dict(use_lidar=True, use_camera=False) # range setting From cbe6f3bcd0bd93efdc144fea48722e57610f4a0f Mon Sep 17 00:00:00 2001 From: KSeabgTan Date: Tue, 2 Jun 2026 15:12:57 +0900 Subject: [PATCH 171/183] Added --- projects/BEVFusion/bevfusion/loading.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/projects/BEVFusion/bevfusion/loading.py b/projects/BEVFusion/bevfusion/loading.py index 2ef6e718c..5d5c8d13a 100644 --- a/projects/BEVFusion/bevfusion/loading.py +++ b/projects/BEVFusion/bevfusion/loading.py @@ -1,7 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. import copy from pathlib import Path -import time from typing import List, Optional, Tuple import matplotlib.pyplot as plt @@ -268,8 +267,6 @@ def transform(self, results: dict) -> Optional[dict]: Added keys: - gt_depths (np.ndarray): Ground truth depths in (N, H, W) for (number of cameras, height, width). """ - start_time = time.perf_counter() - lidar2image = np.asarray(results["lidar2img"]) img_aug_matrix = np.asarray(results["img_aug_matrix"]) if "img_aug_matrix" in results else np.eye(4) cur_coords = results["points"].numpy()[:,:3] @@ -325,8 +322,6 @@ def transform(self, results: dict) -> Optional[dict]: if self.visualize_dir is not None: self._save_depth_subplot(depth, results) - end_time = time.perf_counter() - print(f"Time taken: {end_time - start_time} seconds") return results def _save_depth_subplot(self, depth: np.ndarray, results: dict) -> None: From 4547bb3011b80300d5201769ca3c9d8d446ec147 Mon Sep 17 00:00:00 2001 From: KSeabgTan Date: Tue, 2 Jun 2026 15:13:43 +0900 Subject: [PATCH 172/183] Added --- .../configs/t4dataset/default/pipelines/default_lidar_50m.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py index f49e2dbb4..eb1737e27 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py @@ -1,6 +1,6 @@ # Dataset parameters backend_args = None -num_workers = 4 +num_workers = 8 input_modality = dict(use_lidar=True, use_camera=False) # range setting From a33fc4c96a563acaffa6e9e8eef86a1e60fd10cc Mon Sep 17 00:00:00 2001 From: KSeabgTan Date: Tue, 2 Jun 2026 15:21:23 +0900 Subject: [PATCH 173/183] Added --- .../configs/t4dataset/default/pipelines/default_lidar_50m.py | 2 +- .../default/schedulers/default_30e_8xb16_adamw_linear_cosine.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py index eb1737e27..51688a7aa 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py @@ -1,6 +1,6 @@ # Dataset parameters backend_args = None -num_workers = 8 +num_workers = 16 input_modality = dict(use_lidar=True, use_camera=False) # range setting diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py index 5b018de19..2c00474d6 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py @@ -6,7 +6,7 @@ train_gpu_size = 2 test_batch_size = 2 -train_batch_size = 16 +train_batch_size = 32 param_scheduler = [ # learning rate scheduler From 44cd0896ac7f58da22b2798a0e5a0b1f8172bf89 Mon Sep 17 00:00:00 2001 From: KSeangTan Date: Wed, 3 Jun 2026 20:05:06 +0900 Subject: [PATCH 174/183] Added --- projects/BEVFusion/bevfusion/__init__.py | 5 +- projects/BEVFusion/bevfusion/bevfusion.py | 51 +++ projects/BEVFusion/bevfusion/depth_lss_v2.py | 290 +++++++++++++++++- ...net50_fpn_lss_30e_2xb16_j6gen2_base_50m.py | 4 +- ...era_resnet50_fpn_lss_50e_8xb16_base_50m.py | 4 +- ...snet50_fpn_camera_depth_aware_lssv2_50m.py | 28 ++ 6 files changed, 365 insertions(+), 17 deletions(-) create mode 100644 projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_camera_depth_aware_lssv2_50m.py diff --git a/projects/BEVFusion/bevfusion/__init__.py b/projects/BEVFusion/bevfusion/__init__.py index 9e1ca1cf4..c4afb6a86 100644 --- a/projects/BEVFusion/bevfusion/__init__.py +++ b/projects/BEVFusion/bevfusion/__init__.py @@ -3,7 +3,7 @@ from .bevfusion_necks import GeneralizedLSSFPN from .bevfusion_voxel_encoder import BEVFusionVoxelFeatureNet, HardSimpleVoxelSinCosEncoder from .depth_lss import DepthLSSTransform, LSSTransform -from .depth_lss_v2 import LSSTransformV2 +from .depth_lss_v2 import LSSTransformV2, LSSTransformV2DepthAware from .loading import BEVLoadMultiViewImageFromFiles, PointsToMultiViewImageDepths from .sparse_encoder import BEVFusionSparseEncoder from .transformer import TransformerDecoderLayer @@ -39,5 +39,6 @@ "HardSimpleVoxelSinCosEncoder", "BEVFusionVoxelFeatureNet", "LSSTransformV2", - "PointsToMultiViewImageDepths" + "PointsToMultiViewImageDepths", + "LSSTransformV2DepthAware", ] diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py index 17d924ff9..1cdff32ad 100644 --- a/projects/BEVFusion/bevfusion/bevfusion.py +++ b/projects/BEVFusion/bevfusion/bevfusion.py @@ -149,6 +149,47 @@ def with_bbox_head(self): def with_seg_head(self): """bool: Whether the detector has a segmentation head.""" return hasattr(self, "seg_head") and self.seg_head is not None + + def prepare_camera_depth_aware_parameters( + self, + camera_intrinsics: torch.Tensor, + img_aug_matrix: torch.Tensor, + lidar_aug_matrix: torch.Tensor, + camera2lidar: torch.Tensor, + ) -> torch.Tensor: + """ + Args: + camera_intrinsics: torch.Tensor, the camera intrinsics of shape (B, N, 3, 3). + img_aug_matrix: torch.Tensor, the image augmentation matrix of shape (B, N, 4, 4). + lidar_aug_matrix: torch.Tensor, the lidar augmentation matrix of shape (B, N, 4, 4). + camera2lidar: torch.Tensor, the camera to lidar matrix of shape (B, N, 4, 4). + Returns: + torch.Tensor, the camera depth aware parameters of shape (B*N, N_CAMERA_DEPTH_PARAMETERS). + """ + # (B*N, 15) + mlp_input = torch.stack([ + camera_intrinsics[:, :, 0, 0], # fx + camera_intrinsics[:, :, 1, 1], # fy + camera_intrinsics[:, :, 0, 2], # cx + camera_intrinsics[:, :, 1, 2], # cy + img_aug_matrix[:, :, 0, 0], # r11 + img_aug_matrix[:, :, 0, 1], # r12 + img_aug_matrix[:, :, 0, 3], # t1 + img_aug_matrix[:, :, 1, 0], # r21 + img_aug_matrix[:, :, 1, 1], # r22 + img_aug_matrix[:, :, 1, 3], # t2 + lidar_aug_matrix[:, :, 0, 0], # r11 + lidar_aug_matrix[:, :, 0, 1], # r12 + lidar_aug_matrix[:, :, 1, 0], # r21 + lidar_aug_matrix[:, :, 1, 1], # r22 + lidar_aug_matrix[:, :, 2, 2], # r33 + ], dim=-1) + # (B, N, 4, 4) -> (B, N, 3, 4) -> (B*N, 12) + camera2lidar_flatten = camera2lidar[:,:,:3,:].view(-1, 12) + + # (B*N, 15+12) + mlp_input = torch.cat([mlp_input, camera2lidar_flatten], dim=-1) + return mlp_input def get_image_backbone_features(self, x: torch.Tensor) -> torch.Tensor: B, N, C, H, W = x.size() @@ -327,6 +368,12 @@ def extract_feat( camera2lidar = imgs.new_tensor(np.asarray(camera2lidar)) img_aug_matrix = imgs.new_tensor(np.asarray(img_aug_matrix)) lidar_aug_matrix = imgs.new_tensor(np.asarray(lidar_aug_matrix)) + camera_depth_aware_parameters = self.prepare_camera_depth_aware_parameters( + camera_intrinsics=camera_intrinsics, + img_aug_matrix=img_aug_matrix, + lidar_aug_matrix=lidar_aug_matrix, + camera2lidar=camera2lidar, + ) img_feature, pred_depths = self.extract_img_feat( imgs, deepcopy(points), @@ -337,6 +384,7 @@ def extract_feat( lidar_aug_matrix, batch_input_metas, using_image_features=using_image_features, + camera_depth_aware_parameters=camera_depth_aware_parameters, ) features.append(img_feature) elif imgs is not None: @@ -348,6 +396,8 @@ def extract_feat( img_aug_matrix = batch_inputs_dict["img_aug_matrix"] lidar_aug_matrix = batch_inputs_dict["lidar_aug_matrix"] geom_feats = batch_inputs_dict["geom_feats"] + # Retrieve the parameters from deployment code directly + camera_depth_aware_parameters = batch_inputs_dict["camera_depth_aware_parameters"] img_feature, pred_depths = self.extract_img_feat( imgs, @@ -360,6 +410,7 @@ def extract_feat( batch_input_metas, geom_feats=geom_feats, using_image_features=using_image_features, + camera_depth_aware_parameters=camera_depth_aware_parameters, ) features.append(img_feature) diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py index def32ee5e..251fc61fc 100644 --- a/projects/BEVFusion/bevfusion/depth_lss_v2.py +++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py @@ -7,12 +7,215 @@ import torch from mmdet3d.registry import MODELS from mmengine.logging import print_log +from mmdet.models.backbones.resnet import BasicBlock from torch import nn +from torch.utils.checkpoint import checkpoint from .depth_lss import BaseViewTransform, DepthLSSNet, DownSampleNet, LidarDepthImageNet from .ops import bev_pool_v2 +class SELayer(nn.Module): + """ + Squeeze-and-Excitation (SE) layer. + This is used to modulate features with camera-depth aware parameters. + The code is taken from BEVDET (https://github.com/hustvl/BEVDET). + """ + + def __init__(self, channels, act_layer=nn.ReLU, gate_layer=nn.Sigmoid): + super().__init__() + # Dont need global pooling because inputs are (B*N, C, 1, 1). + self.sequeeze_net = nn.Sequential( + [ + # Squeeze with 1x1 convolution + nn.Conv2d(channels, channels, 1, bias=True), + # Activation + act_layer(), + # Expand with 1x1 convolution + nn.Conv2d(channels, channels, 1, bias=True), + # Gate with sigmoid activation + gate_layer(), + ] + ) + + def forward(self, x: torch.Tensor, depth_aware_features: torch.Tensor) -> torch.Tensor: + """ + Args: + x: Tuple[torch.Tensor, torch.Tensor], the input tuple containing the image features and camera-depth aware parameters. + Returns: + torch.Tensor, the output tensor of shape (B, N, C). + """ + feature_attentions = self.sequeeze_net(depth_aware_features) + return x * feature_attentions + + +class CameraDepthLinearProjectionMLP(nn.Module): + """ + Linear projection module by MLP. This is used to project image (context) features and camera-depth + aware parameters (for example, intrinsics) to embedding space. + The code is taken from BEVDET (https://github.com/hustvl/BEVDET). + """ + + def __init__(self, in_channels: int, hidden_channels:int, out_channels:int, drop_out: float = 0.0): + """ + Args: + in_channels: int, the number of input channels. + hidden_channels: int, the number of hidden channels. + out_channels: int, the number of output channels. + drop_out: float, the dropout rate. + """ + super().__init__() + self.in_channels = in_channels + self.hidden_channels = hidden_channels + self.out_channels = out_channels + self.drop_out = drop_out + + self.sequential_mlp = nn.Sequential( + nn.Linear(in_channels, hidden_channels), + nn.ReLU(inplace=True), + nn.Dropout(drop_out), + nn.Linear(hidden_channels, out_channels), + nn.Dropout(drop_out), + ) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + Args: + x: torch.Tensor, the input tensor of shape (B, N, C). + Returns: + torch.Tensor, the output tensor of shape (B, N, C). + """ + return self.sequential_mlp(x) + + +class CameraDepthAwareNet(nn.Module): + """ + Camera-depth aware depth net. This is used to predict the depth of the scene. + The code is taken from BEVDET (https://github.com/hustvl/BEVDET). + """ + + def __init__( + self, + in_channels: int, + hidden_channels: int, + out_channels: int, + mlp_drop_out: float, + downsample: int, + depth_channels: int, + with_cp: bool = False, + num_camera_depth_parameters: int = 27) -> None: + """ + Args: + in_channels: int, the number of input channels. + out_channels: int, the number of output channels. + mlp_drop_out: float, the dropout rate of the MLP. + mlp_hidden_channels: int, the number of hidden channels of the MLP. + mlp_out_channels: int, the number of output channels of the MLP. + """ + super().__init__() + self.in_channels = in_channels + self.hidden_channels = hidden_channels + self.out_channels = out_channels + self.mlp_drop_out = mlp_drop_out + self.num_camera_depth_parameters = num_camera_depth_parameters + self.downsample = downsample + self.depth_channels = depth_channels + self.with_cp = with_cp + + # Input convolution for context/image features + # Camera depth aware parameters branch + self.camera_depth_aware_parameters_bn = nn.BatchNorm1d( + self.num_camera_depth_parameters + ) + + # Context/image feature branch + self.context_input_conv = nn.Sequential( + nn.Conv2d( + in_channels, hidden_channels, kernel_size=3, stride=1, padding=1), + nn.BatchNorm2d(hidden_channels), + nn.ReLU(inplace=True), + ) + self.context_camera_depth_aware_mlp = CameraDepthLinearProjectionMLP( + in_channels=self.num_camera_depth_parameters, + hidden_channels=hidden_channels, + out_channels=hidden_channels, + drop_out=self.mlp_drop_out + ) + self.context_se = SELayer(channels=hidden_channels) + self.context_conv = nn.Conv2d( + hidden_channels, + out_channels, + stride=1, padding=1) + + # Depth branch + self.depth_camera_depth_aware_mlp = CameraDepthLinearProjectionMLP( + in_channels=self.num_camera_depth_parameters, + hidden_channels=hidden_channels, + out_channels=hidden_channels, + drop_out=self.mlp_drop_out + ) + self.depth_se = SELayer(channels=hidden_channels) + self.depth_conv = nn.Sequantial( + BasicBlock(hidden_channels, hidden_channels, downsample=downsample), + BasicBlock(hidden_channels, hidden_channels, downsample=downsample), + BasicBlock(hidden_channels, hidden_channels, downsample=downsample), + nn.Conv2d(hidden_channels, depth_channels, kernel_size=1, stride=1, padding=0) + ) + + def context_forward(self, context_features: torch.Tensor, camera_depth_aware_features: torch.Tensor) -> torch.Tensor: + """ + Args: + x: torch.Tensor, the input tensor of shape (B*N, C, H, W). + camera_depth_aware_parameters: torch.Tensor, the camera-depth aware parameters of shape (B*N, N_CAMERA_DEPTH_PARAMETERS). + Returns: + torch.Tensor, the output tensor of shape (B*N, C, H, W). + """ + context_camera_depth_aware_features = self.context_camera_depth_aware_mlp(camera_depth_aware_features) + # (B*N, mlp_out_channels) -> (B*N, mlp_out_channels, 1, 1) + context_camera_depth_aware_features = context_camera_depth_aware_features.view(-1, self.hidden_channels, 1, 1) + context_features = self.context_se(context_features, context_camera_depth_aware_features) + context_features = self.context_conv(context_features) + return context_features + + def depth_forward(self, depth_features: torch.Tensor, camera_depth_aware_features: torch.Tensor) -> torch.Tensor: + """ + Args: + depth_features: torch.Tensor, the input tensor of shape (B*N, C, H, W). + camera_depth_aware_parameters: torch.Tensor, the camera-depth aware parameters of shape (B, N, D). + Returns: + torch.Tensor, the output tensor of shape (B*N, C, H, W). + """ + depth_camera_depth_aware_features = self.depth_camera_depth_aware_mlp(camera_depth_aware_features) + # (B*N, mlp_out_channels) -> (B*N, mlp_out_channels, 1, 1) + depth_camera_depth_aware_features = depth_camera_depth_aware_features.view(-1, self.hidden_channels, 1, 1) + # (B*N, C, H, W) + depth_features = self.depth_se(depth_features, depth_camera_depth_aware_features) + if self.with_cp: + depth_features = checkpoint(self.depth_conv, depth_features) + else: + depth_features = self.depth_conv(depth_features) + return depth_features + + def forward(self, x: torch.Tensor, camera_depth_aware_parameters: torch.Tensor) -> torch.Tensor: + """ + Args: + x: torch.Tensor, the input tensor of shape (B, N, C, H, W). + camera_depth_aware_parameters: torch.Tensor, the camera-depth aware parameters of shape (B, N, N_CAMERA_DEPTH_PARAMETERS). + Returns: + torch.Tensor, the output tensor of shape (B*N, C, H, W). + """ + # (B, N, N_CAMERA_DEPTH_PARAMETERS) -> (B*N, N_CAMERA_DEPTH_PARAMETERS) + camera_depth_aware_parameters = camera_depth_aware_parameters.view(-1, self.num_camera_depth_parameters) + + # (B*N, N_CAMERA_DEPTH_PARAMETERS) + camera_depth_aware_features = self.camera_depth_aware_parameters_bn(camera_depth_aware_parameters) + context_features = self.input_conv(x) + + context_features = self.context_forward(context_features, camera_depth_aware_features) + depth_features = self.depth_forward(context_features, camera_depth_aware_features) + return torch.cat([depth_features, context_features], dim=1) + + class BaseViewTransformV2(BaseViewTransform): def __init__( @@ -48,7 +251,7 @@ def __init__( self.collapse_z = collapse_z self.expand_batch_axis = expand_batch_axis - def get_cam_feats(self, x) -> Tuple[torch.Tensor, torch.Tensor]: + def get_cam_feats(self, x, camera_depth_aware_parameters: Optional[torch.Tensor] = None) -> Tuple[torch.Tensor, torch.Tensor]: raise NotImplementedError def forward( @@ -65,6 +268,7 @@ def forward( img_aug_matrix_inverse, lidar_aug_matrix_inverse, geom_feats_precomputed, + camera_depth_aware_parameters: Optional[torch.Tensor] = None, ): if geom_feats_precomputed is not None: ranks_bev, ranks_depth, ranks_feat = geom_feats_precomputed @@ -100,7 +304,7 @@ def forward( ( view_feats, depth_softmax, - ) = self.get_cam_feats(img) + ) = self.get_cam_feats(img, camera_depth_aware_parameters) x = self.bev_pool(view_feats, depth_softmax, geom) return x, depth_softmax @@ -232,9 +436,23 @@ def bev_pool_precomputed(self, view_feats, depth_softmax, ranks_bev, ranks_depth bev_feat = self.compute_bev_pool( view_feats, depth_softmax, ranks_bev, ranks_depth, ranks_feat, interval_starts, interval_lengths ) - return bev_feat - + return bev_feat + def get_depth_softmax(self, x: torch.Tensor, B, N, fH, fW) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Args: + x: torch.Tensor, the input tensor of shape (B*N, D+C, H, W). + Returns: + Tuple[torch.Tensor, torch.Tensor], the tuple containing the view features and depth softmax. + view_feats: torch.Tensor, the view features of shape (B, N, C, H, W). + depth_softmax: torch.Tensor, the depth softmax of shape (B, N, D, H, W). + """ + depth_softmax = x[:, : self.D].softmax(dim=1) + depth_softmax = depth_softmax.view(B, N, self.D, fH, fW) + view_feats = x[:, self.D : (self.D + self.C)] + view_feats = view_feats.view(B, N, self.C, fH, fW) + return view_feats, depth_softmax + @MODELS.register_module() class LSSTransformV2(BaseViewTransformV2): @@ -263,19 +481,69 @@ def __init__( self.depthnet = nn.Conv2d(self.in_channels, self.D + self.C, 1) self.downsample = DownSampleNet(downsample, out_channels, out_channels) - def get_cam_feats(self, x): + def get_cam_feats( + self, + x: torch.Tensor, + camera_depth_aware_parameters: Optional[torch.Tensor] = None + ) -> Tuple[torch.Tensor, torch.Tensor]: B, N, C, fH, fW = x.shape x = x.view(B * N, C, fH, fW) x = self.depthnet(x) + return self.get_depth_softmax(x, B=B, N=N, fH=fH, fW=fW) + + def forward(self, *args, **kwargs): + x, depth_softmax = super().forward(*args, **kwargs) + x = self.downsample(x) + return x, depth_softmax - depth_softmax = x[:, : self.D].softmax(dim=1) - depth_softmax = depth_softmax.view(B, N, self.D, fH, fW) - view_feats = x[:, self.D : (self.D + self.C)] - view_feats = view_feats.view(B, N, self.C, fH, fW) - return view_feats, depth_softmax + +@MODELS.register_module() +class LSSTransformV2DepthAware(BaseViewTransformV2): + + def __init__( + self, + in_channels: int, + out_channels: int, + image_size: Tuple[int, int], + feature_size: Tuple[int, int], + xbound: Tuple[float, float, float], + ybound: Tuple[float, float, float], + zbound: Tuple[float, float, float], + dbound: Tuple[float, float, float], + camera_depth_aware_configs: dict, + downsample: int = 1, + ): + super().__init__( + in_channels=in_channels, + out_channels=out_channels, + image_size=image_size, + feature_size=feature_size, + xbound=xbound, + ybound=ybound, + zbound=zbound, + dbound=dbound, + ) + self.downsample = DownSampleNet(downsample, out_channels, out_channels) + self.camera_depth_aware_net = CameraDepthAwareNet( + in_channels=in_channels, + hidden_channels=in_channels, + mlp_drop_out=camera_depth_aware_configs["mlp_drop_out"], + downsample=camera_depth_aware_configs["downsample"], + depth_channels=self.D, + out_channels=self.C, + ) + + def get_cam_feats( + self, + x: torch.Tensor, + camera_depth_aware_parameters: Optional[torch.Tensor] = None + ) -> Tuple[torch.Tensor, torch.Tensor]: + B, N, C, fH, fW = x.shape + x = x.view(B * N, C, fH, fW) + x = self.camera_depth_aware_net(x, camera_depth_aware_parameters) + return self.get_depth_softmax(x, B=B, N=N, fH=fH, fW=fW) def forward(self, *args, **kwargs): x, depth_softmax = super().forward(*args, **kwargs) x = self.downsample(x) return x, depth_softmax - \ No newline at end of file diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py index 10a8924d2..5de72725c 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py @@ -1,10 +1,10 @@ _base_ = [ "../default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py", - "../../default/models/resnet50/camera_resnet50_fpn_lss_50m.py", + "../../default/models/resnet50/camera_resnet50_fpn_camera_aware_lssv2_50m.py", ] experiment_group_name = "bevfusion_camera/j6gen2_depth_base_adjusted/" + _base_.dataset_type -experiment_name = "bevfusion_camera_resnet50_fpn_lss_v2_30e_8xb16_j6gen2_base_50m" +experiment_name = "bevfusion_camera_resnet50_fpn_camera_depth_aware_lssv2_30e_8xb16_j6gen2_base_50m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m.py index 5215dc9f3..40f008b34 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m.py @@ -1,10 +1,10 @@ _base_ = [ "../default_bevfusion_camera_50e_8xb16_base_50m.py", - "../../default/models/resnet50/camera_resnet50_fpn_lss_50m.py", + "../../default/models/resnet50/camera_resnet50_fpn_camera_aware_lssv2_50m.py", ] experiment_group_name = "bevfusion_camera/base/" + _base_.dataset_type -experiment_name = "bevfusion_camera_resnet50_fpn_lss_50e_8xb16_base_50m" +experiment_name = "bevfusion_camera_resnet50_fpn_camera_depth_aware_lssv2_50e_8xb16_base_50m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_camera_depth_aware_lssv2_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_camera_depth_aware_lssv2_50m.py new file mode 100644 index 000000000..37b98f82d --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_camera_depth_aware_lssv2_50m.py @@ -0,0 +1,28 @@ +_base_ = [ + "./camera_resnet50_fpn_depthlss_120m.py", +] +num_proposals = 200 + +# Image network +model = dict( + depth_gt_downsample=8, + loss_depth_weight=2.0, + view_transform=dict( + type="LSSTransformV2DepthAware", + xbound=[-54.0, 54.0, 0.3], + ybound=[-54.0, 54.0, 0.3], + zbound=[-10.0, 10.0, 20.0], + dbound=[1.0, 60, 0.5], + downsample=2, + camera_depth_aware_configs=dict( + mlp_drop_out=0.0, + downsample=8, + ), + ), + bbox_head=dict( + num_proposals=num_proposals, + bbox_coder=dict( + post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], + ), + ), +) From b7eae63e5a90c85a74cd772895d9f7f5cad55df5 Mon Sep 17 00:00:00 2001 From: KSeabgTan Date: Thu, 4 Jun 2026 11:09:33 +0900 Subject: [PATCH 175/183] Added --- projects/BEVFusion/bevfusion/bevfusion.py | 13 ++- projects/BEVFusion/bevfusion/depth_lss_v2.py | 92 +++++++++---------- ...net50_fpn_lss_30e_2xb16_j6gen2_base_50m.py | 4 +- ...snet50_fpn_camera_depth_aware_lssv2_50m.py | 28 ------ .../default_30e_8xb16_adamw_linear_cosine.py | 8 +- 5 files changed, 59 insertions(+), 86 deletions(-) delete mode 100644 projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_camera_depth_aware_lssv2_50m.py diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py index 1cdff32ad..75fbf181f 100644 --- a/projects/BEVFusion/bevfusion/bevfusion.py +++ b/projects/BEVFusion/bevfusion/bevfusion.py @@ -161,11 +161,14 @@ def prepare_camera_depth_aware_parameters( Args: camera_intrinsics: torch.Tensor, the camera intrinsics of shape (B, N, 3, 3). img_aug_matrix: torch.Tensor, the image augmentation matrix of shape (B, N, 4, 4). - lidar_aug_matrix: torch.Tensor, the lidar augmentation matrix of shape (B, N, 4, 4). + lidar_aug_matrix: torch.Tensor, the lidar augmentation matrix of shape (B, 4, 4). camera2lidar: torch.Tensor, the camera to lidar matrix of shape (B, N, 4, 4). Returns: torch.Tensor, the camera depth aware parameters of shape (B*N, N_CAMERA_DEPTH_PARAMETERS). """ + B, N, _, _ = camera_intrinsics.shape + lidar_aug_matrix = lidar_aug_matrix.view(B, 1, 4, 4).repeat(1, N, 1, 1) + # (B*N, 15) mlp_input = torch.stack([ camera_intrinsics[:, :, 0, 0], # fx @@ -185,9 +188,9 @@ def prepare_camera_depth_aware_parameters( lidar_aug_matrix[:, :, 2, 2], # r33 ], dim=-1) # (B, N, 4, 4) -> (B, N, 3, 4) -> (B*N, 12) - camera2lidar_flatten = camera2lidar[:,:,:3,:].view(-1, 12) - - # (B*N, 15+12) + camera2lidar_flatten = camera2lidar[:,:,:3,:].view(B, N, -1) + + # (B, N, 15+12) mlp_input = torch.cat([mlp_input, camera2lidar_flatten], dim=-1) return mlp_input @@ -221,6 +224,7 @@ def extract_img_feat( lidar_aug_matrix_inverse=None, geom_feats=None, using_image_features=False, + camera_depth_aware_parameters=None ) -> Tuple[torch.Tensor, torch.Tensor]: if not using_image_features: @@ -241,6 +245,7 @@ def extract_img_feat( img_aug_matrix_inverse, lidar_aug_matrix_inverse, geom_feats, + camera_depth_aware_parameters=camera_depth_aware_parameters ) return x, pred_depths diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py index 251fc61fc..4190cc582 100644 --- a/projects/BEVFusion/bevfusion/depth_lss_v2.py +++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py @@ -26,16 +26,14 @@ def __init__(self, channels, act_layer=nn.ReLU, gate_layer=nn.Sigmoid): super().__init__() # Dont need global pooling because inputs are (B*N, C, 1, 1). self.sequeeze_net = nn.Sequential( - [ - # Squeeze with 1x1 convolution - nn.Conv2d(channels, channels, 1, bias=True), - # Activation - act_layer(), - # Expand with 1x1 convolution - nn.Conv2d(channels, channels, 1, bias=True), - # Gate with sigmoid activation - gate_layer(), - ] + # Squeeze with 1x1 convolution + nn.Conv2d(channels, channels, 1, bias=True), + # Activation + act_layer(), + # Expand with 1x1 convolution + nn.Conv2d(channels, channels, 1, bias=True), + # Gate with sigmoid activation + gate_layer(), ) def forward(self, x: torch.Tensor, depth_aware_features: torch.Tensor) -> torch.Tensor: @@ -100,7 +98,6 @@ def __init__( hidden_channels: int, out_channels: int, mlp_drop_out: float, - downsample: int, depth_channels: int, with_cp: bool = False, num_camera_depth_parameters: int = 27) -> None: @@ -118,15 +115,14 @@ def __init__( self.out_channels = out_channels self.mlp_drop_out = mlp_drop_out self.num_camera_depth_parameters = num_camera_depth_parameters - self.downsample = downsample self.depth_channels = depth_channels self.with_cp = with_cp # Input convolution for context/image features # Camera depth aware parameters branch - self.camera_depth_aware_parameters_bn = nn.BatchNorm1d( - self.num_camera_depth_parameters - ) + # self.camera_depth_aware_parameters_bn = nn.BatchNorm1d( + # self.num_camera_depth_parameters + # ) # Context/image feature branch self.context_input_conv = nn.Sequential( @@ -135,30 +131,31 @@ def __init__( nn.BatchNorm2d(hidden_channels), nn.ReLU(inplace=True), ) - self.context_camera_depth_aware_mlp = CameraDepthLinearProjectionMLP( - in_channels=self.num_camera_depth_parameters, - hidden_channels=hidden_channels, - out_channels=hidden_channels, - drop_out=self.mlp_drop_out - ) - self.context_se = SELayer(channels=hidden_channels) + # self.context_camera_depth_aware_mlp = CameraDepthLinearProjectionMLP( + # in_channels=self.num_camera_depth_parameters, + # hidden_channels=hidden_channels, + # out_channels=hidden_channels, + # drop_out=self.mlp_drop_out + # ) + # self.context_se = SELayer(channels=hidden_channels) self.context_conv = nn.Conv2d( hidden_channels, out_channels, - stride=1, padding=1) + kernel_size=1, + stride=1, padding=0) # Depth branch - self.depth_camera_depth_aware_mlp = CameraDepthLinearProjectionMLP( - in_channels=self.num_camera_depth_parameters, - hidden_channels=hidden_channels, - out_channels=hidden_channels, - drop_out=self.mlp_drop_out - ) - self.depth_se = SELayer(channels=hidden_channels) - self.depth_conv = nn.Sequantial( - BasicBlock(hidden_channels, hidden_channels, downsample=downsample), - BasicBlock(hidden_channels, hidden_channels, downsample=downsample), - BasicBlock(hidden_channels, hidden_channels, downsample=downsample), + # self.depth_camera_depth_aware_mlp = CameraDepthLinearProjectionMLP( + # in_channels=self.num_camera_depth_parameters, + # hidden_channels=hidden_channels, + # out_channels=hidden_channels, + # drop_out=self.mlp_drop_out + # ) + # self.depth_se = SELayer(channels=hidden_channels) + self.depth_conv = nn.Sequential( + BasicBlock(hidden_channels, hidden_channels, downsample=None), + BasicBlock(hidden_channels, hidden_channels), + BasicBlock(hidden_channels, hidden_channels), nn.Conv2d(hidden_channels, depth_channels, kernel_size=1, stride=1, padding=0) ) @@ -170,10 +167,10 @@ def context_forward(self, context_features: torch.Tensor, camera_depth_aware_fea Returns: torch.Tensor, the output tensor of shape (B*N, C, H, W). """ - context_camera_depth_aware_features = self.context_camera_depth_aware_mlp(camera_depth_aware_features) - # (B*N, mlp_out_channels) -> (B*N, mlp_out_channels, 1, 1) - context_camera_depth_aware_features = context_camera_depth_aware_features.view(-1, self.hidden_channels, 1, 1) - context_features = self.context_se(context_features, context_camera_depth_aware_features) + # context_camera_depth_aware_features = self.context_camera_depth_aware_mlp(camera_depth_aware_features) + # # (B*N, mlp_out_channels) -> (B*N, mlp_out_channels, 1, 1) + # context_camera_depth_aware_features = context_camera_depth_aware_features.view(-1, self.hidden_channels, 1, 1) + # context_features = self.context_se(context_features, context_camera_depth_aware_features) context_features = self.context_conv(context_features) return context_features @@ -185,11 +182,11 @@ def depth_forward(self, depth_features: torch.Tensor, camera_depth_aware_feature Returns: torch.Tensor, the output tensor of shape (B*N, C, H, W). """ - depth_camera_depth_aware_features = self.depth_camera_depth_aware_mlp(camera_depth_aware_features) - # (B*N, mlp_out_channels) -> (B*N, mlp_out_channels, 1, 1) - depth_camera_depth_aware_features = depth_camera_depth_aware_features.view(-1, self.hidden_channels, 1, 1) - # (B*N, C, H, W) - depth_features = self.depth_se(depth_features, depth_camera_depth_aware_features) + # depth_camera_depth_aware_features = self.depth_camera_depth_aware_mlp(camera_depth_aware_features) + # # (B*N, mlp_out_channels) -> (B*N, mlp_out_channels, 1, 1) + # depth_camera_depth_aware_features = depth_camera_depth_aware_features.view(-1, self.hidden_channels, 1, 1) + # # (B*N, C, H, W) + # depth_features = self.depth_se(depth_features, depth_camera_depth_aware_features) if self.with_cp: depth_features = checkpoint(self.depth_conv, depth_features) else: @@ -208,11 +205,11 @@ def forward(self, x: torch.Tensor, camera_depth_aware_parameters: torch.Tensor) camera_depth_aware_parameters = camera_depth_aware_parameters.view(-1, self.num_camera_depth_parameters) # (B*N, N_CAMERA_DEPTH_PARAMETERS) - camera_depth_aware_features = self.camera_depth_aware_parameters_bn(camera_depth_aware_parameters) - context_features = self.input_conv(x) + # camera_depth_aware_features = self.camera_depth_aware_parameters_bn(camera_depth_aware_parameters) + context_input_features = self.context_input_conv(x) - context_features = self.context_forward(context_features, camera_depth_aware_features) - depth_features = self.depth_forward(context_features, camera_depth_aware_features) + context_features = self.context_forward(context_input_features, None) + depth_features = self.depth_forward(context_input_features, None) return torch.cat([depth_features, context_features], dim=1) @@ -528,7 +525,6 @@ def __init__( in_channels=in_channels, hidden_channels=in_channels, mlp_drop_out=camera_depth_aware_configs["mlp_drop_out"], - downsample=camera_depth_aware_configs["downsample"], depth_channels=self.D, out_channels=self.C, ) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py index 5de72725c..c9afb963b 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py @@ -1,10 +1,10 @@ _base_ = [ "../default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py", - "../../default/models/resnet50/camera_resnet50_fpn_camera_aware_lssv2_50m.py", + "../../default/models/resnet50/camera_resnet50_fpn_lss_depth_50m.py", ] experiment_group_name = "bevfusion_camera/j6gen2_depth_base_adjusted/" + _base_.dataset_type -experiment_name = "bevfusion_camera_resnet50_fpn_camera_depth_aware_lssv2_30e_8xb16_j6gen2_base_50m" +experiment_name = "bevfusion_camera_resnet50_fpn_lss_depth_30e_8xb16_j6gen2_base_50m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_camera_depth_aware_lssv2_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_camera_depth_aware_lssv2_50m.py deleted file mode 100644 index 37b98f82d..000000000 --- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_camera_depth_aware_lssv2_50m.py +++ /dev/null @@ -1,28 +0,0 @@ -_base_ = [ - "./camera_resnet50_fpn_depthlss_120m.py", -] -num_proposals = 200 - -# Image network -model = dict( - depth_gt_downsample=8, - loss_depth_weight=2.0, - view_transform=dict( - type="LSSTransformV2DepthAware", - xbound=[-54.0, 54.0, 0.3], - ybound=[-54.0, 54.0, 0.3], - zbound=[-10.0, 10.0, 20.0], - dbound=[1.0, 60, 0.5], - downsample=2, - camera_depth_aware_configs=dict( - mlp_drop_out=0.0, - downsample=8, - ), - ), - bbox_head=dict( - num_proposals=num_proposals, - bbox_coder=dict( - post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], - ), - ), -) diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py index 2c00474d6..4965e981f 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py @@ -1,6 +1,6 @@ # learning rate -lr = 2e-4 -t_max = 1 +lr = 1e-4 +t_max = 3 max_epochs = 30 val_interval = 1 @@ -58,8 +58,8 @@ optim_wrapper = dict( type="OptimWrapper", - optimizer=dict(type="AdamW", lr=lr, weight_decay=1e-3), - clip_grad=dict(max_norm=5.0, norm_type=2), + optimizer=dict(type="AdamW", lr=lr, weight_decay=1e-2), + clip_grad=dict(max_norm=0.1, norm_type=2), ) auto_scale_lr = dict(enable=False, base_batch_size=train_gpu_size * train_batch_size) From 6c88b6196b81a77d8e40ce5f5325fbe0a30176fd Mon Sep 17 00:00:00 2001 From: KSeabgTan Date: Thu, 4 Jun 2026 11:35:35 +0900 Subject: [PATCH 176/183] Added --- .../camera_resnet50_fpn_lss_depth_50m.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_depth_50m.py diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_depth_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_depth_50m.py new file mode 100644 index 000000000..50e8098a5 --- /dev/null +++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_depth_50m.py @@ -0,0 +1,29 @@ +_base_ = [ + "./camera_resnet50_fpn_depthlss_120m.py", +] +num_proposals = 200 + +# Image network +model = dict( + depth_gt_downsample=8, + loss_depth_weight=2.0, + view_transform=dict( + type="LSSTransformV2DepthAware", + xbound=[-54.0, 54.0, 0.3], + ybound=[-54.0, 54.0, 0.3], + zbound=[-10.0, 10.0, 20.0], + dbound=[1.0, 60, 0.5], + downsample=2, + camera_depth_aware_configs=dict( + mlp_drop_out=0.0, + downsample=8, + num_camera_depth_parameters=27 + ), + ), + bbox_head=dict( + num_proposals=num_proposals, + bbox_coder=dict( + post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], + ), + ), +) From f05e742ceced32b4893312f937142b83e3a2e1d8 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 4 Jun 2026 20:05:18 +0900 Subject: [PATCH 177/183] Updated --- projects/BEVFusion/bevfusion/bevfusion.py | 92 +++++++++++- projects/BEVFusion/bevfusion/depth_lss_v2.py | 134 ++++++++++++++---- ...fusion_camera_30e_8xb16_j6gen2_base_50m.py | 2 +- ...net50_fpn_lss_30e_2xb16_j6gen2_base_50m.py | 2 +- .../camera_resnet50_fpn_lss_depth_50m.py | 3 +- .../default/pipelines/default_lidar_50m.py | 2 +- .../default_30e_8xb16_adamw_linear_cosine.py | 4 +- 7 files changed, 203 insertions(+), 36 deletions(-) diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py index 75fbf181f..242ffb658 100644 --- a/projects/BEVFusion/bevfusion/bevfusion.py +++ b/projects/BEVFusion/bevfusion/bevfusion.py @@ -1,7 +1,10 @@ +import math from collections import OrderedDict from copy import deepcopy +from pathlib import Path from typing import Dict, List, Optional, Tuple +import matplotlib.pyplot as plt import numpy as np import torch import torch.distributed as dist @@ -9,6 +12,7 @@ from mmdet3d.registry import MODELS from mmdet3d.structures import Det3DDataSample from mmdet3d.utils import OptConfigType, OptMultiConfig, OptSampleList +from mmengine.logging import print_log from mmengine.utils import is_list_of from torch import Tensor from torch.nn import functional as F @@ -36,6 +40,7 @@ def __init__( seg_head: Optional[dict] = None, loss_depth_weight: float = 3.0, depth_gt_downsample: int = 1, + visualize_gt_depth_dir: Optional[str] = None, **kwargs, ) -> None: """Initialize BEVFusion model. @@ -79,6 +84,11 @@ def __init__( self._weights_initialized = False self.loss_depth_weight = loss_depth_weight self.depth_gt_downsample = depth_gt_downsample + self.visualize_gt_depth_dir = ( + Path(visualize_gt_depth_dir) if visualize_gt_depth_dir is not None else None + ) + if self.visualize_gt_depth_dir is not None: + self.visualize_gt_depth_dir.mkdir(parents=True, exist_ok=True) def _forward( self, batch_inputs_dict: Tensor, batch_data_samples: OptSampleList = [], using_image_features=False, **kwargs @@ -471,7 +481,86 @@ def loss( losses.update(bbox_loss) return losses - + + def _visualize_one_hot_gt_depth( + self, + gt_depths_one_hot: Tensor, + batch_size: int, + num_cameras: int, + height: int, + width: int, + batch_idx: int = 0, + num_channels: int = 6, + ) -> None: + """Save one-hot depth GT maps for the first batch and first few depth channels. + + Args: + gt_depths_one_hot (Tensor): One-hot depth GT of shape [B*N*H*W, D]. + batch_size (int): Batch size B from the original input. + num_cameras (int): Number of camera views N from the original input. + height (int): Original input height H before downsampling. + width (int): Original input width W before downsampling. + batch_idx (int): Batch index to visualize. + num_channels (int): Number of depth-bin channels to visualize. + """ + if self.visualize_gt_depth_dir is None: + return + + if dist.is_available() and dist.is_initialized() and dist.get_rank() != 0: + return + + if batch_size <= batch_idx or num_cameras == 0: + return + + downsample = self.depth_gt_downsample + height_down = height // downsample + width_down = width // downsample + num_depth_bins = gt_depths_one_hot.shape[1] + + num_channels = min(num_channels, num_depth_bins) + if num_channels == 0 or height_down == 0 or width_down == 0: + return + + with torch.no_grad(): + one_hot = gt_depths_one_hot.view( + batch_size, num_cameras, height_down, width_down, num_depth_bins + ) + depth_channels = one_hot[batch_idx, 0, :, :, :num_channels].detach().float().cpu().numpy() + + ncols = min(3, num_channels) + nrows = math.ceil(num_channels / ncols) + fig, axes = plt.subplots(nrows, ncols, figsize=(4 * ncols, 4 * nrows), squeeze=False) + + dbounds = self.view_transform.dbound + for ch_idx in range(num_channels): + ax = axes[ch_idx // ncols, ch_idx % ncols] + channel_map = depth_channels[:, :, ch_idx] + depth_m = dbounds[0] + (ch_idx + 0.5) * dbounds[2] + im = ax.imshow(channel_map, cmap="viridis", vmin=0, vmax=1, interpolation="nearest") + ax.set_title(f"batch {batch_idx}, depth bin {ch_idx} (~{depth_m:.1f}m)") + ax.set_xticks([]) + ax.set_yticks([]) + fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04) + + for ch_idx in range(num_channels, nrows * ncols): + axes[ch_idx // ncols, ch_idx % ncols].axis("off") + + fig.suptitle( + f"one-hot gt_depth (batch={batch_idx}, cam=0, bins=0-{num_channels - 1})" + ) + fig.tight_layout() + + if not hasattr(self, "_gt_depth_one_hot_vis_count"): + self._gt_depth_one_hot_vis_count = 0 + self._gt_depth_one_hot_vis_count += 1 + save_path = ( + self.visualize_gt_depth_dir + / f"gt_depth_one_hot_{self._gt_depth_one_hot_vis_count:06d}.png" + ) + fig.savefig(save_path, dpi=150, bbox_inches="tight") + plt.close(fig) + print_log(f"Saved one-hot gt_depth visualization to {save_path.resolve()}") + def get_downsampled_gt_depth(self, gt_depths): """ Input: @@ -501,6 +590,7 @@ def get_downsampled_gt_depth(self, gt_depths): # gt_depths = torch.clamp(gt_depths, max=float(D)) gt_depths = F.one_hot( gt_depths.long(), num_classes=D + 1).view(-1, D + 1)[:, 1:] + self._visualize_one_hot_gt_depth(gt_depths, B, N, H, W) return gt_depths.float() def get_depth_loss(self, depth_labels, depth_preds): diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py index 4190cc582..23c74d82d 100644 --- a/projects/BEVFusion/bevfusion/depth_lss_v2.py +++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py @@ -7,7 +7,10 @@ import torch from mmdet3d.registry import MODELS from mmengine.logging import print_log +from mmcv.runner import BaseModule +from mmcv.cnn import build_conv_layer, build_norm_layer, build_plugin_layer from mmdet.models.backbones.resnet import BasicBlock + from torch import nn from torch.utils.checkpoint import checkpoint @@ -15,6 +18,56 @@ from .ops import bev_pool_v2 +class CustomDepthBasicBlock(BaseModule): + def __init__( + self, + in_channels: int, + out_channel: int, + kernel_size: int = 3, + stride: int = 1, + dilation: int = 1, + with_cp: bool = False, + downsample: Optional[nn.Module] = None, + init_cfg: OptMultiConfig = None): + super().__init__(init_cfg) + + self.norm1_name, norm1 = build_norm_layer(norm_cfg, out_channel, postfix=1) + self.norm2_name, norm2 = build_norm_layer(norm_cfg, out_channel, postfix=2) + self.conv1 = build_conv_layer( + conv_cfg, + in_channels, + out_channel, + kernel_size, + stride=stride, + padding=dilation, + dilation=dilation, bias=False + ) + self.add_module(self.norm1_name, norm1) + self.conv2 = build_conv_layer( + conv_cfg, planes, planes, 3, padding=1, bias=False) + self.add_module(self.norm2_name, norm2) + + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + self.dilation = dilation + self.with_cp = with_cp + + def forward(self, x: torch.Tensor) -> torch.Tensor: + identity = x + out = self.conv1(x) + out = self.norm1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.norm2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + return out + class SELayer(nn.Module): """ Squeeze-and-Excitation (SE) layer. @@ -120,45 +173,67 @@ def __init__( # Input convolution for context/image features # Camera depth aware parameters branch - # self.camera_depth_aware_parameters_bn = nn.BatchNorm1d( - # self.num_camera_depth_parameters - # ) + self.camera_depth_aware_parameters_bn = nn.BatchNorm1d( + self.num_camera_depth_parameters + ) # Context/image feature branch + # self.context_input_conv = nn.Sequential( + # nn.Conv2d( + # in_channels, hidden_channels, kernel_size=3, stride=1, padding=1, bias=False), + # nn.BatchNorm2d(hidden_channels), + # nn.ReLU(inplace=True), + # ) self.context_input_conv = nn.Sequential( nn.Conv2d( - in_channels, hidden_channels, kernel_size=3, stride=1, padding=1), + in_channels, hidden_channels, kernel_size=1, stride=1, bias=False), nn.BatchNorm2d(hidden_channels), nn.ReLU(inplace=True), ) - # self.context_camera_depth_aware_mlp = CameraDepthLinearProjectionMLP( - # in_channels=self.num_camera_depth_parameters, - # hidden_channels=hidden_channels, - # out_channels=hidden_channels, - # drop_out=self.mlp_drop_out - # ) - # self.context_se = SELayer(channels=hidden_channels) + self.context_camera_depth_aware_mlp = CameraDepthLinearProjectionMLP( + in_channels=self.num_camera_depth_parameters, + hidden_channels=hidden_channels, + out_channels=hidden_channels, + drop_out=self.mlp_drop_out + ) + self.context_se = SELayer(channels=hidden_channels) self.context_conv = nn.Conv2d( hidden_channels, - out_channels, + depth_channels + out_channels, kernel_size=1, - stride=1, padding=0) + stride=1, padding=0, bias=True) # Depth branch - # self.depth_camera_depth_aware_mlp = CameraDepthLinearProjectionMLP( - # in_channels=self.num_camera_depth_parameters, - # hidden_channels=hidden_channels, - # out_channels=hidden_channels, - # drop_out=self.mlp_drop_out - # ) - # self.depth_se = SELayer(channels=hidden_channels) + self.depth_camera_depth_aware_mlp = CameraDepthLinearProjectionMLP( + in_channels=self.num_camera_depth_parameters, + hidden_channels=hidden_channels, + out_channels=hidden_channels, + drop_out=self.mlp_drop_out + ) + self.depth_se = SELayer(channels=hidden_channels) self.depth_conv = nn.Sequential( BasicBlock(hidden_channels, hidden_channels, downsample=None), BasicBlock(hidden_channels, hidden_channels), BasicBlock(hidden_channels, hidden_channels), - nn.Conv2d(hidden_channels, depth_channels, kernel_size=1, stride=1, padding=0) + nn.Conv2d(hidden_channels, depth_channels, kernel_size=1, stride=1, padding=0, bias=True) ) - + # self._init_weight() + + def _init_weight(self): + print_log("Initializing depth weights...", logger="current") + for m in self.modules(): + if isinstance(m, nn.Conv2d): + torch.nn.init.kaiming_normal_(m.weight) + if m.bias is not None: + m.bias.data.zero_() + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + elif isinstance(m, nn.BatchNorm1d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + def context_forward(self, context_features: torch.Tensor, camera_depth_aware_features: torch.Tensor) -> torch.Tensor: """ Args: @@ -167,10 +242,10 @@ def context_forward(self, context_features: torch.Tensor, camera_depth_aware_fea Returns: torch.Tensor, the output tensor of shape (B*N, C, H, W). """ - # context_camera_depth_aware_features = self.context_camera_depth_aware_mlp(camera_depth_aware_features) + context_camera_depth_aware_features = self.context_camera_depth_aware_mlp(camera_depth_aware_features) # # (B*N, mlp_out_channels) -> (B*N, mlp_out_channels, 1, 1) - # context_camera_depth_aware_features = context_camera_depth_aware_features.view(-1, self.hidden_channels, 1, 1) - # context_features = self.context_se(context_features, context_camera_depth_aware_features) + context_camera_depth_aware_features = context_camera_depth_aware_features.view(-1, self.hidden_channels, 1, 1) + context_features = self.context_se(context_features, context_camera_depth_aware_features) context_features = self.context_conv(context_features) return context_features @@ -205,11 +280,12 @@ def forward(self, x: torch.Tensor, camera_depth_aware_parameters: torch.Tensor) camera_depth_aware_parameters = camera_depth_aware_parameters.view(-1, self.num_camera_depth_parameters) # (B*N, N_CAMERA_DEPTH_PARAMETERS) - # camera_depth_aware_features = self.camera_depth_aware_parameters_bn(camera_depth_aware_parameters) + camera_depth_aware_features = self.camera_depth_aware_parameters_bn(camera_depth_aware_parameters) context_input_features = self.context_input_conv(x) - - context_features = self.context_forward(context_input_features, None) - depth_features = self.depth_forward(context_input_features, None) + context_features = self.context_forward(context_input_features, camera_depth_aware_features) + # return context_features + # context_features = self.context_forward(context_input_features, None) + depth_features = self.depth_forward(context_input_features, camera_depth_aware_features) return torch.cat([depth_features, context_features], dim=1) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py index 591399a4e..ffe9f1363 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/default_bevfusion_camera_30e_8xb16_j6gen2_base_50m.py @@ -11,7 +11,7 @@ custom_imports["imports"] += ["autoware_ml.detection3d.datasets.transforms"] # user setting -data_root = "data/t4datasets/" +data_root = "data/t4dataset/" info_directory_path = "info/kokseang_2_8/" # Dataset parameters diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py index c9afb963b..9074f14d2 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-C/resnet50/bevfusion_camera_resnet50_fpn_lss_30e_2xb16_j6gen2_base_50m.py @@ -3,7 +3,7 @@ "../../default/models/resnet50/camera_resnet50_fpn_lss_depth_50m.py", ] -experiment_group_name = "bevfusion_camera/j6gen2_depth_base_adjusted/" + _base_.dataset_type +experiment_group_name = "bevfusion_camera/j6gen2_base_depth_adjust_v2/" + _base_.dataset_type experiment_name = "bevfusion_camera_resnet50_fpn_lss_depth_30e_8xb16_j6gen2_base_50m" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_depth_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_depth_50m.py index 50e8098a5..997fa1e76 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_depth_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_depth_50m.py @@ -6,8 +6,9 @@ # Image network model = dict( depth_gt_downsample=8, - loss_depth_weight=2.0, + loss_depth_weight=1.0, view_transform=dict( + # type="LSSTransformV2", type="LSSTransformV2DepthAware", xbound=[-54.0, 54.0, 0.3], ybound=[-54.0, 54.0, 0.3], diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py index 51688a7aa..f49e2dbb4 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py @@ -1,6 +1,6 @@ # Dataset parameters backend_args = None -num_workers = 16 +num_workers = 4 input_modality = dict(use_lidar=True, use_camera=False) # range setting diff --git a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py index 4965e981f..95f5f96bd 100644 --- a/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py +++ b/projects/BEVFusion/configs/t4dataset/default/schedulers/default_30e_8xb16_adamw_linear_cosine.py @@ -1,5 +1,5 @@ # learning rate -lr = 1e-4 +lr = 2e-4 t_max = 3 max_epochs = 30 val_interval = 1 @@ -59,7 +59,7 @@ optim_wrapper = dict( type="OptimWrapper", optimizer=dict(type="AdamW", lr=lr, weight_decay=1e-2), - clip_grad=dict(max_norm=0.1, norm_type=2), + clip_grad=dict(max_norm=5.0, norm_type=2), ) auto_scale_lr = dict(enable=False, base_batch_size=train_gpu_size * train_batch_size) From 69300aae2b77ab70b42309ba342f56899f0951ba Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Thu, 4 Jun 2026 21:33:00 +0900 Subject: [PATCH 178/183] Updated --- projects/BEVFusion/bevfusion/depth_lss_v2.py | 48 ++++++++++++-------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py index 23c74d82d..c3cef18ae 100644 --- a/projects/BEVFusion/bevfusion/depth_lss_v2.py +++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py @@ -7,9 +7,10 @@ import torch from mmdet3d.registry import MODELS from mmengine.logging import print_log -from mmcv.runner import BaseModule -from mmcv.cnn import build_conv_layer, build_norm_layer, build_plugin_layer -from mmdet.models.backbones.resnet import BasicBlock +from mmengine.model import BaseModule +from mmcv.cnn import build_conv_layer, build_norm_layer +# from mmdet.models.backbones.resnet import BasicBlock +from mmdet3d.utils import ConfigType, OptConfigType, OptMultiConfig from torch import nn from torch.utils.checkpoint import checkpoint @@ -23,29 +24,32 @@ def __init__( self, in_channels: int, out_channel: int, - kernel_size: int = 3, + padding: int = 0, + kernel_size: int = 1, stride: int = 1, dilation: int = 1, - with_cp: bool = False, + with_cp: bool = False, + norm_cfg=dict(type='BN'), + conv_cfg=None, downsample: Optional[nn.Module] = None, init_cfg: OptMultiConfig = None): super().__init__(init_cfg) - self.norm1_name, norm1 = build_norm_layer(norm_cfg, out_channel, postfix=1) - self.norm2_name, norm2 = build_norm_layer(norm_cfg, out_channel, postfix=2) + self.norm1_name, self.norm1 = build_norm_layer(norm_cfg, out_channel, postfix=1) + self.norm2_name, self.norm2 = build_norm_layer(norm_cfg, out_channel, postfix=2) self.conv1 = build_conv_layer( conv_cfg, in_channels, out_channel, kernel_size, stride=stride, - padding=dilation, + padding=padding, dilation=dilation, bias=False ) - self.add_module(self.norm1_name, norm1) + self.add_module(self.norm1_name, self.norm1) self.conv2 = build_conv_layer( - conv_cfg, planes, planes, 3, padding=1, bias=False) - self.add_module(self.norm2_name, norm2) + conv_cfg, out_channel, out_channel, kernel_size, padding=padding, bias=False) + self.add_module(self.norm2_name, self.norm2) self.relu = nn.ReLU(inplace=True) self.downsample = downsample @@ -199,7 +203,7 @@ def __init__( self.context_se = SELayer(channels=hidden_channels) self.context_conv = nn.Conv2d( hidden_channels, - depth_channels + out_channels, + out_channels, kernel_size=1, stride=1, padding=0, bias=True) @@ -211,12 +215,18 @@ def __init__( drop_out=self.mlp_drop_out ) self.depth_se = SELayer(channels=hidden_channels) + # self.depth_conv = nn.Sequential( + # BasicBlock(hidden_channels, hidden_channels, downsample=None), + # BasicBlock(hidden_channels, hidden_channels), + # BasicBlock(hidden_channels, hidden_channels), + # nn.Conv2d(hidden_channels, depth_channels, kernel_size=1, stride=1, padding=0, bias=True) + # ) self.depth_conv = nn.Sequential( - BasicBlock(hidden_channels, hidden_channels, downsample=None), - BasicBlock(hidden_channels, hidden_channels), - BasicBlock(hidden_channels, hidden_channels), + CustomDepthBasicBlock(hidden_channels, hidden_channels, downsample=None, kernel_size=1), + CustomDepthBasicBlock(hidden_channels, hidden_channels, kernel_size=1), + CustomDepthBasicBlock(hidden_channels, hidden_channels, kernel_size=1), nn.Conv2d(hidden_channels, depth_channels, kernel_size=1, stride=1, padding=0, bias=True) - ) + ) # self._init_weight() def _init_weight(self): @@ -257,11 +267,11 @@ def depth_forward(self, depth_features: torch.Tensor, camera_depth_aware_feature Returns: torch.Tensor, the output tensor of shape (B*N, C, H, W). """ - # depth_camera_depth_aware_features = self.depth_camera_depth_aware_mlp(camera_depth_aware_features) + depth_camera_depth_aware_features = self.depth_camera_depth_aware_mlp(camera_depth_aware_features) # # (B*N, mlp_out_channels) -> (B*N, mlp_out_channels, 1, 1) - # depth_camera_depth_aware_features = depth_camera_depth_aware_features.view(-1, self.hidden_channels, 1, 1) + depth_camera_depth_aware_features = depth_camera_depth_aware_features.view(-1, self.hidden_channels, 1, 1) # # (B*N, C, H, W) - # depth_features = self.depth_se(depth_features, depth_camera_depth_aware_features) + depth_features = self.depth_se(depth_features, depth_camera_depth_aware_features) if self.with_cp: depth_features = checkpoint(self.depth_conv, depth_features) else: From e15de56e9adb9029e6886a3b65909bfee90fdf89 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Fri, 5 Jun 2026 10:04:22 +0900 Subject: [PATCH 179/183] Updated --- projects/BEVFusion/bevfusion/depth_lss_v2.py | 23 +++----------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py index c3cef18ae..3c25402e4 100644 --- a/projects/BEVFusion/bevfusion/depth_lss_v2.py +++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py @@ -222,28 +222,13 @@ def __init__( # nn.Conv2d(hidden_channels, depth_channels, kernel_size=1, stride=1, padding=0, bias=True) # ) self.depth_conv = nn.Sequential( - CustomDepthBasicBlock(hidden_channels, hidden_channels, downsample=None, kernel_size=1), - CustomDepthBasicBlock(hidden_channels, hidden_channels, kernel_size=1), - CustomDepthBasicBlock(hidden_channels, hidden_channels, kernel_size=1), + # CustomDepthBasicBlock(hidden_channels, hidden_channels, downsample=None, kernel_size=1, padding=0), + # CustomDepthBasicBlock(hidden_channels, hidden_channels, kernel_size=1), + # CustomDepthBasicBlock(hidden_channels, hidden_channels, kernel_size=1), nn.Conv2d(hidden_channels, depth_channels, kernel_size=1, stride=1, padding=0, bias=True) ) # self._init_weight() - def _init_weight(self): - print_log("Initializing depth weights...", logger="current") - for m in self.modules(): - if isinstance(m, nn.Conv2d): - torch.nn.init.kaiming_normal_(m.weight) - if m.bias is not None: - m.bias.data.zero_() - elif isinstance(m, nn.BatchNorm2d): - m.weight.data.fill_(1) - m.bias.data.zero_() - elif isinstance(m, nn.BatchNorm1d): - m.weight.data.fill_(1) - m.bias.data.zero_() - - def context_forward(self, context_features: torch.Tensor, camera_depth_aware_features: torch.Tensor) -> torch.Tensor: """ Args: @@ -293,8 +278,6 @@ def forward(self, x: torch.Tensor, camera_depth_aware_parameters: torch.Tensor) camera_depth_aware_features = self.camera_depth_aware_parameters_bn(camera_depth_aware_parameters) context_input_features = self.context_input_conv(x) context_features = self.context_forward(context_input_features, camera_depth_aware_features) - # return context_features - # context_features = self.context_forward(context_input_features, None) depth_features = self.depth_forward(context_input_features, camera_depth_aware_features) return torch.cat([depth_features, context_features], dim=1) From aa3818d467bcfb04aed529cf69949235f0c6c6aa Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 5 Jun 2026 03:13:05 +0000 Subject: [PATCH 180/183] ci(pre-commit): autofix --- autoware_ml/detection3d/datasets/t4dataset.py | 4 +- .../datasets/transforms/__init__.py | 2 +- .../datasets/transforms/local_3d_bbox.py | 19 +- projects/BEVFusion/bevfusion/bevfusion.py | 111 +++++----- .../BEVFusion/bevfusion/bevfusion_head.py | 55 ++--- .../bevfusion/bevfusion_voxel_encoder.py | 195 ++++++++---------- .../bevfusion/custom_sparse_conv_tensor.py | 13 +- projects/BEVFusion/bevfusion/depth_lss.py | 2 +- projects/BEVFusion/bevfusion/depth_lss_v2.py | 151 +++++++------- projects/BEVFusion/bevfusion/loading.py | 70 ++++--- .../bevfusion/ops/bev_pool_v2/__init__.py | 2 +- projects/BEVFusion/bevfusion/ops/topk/topk.py | 17 +- .../BEVFusion/bevfusion/sparse_encoder.py | 7 +- projects/BEVFusion/bevfusion/transforms_3d.py | 1 + projects/BEVFusion/bevfusion/utils.py | 2 +- ...y_lidar_only_intensity_tensorrt_dynamic.py | 6 +- ...econd_secfpn_30e_8xb16_j6gen2_base_120m.py | 16 +- ...cond_secfpn_30e_8xb16_jpntaxi_base_120m.py | 20 +- ...voxel_second_secfpn_50e_8xb16_base_120m.py | 2 +- .../default_lidar_second_secfpn_120m.py | 6 +- .../camera_resnet50_fpn_depthlss_120m.py | 2 +- .../resnet50/camera_resnet50_fpn_lss_50m.py | 4 +- .../camera_resnet50_fpn_lss_depth_50m.py | 10 +- .../pipelines/cameras/default_camera_120m.py | 2 +- .../pipelines/cameras/default_camera_50m.py | 15 +- .../default_camera_lidar_intensity_120m.py | 4 +- .../default/pipelines/default_lidar_120m.py | 2 +- .../default/pipelines/default_lidar_50m.py | 2 +- .../pipelines/default_lidar_intensity_120m.py | 2 +- projects/BEVFusion/deploy/containers.py | 14 +- projects/BEVFusion/deploy/exporter.py | 13 +- 31 files changed, 390 insertions(+), 381 deletions(-) diff --git a/autoware_ml/detection3d/datasets/t4dataset.py b/autoware_ml/detection3d/datasets/t4dataset.py index 384cc13b4..38c5f69dd 100644 --- a/autoware_ml/detection3d/datasets/t4dataset.py +++ b/autoware_ml/detection3d/datasets/t4dataset.py @@ -2,10 +2,10 @@ from typing import List import numpy as np +import tqdm from mmdet3d.datasets import NuScenesDataset from mmengine.logging import print_log from mmengine.registry import DATASETS -import tqdm @DATASETS.register_module() @@ -63,7 +63,7 @@ def filter_data(self) -> List[dict]: if camera_order not in entry["images"]: filtered = True break - + if entry["images"][camera_order]["img_path"] is None: filtered = True break diff --git a/autoware_ml/detection3d/datasets/transforms/__init__.py b/autoware_ml/detection3d/datasets/transforms/__init__.py index b20961db6..a63ff1eea 100644 --- a/autoware_ml/detection3d/datasets/transforms/__init__.py +++ b/autoware_ml/detection3d/datasets/transforms/__init__.py @@ -1,4 +1,4 @@ -from .object_min_points_filter import ObjectMinPointsFilter from .local_3d_bbox import Local3DBBoxExpand +from .object_min_points_filter import ObjectMinPointsFilter __all__ = ["ObjectMinPointsFilter", "Local3DBBoxExpand"] diff --git a/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py b/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py index 96772cf44..ae06d4005 100644 --- a/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py +++ b/autoware_ml/detection3d/datasets/transforms/local_3d_bbox.py @@ -1,7 +1,6 @@ from typing import List -import numpy as np - +import numpy as np from mmcv.transforms import BaseTransform from mmdet3d.structures.ops import box_np_ops from mmengine.registry import TRANSFORMS @@ -13,11 +12,11 @@ class Local3DBBoxExpand(BaseTransform): Args: expand_widths: (List[float]): Uniformly sampled expand width. - width_dim: (int): The dimension of the width. Default is 4, which is the width dimension of the 3D + width_dim: (int): The dimension of the width. Default is 4, which is the width dimension of the 3D bounding box. Since 3D Bbox is in the format of [x, y, z, dx, dy, dz, heading], the width dimension is the 4th dimension. label_ids: (List[int]): The label IDs to expand. If None, all label IDs will be expanded. - """ + """ def __init__(self, expand_widths: List[float], width_dim: int = 4, label_ids: List[int] = None) -> None: assert isinstance(expand_widths, list) @@ -26,7 +25,7 @@ def __init__(self, expand_widths: List[float], width_dim: int = 4, label_ids: Li self.expand_widths = expand_widths self.width_dim = width_dim self.label_ids = label_ids - + def transform(self, input_dict: dict) -> dict: """Call function to locally augment the 3D bounding boxes by scaling the width. @@ -37,19 +36,19 @@ def transform(self, input_dict: dict) -> dict: dict: Results after locally augmenting the 3D bounding boxes by scaling the width, 'gt_bboxes_3d' \ key is updated in the result dict. """ - # Label mask + # Label mask if self.label_ids is not None: - label_masks = [True if label in self.label_ids else False for label in input_dict["gt_labels_3d"]] + label_masks = [True if label in self.label_ids else False for label in input_dict["gt_labels_3d"]] else: label_masks = np.ones(len(input_dict["gt_labels_3d"]), dtype=bool) for i in range(len(input_dict["gt_bboxes_3d"])): if not label_masks[i]: - continue - + continue + expand_width = np.random.uniform(self.expand_widths[0], self.expand_widths[1]) input_dict["gt_bboxes_3d"].tensor[i, self.width_dim] += expand_width - + return input_dict def __repr__(self) -> str: diff --git a/projects/BEVFusion/bevfusion/bevfusion.py b/projects/BEVFusion/bevfusion/bevfusion.py index 242ffb658..2eb3ef3d3 100644 --- a/projects/BEVFusion/bevfusion/bevfusion.py +++ b/projects/BEVFusion/bevfusion/bevfusion.py @@ -84,9 +84,7 @@ def __init__( self._weights_initialized = False self.loss_depth_weight = loss_depth_weight self.depth_gt_downsample = depth_gt_downsample - self.visualize_gt_depth_dir = ( - Path(visualize_gt_depth_dir) if visualize_gt_depth_dir is not None else None - ) + self.visualize_gt_depth_dir = Path(visualize_gt_depth_dir) if visualize_gt_depth_dir is not None else None if self.visualize_gt_depth_dir is not None: self.visualize_gt_depth_dir.mkdir(parents=True, exist_ok=True) @@ -159,10 +157,10 @@ def with_bbox_head(self): def with_seg_head(self): """bool: Whether the detector has a segmentation head.""" return hasattr(self, "seg_head") and self.seg_head is not None - + def prepare_camera_depth_aware_parameters( - self, - camera_intrinsics: torch.Tensor, + self, + camera_intrinsics: torch.Tensor, img_aug_matrix: torch.Tensor, lidar_aug_matrix: torch.Tensor, camera2lidar: torch.Tensor, @@ -178,28 +176,31 @@ def prepare_camera_depth_aware_parameters( """ B, N, _, _ = camera_intrinsics.shape lidar_aug_matrix = lidar_aug_matrix.view(B, 1, 4, 4).repeat(1, N, 1, 1) - + # (B*N, 15) - mlp_input = torch.stack([ - camera_intrinsics[:, :, 0, 0], # fx - camera_intrinsics[:, :, 1, 1], # fy - camera_intrinsics[:, :, 0, 2], # cx - camera_intrinsics[:, :, 1, 2], # cy - img_aug_matrix[:, :, 0, 0], # r11 - img_aug_matrix[:, :, 0, 1], # r12 - img_aug_matrix[:, :, 0, 3], # t1 - img_aug_matrix[:, :, 1, 0], # r21 - img_aug_matrix[:, :, 1, 1], # r22 - img_aug_matrix[:, :, 1, 3], # t2 - lidar_aug_matrix[:, :, 0, 0], # r11 - lidar_aug_matrix[:, :, 0, 1], # r12 - lidar_aug_matrix[:, :, 1, 0], # r21 - lidar_aug_matrix[:, :, 1, 1], # r22 - lidar_aug_matrix[:, :, 2, 2], # r33 - ], dim=-1) + mlp_input = torch.stack( + [ + camera_intrinsics[:, :, 0, 0], # fx + camera_intrinsics[:, :, 1, 1], # fy + camera_intrinsics[:, :, 0, 2], # cx + camera_intrinsics[:, :, 1, 2], # cy + img_aug_matrix[:, :, 0, 0], # r11 + img_aug_matrix[:, :, 0, 1], # r12 + img_aug_matrix[:, :, 0, 3], # t1 + img_aug_matrix[:, :, 1, 0], # r21 + img_aug_matrix[:, :, 1, 1], # r22 + img_aug_matrix[:, :, 1, 3], # t2 + lidar_aug_matrix[:, :, 0, 0], # r11 + lidar_aug_matrix[:, :, 0, 1], # r12 + lidar_aug_matrix[:, :, 1, 0], # r21 + lidar_aug_matrix[:, :, 1, 1], # r22 + lidar_aug_matrix[:, :, 2, 2], # r33 + ], + dim=-1, + ) # (B, N, 4, 4) -> (B, N, 3, 4) -> (B*N, 12) - camera2lidar_flatten = camera2lidar[:,:,:3,:].view(B, N, -1) - + camera2lidar_flatten = camera2lidar[:, :, :3, :].view(B, N, -1) + # (B, N, 15+12) mlp_input = torch.cat([mlp_input, camera2lidar_flatten], dim=-1) return mlp_input @@ -234,13 +235,13 @@ def extract_img_feat( lidar_aug_matrix_inverse=None, geom_feats=None, using_image_features=False, - camera_depth_aware_parameters=None + camera_depth_aware_parameters=None, ) -> Tuple[torch.Tensor, torch.Tensor]: if not using_image_features: x = self.get_image_backbone_features(x) - with torch.amp.autocast("cuda",enabled=False): + with torch.amp.autocast("cuda", enabled=False): # with torch.autocast(device_type='cuda', dtype=torch.float32): x, pred_depths = self.view_transform( x, @@ -255,7 +256,7 @@ def extract_img_feat( img_aug_matrix_inverse, lidar_aug_matrix_inverse, geom_feats, - camera_depth_aware_parameters=camera_depth_aware_parameters + camera_depth_aware_parameters=camera_depth_aware_parameters, ) return x, pred_depths @@ -305,11 +306,11 @@ def voxelize(self, points): coords = torch.cat(coords, dim=0) assert len(sizes) > 0, "No points in the voxel" sizes = torch.cat(sizes, dim=0) - + # if self.voxelize_reduce: # feats = feats.sum(dim=1, keepdim=False) / sizes.type_as(feats).view(-1, 1) # feats = feats.contiguous() - + return feats, coords, sizes def predict( @@ -467,15 +468,17 @@ def loss( with torch.amp.autocast("cuda", enabled=False): gt_depths = torch.stack( [ - meta["gt_depths"] - if isinstance(meta["gt_depths"], torch.Tensor) - else torch.as_tensor(meta["gt_depths"]) + ( + meta["gt_depths"] + if isinstance(meta["gt_depths"], torch.Tensor) + else torch.as_tensor(meta["gt_depths"]) + ) for meta in batch_input_metas ] ).to(device=pred_depths.device, dtype=torch.float32) depth_loss = self.get_depth_loss(gt_depths, pred_depths) losses["loss_depth"] = depth_loss - + if self.with_bbox_head: bbox_loss = self.bbox_head.loss(feats, batch_data_samples) losses.update(bbox_loss) @@ -522,9 +525,7 @@ def _visualize_one_hot_gt_depth( return with torch.no_grad(): - one_hot = gt_depths_one_hot.view( - batch_size, num_cameras, height_down, width_down, num_depth_bins - ) + one_hot = gt_depths_one_hot.view(batch_size, num_cameras, height_down, width_down, num_depth_bins) depth_channels = one_hot[batch_idx, 0, :, :, :num_channels].detach().float().cpu().numpy() ncols = min(3, num_channels) @@ -545,18 +546,13 @@ def _visualize_one_hot_gt_depth( for ch_idx in range(num_channels, nrows * ncols): axes[ch_idx // ncols, ch_idx % ncols].axis("off") - fig.suptitle( - f"one-hot gt_depth (batch={batch_idx}, cam=0, bins=0-{num_channels - 1})" - ) + fig.suptitle(f"one-hot gt_depth (batch={batch_idx}, cam=0, bins=0-{num_channels - 1})") fig.tight_layout() if not hasattr(self, "_gt_depth_one_hot_vis_count"): self._gt_depth_one_hot_vis_count = 0 self._gt_depth_one_hot_vis_count += 1 - save_path = ( - self.visualize_gt_depth_dir - / f"gt_depth_one_hot_{self._gt_depth_one_hot_vis_count:06d}.png" - ) + save_path = self.visualize_gt_depth_dir / f"gt_depth_one_hot_{self._gt_depth_one_hot_vis_count:06d}.png" fig.savefig(save_path, dpi=150, bbox_inches="tight") plt.close(fig) print_log(f"Saved one-hot gt_depth visualization to {save_path.resolve()}") @@ -571,25 +567,26 @@ def get_downsampled_gt_depth(self, gt_depths): B, N, H, W = gt_depths.shape D = self.view_transform.D dbounds = self.view_transform.dbound - gt_depths = gt_depths.view(B * N, H // self.depth_gt_downsample, - self.depth_gt_downsample, W // self.depth_gt_downsample, - self.depth_gt_downsample, 1) + gt_depths = gt_depths.view( + B * N, + H // self.depth_gt_downsample, + self.depth_gt_downsample, + W // self.depth_gt_downsample, + self.depth_gt_downsample, + 1, + ) gt_depths = gt_depths.permute(0, 1, 3, 5, 2, 4).contiguous() gt_depths = gt_depths.view(-1, self.depth_gt_downsample * self.depth_gt_downsample) - gt_depths_tmp = torch.where(gt_depths == 0.0, - 1e5 * torch.ones_like(gt_depths), - gt_depths) + gt_depths_tmp = torch.where(gt_depths == 0.0, 1e5 * torch.ones_like(gt_depths), gt_depths) gt_depths = torch.min(gt_depths_tmp, dim=-1).values - gt_depths = gt_depths.view(B * N, H // self.depth_gt_downsample, - W // self.depth_gt_downsample) + gt_depths = gt_depths.view(B * N, H // self.depth_gt_downsample, W // self.depth_gt_downsample) gt_depths = (gt_depths - (dbounds[0] - dbounds[2])) / dbounds[2] # gt_depths = torch.where(gt_depths >= 0.0, gt_depths, torch.zeros_like(gt_depths)) # gt_depths = torch.clamp(gt_depths, max=float(D)) gt_depths = torch.where((gt_depths >= 0.0) & (gt_depths < D + 1), gt_depths, torch.zeros_like(gt_depths)) # gt_depths = torch.clamp(gt_depths, max=float(D)) - gt_depths = F.one_hot( - gt_depths.long(), num_classes=D + 1).view(-1, D + 1)[:, 1:] + gt_depths = F.one_hot(gt_depths.long(), num_classes=D + 1).view(-1, D + 1)[:, 1:] self._visualize_one_hot_gt_depth(gt_depths, B, N, H, W) return gt_depths.float() @@ -603,6 +600,6 @@ def get_depth_loss(self, depth_labels, depth_preds): depth_loss = F.binary_cross_entropy( depth_preds, depth_labels, - reduction='none', + reduction="none", ).sum() / max(1.0, fg_mask.sum()) - return self.loss_depth_weight * depth_loss \ No newline at end of file + return self.loss_depth_weight * depth_loss diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 3e464ebc4..8dc4bce45 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -169,9 +169,9 @@ def __init__( y_size = self.test_cfg["grid_size"][1] // self.test_cfg["out_size_factor"] self.spatial_dim = x_size * y_size bev_pos = self.create_2D_grid(x_size, y_size) - + # Register the bev_pos as a buffer so it moves to the GPU automatically. - self.register_buffer("bev_pos", bev_pos, persistent=False) # (1, H * W, 2) + self.register_buffer("bev_pos", bev_pos, persistent=False) # (1, H * W, 2) self.img_feat_pos = None self.img_feat_collapsed_pos = None @@ -189,22 +189,23 @@ def __init__( self.dense_heatmap_exclude_pooling_classes = sorted( list(set(self.class_name_to_indices.values()) - set(self.dense_heatmap_pooling_class_indices)) - ) + ) # Pre-compute the correct order of the classes for the final local_max - heatmap_concat_order = self.dense_heatmap_pooling_class_indices + self.dense_heatmap_exclude_pooling_classes - local_concat_class_remapping = [ - heatmap_concat_order.index(i) - for i in range(self.num_classes) - ] + heatmap_concat_order = ( + self.dense_heatmap_pooling_class_indices + self.dense_heatmap_exclude_pooling_classes + ) + local_concat_class_remapping = [heatmap_concat_order.index(i) for i in range(self.num_classes)] else: self.dense_heatmap_pooling_class_indices = None self.dense_heatmap_exclude_pooling_classes = None local_concat_class_remapping = [i for i in range(self.num_classes)] - + # Register the remapping as a buffer so it moves to the GPU automatically and gets saved in the state_dict. - self.register_buffer("local_concat_class_remapping", torch.tensor(local_concat_class_remapping), persistent=False) + self.register_buffer( + "local_concat_class_remapping", torch.tensor(local_concat_class_remapping), persistent=False + ) self.local_heatmap_padding = self.nms_kernel_size // 2 - + # NMS clusters self.nms_clusters = self.test_cfg.get("nms_clusters", []) # Add class indices for nms @@ -290,7 +291,7 @@ def forward_single(self, inputs, metas): dense_heatmap = self.heatmap_head(fusion_feat.float()) heatmap = dense_heatmap.detach().sigmoid() if self.dense_heatmap_pooling_class_indices is not None: - # Pooling + # Pooling selected_heatmap = heatmap[:, self.dense_heatmap_pooling_class_indices, :, :] local_max_inner = F.max_pool2d( selected_heatmap, @@ -301,31 +302,35 @@ def forward_single(self, inputs, metas): # 2. Restore spatial size using F.pad instead of slice mutation local_max = F.pad( - local_max_inner, - (self.local_heatmap_padding, self.local_heatmap_padding, self.local_heatmap_padding, - self.local_heatmap_padding), - mode="constant", - value=0.0 + local_max_inner, + ( + self.local_heatmap_padding, + self.local_heatmap_padding, + self.local_heatmap_padding, + self.local_heatmap_padding, + ), + mode="constant", + value=0.0, ) - + # 3. Any non-pooling classes if self.dense_heatmap_exclude_pooling_classes: excluded_local_max = heatmap[:, self.dense_heatmap_exclude_pooling_classes, :, :] local_max = torch.cat([local_max, excluded_local_max], dim=1) local_max = local_max[:, self.local_concat_class_remapping, :, :] else: - local_max = heatmap + local_max = heatmap heatmap = heatmap * (heatmap == local_max) # (BS, num_classes, H*W) heatmap = heatmap.view(-1, self.num_classes, self.spatial_dim) # top num_proposals among all classes - flattened_heatmap = heatmap.view(-1, self.num_classes*self.spatial_dim) - + flattened_heatmap = heatmap.view(-1, self.num_classes * self.spatial_dim) + # Use topk instead of argsort to avoid sorting the entire flattened heatmap. top_proposals = topk(x=flattened_heatmap, k=self.num_proposals, dim=-1, sorted=False) - + # 2. Calculate class and spatial indices # Use shape[-1] dynamically to handle grid sizes safely. top_proposals_class = top_proposals // self.spatial_dim @@ -340,7 +345,7 @@ def forward_single(self, inputs, metas): one_hot = F.one_hot(top_proposals_class, num_classes=self.num_classes).permute(0, 2, 1) query_cat_encoding = self.class_encoding(one_hot.float()) query_feat += query_cat_encoding - + # (B, N, 2) query_pos = self.bev_pos.squeeze(0)[top_proposals_index] ################################# @@ -350,7 +355,9 @@ def forward_single(self, inputs, metas): for i in range(self.num_decoder_layers): # Transformer Decoder Layer # :param query: B C Pq :param query_pos: B Pq 3/6 - query_feat = self.decoder[i](query_feat, key=fusion_feat_flatten, query_pos=query_pos, key_pos=self.bev_pos) + query_feat = self.decoder[i]( + query_feat, key=fusion_feat_flatten, query_pos=query_pos, key_pos=self.bev_pos + ) # Prediction res_layer = self.prediction_heads[i](query_feat) diff --git a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py index 30afdc41d..d7801482a 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py +++ b/projects/BEVFusion/bevfusion/bevfusion_voxel_encoder.py @@ -1,21 +1,19 @@ from typing import Optional, Tuple -import torch import numpy as np -from torch import Tensor, nn - +import torch +from mmdet3d.models.voxel_encoders.utils import PFNLayer, get_paddings_indicator from mmdet3d.registry import MODELS -from mmdet3d.models.voxel_encoders.utils import get_paddings_indicator, PFNLayer +from torch import Tensor, nn @MODELS.register_module() class HardSimpleVoxelSinCosEncoder(nn.Module): - def __init__(self, - min_norm_values: Tuple[float], - max_norm_values: Tuple[float], - in_channels: Optional[int] = 4) -> None: + def __init__( + self, min_norm_values: Tuple[float], max_norm_values: Tuple[float], in_channels: Optional[int] = 4 + ) -> None: """ - Simple voxel encoder that only performs mean pooling on the normalize features, and then + Simple voxel encoder that only performs mean pooling on the normalize features, and then performs sin-cos (fourier encoding) on each voxel channels. The output shape of each voxel is (N, feature_channels*2). @@ -25,30 +23,29 @@ def __init__(self, in_channels (int): Number of input channels. """ super().__init__() - + # Create PillarFeatureNet layers self.in_channels = in_channels - - # Convert the ((x - min) / (max - min)) * pi * exponents to x * scale + bias for folding them into one OP + + # Convert the ((x - min) / (max - min)) * pi * exponents to x * scale + bias for folding them into one OP min_norm_values = torch.tensor(min_norm_values) max_norm_values = torch.tensor(max_norm_values) # Let alpha = pi * exponents, beta = max - min - # y = ((x - min) / beta) * alpha + # y = ((x - min) / beta) * alpha # y = alpha / beta * (x - min) - # y = (alpha / beta) * x - (alpha / beta) * min - # Therefore, scale = alpha / beta, bias = - (alpha * min) / beta + # y = (alpha / beta) * x - (alpha / beta) * min + # Therefore, scale = alpha / beta, bias = - (alpha * min) / beta # y = scale * x + bias exponents = (2 ** torch.arange(0, self.in_channels)).float() - alpha = (torch.pi * exponents).unsqueeze(0) # (1, C) - beta = (max_norm_values - min_norm_values).unsqueeze(1) # (C, 1) + alpha = (torch.pi * exponents).unsqueeze(0) # (1, C) + beta = (max_norm_values - min_norm_values).unsqueeze(1) # (C, 1) scale = alpha / beta - bias = - (alpha * min_norm_values.unsqueeze(1)) / beta # (C, C) - - self.register_buffer("exponent_scale", scale.unsqueeze(0), persistent=False) # (1, C, C) - self.register_buffer("exponent_bias", bias.unsqueeze(0), persistent=False) # (1, C, C) + bias = -(alpha * min_norm_values.unsqueeze(1)) / beta # (C, C) - def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, - *args, **kwargs) -> Tensor: + self.register_buffer("exponent_scale", scale.unsqueeze(0), persistent=False) # (1, C, C) + self.register_buffer("exponent_bias", bias.unsqueeze(0), persistent=False) # (1, C, C) + + def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, *args, **kwargs) -> Tensor: """Forward function. Args: @@ -63,26 +60,28 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, """ # Mean in the voxel # (N, M, C) -> (N, C) - voxel_mean_features = (features.sum(dim=1, keepdim=False) / num_points.type_as(features).view(-1, 1)).contiguous() + voxel_mean_features = ( + features.sum(dim=1, keepdim=False) / num_points.type_as(features).view(-1, 1) + ).contiguous() # x * scale + bias, (1, C, C) + (1, C, C) * (N, C, 1) -> (N, C, C) # FMA (fused multiply-add): y = bias + scale * voxel_mean_features y = torch.addcmul(self.exponent_bias, self.exponent_scale, voxel_mean_features.unsqueeze(-1)) # SinCos encoding # (N*C, C) -> (N, C*C) - y = y.reshape(-1, self.in_channels*self.in_channels) + y = y.reshape(-1, self.in_channels * self.in_channels) # (N, C*C) -> (N, C*C*2) voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1) - + return voxel_fourier_features @MODELS.register_module() class BEVFusionVoxelFeatureNet(HardSimpleVoxelSinCosEncoder): """BEVFusion Voxel Encoder Feature Net. - - The network is same as HardSimpleVoxelSinCosEncoder, but it performs PFNLayers on the - offset features, for example, distances. After that, it concatenates the fourier features and the PFN features + + The network is same as HardSimpleVoxelSinCosEncoder, but it performs PFNLayers on the + offset features, for example, distances. After that, it concatenates the fourier features and the PFN features along the channel dimension for each voxel. Args: @@ -93,25 +92,24 @@ class BEVFusionVoxelFeatureNet(HardSimpleVoxelSinCosEncoder): N PFNLayers. Defaults to (64, ). """ - def __init__(self, - min_norm_values: Optional[Tuple[float]] = None, - max_norm_values: Optional[Tuple[float]] = None, - in_channels: Optional[int] = 4, - feat_channels: Optional[tuple] = (64, ), - with_distance: Optional[bool] = False, - with_cluster_center: Optional[bool] = True, - with_voxel_center: Optional[bool] = True, - voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4), - point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4, - 40, 1), - norm_cfg: Optional[dict] = dict( - type='BN1d', eps=1e-3, momentum=0.01), - mode: Optional[str] = 'max', - legacy: Optional[bool] = True): - + def __init__( + self, + min_norm_values: Optional[Tuple[float]] = None, + max_norm_values: Optional[Tuple[float]] = None, + in_channels: Optional[int] = 4, + feat_channels: Optional[tuple] = (64,), + with_distance: Optional[bool] = False, + with_cluster_center: Optional[bool] = True, + with_voxel_center: Optional[bool] = True, + voxel_size: Optional[Tuple[float]] = (0.2, 0.2, 4), + point_cloud_range: Optional[Tuple[float]] = (0, -40, -3, 70.4, 40, 1), + norm_cfg: Optional[dict] = dict(type="BN1d", eps=1e-3, momentum=0.01), + mode: Optional[str] = "max", + legacy: Optional[bool] = True, + ): + super(BEVFusionVoxelFeatureNet, self).__init__( - min_norm_values=min_norm_values, - max_norm_values=max_norm_values, in_channels=in_channels + min_norm_values=min_norm_values, max_norm_values=max_norm_values, in_channels=in_channels ) assert len(feat_channels) > 0 self.legacy = legacy @@ -122,12 +120,12 @@ def __init__(self, pfn_in_channels += 3 if with_distance: pfn_in_channels += 1 - + assert pfn_in_channels > 0, "pfn_in_channels must be greater than 0" self._with_distance = with_distance self._with_cluster_center = with_cluster_center self._with_voxel_center = with_voxel_center - + # Create VoxelFeatureNet layers feat_channels = [pfn_in_channels] + list(feat_channels) pfn_layers = [] @@ -138,13 +136,7 @@ def __init__(self, last_layer = False else: last_layer = True - pfn_layers.append( - PFNLayer( - in_filters, - out_filters, - norm_cfg=norm_cfg, - last_layer=last_layer, - mode=mode)) + pfn_layers.append(PFNLayer(in_filters, out_filters, norm_cfg=norm_cfg, last_layer=last_layer, mode=mode)) self.pfn_layers = nn.ModuleList(pfn_layers) # Need pillar (voxel) size and x/y offset in order to calculate offset @@ -156,8 +148,7 @@ def __init__(self, self.z_offset = self.vz / 2 + point_cloud_range[2] self.point_cloud_range = point_cloud_range - def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, - *args, **kwargs) -> Tensor: + def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, *args, **kwargs) -> Tensor: """Forward function. Args: @@ -173,17 +164,17 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, voxel_fourier_features = super().forward(features, num_points, coors) # Normalize the features - norm_features = (features - self.min_norm_values.view(1, -1)) / ((self.max_norm_values - self.min_norm_values).view(1, -1)) - + norm_features = (features - self.min_norm_values.view(1, -1)) / ( + (self.max_norm_values - self.min_norm_values).view(1, -1) + ) + # Offset features - max_points_per_voxel = features.shape[1] - + max_points_per_voxel = features.shape[1] + features_ls = [norm_features] # Find distance of x, y, and z from cluster center, mapped to [-1, 1] if available if self._with_cluster_center: - points_mean = features[:, :, :3].sum( - dim=1, keepdim=True) / num_points.type_as(features).view( - -1, 1, 1) + points_mean = features[:, :, :3].sum(dim=1, keepdim=True) / num_points.type_as(features).view(-1, 1, 1) f_cluster = features[:, :, :3] - points_mean # Map to [0, 1] if available # if self.min_norm_values is not None and self.max_norm_values is not None: @@ -196,27 +187,21 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, if self._with_voxel_center: if not self.legacy: f_center = torch.zeros_like(features[:, :, :3]) - f_center[:, :, 0] = features[:, :, 0] - ( - coors[:, 3].to(dtype).unsqueeze(1) * self.vx + - self.x_offset) - f_center[:, :, 1] = features[:, :, 1] - ( - coors[:, 2].to(dtype).unsqueeze(1) * self.vy + - self.y_offset) - f_center[:, :, 2] = features[:, :, 2] - ( - coors[:, 1].to(dtype).unsqueeze(1) * self.vz + - self.z_offset) + f_center[:, :, 0] = features[:, :, 0] - (coors[:, 3].to(dtype).unsqueeze(1) * self.vx + self.x_offset) + f_center[:, :, 1] = features[:, :, 1] - (coors[:, 2].to(dtype).unsqueeze(1) * self.vy + self.y_offset) + f_center[:, :, 2] = features[:, :, 2] - (coors[:, 1].to(dtype).unsqueeze(1) * self.vz + self.z_offset) else: f_center = features[:, :, :3] f_center[:, :, 0] = f_center[:, :, 0] - ( - coors[:, 3].type_as(features).unsqueeze(1) * self.vx + - self.x_offset) + coors[:, 3].type_as(features).unsqueeze(1) * self.vx + self.x_offset + ) f_center[:, :, 1] = f_center[:, :, 1] - ( - coors[:, 2].type_as(features).unsqueeze(1) * self.vy + - self.y_offset) + coors[:, 2].type_as(features).unsqueeze(1) * self.vy + self.y_offset + ) f_center[:, :, 2] = f_center[:, :, 2] - ( - coors[:, 1].type_as(features).unsqueeze(1) * self.vz + - self.z_offset) - + coors[:, 1].type_as(features).unsqueeze(1) * self.vz + self.z_offset + ) + # if self.min_norm_values is not None and self.max_norm_values is not None: # f_center = f_center / (voxel_size * 0.5) features_ls.append(f_center) @@ -234,12 +219,12 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, mask = get_paddings_indicator(num_points, max_points_per_voxel, axis=0) mask = torch.unsqueeze(mask, -1).type_as(voxel_feature_offsets) voxel_feature_offsets *= mask - + # PFN for pfn in self.pfn_layers: voxel_feature_offsets = pfn(voxel_feature_offsets, num_points) - - # Concat + + # Concat features = torch.cat([voxel_fourier_features, voxel_feature_offsets.squeeze(1)], dim=-1) return features @@ -247,7 +232,7 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, # @MODELS.register_module() # class BEVFusionVoxelSinCosEncoder(nn.Module): -# def __init__(self, +# def __init__(self, # min_norm_values: Tuple[float], # max_norm_values: Tuple[float], # time_lag_channel_index: int = 3, @@ -279,7 +264,7 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, # self.y_offset = self.vy / 2 + point_cloud_range[1] # self.z_offset = self.vz / 2 + point_cloud_range[2] # self.point_cloud_range = point_cloud_range - + # self.xyz_channels = 3 # feat_offset_channels = in_channels - self.xyz_channels # if with_cluster_center: @@ -310,7 +295,7 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, # self.time_lag_channel_index = time_lag_channel_index # self.time_exp_factor = time_exp_factor - + # self.register_buffer("min_norm_values", torch.tensor(min_norm_values)) # self.register_buffer("max_norm_values", torch.tensor(max_norm_values)) # self.register_buffer("voxel_size", torch.tensor([self.vx, self.vy, self.vz])) @@ -328,9 +313,9 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, # Returns: # torch.Tensor: Features of pillars in shape (M, C). -# """ +# """ # num_voxels, max_points_per_voxel = features.shape[0], features.shape[1] - + # # Mean in the voxel # # (N, M, 3) -> (N, 3) # voxel_features = (features[:, :, :self.xyz_channels].sum(dim=1, keepdim=False) / num_points.type_as(features).view( @@ -339,7 +324,7 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, # # min-max normalization, (N, 3) -> (N, 3) # voxel_features_norm = (voxel_features - \ # self.min_norm_values[:self.xyz_channels].view(1, -1)) / ((self.max_norm_values[:self.xyz_channels] - self.min_norm_values[:self.xyz_channels]).view(1, -1)) - + # # SinCos encoding # # (N, 3) -> (N*3, 1) * (1, ) * (1, 3) -> (N*3, 3) # y = voxel_features_norm.reshape(-1, 1) * np.pi * self.exponents.reshape(1, -1) @@ -348,22 +333,22 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, # # (N, 3*3) -> (N, 3*3*2) # voxel_fourier_features = torch.cat([torch.cos(y), torch.sin(y)], dim=1) -# # PFN -# # Other features, for example, intensity or time_lag +# # PFN +# # Other features, for example, intensity or time_lag # other_features = features[:, :, self.xyz_channels:] - -# # Normalization -# other_features_norm = (other_features - self.min_norm_values[self.xyz_channels:]) / (self.max_norm_values[self.xyz_channels:] - self.min_norm_values[self.xyz_channels:]) + +# # Normalization +# other_features_norm = (other_features - self.min_norm_values[self.xyz_channels:]) / (self.max_norm_values[self.xyz_channels:] - self.min_norm_values[self.xyz_channels:]) # time_lag_feature_index = self.time_lag_channel_index - self.xyz_channels -# # exponentiate time_lag features, it's higher when the normlized time lag is lower +# # exponentiate time_lag features, it's higher when the normlized time lag is lower # # (1.0 when time_lag_features is 0.0) # if self.time_exp_factor is not None: # other_features_norm[:, :, time_lag_feature_index] = torch.exp(- other_features_norm[:, :, time_lag_feature_index] * self.time_exp_factor) # else: -# # Inverse the time_lag feature +# # Inverse the time_lag feature # other_features_norm[:, :, time_lag_feature_index] = 1.0 - other_features_norm[:, :, time_lag_feature_index] - + # # Offsets # voxel_feature_offsets = [other_features_norm] # # Find distance of x, y, and z from cluster center @@ -371,7 +356,7 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, # points_mean = features[:, :, :3].sum( # dim=1, keepdim=True) / num_points.type_as(features).view( # -1, 1, 1) - + # # f_cluster = (features[:, :, :3] - points_mean) # f_cluster = features[:, :, :3] - points_mean # voxel_feature_offsets.append(f_cluster) @@ -389,7 +374,7 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, # f_center[:, :, 2] = features[:, :, 2] - ( # coors[:, 1].to(dtype).unsqueeze(1) * self.vz + # self.z_offset) - + # # Map to [-1, 1] # # f_center = f_center / (self.voxel_size * 0.5) # voxel_feature_offsets.append(f_center) @@ -397,7 +382,7 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, # if self._with_distance: # points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True) # voxel_feature_offsets.append(points_dist) - + # voxel_feature_offsets = torch.cat(voxel_feature_offsets, dim=-1) # # The feature decorations were calculated without regard to whether # # pillar was empty. Need to ensure that @@ -405,13 +390,11 @@ def forward(self, features: Tensor, num_points: Tensor, coors: Tensor, # mask = get_paddings_indicator(num_points, max_points_per_voxel, axis=0) # mask = torch.unsqueeze(mask, -1).type_as(voxel_feature_offsets) # voxel_feature_offsets *= mask - + # # PFN # for pfn in self.pfn_layers: # voxel_feature_offsets = pfn(voxel_feature_offsets, num_points) - -# # Concat + +# # Concat # features = torch.cat([voxel_fourier_features, voxel_feature_offsets.squeeze(1)], dim=-1) # return features - - diff --git a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py index 175c08bed..888d2b1c0 100644 --- a/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py +++ b/projects/BEVFusion/bevfusion/custom_sparse_conv_tensor.py @@ -2,8 +2,8 @@ Custom SparseConvTensor for BEVFusion. This customiztion is used to support cleaner ONNX export of sparse convolutions. """ -import torch +import torch from mmdet3d.models.layers.spconv import IS_SPCONV2_AVAILABLE if IS_SPCONV2_AVAILABLE: @@ -23,18 +23,15 @@ def sparse_to_dense(sparse_tensor: SparseConvTensor, batch_size: int, spatial_sh # b * (H * W * D) + h*(W*D) + w*D + d # Factor out the common terms D and W # (b*H*W + h*W + w) * D + d -> (b*H + h) * W + w) * D + d - linear_idx = ((b * H + h) * W + w) * D + d # [N] - + linear_idx = ((b * H + h) * W + w) * D + d # [N] + out = torch.zeros( - [ - num_cells, - sparse_tensor.features.shape[1] - ], + [num_cells, sparse_tensor.features.shape[1]], device=sparse_tensor.features.device, dtype=sparse_tensor.features.dtype, ) # out = out.index_copy(0, linear_idx, sparse_tensor.features) # out = out.scatter(0, linear_idx, sparse_tensor.features) - scatter_idx = linear_idx.unsqueeze(1).expand(-1, out_channels) # [N, C] + scatter_idx = linear_idx.unsqueeze(1).expand(-1, out_channels) # [N, C] out = out.scatter(0, scatter_idx, sparse_tensor.features) return out.view(batch_size, H, W, D, out_channels) diff --git a/projects/BEVFusion/bevfusion/depth_lss.py b/projects/BEVFusion/bevfusion/depth_lss.py index 2e68a3772..d0a547258 100644 --- a/projects/BEVFusion/bevfusion/depth_lss.py +++ b/projects/BEVFusion/bevfusion/depth_lss.py @@ -328,7 +328,7 @@ def bev_pool_precomputed(self, x, geom_feats, kept, ranks, indices): final = torch.cat(x.unbind(dim=2), 1) if self.visualize_bev_feat: self.plot_bev_feat(final) - + return final def plot_bev_feat(self, bev_feat): diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py index 3c25402e4..46b74b969 100644 --- a/projects/BEVFusion/bevfusion/depth_lss_v2.py +++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py @@ -5,13 +5,13 @@ import matplotlib.pyplot as plt import numpy as np import torch +from mmcv.cnn import build_conv_layer, build_norm_layer from mmdet3d.registry import MODELS -from mmengine.logging import print_log -from mmengine.model import BaseModule -from mmcv.cnn import build_conv_layer, build_norm_layer + # from mmdet.models.backbones.resnet import BasicBlock from mmdet3d.utils import ConfigType, OptConfigType, OptMultiConfig - +from mmengine.logging import print_log +from mmengine.model import BaseModule from torch import nn from torch.utils.checkpoint import checkpoint @@ -21,34 +21,35 @@ class CustomDepthBasicBlock(BaseModule): def __init__( - self, - in_channels: int, - out_channel: int, - padding: int = 0, - kernel_size: int = 1, - stride: int = 1, - dilation: int = 1, - with_cp: bool = False, - norm_cfg=dict(type='BN'), - conv_cfg=None, - downsample: Optional[nn.Module] = None, - init_cfg: OptMultiConfig = None): + self, + in_channels: int, + out_channel: int, + padding: int = 0, + kernel_size: int = 1, + stride: int = 1, + dilation: int = 1, + with_cp: bool = False, + norm_cfg=dict(type="BN"), + conv_cfg=None, + downsample: Optional[nn.Module] = None, + init_cfg: OptMultiConfig = None, + ): super().__init__(init_cfg) self.norm1_name, self.norm1 = build_norm_layer(norm_cfg, out_channel, postfix=1) self.norm2_name, self.norm2 = build_norm_layer(norm_cfg, out_channel, postfix=2) self.conv1 = build_conv_layer( - conv_cfg, - in_channels, - out_channel, - kernel_size, - stride=stride, - padding=padding, - dilation=dilation, bias=False + conv_cfg, + in_channels, + out_channel, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + bias=False, ) self.add_module(self.norm1_name, self.norm1) - self.conv2 = build_conv_layer( - conv_cfg, out_channel, out_channel, kernel_size, padding=padding, bias=False) + self.conv2 = build_conv_layer(conv_cfg, out_channel, out_channel, kernel_size, padding=padding, bias=False) self.add_module(self.norm2_name, self.norm2) self.relu = nn.ReLU(inplace=True) @@ -72,9 +73,10 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: out += identity return out + class SELayer(nn.Module): """ - Squeeze-and-Excitation (SE) layer. + Squeeze-and-Excitation (SE) layer. This is used to modulate features with camera-depth aware parameters. The code is taken from BEVDET (https://github.com/hustvl/BEVDET). """ @@ -84,7 +86,7 @@ def __init__(self, channels, act_layer=nn.ReLU, gate_layer=nn.Sigmoid): # Dont need global pooling because inputs are (B*N, C, 1, 1). self.sequeeze_net = nn.Sequential( # Squeeze with 1x1 convolution - nn.Conv2d(channels, channels, 1, bias=True), + nn.Conv2d(channels, channels, 1, bias=True), # Activation act_layer(), # Expand with 1x1 convolution @@ -106,12 +108,12 @@ def forward(self, x: torch.Tensor, depth_aware_features: torch.Tensor) -> torch. class CameraDepthLinearProjectionMLP(nn.Module): """ - Linear projection module by MLP. This is used to project image (context) features and camera-depth + Linear projection module by MLP. This is used to project image (context) features and camera-depth aware parameters (for example, intrinsics) to embedding space. The code is taken from BEVDET (https://github.com/hustvl/BEVDET). """ - def __init__(self, in_channels: int, hidden_channels:int, out_channels:int, drop_out: float = 0.0): + def __init__(self, in_channels: int, hidden_channels: int, out_channels: int, drop_out: float = 0.0): """ Args: in_channels: int, the number of input channels. @@ -132,7 +134,7 @@ def __init__(self, in_channels: int, hidden_channels:int, out_channels:int, drop nn.Linear(hidden_channels, out_channels), nn.Dropout(drop_out), ) - + def forward(self, x: torch.Tensor) -> torch.Tensor: """ Args: @@ -150,14 +152,15 @@ class CameraDepthAwareNet(nn.Module): """ def __init__( - self, - in_channels: int, + self, + in_channels: int, hidden_channels: int, out_channels: int, - mlp_drop_out: float, + mlp_drop_out: float, depth_channels: int, with_cp: bool = False, - num_camera_depth_parameters: int = 27) -> None: + num_camera_depth_parameters: int = 27, + ) -> None: """ Args: in_channels: int, the number of input channels. @@ -177,42 +180,35 @@ def __init__( # Input convolution for context/image features # Camera depth aware parameters branch - self.camera_depth_aware_parameters_bn = nn.BatchNorm1d( - self.num_camera_depth_parameters - ) - + self.camera_depth_aware_parameters_bn = nn.BatchNorm1d(self.num_camera_depth_parameters) + # Context/image feature branch # self.context_input_conv = nn.Sequential( - # nn.Conv2d( - # in_channels, hidden_channels, kernel_size=3, stride=1, padding=1, bias=False), + # nn.Conv2d( + # in_channels, hidden_channels, kernel_size=3, stride=1, padding=1, bias=False), # nn.BatchNorm2d(hidden_channels), # nn.ReLU(inplace=True), # ) self.context_input_conv = nn.Sequential( - nn.Conv2d( - in_channels, hidden_channels, kernel_size=1, stride=1, bias=False), + nn.Conv2d(in_channels, hidden_channels, kernel_size=1, stride=1, bias=False), nn.BatchNorm2d(hidden_channels), nn.ReLU(inplace=True), ) self.context_camera_depth_aware_mlp = CameraDepthLinearProjectionMLP( - in_channels=self.num_camera_depth_parameters, - hidden_channels=hidden_channels, - out_channels=hidden_channels, - drop_out=self.mlp_drop_out + in_channels=self.num_camera_depth_parameters, + hidden_channels=hidden_channels, + out_channels=hidden_channels, + drop_out=self.mlp_drop_out, ) self.context_se = SELayer(channels=hidden_channels) - self.context_conv = nn.Conv2d( - hidden_channels, - out_channels, - kernel_size=1, - stride=1, padding=0, bias=True) + self.context_conv = nn.Conv2d(hidden_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=True) - # Depth branch + # Depth branch self.depth_camera_depth_aware_mlp = CameraDepthLinearProjectionMLP( - in_channels=self.num_camera_depth_parameters, - hidden_channels=hidden_channels, - out_channels=hidden_channels, - drop_out=self.mlp_drop_out + in_channels=self.num_camera_depth_parameters, + hidden_channels=hidden_channels, + out_channels=hidden_channels, + drop_out=self.mlp_drop_out, ) self.depth_se = SELayer(channels=hidden_channels) # self.depth_conv = nn.Sequential( @@ -229,7 +225,9 @@ def __init__( ) # self._init_weight() - def context_forward(self, context_features: torch.Tensor, camera_depth_aware_features: torch.Tensor) -> torch.Tensor: + def context_forward( + self, context_features: torch.Tensor, camera_depth_aware_features: torch.Tensor + ) -> torch.Tensor: """ Args: x: torch.Tensor, the input tensor of shape (B*N, C, H, W). @@ -243,7 +241,7 @@ def context_forward(self, context_features: torch.Tensor, camera_depth_aware_fea context_features = self.context_se(context_features, context_camera_depth_aware_features) context_features = self.context_conv(context_features) return context_features - + def depth_forward(self, depth_features: torch.Tensor, camera_depth_aware_features: torch.Tensor) -> torch.Tensor: """ Args: @@ -273,10 +271,10 @@ def forward(self, x: torch.Tensor, camera_depth_aware_parameters: torch.Tensor) """ # (B, N, N_CAMERA_DEPTH_PARAMETERS) -> (B*N, N_CAMERA_DEPTH_PARAMETERS) camera_depth_aware_parameters = camera_depth_aware_parameters.view(-1, self.num_camera_depth_parameters) - + # (B*N, N_CAMERA_DEPTH_PARAMETERS) camera_depth_aware_features = self.camera_depth_aware_parameters_bn(camera_depth_aware_parameters) - context_input_features = self.context_input_conv(x) + context_input_features = self.context_input_conv(x) context_features = self.context_forward(context_input_features, camera_depth_aware_features) depth_features = self.depth_forward(context_input_features, camera_depth_aware_features) return torch.cat([depth_features, context_features], dim=1) @@ -317,7 +315,9 @@ def __init__( self.collapse_z = collapse_z self.expand_batch_axis = expand_batch_axis - def get_cam_feats(self, x, camera_depth_aware_parameters: Optional[torch.Tensor] = None) -> Tuple[torch.Tensor, torch.Tensor]: + def get_cam_feats( + self, x, camera_depth_aware_parameters: Optional[torch.Tensor] = None + ) -> Tuple[torch.Tensor, torch.Tensor]: raise NotImplementedError def forward( @@ -340,10 +340,10 @@ def forward( ranks_bev, ranks_depth, ranks_feat = geom_feats_precomputed x, depth_softmax = self.get_cam_feats(img) x = self.bev_pool_precomputed(x, depth_softmax, ranks_bev, ranks_depth, ranks_feat) - + # No return depth predictions when precomputed geometry features are used depth_softmax = None - + else: intrins = camera_intrinsics[..., :3, :3] post_rots = img_aug_matrix[..., :3, :3] @@ -372,7 +372,7 @@ def forward( depth_softmax, ) = self.get_cam_feats(img, camera_depth_aware_parameters) x = self.bev_pool(view_feats, depth_softmax, geom) - + return x, depth_softmax def bev_pool_aux(self, geom_feats): @@ -408,7 +408,7 @@ def bev_pool_aux(self, geom_feats): return None, None, None geom_feats, ranks_depth, ranks_feat = geom_feats[kept], ranks_depth[kept], ranks_feat[kept] - + # Switch x and y to match the order of the BEV grid ranks_bev = ( geom_feats[:, 3] * (self.nx[2] * self.nx[1] * self.nx[0]) @@ -496,7 +496,7 @@ def compute_bev_pool( self.plot_bev_feat(bev_feat) return bev_feat - + def bev_pool_precomputed(self, view_feats, depth_softmax, ranks_bev, ranks_depth, ranks_feat): interval_starts, interval_lengths = self.compute_intervals(ranks_bev) bev_feat = self.compute_bev_pool( @@ -518,7 +518,8 @@ def get_depth_softmax(self, x: torch.Tensor, B, N, fH, fW) -> Tuple[torch.Tensor view_feats = x[:, self.D : (self.D + self.C)] view_feats = view_feats.view(B, N, self.C, fH, fW) return view_feats, depth_softmax - + + @MODELS.register_module() class LSSTransformV2(BaseViewTransformV2): @@ -548,15 +549,13 @@ def __init__( self.downsample = DownSampleNet(downsample, out_channels, out_channels) def get_cam_feats( - self, - x: torch.Tensor, - camera_depth_aware_parameters: Optional[torch.Tensor] = None + self, x: torch.Tensor, camera_depth_aware_parameters: Optional[torch.Tensor] = None ) -> Tuple[torch.Tensor, torch.Tensor]: B, N, C, fH, fW = x.shape x = x.view(B * N, C, fH, fW) x = self.depthnet(x) return self.get_depth_softmax(x, B=B, N=N, fH=fH, fW=fW) - + def forward(self, *args, **kwargs): x, depth_softmax = super().forward(*args, **kwargs) x = self.downsample(x) @@ -576,7 +575,7 @@ def __init__( ybound: Tuple[float, float, float], zbound: Tuple[float, float, float], dbound: Tuple[float, float, float], - camera_depth_aware_configs: dict, + camera_depth_aware_configs: dict, downsample: int = 1, ): super().__init__( @@ -597,17 +596,15 @@ def __init__( depth_channels=self.D, out_channels=self.C, ) - + def get_cam_feats( - self, - x: torch.Tensor, - camera_depth_aware_parameters: Optional[torch.Tensor] = None + self, x: torch.Tensor, camera_depth_aware_parameters: Optional[torch.Tensor] = None ) -> Tuple[torch.Tensor, torch.Tensor]: B, N, C, fH, fW = x.shape x = x.view(B * N, C, fH, fW) x = self.camera_depth_aware_net(x, camera_depth_aware_parameters) return self.get_depth_softmax(x, B=B, N=N, fH=fH, fW=fW) - + def forward(self, *args, **kwargs): x, depth_softmax = super().forward(*args, **kwargs) x = self.downsample(x) diff --git a/projects/BEVFusion/bevfusion/loading.py b/projects/BEVFusion/bevfusion/loading.py index 5d5c8d13a..7f3322fda 100644 --- a/projects/BEVFusion/bevfusion/loading.py +++ b/projects/BEVFusion/bevfusion/loading.py @@ -4,15 +4,13 @@ from typing import List, Optional, Tuple import matplotlib.pyplot as plt - import mmcv import numpy as np +from mmcv.transforms import BaseTransform from mmdet3d.datasets.transforms import LoadMultiViewImageFromFiles from mmdet3d.registry import TRANSFORMS from mmengine.fileio import get from mmengine.logging import print_log -from mmcv.transforms import BaseTransform - @TRANSFORMS.register_module() @@ -238,6 +236,7 @@ class PointsToMultiViewImageDepths(BaseTransform): max_depth (float): Upper clip for the depth color scale (m). Defaults to 80. """ + def __init__( self, img_shape, @@ -255,7 +254,7 @@ def __init__( if self.visualize_dir is not None: self.visualize_dir.mkdir(parents=True, exist_ok=True) self._depth_idx = 0 - + def transform(self, results: dict) -> Optional[dict]: """Call function to load multi-view image from files. @@ -269,17 +268,17 @@ def transform(self, results: dict) -> Optional[dict]: """ lidar2image = np.asarray(results["lidar2img"]) img_aug_matrix = np.asarray(results["img_aug_matrix"]) if "img_aug_matrix" in results else np.eye(4) - cur_coords = results["points"].numpy()[:,:3] + cur_coords = results["points"].numpy()[:, :3] # inverse lidar aug if "lidar_aug_matrix" in results: - lidar_aug_matrix = np.asarray(results["lidar_aug_matrix"]) - lidar_aug_matrix_inverse = np.linalg.inv(lidar_aug_matrix) - cur_coords -= lidar_aug_matrix[:3, 3] - cur_coords = lidar_aug_matrix_inverse[:3, :3] @ cur_coords.transpose(1, 0) + lidar_aug_matrix = np.asarray(results["lidar_aug_matrix"]) + lidar_aug_matrix_inverse = np.linalg.inv(lidar_aug_matrix) + cur_coords -= lidar_aug_matrix[:3, 3] + cur_coords = lidar_aug_matrix_inverse[:3, :3] @ cur_coords.transpose(1, 0) else: - cur_coords = cur_coords.transpose(1, 0) - + cur_coords = cur_coords.transpose(1, 0) + # lidar2image cur_coords = lidar2image[:, :3, :3] @ cur_coords cur_coords += lidar2image[:, :3, 3].reshape(-1, 3, 1) @@ -306,15 +305,19 @@ def transform(self, results: dict) -> Optional[dict]: & valid_dist_mask ) - # Avoid loops since it's slow + # Avoid loops since it's slow indices = np.nonzero(on_img) camera_indices = indices[0] point_indices = indices[1] masked_coords = cur_coords[camera_indices, point_indices].astype(np.int64) masked_dist = dist[camera_indices, point_indices] - # Possibly to have duplicates and the last one will be used, however, the chance is small - flatten_indices = camera_indices * self.img_shape[0] * self.img_shape[1] + masked_coords[:, 0] * self.img_shape[1] + masked_coords[:, 1] + # Possibly to have duplicates and the last one will be used, however, the chance is small + flatten_indices = ( + camera_indices * self.img_shape[0] * self.img_shape[1] + + masked_coords[:, 0] * self.img_shape[1] + + masked_coords[:, 1] + ) depth_flat = np.zeros(self.num_cameras * self.img_shape[0] * self.img_shape[1], dtype=np.float32) depth_flat[flatten_indices] = masked_dist depth = depth_flat.reshape(self.num_cameras, self.img_shape[0], self.img_shape[1]) @@ -350,9 +353,7 @@ def _save_depth_subplot(self, depth: np.ndarray, results: dict) -> None: base_rows = int(np.ceil(self.num_cameras / cols)) rows = base_rows * 3 - fig, axes = plt.subplots( - rows, cols, figsize=(4 * cols, 4 * rows), squeeze=False - ) + fig, axes = plt.subplots(rows, cols, figsize=(4 * cols, 4 * rows), squeeze=False) for c in range(self.num_cameras): d = depth[c] @@ -365,12 +366,20 @@ def _save_depth_subplot(self, depth: np.ndarray, results: dict) -> None: ax_overlay.imshow(imgs[c].astype(np.uint8)) if vals.size > 0: ax_overlay.scatter( - xs, ys, c=vals, cmap="turbo", - vmin=0, vmax=self.max_depth, s=1, + xs, + ys, + c=vals, + cmap="turbo", + vmin=0, + vmax=self.max_depth, + s=1, ) else: ax_overlay.imshow( - d, cmap="turbo", vmin=0, vmax=self.max_depth, + d, + cmap="turbo", + vmin=0, + vmax=self.max_depth, interpolation="nearest", ) ax_overlay.set_title(f"cam {c} overlay ({vals.size} pts)") @@ -383,7 +392,10 @@ def _save_depth_subplot(self, depth: np.ndarray, results: dict) -> None: ax_img.imshow(imgs[c].astype(np.uint8)) else: ax_img.imshow( - d, cmap="gray", vmin=0, vmax=self.max_depth, + d, + cmap="gray", + vmin=0, + vmax=self.max_depth, interpolation="nearest", ) ax_img.set_title(f"cam {c} image-only") @@ -393,7 +405,10 @@ def _save_depth_subplot(self, depth: np.ndarray, results: dict) -> None: # Row block 3: depth-only visualization. ax_depth = axes[(base_rows * 2) + (c // cols), c % cols] ax_depth.imshow( - d, cmap="turbo", vmin=0, vmax=self.max_depth, + d, + cmap="turbo", + vmin=0, + vmax=self.max_depth, interpolation="nearest", ) ax_depth.set_title(f"cam {c} depth-only") @@ -407,21 +422,16 @@ def _save_depth_subplot(self, depth: np.ndarray, results: dict) -> None: axes[(base_rows * 2) + (c // cols), c % cols].axis("off") # Shared depth colorbar with numeric values. - depth_mappable = plt.cm.ScalarMappable( - cmap="turbo", norm=plt.Normalize(vmin=0, vmax=self.max_depth) - ) + depth_mappable = plt.cm.ScalarMappable(cmap="turbo", norm=plt.Normalize(vmin=0, vmax=self.max_depth)) depth_mappable.set_array([]) - cbar = fig.colorbar( - depth_mappable, ax=axes, location="right", fraction=0.02, pad=0.02 - ) + cbar = fig.colorbar(depth_mappable, ax=axes, location="right", fraction=0.02, pad=0.02) cbar.set_label("Depth (m)") fig.suptitle(f"gt_depths — {self._depth_idx}") fig.tight_layout(rect=[0, 0, 0.96, 0.97]) - + self._depth_idx += 1 out_path = self.visualize_dir / f"{self._depth_idx:06d}_gt_depths.png" fig.savefig(out_path, dpi=120, bbox_inches="tight") plt.close(fig) print(f"Saved gt_depths visualization to {out_path}") - \ No newline at end of file diff --git a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/__init__.py b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/__init__.py index 549a97e81..ff2fdfff7 100644 --- a/projects/BEVFusion/bevfusion/ops/bev_pool_v2/__init__.py +++ b/projects/BEVFusion/bevfusion/ops/bev_pool_v2/__init__.py @@ -1,3 +1,3 @@ from .bev_pool_v2 import bev_pool_v2 -__all__ = ["bev_pool_v2"] \ No newline at end of file +__all__ = ["bev_pool_v2"] diff --git a/projects/BEVFusion/bevfusion/ops/topk/topk.py b/projects/BEVFusion/bevfusion/ops/topk/topk.py index a767bb720..f0f9a8779 100644 --- a/projects/BEVFusion/bevfusion/ops/topk/topk.py +++ b/projects/BEVFusion/bevfusion/ops/topk/topk.py @@ -13,9 +13,9 @@ class TopK(Function): def symbolic( g, x: torch.Tensor, - k: int, - dim: int, - sorted: bool = False, + k: int, + dim: int, + sorted: bool = False, ): output = g.op( @@ -27,19 +27,20 @@ def symbolic( if x_shape is not None and hasattr(output.type(), "with_sizes"): output_type = x.type().with_sizes(x_shape) output.setType(output_type) - # Argsort from Autoware is in ascending order, so we need to return the last k elements. + # Argsort from Autoware is in ascending order, so we need to return the last k elements. return output[-k:] @staticmethod def forward( ctx, x: torch.Tensor, - k: int, - dim: int, - sorted: bool = False, + k: int, + dim: int, + sorted: bool = False, ): _, indices = torch.topk(x, k=k, dim=dim, largest=True, sorted=sorted) return indices + def topk(x: torch.Tensor, k: int, dim: int, sorted: bool = False): - return TopK.apply(x, k, dim, sorted) \ No newline at end of file + return TopK.apply(x, k, dim, sorted) diff --git a/projects/BEVFusion/bevfusion/sparse_encoder.py b/projects/BEVFusion/bevfusion/sparse_encoder.py index 0fc20cd19..6bf0592b6 100644 --- a/projects/BEVFusion/bevfusion/sparse_encoder.py +++ b/projects/BEVFusion/bevfusion/sparse_encoder.py @@ -5,7 +5,6 @@ import numpy as np import torch - from mmdet3d.models.layers import make_sparse_convmodule from mmdet3d.models.layers.spconv import IS_SPCONV2_AVAILABLE from mmdet3d.models.middle_encoders import SparseEncoder @@ -119,7 +118,7 @@ def __init__( indice_key="spconv_down2", conv_type="SparseConv3d", ) - + def forward(self, voxel_features, coors, batch_size): """Forward of SparseEncoder. @@ -147,11 +146,11 @@ def forward(self, voxel_features, coors, batch_size): for encoder_layer in self.encoder_layers: x = encoder_layer(x) encode_features.append(x) - + # for detection head # [200, 176, 5] -> [200, 176, 2] out = self.conv_out(encode_features[-1]) - + spatial_features = sparse_to_dense(out, batch_size, self.dense_output_shapes, self.output_channels) # spatial_features = out.dense(channels_first=False) spatial_features = spatial_features.permute(0, 4, 3, 1, 2).contiguous() diff --git a/projects/BEVFusion/bevfusion/transforms_3d.py b/projects/BEVFusion/bevfusion/transforms_3d.py index c311f9254..31d0cc417 100644 --- a/projects/BEVFusion/bevfusion/transforms_3d.py +++ b/projects/BEVFusion/bevfusion/transforms_3d.py @@ -191,6 +191,7 @@ def transform(self, input_dict: dict) -> dict: @TRANSFORMS.register_module() class BEVFusionRemoveLiDARPoints(BaseTransform): """Remove LiDAR points from the data.""" + def __init__(self): super().__init__() diff --git a/projects/BEVFusion/bevfusion/utils.py b/projects/BEVFusion/bevfusion/utils.py index 84797cc51..39c6a0ded 100644 --- a/projects/BEVFusion/bevfusion/utils.py +++ b/projects/BEVFusion/bevfusion/utils.py @@ -93,7 +93,7 @@ def decode(self, heatmap, rot, dim, center, height, vel, filter=False): predictions_dict = {"bboxes": boxes3d, "scores": scores, "labels": labels} predictions_dicts.append(predictions_dict) return predictions_dicts - + # use score threshold if self.score_threshold is not None: if isinstance(self.score_threshold, float): diff --git a/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py b/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py index 0863889bb..2652b3965 100644 --- a/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py +++ b/projects/BEVFusion/configs/deploy/bevfusion_main_body_lidar_only_intensity_tensorrt_dynamic.py @@ -1,11 +1,7 @@ codebase_config = dict(type="mmdet3d", task="VoxelDetection", model_type="end2end") custom_imports = dict( - imports=[ - "projects.BEVFusion.deploy", - "projects.BEVFusion.bevfusion", - "projects.SparseConvolution" - ], + imports=["projects.BEVFusion.deploy", "projects.BEVFusion.bevfusion", "projects.SparseConvolution"], allow_failed_imports=False, ) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py index 7a8afad3e..bcf9870c6 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_j6gen2_base_120m.py @@ -29,8 +29,20 @@ pts_voxel_encoder=dict( in_channels=len(_base_.lidar_sweep_dims), # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here - min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0, 0.0], - max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 255.0, 0.2], + min_norm_values=[ + _base_.point_cloud_range[0], + _base_.point_cloud_range[1], + _base_.point_cloud_range[2], + 0.0, + 0.0, + ], + max_norm_values=[ + _base_.point_cloud_range[3], + _base_.point_cloud_range[4], + _base_.point_cloud_range[5], + 255.0, + 0.2, + ], ), pts_middle_encoder=dict( in_channels=50, diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py index d3c5154c6..1f2acd6ab 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb16_jpntaxi_base_120m.py @@ -29,8 +29,20 @@ pts_voxel_encoder=dict( in_channels=len(_base_.lidar_sweep_dims), # min-max normalization for x, y, z, intensity, time_lag, where the max of time lag technically is two seeps (200 ms) here - min_norm_values=[_base_.point_cloud_range[0], _base_.point_cloud_range[1], _base_.point_cloud_range[2], 0.0, 0.0], - max_norm_values=[_base_.point_cloud_range[3], _base_.point_cloud_range[4], _base_.point_cloud_range[5], 255.0, 0.2], + min_norm_values=[ + _base_.point_cloud_range[0], + _base_.point_cloud_range[1], + _base_.point_cloud_range[2], + 0.0, + 0.0, + ], + max_norm_values=[ + _base_.point_cloud_range[3], + _base_.point_cloud_range[4], + _base_.point_cloud_range[5], + 255.0, + 0.2, + ], ), pts_middle_encoder=dict( in_channels=50, @@ -153,4 +165,6 @@ ) log_processor = dict(window_size=50) -load_from = "work_dirs/bevfusion_lidar_2_8_0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb16_base_120m/best_epoch_47.pth" +load_from = ( + "work_dirs/bevfusion_lidar_2_8_0/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb16_base_120m/best_epoch_47.pth" +) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py index 171c3076e..19394ab16 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb16_base_120m.py @@ -149,4 +149,4 @@ ) log_processor = dict(window_size=50) -resume = True \ No newline at end of file +resume = True diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index aa275f558..f11431814 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -14,7 +14,7 @@ pad_size_divisor=32, ), pts_voxel_encoder=dict( - type="HardSimpleVoxelSinCosEncoder", + type="HardSimpleVoxelSinCosEncoder", in_channels=4, ), pts_middle_encoder=dict( @@ -89,7 +89,9 @@ # Set NMS for different clusters nms_clusters=[ # Sqrt(0.25) = 0.50 - dict(class_names=["car", "truck", "bus"], class_indices=[0, 1, 2], nms_threshold=0.25, post_max_size=300), # It's radius if using circle_nms + dict( + class_names=["car", "truck", "bus"], class_indices=[0, 1, 2], nms_threshold=0.25, post_max_size=300 + ), # It's radius if using circle_nms dict(class_names=["bicycle"], class_indices=[3], nms_threshold=0.0, post_max_size=50), dict(class_names=["pedestrian"], class_indices=[4], nms_threshold=0.0, post_max_size=100), dict(class_names=["traffic_cone"], class_indices=[5], nms_threshold=0.0, post_max_size=100), diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py index 1059ce9e8..c807668a3 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_depthlss_120m.py @@ -56,5 +56,5 @@ ), bbox_head=dict( in_channels=80, - ) + ), ) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py index d0920ccf1..1457207c8 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_50m.py @@ -1,11 +1,11 @@ _base_ = [ "./camera_resnet50_fpn_depthlss_120m.py", ] -num_proposals = 200 +num_proposals = 200 # Image network model = dict( - depth_gt_downsample=8, + depth_gt_downsample=8, loss_depth_weight=2.0, view_transform=dict( type="LSSTransformV2", diff --git a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_depth_50m.py b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_depth_50m.py index 997fa1e76..dd90ccb6e 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_depth_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/resnet50/camera_resnet50_fpn_lss_depth_50m.py @@ -1,11 +1,11 @@ _base_ = [ "./camera_resnet50_fpn_depthlss_120m.py", ] -num_proposals = 200 +num_proposals = 200 # Image network model = dict( - depth_gt_downsample=8, + depth_gt_downsample=8, loss_depth_weight=1.0, view_transform=dict( # type="LSSTransformV2", @@ -15,11 +15,7 @@ zbound=[-10.0, 10.0, 20.0], dbound=[1.0, 60, 0.5], downsample=2, - camera_depth_aware_configs=dict( - mlp_drop_out=0.0, - downsample=8, - num_camera_depth_parameters=27 - ), + camera_depth_aware_configs=dict(mlp_drop_out=0.0, downsample=8, num_camera_depth_parameters=27), ), bbox_head=dict( num_proposals=num_proposals, diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py index f31a604b0..3a7a428b7 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_120m.py @@ -53,7 +53,7 @@ ), dict(type="BEVFusionRandomFlip3D"), dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range), - dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=5), + dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=5), dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=3), dict( type="ObjectNameFilter", diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py index 00e7ac896..011e460f0 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/cameras/default_camera_50m.py @@ -29,20 +29,17 @@ dict( type="ImageAug3D", final_dim=image_size, - resize_lim=[0.29, 0.35], + resize_lim=[0.29, 0.35], bot_pct_lim=[0.0, 0.0], rot_lim=[0.0, 0.0], rand_flip=True, is_train=True, ), + dict(type="PointsRangeFilter", point_cloud_range=[-80.0, -80.0, -10.0, 80.0, 80.0, 10.0]), dict( - type="PointsRangeFilter", - point_cloud_range=[-80.0, -80.0, -10.0, 80.0, 80.0, 10.0] - ), - dict( - type="PointsToMultiViewImageDepths", - img_shape=image_size, - num_cameras=len(camera_order), + type="PointsToMultiViewImageDepths", + img_shape=image_size, + num_cameras=len(camera_order), depth_bounds=[1.0, 60.0], # visualize_dir="work_dirs/visualize_depths_6", ), @@ -54,7 +51,7 @@ ), dict(type="BEVFusionRandomFlip3D"), dict(type="ObjectRangeFilter", point_cloud_range=_base_.point_cloud_range), - dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=5), + dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=5), # Remove LiDAR points from the data dict(type="BEVFusionRemoveLiDARPoints"), dict( diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py index e13597aec..11e297c09 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_camera_lidar_intensity_120m.py @@ -84,7 +84,7 @@ "barrier", ], ), - dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), + dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), dict(type="PointShuffle"), dict( @@ -124,7 +124,7 @@ backend_args=backend_args, camera_order=camera_order, ), - dict( + dict( type="LoadPointsFromFile", coord_type="LIDAR", load_dim=point_load_dim, diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index ed0e35fbf..d797779de 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -66,7 +66,7 @@ "barrier", ], ), - dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=5), + dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=5), dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=3), dict(type="PointShuffle"), dict( diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py index f49e2dbb4..ca75d799f 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_50m.py @@ -63,7 +63,7 @@ "barrier", ], ), - dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), + dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), dict(type="PointShuffle"), dict( diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index 23dd78687..d5e426f58 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -66,7 +66,7 @@ "barrier", ], ), - dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), + dict(type="ObjectRangeMinPointsFilter", range_radius=[0, 60], min_num_points=2), dict(type="ObjectRangeMinPointsFilter", range_radius=[60, 130], min_num_points=1), dict(type="PointShuffle"), dict( diff --git a/projects/BEVFusion/deploy/containers.py b/projects/BEVFusion/deploy/containers.py index 018b5db7e..55586e0f5 100644 --- a/projects/BEVFusion/deploy/containers.py +++ b/projects/BEVFusion/deploy/containers.py @@ -1,8 +1,8 @@ +# Wrapper Classes for onnx conversion +import numpy as np import torch import torch.nn.functional as F -# Wrapper Classes for onnx conversion -import numpy as np class TrtBevFusionImageBackboneContainer(torch.nn.Module): def __init__(self, mod, mean, std) -> None: @@ -49,13 +49,17 @@ def forward( batch_inputs_dict = { "voxels": {"voxels": voxels, "coors": coors, "num_points_per_voxel": num_points_per_voxel}, - } - + voxels.cpu().numpy().astype(np.float32).tofile("work_dirs/dummy_inputs/voxels.bin") coors[:, :3].cpu().numpy().astype(np.int32).tofile("work_dirs/dummy_inputs/coors.bin") num_points_per_voxel.cpu().numpy().astype(np.int32).tofile("work_dirs/dummy_inputs/num_points_per_voxel.bin") - print("voxels.shape, coors.shape, num_points_per_voxel.shape:", voxels.shape, coors.shape, num_points_per_voxel.shape) + print( + "voxels.shape, coors.shape, num_points_per_voxel.shape:", + voxels.shape, + coors.shape, + num_points_per_voxel.shape, + ) if points is not None: batch_inputs_dict["points"] = [points] diff --git a/projects/BEVFusion/deploy/exporter.py b/projects/BEVFusion/deploy/exporter.py index 4c5e72ac7..c7cd9e6b0 100644 --- a/projects/BEVFusion/deploy/exporter.py +++ b/projects/BEVFusion/deploy/exporter.py @@ -2,7 +2,7 @@ import logging import os.path as osp -from typing import Optional, Any +from typing import Any, Optional import numpy as np import onnx @@ -11,7 +11,7 @@ from builder import ExportBuilder from containers import TrtBevFusionCameraOnlyContainer, TrtBevFusionImageBackboneContainer, TrtBevFusionMainContainer from data_classes import ModelData, SetupConfigs -from mmdeploy.core import RewriterContext, SYMBOLIC_REWRITER +from mmdeploy.core import SYMBOLIC_REWRITER, RewriterContext from mmdeploy.utils import ( get_root_logger, ) @@ -32,8 +32,7 @@ def purge_mmdeploy_symbolics(op_names: list[str]) -> dict: continue # Bookkeeping key: full Python path of an implementer function. # Match by "...symbolics.." or "...symbolics.__" - if any(f".symbolics.{op}." in key or f".symbolics.{op}__" in key - for op in op_names): + if any(f".symbolics.{op}." in key or f".symbolics.{op}__" in key for op in op_names): removed[key] = records.pop(key) return removed @@ -83,12 +82,10 @@ def _export_model( patched_model (torch.nn.Module): Patched Pytorch model. ir_configs (dict): Configs for intermediate representations in ONNX. """ - # Purge the mmdeploy symbolic records for the layer_norm op, remove this if LayerNorm OP is not supported + # Purge the mmdeploy symbolic records for the layer_norm op, remove this if LayerNorm OP is not supported # in the tensorrt version removed = purge_mmdeploy_symbolics(["layer_norm"]) - self.logger.info( - f"Purged {len(removed)} mmdeploy symbolic records: {list(removed.keys())}" - ) + self.logger.info(f"Purged {len(removed)} mmdeploy symbolic records: {list(removed.keys())}") with RewriterContext(**context_info), torch.no_grad(): image_feats = None if "img_backbone" in self.setup_configs.model_cfg.model: From a4bd66e2a4749b15f9295e99e8f14017c5cea79f Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Fri, 5 Jun 2026 12:29:57 +0900 Subject: [PATCH 181/183] Clean configs --- projects/BEVFusion/deploy/containers.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/projects/BEVFusion/deploy/containers.py b/projects/BEVFusion/deploy/containers.py index 55586e0f5..ad9243412 100644 --- a/projects/BEVFusion/deploy/containers.py +++ b/projects/BEVFusion/deploy/containers.py @@ -51,15 +51,6 @@ def forward( "voxels": {"voxels": voxels, "coors": coors, "num_points_per_voxel": num_points_per_voxel}, } - voxels.cpu().numpy().astype(np.float32).tofile("work_dirs/dummy_inputs/voxels.bin") - coors[:, :3].cpu().numpy().astype(np.int32).tofile("work_dirs/dummy_inputs/coors.bin") - num_points_per_voxel.cpu().numpy().astype(np.int32).tofile("work_dirs/dummy_inputs/num_points_per_voxel.bin") - print( - "voxels.shape, coors.shape, num_points_per_voxel.shape:", - voxels.shape, - coors.shape, - num_points_per_voxel.shape, - ) if points is not None: batch_inputs_dict["points"] = [points] From 1d15699a6aa6a10e50c20e9d3c6c99f214a68fa5 Mon Sep 17 00:00:00 2001 From: Kok Seang Tan Date: Fri, 5 Jun 2026 14:03:25 +0900 Subject: [PATCH 182/183] Updated --- projects/BEVFusion/bevfusion/depth_lss_v2.py | 76 -------------------- 1 file changed, 76 deletions(-) diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py index 46b74b969..f843d1d94 100644 --- a/projects/BEVFusion/bevfusion/depth_lss_v2.py +++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py @@ -2,16 +2,11 @@ from pathlib import Path from typing import Optional, Tuple -import matplotlib.pyplot as plt import numpy as np import torch -from mmcv.cnn import build_conv_layer, build_norm_layer from mmdet3d.registry import MODELS -# from mmdet.models.backbones.resnet import BasicBlock -from mmdet3d.utils import ConfigType, OptConfigType, OptMultiConfig from mmengine.logging import print_log -from mmengine.model import BaseModule from torch import nn from torch.utils.checkpoint import checkpoint @@ -19,61 +14,6 @@ from .ops import bev_pool_v2 -class CustomDepthBasicBlock(BaseModule): - def __init__( - self, - in_channels: int, - out_channel: int, - padding: int = 0, - kernel_size: int = 1, - stride: int = 1, - dilation: int = 1, - with_cp: bool = False, - norm_cfg=dict(type="BN"), - conv_cfg=None, - downsample: Optional[nn.Module] = None, - init_cfg: OptMultiConfig = None, - ): - super().__init__(init_cfg) - - self.norm1_name, self.norm1 = build_norm_layer(norm_cfg, out_channel, postfix=1) - self.norm2_name, self.norm2 = build_norm_layer(norm_cfg, out_channel, postfix=2) - self.conv1 = build_conv_layer( - conv_cfg, - in_channels, - out_channel, - kernel_size, - stride=stride, - padding=padding, - dilation=dilation, - bias=False, - ) - self.add_module(self.norm1_name, self.norm1) - self.conv2 = build_conv_layer(conv_cfg, out_channel, out_channel, kernel_size, padding=padding, bias=False) - self.add_module(self.norm2_name, self.norm2) - - self.relu = nn.ReLU(inplace=True) - self.downsample = downsample - self.stride = stride - self.dilation = dilation - self.with_cp = with_cp - - def forward(self, x: torch.Tensor) -> torch.Tensor: - identity = x - out = self.conv1(x) - out = self.norm1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.norm2(out) - - if self.downsample is not None: - identity = self.downsample(x) - - out += identity - return out - - class SELayer(nn.Module): """ Squeeze-and-Excitation (SE) layer. @@ -183,12 +123,6 @@ def __init__( self.camera_depth_aware_parameters_bn = nn.BatchNorm1d(self.num_camera_depth_parameters) # Context/image feature branch - # self.context_input_conv = nn.Sequential( - # nn.Conv2d( - # in_channels, hidden_channels, kernel_size=3, stride=1, padding=1, bias=False), - # nn.BatchNorm2d(hidden_channels), - # nn.ReLU(inplace=True), - # ) self.context_input_conv = nn.Sequential( nn.Conv2d(in_channels, hidden_channels, kernel_size=1, stride=1, bias=False), nn.BatchNorm2d(hidden_channels), @@ -211,19 +145,9 @@ def __init__( drop_out=self.mlp_drop_out, ) self.depth_se = SELayer(channels=hidden_channels) - # self.depth_conv = nn.Sequential( - # BasicBlock(hidden_channels, hidden_channels, downsample=None), - # BasicBlock(hidden_channels, hidden_channels), - # BasicBlock(hidden_channels, hidden_channels), - # nn.Conv2d(hidden_channels, depth_channels, kernel_size=1, stride=1, padding=0, bias=True) - # ) self.depth_conv = nn.Sequential( - # CustomDepthBasicBlock(hidden_channels, hidden_channels, downsample=None, kernel_size=1, padding=0), - # CustomDepthBasicBlock(hidden_channels, hidden_channels, kernel_size=1), - # CustomDepthBasicBlock(hidden_channels, hidden_channels, kernel_size=1), nn.Conv2d(hidden_channels, depth_channels, kernel_size=1, stride=1, padding=0, bias=True) ) - # self._init_weight() def context_forward( self, context_features: torch.Tensor, camera_depth_aware_features: torch.Tensor From 4828cf29fe4f3f2e9ffa8227311fc23418535474 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 5 Jun 2026 05:03:52 +0000 Subject: [PATCH 183/183] ci(pre-commit): autofix --- projects/BEVFusion/bevfusion/depth_lss_v2.py | 1 - 1 file changed, 1 deletion(-) diff --git a/projects/BEVFusion/bevfusion/depth_lss_v2.py b/projects/BEVFusion/bevfusion/depth_lss_v2.py index f843d1d94..a95a88e4e 100644 --- a/projects/BEVFusion/bevfusion/depth_lss_v2.py +++ b/projects/BEVFusion/bevfusion/depth_lss_v2.py @@ -5,7 +5,6 @@ import numpy as np import torch from mmdet3d.registry import MODELS - from mmengine.logging import print_log from torch import nn from torch.utils.checkpoint import checkpoint