diff --git a/Dockerfile b/Dockerfile index 3e9caecb9..2fbcaa620 100644 --- a/Dockerfile +++ b/Dockerfile @@ -61,13 +61,15 @@ RUN python3 -m pip --no-cache-dir install \ RUN python3 -m pip install git+https://github.com/tier4/t4-devkit@v0.5.1 # Install autoware-perception-evaluation -RUN python3 -m pip install git+https://github.com/tier4/autoware_perception_evaluation@9d8c9773d35177bb0b7f2606f429f58a5fb708ca +RUN python3 -m pip install git+https://github.com/tier4/autoware_perception_evaluation@3c9577dc23fd76a049559b42656ca46c1c32fa66 # Need to dowgrade setuptools to 60.2.0 to fix setup RUN python3 -m pip --no-cache-dir install \ setuptools==60.2.0 \ transformers==4.51.3 \ - polars==1.37.1 + polars==1.37.1 \ + onnx_graphsurgeon==0.5.8 \ + spconv-cu126==2.3.8 # NOTE(knzo25): this patch is needed to use numpy versions over 1.23.5 (version used in mmdet3d 1.4.0) # It can be safely deleted when mmdet3d updates the numpy version diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py index d0744a131..3be587072 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/base.py @@ -91,8 +91,8 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "movable_object.barrier": "barrier", - "movable_object.debris": "debris", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.debris": "barrier", + "movable_object.pushable_pullable": "barrier", "movable_object.trafficcone": "traffic_cone", "movable_object.traffic_cone": "traffic_cone", "animal": "animal", @@ -113,7 +113,7 @@ # DBv2.0 and DBv3.0 "animal": "animal", "movable_object.barrier": "barrier", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.pushable_pullable": "barrier", "movable_object.traffic_cone": "traffic_cone", "pedestrian.adult": "pedestrian", "pedestrian.child": "pedestrian", @@ -143,15 +143,12 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } -class_names = [ - "car", - "truck", - "bus", - "bicycle", - "pedestrian", -] +class_names = ["car", "truck", "bus", "bicycle", "pedestrian", "traffic_cone", "barrier"] num_class = len(class_names) metainfo = dict(classes=class_names) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py index 3c8675c13..0324e7207 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2.py @@ -72,8 +72,8 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "movable_object.barrier": "barrier", - "movable_object.debris": "debris", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.debris": "barrier", + "movable_object.pushable_pullable": "barrier", "movable_object.trafficcone": "traffic_cone", "movable_object.traffic_cone": "traffic_cone", "animal": "animal", @@ -94,7 +94,7 @@ # DBv2.0 and DBv3.0 "animal": "animal", "movable_object.barrier": "barrier", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.pushable_pullable": "barrier", "movable_object.traffic_cone": "traffic_cone", "pedestrian.adult": "pedestrian", "pedestrian.child": "pedestrian", @@ -124,6 +124,9 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } class_names = [ @@ -132,6 +135,8 @@ "bus", "bicycle", "pedestrian", + "traffic_cone", + "barrier", ] num_class = len(class_names) metainfo = dict(classes=class_names) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py index cc3a86d3e..b9ec03f27 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_base.py @@ -78,8 +78,8 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "movable_object.barrier": "barrier", - "movable_object.debris": "debris", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.debris": "barrier", + "movable_object.pushable_pullable": "barrier", "movable_object.trafficcone": "traffic_cone", "movable_object.traffic_cone": "traffic_cone", "animal": "animal", @@ -100,7 +100,7 @@ # DBv2.0 and DBv3.0 "animal": "animal", "movable_object.barrier": "barrier", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.pushable_pullable": "barrier", "movable_object.traffic_cone": "traffic_cone", "pedestrian.adult": "pedestrian", "pedestrian.child": "pedestrian", @@ -130,14 +130,20 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } + class_names = [ "car", "truck", "bus", "bicycle", "pedestrian", + "traffic_cone", + "barrier", ] num_class = len(class_names) metainfo = dict(classes=class_names) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_v2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_v2.py new file mode 100644 index 000000000..e4375d576 --- /dev/null +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/j6gen2_v2.py @@ -0,0 +1,194 @@ +custom_imports = dict( + imports=[ + "autoware_ml.detection3d.datasets.t4dataset", + "autoware_ml.detection3d.evaluation.t4metric.t4metric", + "autoware_ml.detection3d.evaluation.t4metric.t4metric_v2", + ] +) + +# dataset type setting +dataset_type = "T4Dataset" +info_train_file_name = "t4dataset_j6gen2_v2_infos_train.pkl" +info_val_file_name = "t4dataset_j6gen2_v2_infos_val.pkl" +info_test_file_name = "t4dataset_j6gen2_v2_infos_test.pkl" + +info_train_statistics_file_name = "t4dataset_j6gen2_v2_statistics_train.parquet" +info_val_statistics_file_name = "t4dataset_j6gen2_v2_statistics_val.parquet" +info_test_statistics_file_name = "t4dataset_j6gen2_v2_statistics_test.parquet" + +# dataset scene setting +dataset_version_list = [ + "db_j6gen2_v2", +] + +dataset_test_groups = { + "j6gen2_v2": ("t4dataset_j6gen2_v2_infos_test.pkl", True), +} + +# dataset format setting +data_prefix = dict( + pts="", + CAM_FRONT="", + CAM_FRONT_LEFT="", + CAM_FRONT_RIGHT="", + CAM_BACK="", + CAM_BACK_RIGHT="", + CAM_BACK_LEFT="", + sweeps="", +) +camera_types = { + "CAM_FRONT", + "CAM_FRONT_RIGHT", + "CAM_FRONT_LEFT", + "CAM_BACK", + "CAM_BACK_LEFT", + "CAM_BACK_RIGHT", +} + +# class setting +name_mapping = { + # DBv1.0 + "vehicle.car": "car", + "vehicle.construction": "truck", + "vehicle.emergency (ambulance & police)": "car", + "vehicle.motorcycle": "bicycle", + "vehicle.trailer": "trailer", + "vehicle.truck": "truck", + "vehicle.bicycle": "bicycle", + "vehicle.bus (bendy & rigid)": "bus", + "pedestrian.adult": "pedestrian", + "pedestrian.child": "pedestrian", + "pedestrian.construction_worker": "pedestrian", + "pedestrian.personal_mobility": "pedestrian", + "pedestrian.police_officer": "pedestrian", + "pedestrian.stroller": "pedestrian", + "pedestrian.wheelchair": "pedestrian", + "movable_object.barrier": "barrier", + "movable_object.debris": "barrier", + "movable_object.pushable_pullable": "barrier", + "movable_object.trafficcone": "traffic_cone", + "movable_object.traffic_cone": "traffic_cone", + "animal": "animal", + "static_object.bicycle_rack": "bicycle_rack", + # DBv1.1 and UCv2.0 + "car": "car", + "truck": "truck", + "bus": "bus", + "trailer": "trailer", + "motorcycle": "bicycle", + "bicycle": "bicycle", + "police_car": "car", + "pedestrian": "pedestrian", + "police_officer": "pedestrian", + "forklift": "car", + "construction_worker": "pedestrian", + "stroller": "pedestrian", + # DBv2.0 and DBv3.0 + "animal": "animal", + "movable_object.barrier": "barrier", + "movable_object.pushable_pullable": "barrier", + "movable_object.traffic_cone": "traffic_cone", + "pedestrian.adult": "pedestrian", + "pedestrian.child": "pedestrian", + "pedestrian.construction_worker": "pedestrian", + "pedestrian.personal_mobility": "pedestrian", + "pedestrian.police_officer": "pedestrian", + "pedestrian.stroller": "pedestrian", + "pedestrian.wheelchair": "pedestrian", + "static_object.bicycle rack": "bicycle rack", + "static_object.bollard": "bollard", + "vehicle.ambulance": "car", # Define vehicle.ambulance as car since vehicle.emergency (ambulance & police) is defined as car + "vehicle.bicycle": "bicycle", + "vehicle.bus": "bus", + "vehicle.car": "car", + "vehicle.construction": "truck", + "vehicle.fire": "truck", + "vehicle.motorcycle": "bicycle", + "vehicle.police": "car", + "vehicle.trailer": "trailer", + "vehicle.truck": "truck", + # DBv1.3 + "ambulance": "car", + "kart": "car", + "wheelchair": "pedestrian", + "personal_mobility": "pedestrian", + "fire_truck": "truck", + "semi_trailer": "trailer", + "tractor_unit": "truck", + "construction_vehicle": "truck", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", +} + +class_names = [ + "car", + "truck", + "bus", + "bicycle", + "pedestrian", + "traffic_cone", + "barrier", +] +num_class = len(class_names) +metainfo = dict(classes=class_names) + +merge_objects = [ + ("truck", ["truck", "trailer"]), +] +merge_type = "extend_longer" # One of ["extend_longer","union", None] + +# visualization +class_colors = { + "car": (30, 144, 255), + "truck": (140, 0, 255), + "construction_vehicle": (255, 255, 0), + "bus": (111, 255, 111), + "trailer": (0, 255, 255), + "barrier": (0, 0, 0), + "motorcycle": (100, 0, 30), + "bicycle": (255, 0, 30), + "pedestrian": (255, 200, 200), + "traffic_cone": (120, 120, 120), +} +camera_panels = [ + "data/CAM_FRONT_LEFT", + "data/CAM_FRONT", + "data/CAM_FRONT_RIGHT", + "data/CAM_BACK_LEFT", + "data/CAM_BACK", + "data/CAM_BACK_RIGHT", +] + +filter_attributes = [ + ("vehicle.bicycle", "vehicle_state.parked"), + ("vehicle.bicycle", "cycle_state.without_rider"), + ("vehicle.bicycle", "motorcycle_state.without_rider"), + ("vehicle.motorcycle", "vehicle_state.parked"), + ("vehicle.motorcycle", "cycle_state.without_rider"), + ("vehicle.motorcycle", "motorcycle_state.without_rider"), + ("bicycle", "vehicle_state.parked"), + ("bicycle", "cycle_state.without_rider"), + ("bicycle", "motorcycle_state.without_rider"), + ("motorcycle", "vehicle_state.parked"), + ("motorcycle", "cycle_state.without_rider"), + ("motorcycle", "motorcycle_state.without_rider"), +] + +evaluator_metric_configs = dict( + evaluation_task="detection", + target_labels=class_names, + center_distance_bev_thresholds=[0.5, 1.0, 2.0, 4.0], + # plane_distance_thresholds is required for the pass fail evaluation + plane_distance_thresholds=[2.0, 4.0], + iou_2d_thresholds=None, + iou_3d_thresholds=None, + label_prefix="autoware", + # bev minimum distance ranges for each range bucket, must be the same length as max_distance, + # they will form bev distance ranges in [(min_distance[0], max_distance[0]), (min_distance[1], max_distance[1]), ...] when filtering + min_distance=[0.0, 50.0, 90.0, 0.0], + # bev maximum distance ranges for each range bucket, must be the same length as min_distance + max_distance=[50.0, 90.0, 121.0, 121.0], + min_point_numbers=0, + matching_class_agnostic_fps=False, +) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py index b7ddb799a..c08decfa1 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_base.py @@ -68,8 +68,8 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "movable_object.barrier": "barrier", - "movable_object.debris": "debris", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.debris": "barrier", + "movable_object.pushable_pullable": "barrier", "movable_object.trafficcone": "traffic_cone", "movable_object.traffic_cone": "traffic_cone", "animal": "animal", @@ -90,7 +90,7 @@ # DBv2.0 and DBv3.0 "animal": "animal", "movable_object.barrier": "barrier", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.pushable_pullable": "barrier", "movable_object.traffic_cone": "traffic_cone", "pedestrian.adult": "pedestrian", "pedestrian.child": "pedestrian", @@ -120,6 +120,9 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } class_names = [ @@ -128,7 +131,10 @@ "bus", "bicycle", "pedestrian", + "traffic_cone", + "barrier", ] + num_class = len(class_names) metainfo = dict(classes=class_names) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py index f91bbc22f..dbd6e2813 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/jpntaxi_gen2.py @@ -65,8 +65,8 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "movable_object.barrier": "barrier", - "movable_object.debris": "debris", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.debris": "barrier", + "movable_object.pushable_pullable": "barrier", "movable_object.trafficcone": "traffic_cone", "movable_object.traffic_cone": "traffic_cone", "animal": "animal", @@ -87,7 +87,7 @@ # DBv2.0 and DBv3.0 "animal": "animal", "movable_object.barrier": "barrier", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.pushable_pullable": "barrier", "movable_object.traffic_cone": "traffic_cone", "pedestrian.adult": "pedestrian", "pedestrian.child": "pedestrian", @@ -117,6 +117,9 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } class_names = [ @@ -125,7 +128,10 @@ "bus", "bicycle", "pedestrian", + "traffic_cone", + "barrier", ] + num_class = len(class_names) metainfo = dict(classes=class_names) diff --git a/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py b/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py index b117c3798..2212b8e56 100644 --- a/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py +++ b/autoware_ml/configs/detection3d/dataset/t4dataset/largebus.py @@ -67,8 +67,8 @@ "pedestrian.stroller": "pedestrian", "pedestrian.wheelchair": "pedestrian", "movable_object.barrier": "barrier", - "movable_object.debris": "debris", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.debris": "barrier", + "movable_object.pushable_pullable": "barrier", "movable_object.trafficcone": "traffic_cone", "movable_object.traffic_cone": "traffic_cone", "animal": "animal", @@ -89,7 +89,7 @@ # DBv2.0 and DBv3.0 "animal": "animal", "movable_object.barrier": "barrier", - "movable_object.pushable_pullable": "pushable_pullable", + "movable_object.pushable_pullable": "barrier", "movable_object.traffic_cone": "traffic_cone", "pedestrian.adult": "pedestrian", "pedestrian.child": "pedestrian", @@ -119,6 +119,9 @@ "semi_trailer": "trailer", "tractor_unit": "truck", "construction_vehicle": "truck", + "traffic_cone": "traffic_cone", + "trafficcone": "traffic_cone", + "barrier": "barrier", } class_names = [ @@ -127,7 +130,10 @@ "bus", "bicycle", "pedestrian", + "traffic_cone", + "barrier", ] + num_class = len(class_names) metainfo = dict(classes=class_names) diff --git a/autoware_ml/detection3d/datasets/t4dataset.py b/autoware_ml/detection3d/datasets/t4dataset.py index ce1c78f31..d7fed6256 100644 --- a/autoware_ml/detection3d/datasets/t4dataset.py +++ b/autoware_ml/detection3d/datasets/t4dataset.py @@ -192,4 +192,7 @@ def parse_data_info(self, info: dict) -> dict: else: info["lidar2img"] = info["cam2img"] @ info["lidar2cam"] + # Default difficulty to 0 if not present + if "difficulty" not in info: + info["difficulty"] = 0 return info diff --git a/pipelines/webauto/download_t4dataset/download_t4dataset.py b/pipelines/webauto/download_t4dataset/download_t4dataset.py index f06f6979d..d06b85717 100644 --- a/pipelines/webauto/download_t4dataset/download_t4dataset.py +++ b/pipelines/webauto/download_t4dataset/download_t4dataset.py @@ -68,8 +68,8 @@ def get_t4dataset_ids(config_path: str) -> list[str]: for key in required_keys: for t4dataset_ids in data_splits[key]: t4dataset_ids = t4dataset_ids.split("/") - if len(t4dataset_ids) == 4: - t4dataset_id, t4dataset_version_id, city, vehicle_type = t4dataset_ids + if len(t4dataset_ids) == 5: + t4dataset_id, t4dataset_version_id, city, vehicle_type, traffic_cone_barrier_status = t4dataset_ids elif len(t4dataset_ids) == 2: t4dataset_id, t4dataset_version_id = t4dataset_ids elif len(t4dataset_ids) == 1: diff --git a/projects/BEVFusion/bevfusion/bevfusion_head.py b/projects/BEVFusion/bevfusion/bevfusion_head.py index 853523c4f..c37c5a538 100644 --- a/projects/BEVFusion/bevfusion/bevfusion_head.py +++ b/projects/BEVFusion/bevfusion/bevfusion_head.py @@ -13,6 +13,7 @@ from mmdet3d.structures import xywhr2xyxyr from mmdet.models.task_modules import AssignResult, PseudoSampler, build_assigner, build_bbox_coder, build_sampler from mmdet.models.utils import multi_apply +from mmengine.logging import print_log from mmengine.structures import InstanceData from torch import nn @@ -69,6 +70,7 @@ def __init__( train_cfg=None, test_cfg=None, bbox_coder=None, + partial_ignore_labels=None, ): super().__init__() self.class_names = class_names @@ -82,7 +84,6 @@ def __init__( self.nms_kernel_size = nms_kernel_size self.train_cfg = train_cfg self.test_cfg = test_cfg - self.use_sigmoid_cls = loss_cls.get("use_sigmoid", False) if not self.use_sigmoid_cls: self.num_classes += 1 @@ -186,6 +187,20 @@ def __init__( [self.class_name_to_indices[class_name] for class_name in cluster["class_names"]] ) + # If true, only compute loss for traffic cone and barrier when it's available in the frame + if partial_ignore_labels is not None: + assert ( + loss_heatmap["reduction"] == "none" + ), "Loss reduction must be 'none' for partial traffic cone and barrier" + self.partial_ignore_labels = [ + self.class_name_to_indices[class_name] for class_name in partial_ignore_labels + ] + else: + self.partial_ignore_labels = None + + print_log(f"BEVFusionHead Partial ignore labels: {self.partial_ignore_labels}, dense heatmap pooling classes: \ + {self.dense_heatmap_pooling_classes}, class_names: {self.class_names}", logger="current") + def create_2D_grid(self, x_size, y_size): meshgrid = [[0, x_size - 1, x_size], [0, y_size - 1, y_size]] # NOTE: modified @@ -456,7 +471,9 @@ def predict_by_feat(self, preds_dicts, metas, img=None, rescale=False, for_roi=F return rets[0] - def get_targets(self, batch_gt_instances_3d: List[InstanceData], preds_dict: List[dict]): + def get_targets( + self, batch_gt_instances_3d: List[InstanceData], preds_dict: List[dict], batch_metadata: List[dict] + ): """Generate training targets. Args: batch_gt_instances_3d (List[InstanceData]): @@ -500,6 +517,7 @@ def get_targets(self, batch_gt_instances_3d: List[InstanceData], preds_dict: Lis batch_gt_instances_3d, list_of_pred_dict, np.arange(len(batch_gt_instances_3d)), + batch_metadata, ) labels = torch.cat(res_tuple[0], dim=0) label_weights = torch.cat(res_tuple[1], dim=0) @@ -509,6 +527,7 @@ def get_targets(self, batch_gt_instances_3d: List[InstanceData], preds_dict: Lis num_pos = np.sum(res_tuple[5]) matched_ious = np.mean(res_tuple[6]) heatmap = torch.cat(res_tuple[7], dim=0) + heatmap_weights = torch.cat(res_tuple[8], dim=0) return ( labels, label_weights, @@ -518,9 +537,10 @@ def get_targets(self, batch_gt_instances_3d: List[InstanceData], preds_dict: Lis num_pos, matched_ious, heatmap, + heatmap_weights, ) - def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx): + def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx, metadata): """Generate training targets for a single sample. Args: gt_instances_3d (:obj:`InstanceData`): ground truth of instances. @@ -616,7 +636,7 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx): ious = assign_result_ensemble.max_overlaps ious = torch.clamp(ious, min=0.0, max=1.0) labels = bboxes_tensor.new_zeros(num_proposals, dtype=torch.long) - label_weights = bboxes_tensor.new_zeros(num_proposals, dtype=torch.long) + label_weights = bboxes_tensor.new_zeros([num_proposals, self.num_classes], dtype=torch.long) if gt_labels_3d is not None: # default label is -1 labels += self.num_classes @@ -671,6 +691,17 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx): draw_heatmap_gaussian(heatmap[gt_labels_3d[idx]], center_int[[1, 0]], radius) mean_iou = ious[pos_inds].sum() / max(len(pos_inds), 1) + heatmap_weights = torch.ones_like(heatmap) + + # Ignore labels for traffic cone and barrier + traffic_cone_barrier_status = metadata.get("traffic_cone_barrier_status", True) + if self.partial_ignore_labels is not None and not traffic_cone_barrier_status: + heatmap_weights[self.partial_ignore_labels] = 0.0 # Set to 0 to ignore these grids + if len(neg_inds) > 0: + # neg_inds [N] and column indices [K] must broadcast (not pair); + _cols = torch.as_tensor(self.partial_ignore_labels, device=label_weights.device, dtype=torch.long) + label_weights[neg_inds.unsqueeze(1), _cols.unsqueeze(0)] = 0.0 + return ( labels[None], label_weights[None], @@ -680,6 +711,7 @@ def get_targets_single(self, gt_instances_3d, preds_dict, batch_idx): int(pos_inds.shape[0]), float(mean_iou), heatmap[None], + heatmap_weights[None], ) def loss(self, batch_feats, batch_data_samples): @@ -698,11 +730,13 @@ def loss(self, batch_feats, batch_data_samples): batch_input_metas.append(data_sample.metainfo) batch_gt_instances_3d.append(data_sample.gt_instances_3d) preds_dicts = self(batch_feats, batch_input_metas) - loss = self.loss_by_feat(preds_dicts, batch_gt_instances_3d) + loss = self.loss_by_feat(preds_dicts, batch_gt_instances_3d, batch_input_metas) return loss - def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: List[InstanceData], *args, **kwargs): + def loss_by_feat( + self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: List[InstanceData], batch_input_metas + ): ( labels, label_weights, @@ -712,7 +746,8 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li num_pos, matched_ious, heatmap, - ) = self.get_targets(batch_gt_instances_3d, preds_dicts[0]) + heatmap_weights, + ) = self.get_targets(batch_gt_instances_3d, preds_dicts[0], batch_input_metas) if hasattr(self, "on_the_image_mask"): label_weights = label_weights * self.on_the_image_mask bbox_weights = bbox_weights * self.on_the_image_mask[:, :, None] @@ -721,12 +756,32 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li loss_dict = dict() # compute heatmap loss - loss_heatmap = self.loss_heatmap( - clip_sigmoid(preds_dict["dense_heatmap"]).float(), - heatmap.float(), - avg_factor=max(heatmap.eq(1).float().sum().item(), 1), - ) - loss_dict["loss_heatmap"] = loss_heatmap + preds_dense_heatmap = clip_sigmoid(preds_dict["dense_heatmap"].float()) + num_pos_dense_heatmap = max(heatmap.eq(1).float().sum().item(), 1) + if self.partial_ignore_labels is None: + loss_heatmap = self.loss_heatmap( + preds_dense_heatmap, + heatmap.float(), + avg_factor=num_pos_dense_heatmap, + ) + loss_dict["loss_heatmap"] = loss_heatmap + else: + # When ignore labels is found, we compute the loss for each class + # heatmap focal loss + loss_heatmap_cls: torch.Tensor = self.loss_heatmap( + preds_dense_heatmap, + heatmap.float(), + ) + + # (Batch, num_classes, height, width) * (Batch, num_classes, height, width) + loss_heatmap_cls = loss_heatmap_cls * heatmap_weights.float() + loss_heatmap_cls = loss_heatmap_cls.sum((0, 2, 3)) / num_pos_dense_heatmap + # (Batch, num_classes) + for cls_i, class_name in enumerate(self.class_names): + loss_dict[f"loss_heatmap_{class_name}"] = loss_heatmap_cls[cls_i] + + # Prevent loss item to avoid computing gradients twice. This is for logging. + loss_dict["total_dense_heatmap"] = loss_heatmap_cls.sum() # compute loss for each layer for idx_layer in range(self.num_decoder_layers if self.auxiliary else 1): @@ -742,7 +797,9 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: Li layer_label_weights = label_weights[ ..., idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals, - ].reshape(-1) + ] + # (Batch*num_proposals, num_classes) + layer_label_weights = layer_label_weights.reshape(-1, self.num_classes) layer_score = preds_dict["heatmap"][ ..., idx_layer * self.num_proposals : (idx_layer + 1) * self.num_proposals, diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py index 9da67036e..380a4ba81 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m.py @@ -13,7 +13,7 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" +info_directory_path = "info/kokseang_2_8/" experiment_group_name = "bevfusion_lidar_intensity/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m" @@ -64,6 +64,10 @@ pc_range=_base_.point_cloud_range[0:2], voxel_size=_base_.voxel_size[0:2], ), + partial_ignore_labels=["traffic_cone", "barrier"], + loss_heatmap=dict( + reduction="none", + ), ), ) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py index 39462b1f6..e3f7d5146 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar_intensity/j6gen2_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/j6gen2_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_j6gen2_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name @@ -18,7 +18,7 @@ frame_pass_fail_config = dict( target_labels=_base_.class_names, # Matching thresholds per class (must align with `plane_distance_thresholds` used in evaluation) - matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0], + matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0], confidence_threshold_list=None, ) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py index c884c0aef..eec87a585 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m.py @@ -13,10 +13,10 @@ # user setting data_root = "data/t4dataset/" -info_directory_path = "info/user_name/" +info_directory_path = "info/kokseang_2_8/" -experiment_group_name = "bevfusion_lidar/jpntaxi_base/" + _base_.dataset_type -experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m" +experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/jpntaxi_base/" + _base_.dataset_type +experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_ignore" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name # model parameter @@ -64,6 +64,10 @@ pc_range=_base_.point_cloud_range[0:2], voxel_size=_base_.voxel_size[0:2], ), + partial_ignore_labels=["traffic_cone", "barrier"], + loss_heatmap=dict( + reduction="none", + ), ), ) @@ -160,4 +164,4 @@ ) log_processor = dict(window_size=50) -load_from = None +load_from = "work_dirs/bevfusion_lidar_traffic_cone/base/T4Dataset/lidar_voxel_second_secfpn_50e_8xb8_base_120m_ignore/epoch_48.pth" diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py index b50b093f7..213f0041b 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2.py @@ -3,7 +3,7 @@ ] # user setting -experiment_group_name = "bevfusion_lidar_intensity/jpntaxi_base/" + _base_.dataset_type +experiment_group_name = "bevfusion_lidar_intensity_traffic_cone/jpntaxi_base/" + _base_.dataset_type experiment_name = "lidar_voxel_second_secfpn_30e_8xb8_jpntaxi_base_120m_t4metric_v2" work_dir = "work_dirs/" + experiment_group_name + "/" + experiment_name @@ -18,7 +18,7 @@ frame_pass_fail_config = dict( target_labels=_base_.class_names, # Matching thresholds per class (must align with `plane_distance_thresholds` used in evaluation) - matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0], + matching_threshold_list=[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0], confidence_threshold_list=None, ) diff --git a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py index 79337d976..e8068332a 100644 --- a/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py +++ b/projects/BEVFusion/configs/t4dataset/BEVFusion-L/bevfusion_lidar_voxel_second_secfpn_50e_8xb8_base_120m.py @@ -62,6 +62,10 @@ pc_range=_base_.point_cloud_range[0:2], voxel_size=_base_.voxel_size[0:2], ), + partial_ignore_labels=["traffic_cone", "barrier"], + loss_heatmap=dict( + reduction="none", + ), ), ) diff --git a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py index b5d9a8fdc..809179b20 100644 --- a/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/models/default_lidar_second_secfpn_120m.py @@ -91,9 +91,11 @@ dict(class_names=["car", "truck", "bus"], nms_threshold=0.5), # It's radius if using circle_nms dict(class_names=["bicycle"], nms_threshold=0.5), dict(class_names=["pedestrian"], nms_threshold=0.175), + dict(class_names=["barrier"], nms_threshold=0.5), + dict(class_names=["traffic_cone"], nms_threshold=0.175), ], ), - dense_heatmap_pooling_classes=["car", "truck", "bus", "bicycle"], # Use class indices for pooling + dense_heatmap_pooling_classes=["car", "truck", "bus", "bicycle", "barrier"], # Use class indices for pooling common_heads=dict(center=[2, 2], height=[1, 2], dim=[3, 2], rot=[2, 2], vel=[2, 2]), bbox_coder=dict( type="TransFusionBBoxCoder", @@ -112,5 +114,6 @@ ), loss_heatmap=dict(type="mmdet.GaussianFocalLoss", reduction="mean", loss_weight=1.0), loss_bbox=dict(type="mmdet.L1Loss", reduction="mean", loss_weight=0.25), + partial_ignore_labels=None, ), ) diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py index 06d95be16..09b9f7b26 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_120m.py @@ -13,6 +13,8 @@ "bus": 120, "bicycle": 120, "pedestrian": 120, + "traffic_cone": 120, + "barrier": 120, } # LiDAR parameters @@ -57,6 +59,8 @@ "bus", "bicycle", "pedestrian", + "traffic_cone", + "barrier", ], ), dict(type="PointShuffle"), @@ -84,6 +88,7 @@ "timestamp", "vehicle_type", "city", + "traffic_cone_barrier_status", ], ), ] @@ -127,6 +132,7 @@ "timestamp", "vehicle_type", "city", + "traffic_cone_barrier_status", ], ), ] diff --git a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py index 4e74d3616..e2de195e9 100644 --- a/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py +++ b/projects/BEVFusion/configs/t4dataset/default/pipelines/default_lidar_intensity_120m.py @@ -13,6 +13,8 @@ "bus": 120, "bicycle": 120, "pedestrian": 120, + "traffic_cone": 120, + "barrier": 120, } # LiDAR parameters @@ -57,6 +59,8 @@ "bus", "bicycle", "pedestrian", + "traffic_cone", + "barrier", ], ), dict(type="PointShuffle"), @@ -84,6 +88,7 @@ "timestamp", "vehicle_type", "city", + "traffic_cone_barrier_status", ], ), ] @@ -127,6 +132,7 @@ "timestamp", "vehicle_type", "city", + "traffic_cone_barrier_status", ], ), ] diff --git a/tools/detection3d/create_data_t4dataset.py b/tools/detection3d/create_data_t4dataset.py index 1e61af9d8..3b02017e0 100644 --- a/tools/detection3d/create_data_t4dataset.py +++ b/tools/detection3d/create_data_t4dataset.py @@ -102,6 +102,7 @@ def get_info( sample: Sample, i: int, max_sweeps: int, + traffic_cone_barrier_status: str, city: Optional[str] = None, vehicle_type: Optional[str] = None, ) -> Dict[str, Any]: @@ -129,6 +130,10 @@ def get_info( sd_record: SampleData = t4.get("sample_data", lidar_token) info = get_empty_standard_data_info(cfg.camera_types) + if traffic_cone_barrier_status == "true": + traffic_cone_barrier_status = True + else: + traffic_cone_barrier_status = False basic_info = dict( sample_idx=i, @@ -139,6 +144,7 @@ def get_info( scene_name=scene_record.name, city=city, vehicle_type=vehicle_type, + traffic_cone_barrier_status=traffic_cone_barrier_status, ) for new_info in [ @@ -268,6 +274,7 @@ def main(): if cfg.filter_attributes is None: print_log("No attribute filtering is applied!") + remove_non_traffic_cone_barrier = cfg.get("remove_non_traffic_cone_barrier", False) # Get every pair of min-max distance filtering thresholds bev_distance_ranges = [] if hasattr(cfg, "evaluator_metric_configs"): @@ -302,8 +309,16 @@ def main(): f"Creating data info for scene: {scene_id}, steps: {sample_steps}, sweeps: {args.max_sweeps}" ) dataset_scene_info = scene_id.split("/") - if len(dataset_scene_info) == 4: - t4_dataset_id, t4_dataset_version_id, city, vehicle_type = dataset_scene_info + if len(dataset_scene_info) == 5: + t4_dataset_id, t4_dataset_version_id, city, vehicle_type, traffic_cone_barrier_status = ( + dataset_scene_info + ) + if remove_non_traffic_cone_barrier and traffic_cone_barrier_status == "false": + print_log( + f"Skipping scene: {scene_id} because it does not have traffic cone or barrier", + logger="current", + ) + continue elif len(dataset_scene_info) == 2: t4_dataset_id, t4_dataset_version_id = dataset_scene_info city = vehicle_type = None @@ -326,7 +341,9 @@ def main(): infos = [] for i in range(0, len(t4.sample), sample_steps): sample = t4.sample[i] - info = get_info(cfg, t4, sample, i, args.max_sweeps, city, vehicle_type) + info = get_info( + cfg, t4, sample, i, args.max_sweeps, traffic_cone_barrier_status, city, vehicle_type + ) if info is None: continue # info["version"] = dataset_version # used for visualizations during debugging. diff --git a/tools/detection3d/t4dataset_converters/t4converter.py b/tools/detection3d/t4dataset_converters/t4converter.py index 842b0f458..ccc88b2d1 100644 --- a/tools/detection3d/t4dataset_converters/t4converter.py +++ b/tools/detection3d/t4dataset_converters/t4converter.py @@ -627,6 +627,10 @@ def get_lidarseg_annotations( if not hasattr(t4, "lidarseg") or not t4.lidarseg: return dict() + if sd_record.info_filename is None: + print(f"sample {lidar_token} doesn't have lidar info_filename") + return dict() + assert i < len(t4.lidarseg), "Index exceeds number of lidarseg records!" assert t4.lidarseg[i].sample_data_token == lidar_token, "Sample data token mismatch!" return dict(