diff --git a/1.csv b/1.csv new file mode 100644 index 0000000..5f08b8a --- /dev/null +++ b/1.csv @@ -0,0 +1,220 @@ +- - '4cac9f6cd85a5b47' + - '2382542e05f253df' + - - - '5b733b329372ed8c8' + - 'cc716ed59dd6f0250' + - - '10933ac8239bd6fbb' + - 'dedf2ffe66804a16a' + - - 'ab74a4ee392eb4ffc' + - '543679cd5973594a2' + - - '0cd9ea86052a0b8e1' + - '0d70fefa06711301c' + - - '5f775bac151622950' + - 'c8a0832c8f737ce92' + - - '2a46d49b6aa55f1ca' + - 'fc8bd325b3da03b85' + - - '4a421aa61a3bbad1d' + - 'a0dee764f3cd352e8' + - - '4a5ea5d6a2735531c' + - '49d2336208d2cac37' + - - '8e653e8d9b170a7dd' + - '00caf7c273c7d5065' + - - 'ecefe6eb9d594a685' + - 'eb16cfaf356e80f1f' +- - 'c054f473288d5515' + - '68097b10029d5fe1' + - - - '4d115c250e5dc02b1' + - '45d88e7f92a4a9e74' + - - '7b5998d0c96e5433a' + - 'b77f2183b554e6641' + - - 'cd9aed75f054bccb5' + - '6808c5efa6976fe37' + - - '07d57ac58a266170f' + - 'd9bbca7a0907304f1' + - - '50442ffec20a73824' + - 'e0e4ca847f84568a3' + - - '142f665559387e2c2' + - 'f0ec163fc911ff8bb' + - - 'ba9155b84dc322eac' + - '765fab253a51a77e4' + - - '51bb4445380dac75e' + - '9d5c879308ff1f5b5' + - - '645751bf568658675' + - 'fad1ad3bce41df4d9' + - - 'bef503ff5929be3a8' + - '91e2e5e740db004ce' + - - '44a5937b14cec415b' + - 'fa8d78f84cd8b7f8c' + - - '51d3d62e78d712283' + - '62631b017ceec5a18' + - - '7caf8d7d76cd0a120' + - '2bb929aa7298012be' + - - '7c6eb3ebf2ec0a507' + - 'a31ca4af2bc4ff90a' + - - '5fcf415bcf1a37722' + - '6cca874fda7a3507a' +- - 'f14e7ed78b5d55b4' + - 'ae472d675a965aca' + - - - '78e346fdbeb014770' + - '82e6ed49ff7af43fe' + - - 'c481b1ded8232fa61' + - '2251eed913b2fbe46' + - - '43eb9502a0d70fcea' + - 'bc9a77699f862350d' + - - 'd5cbf126369d32fdf' + - '57e7f219c88ab8fe0' + - - '7516958eab299d94d' + - '754d33f5c0419048c' + - - '6c5bcf6f99698fac4' + - 'ac7a491ea6609fc90' + - - '18087593f1147e61f' + - '32b8d38d96342e28b' + - - '6c04df27e5264b20b' + - '232ee9d7342aed6a0' + - - '2a8ffeaa7391e73c7' + - '646aa5cbfd5f84a9e' + - - 'c9c198442a1efdc10' + - 'ceeb361872a012bd7' + - - '6c0405cd5c40c15ab' + - '3d0fbd6a6fbde5f86' +- - '13310ca9a7715154' + - 'd01fec804cd45644' + - - - 'bc686838d510dad87' + - 'bc6888f8da8ee3d48' + - - '90dfc4788a349e60b' + - '5fb2ee221c72e08b2' + - - '8d59e8ecb60e11b5b' + - '1effe02b2af9680a9' + - - 'fc3f449099625878e' + - 'fedfbdfcf20092620' + - - 'eaa70df977f0ed637' + - '32a33652619708c65' + - - 'dc92f82e6715ac493' + - 'fd0fe3e97024a208f' + - - '6d8d91ad290f124b2' + - '362376a65fe8ca576' + - - 'c58481114df532967' + - '863415a0e3cc1897d' + - - 'af1a4d903d6c645ce' + - 'a946e96acbe5186d9' + - - '4855fc07a04961bfd' + - '73bd1fde4cf4bbd5a' + - - 'e147b31afd8a5c390' + - '32888a8d5ffa4cfa6' + - - 'beee26fb5719cdfcb' + - '27d64cbd86e07452b' +- - 'c1839a3333695317' + - 'f3ca2707d87a586b' + - - - '774a05df3c1512cac' + - '431df46346088a1de' + - - 'e9f1d3b368a8f7603' + - '026597b466e481724' + - - '956f2e4043c1b8f4d' + - 'b5d11a125c8146f58' + - - '8c19ef349d4fc37cb' + - '80fbecd9f16630510' + - - 'dd01f78238cd88b26' + - '8413ea29ea72fdfde' + - - 'c807c2486a7897e81' + - '5688641dd10efb02d' + - - '6d342222e4d9d2b2a' + - '8aea1806b6e406159' + - - 'ea96a137518d58ac9' + - '6f8794f5f301202d0' + - - 'e774d1b87b07638c4' + - '8d71beefa48cb91fb' + - - 'ba3f20b19ddb93cb1' + - '8c7844d1d621c907b' + - - '5913dfbc435fb100c' + - '6333f19e87ad08122' + - - 'eedb6f9d589e745c8' + - '9f903173a7777cf35' + - - '188fef8a871d3bb74' + - '7098c40dee5929017' + - - '5c8283705c1e44a16' + - '78fdd992fa8acb334' + - - '0225e9a16d781eb96' + - '8f3bf4b1cb4be05f8' + - - '4c03648ccf3301fe7' + - '241e6bedc6fcf2de4' + - - 'ef08e59a9c882aea6' + - '801d1b42c0409656b' + - - 'c6f215cb7b0d88628' + - '95bfecce656f72d69' + - - 'de1d1d3fbf4860a8c' + - '474779897c967667c' +- - 'f6b9867069845fd2' + - '19e90f2757b25f38' + - - - 'c91d6633fc0516a81' + - '3042cb349e2487756' + - - '042bf7d6a0f98a104' + - 'f67fdc6f83d2e8656' + - - '38d50c91468fbe6e5' + - 'e03e2999009dbd343' + - - '07a36d3f2100bc352' + - 'ffe0db4c4b5ed141a' + - - 'a52a1bf87700c06b5' + - '32f2437326c9e81b2' + - - 'beae653519f0232e7' + - 'e64bbff1f3e0c30be' + - - '9e0949ca46f2a4a49' + - '2e822d476fd14739c' + - - '913e360242a39e667' + - 'b545597d491f4a96e' + - - '23b5ec16bb6ad0b92' + - '34e7a39f33ba4f108' + - - 'f2b84517dc455c03f' + - 'ebe3f434bfee258dc' + - - '875f6c89666fde397' + - 'e8a9486b49b69adbf' +- - '3c542be991515ccd' + - '5f53001e7e2d5347' + - - - '5a6f69ea54c00b6d2' + - '9cb5545908502a26c' + - - 'c47263e93d93b6838' + - 'e92394471671ed533' + - - 'dd091b9e715ca6370' + - 'c64e0d6194293f51e' + - - '01898e4c2520a9400' + - 'bc730296c22859127' + - - 'be4d95422b6b6e212' + - 'bffe666f9175e93c1' + - - 'c909e005442d28a9c' + - '9600f9f6992618229' + - - '2048a65fec13c23d5' + - '71afcb9b1ac436e14' + - - '1a6ba40b74f9b51e5' + - '13f7d51df20a8dc66' + - - '321422cad3f85a6a3' + - '808e6b707f608e2a9' + - - 'e7ae2cd68d33a847e' + - '7a999e76f404c4956' + - - 'f9533c6195790b299' + - 'aaaeb97c15db0e2f6' + - - 'b3c7672d677c644c7' + - '960a0cd8fd6e4be22' + - - '5922fad17f2f5e59f' + - 'e1f0dcd38a9ec2523' + - - '15755cd0249f8223a' + - '190768b2e92e01b36' + - - 'b795e9c4c8fde41bb' + - '3c8afefbaf08142c1' +- - '0dc54a8c8203567b' + - 'ed3db88f99a05a6c' + - - - '4876fb179a00309b5' + - '35fc09471c163fd84' + - - 'e593c645d1fd3fdb9' + - '5065468cd60f1bc00' + - - 'ae4f3518384257a07' + - 'ad537f45665843f16' + - - '12499ddd90200ea9f' + - 'e6aaf829acef158f6' + - - 'cea842f2d5b0ad72d' + - '02644631a56fa1750' + - - 'c2e48998f58fcf229' + - '5ae5fc96ae87f7041' + - - '583a61a4880467bf7' + - '5f69ad192f42e1e35' + - - 'ad027cf65e5484130' + - '2df81c34dac0b3f5c' + - - 'f5d454a22e5731c5a' + - '4533792bbf9b2478b' \ No newline at end of file diff --git a/1.py b/1.py new file mode 100644 index 0000000..1808bef --- /dev/null +++ b/1.py @@ -0,0 +1,8 @@ +# /data/hdd01/dingzx/dataset/private_test_hard_two_stage/openscene_meta_datas/0a79e71ca7aa8e249.pkl +# 1d70d69255aef8709 +import pickle + +with open('/data/hdd01/dingzx/dataset/private_test_hard_two_stage/openscene_meta_datas/0a79e71ca7aa8e249.pkl', 'rb') as f: + data = pickle.load(f) + +print(data) # 打印文件内容 \ No newline at end of file diff --git a/1/1.py b/1/1.py new file mode 100644 index 0000000..49817e0 --- /dev/null +++ b/1/1.py @@ -0,0 +1,4 @@ +print("<<") +print("<<") +print("<<") +print("<<") \ No newline at end of file diff --git a/navsim/agents/abstract_agent.py b/navsim/agents/abstract_agent.py index 5c2ce9b..10b50c1 100644 --- a/navsim/agents/abstract_agent.py +++ b/navsim/agents/abstract_agent.py @@ -68,18 +68,23 @@ def compute_trajectory(self, agent_input: AgentInput) -> Trajectory: """ self.eval() features: Dict[str, torch.Tensor] = {} + # targets: Dict[str, torch.Tensor] = {} # build features for builder in self.get_feature_builders(): features.update(builder.compute_features(agent_input)) + # build targets + # for builder in self.get_target_builders(): + # targets.update(builder.compute_targets(agent_input)) + # add batch dimension features = {k: v.unsqueeze(0) for k, v in features.items()} - + # targets = {k: v.unsqueeze(0) for k, v in targets.items()} # forward pass with torch.no_grad(): predictions = self.forward(features) poses = predictions["trajectory"].squeeze(0).numpy() - + # print("huatu") # extract trajectory return Trajectory(poses, self._trajectory_sampling) diff --git a/navsim/agents/abstract_agent_diffusiondrive.py b/navsim/agents/abstract_agent_diffusiondrive.py new file mode 100644 index 0000000..b56fef3 --- /dev/null +++ b/navsim/agents/abstract_agent_diffusiondrive.py @@ -0,0 +1,127 @@ +from abc import abstractmethod, ABC +from typing import Dict, Union, List +import torch +import pytorch_lightning as pl +from typing import Tuple +import numpy as np +from navsim.common.dataclasses import AgentInput, Trajectory, SensorConfig, Scene +from navsim.planning.training.abstract_feature_target_builder import AbstractFeatureBuilder, AbstractTargetBuilder + + +class AbstractAgent(torch.nn.Module, ABC): + """Interface for an agent in NAVSIM.""" + + def __init__( + self, + requires_scene: bool = False, + ): + super().__init__() + self.requires_scene = requires_scene + + @abstractmethod + def name(self) -> str: + """ + :return: string describing name of this agent. + """ + pass + + @abstractmethod + def get_sensor_config(self) -> SensorConfig: + """ + :return: Dataclass defining the sensor configuration for lidar and cameras. + """ + pass + + @abstractmethod + def initialize(self) -> None: + """ + Initialize agent + :param initialization: Initialization class. + """ + pass + + def forward(self, features: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: + """ + Forward pass of the agent. + :param features: Dictionary of features. + :return: Dictionary of predictions. + """ + raise NotImplementedError + + def get_feature_builders(self) -> List[AbstractFeatureBuilder]: + """ + :return: List of target builders. + """ + raise NotImplementedError("No feature builders. Agent does not support training.") + + def get_target_builders(self) -> List[AbstractTargetBuilder]: + """ + :return: List of feature builders. + """ + raise NotImplementedError("No target builders. Agent does not support training.") + + def compute_trajectory(self, agent_input: AgentInput) -> Tuple[np.ndarray,np.ndarray]: + """ + Computes the ego vehicle trajectory. + :param current_input: Dataclass with agent inputs. + :return: Trajectory representing the predicted ego's position in future + """ + self.eval() + features: Dict[str, torch.Tensor] = {} + # targets: Dict[str, torch.Tensor] = {} + # build features + for builder in self.get_feature_builders(): + features.update(builder.compute_features(agent_input)) + + # for builder in self.get_target_builders(): + # targets.update(builder.compute_targets(scene)) + + + # "trajectory": trajectory, + # "agent_states": agent_states, + # "agent_labels": agent_labels, + # "bev_semantic_map": bev_semantic_map,` + + # add batch dimension + features = {k: v.unsqueeze(0) for k, v in features.items()} + # targets = {k: v.unsqueeze(0) for k, v in targets.items()} + # print(targets) + # print('<<<<<<<<<<<<<<<') + # poses1 = targets["trajectory"].squeeze(0).numpy() + # forward pass + with torch.no_grad(): + predictions = self.forward(features) + poses = predictions['trajectory'].squeeze(0).numpy()# 20 8 3 20 64 8 3 + anchor_poses = predictions['anchor_trajectories'].squeeze(0).numpy() # 确保转换为numpy + # extract trajectory + # print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") + # print(anchor_trajectories.shape) + return poses,anchor_poses + # return Trajectory(poses) + + def compute_loss( + self, + features: Dict[str, torch.Tensor], + targets: Dict[str, torch.Tensor], + predictions: Dict[str, torch.Tensor], + ) -> torch.Tensor: + """ + Computes the loss used for backpropagation based on the features, targets and model predictions. + """ + raise NotImplementedError("No loss. Agent does not support training.") + + def get_optimizers( + self, + ) -> Union[torch.optim.Optimizer, Dict[str, Union[torch.optim.Optimizer, torch.optim.lr_scheduler.LRScheduler]]]: + """ + Returns the optimizers that are used by thy pytorch-lightning trainer. + Has to be either a single optimizer or a dict of optimizer and lr scheduler. + """ + raise NotImplementedError("No optimizers. Agent does not support training.") + + def get_training_callbacks(self) -> List[pl.Callback]: + """ + Returns a list of pytorch-lightning callbacks that are used during training. + See navsim.planning.training.callbacks for examples. + """ + return [] \ No newline at end of file diff --git a/navsim/agents/diffusiondrive/modules/blocks.py b/navsim/agents/diffusiondrive/modules/blocks.py new file mode 100644 index 0000000..aa88372 --- /dev/null +++ b/navsim/agents/diffusiondrive/modules/blocks.py @@ -0,0 +1,189 @@ +from typing import List, Optional, Tuple +import math +import numpy as np +import torch +import torch.nn as nn +from torch.cuda.amp.autocast_mode import autocast + +def linear_relu_ln(embed_dims, in_loops, out_loops, input_dims=None): + if input_dims is None: + input_dims = embed_dims + layers = [] + for _ in range(out_loops): + for _ in range(in_loops): + layers.append(nn.Linear(input_dims, embed_dims)) + layers.append(nn.ReLU(inplace=True)) + input_dims = embed_dims + layers.append(nn.LayerNorm(embed_dims)) + return layers + +def gen_sineembed_for_position(pos_tensor, hidden_dim=256): + """Mostly copy-paste from https://github.com/IDEA-opensource/DAB-DETR/ + """ + half_hidden_dim = hidden_dim // 2 + scale = 2 * math.pi + dim_t = torch.arange(half_hidden_dim, dtype=torch.float32, device=pos_tensor.device) + dim_t = 10000 ** (2 * (dim_t // 2) / half_hidden_dim) + x_embed = pos_tensor[..., 0] * scale + y_embed = pos_tensor[..., 1] * scale + pos_x = x_embed[..., None] / dim_t + pos_y = y_embed[..., None] / dim_t + pos_x = torch.stack((pos_x[..., 0::2].sin(), pos_x[..., 1::2].cos()), dim=-1).flatten(-2) + pos_y = torch.stack((pos_y[..., 0::2].sin(), pos_y[..., 1::2].cos()), dim=-1).flatten(-2) + pos = torch.cat((pos_y, pos_x), dim=-1) + return pos + +def bias_init_with_prob(prior_prob): + """initialize conv/fc bias value according to giving probablity.""" + bias_init = float(-np.log((1 - prior_prob) / prior_prob)) + return bias_init + + +class GridSampleCrossBEVAttention(nn.Module): + def __init__(self, embed_dims, num_heads, num_levels=1, in_bev_dims=64, num_points=8, config=None): + super(GridSampleCrossBEVAttention, self).__init__() + self.embed_dims = embed_dims + self.num_heads = num_heads + self.num_levels = num_levels + self.num_points = num_points + self.config = config + self.attention_weights = nn.Linear(embed_dims,num_points) + self.output_proj = nn.Linear(embed_dims, embed_dims) + self.dropout = nn.Dropout(0.1) + + + self.value_proj = nn.Sequential( + nn.Conv2d(in_bev_dims, 256, kernel_size=(3, 3), stride=(1, 1), padding=1,bias=True), + nn.ReLU(inplace=True), + ) + + self.init_weight() + + def init_weight(self): + + nn.init.constant_(self.attention_weights.weight, 0) + nn.init.constant_(self.attention_weights.bias, 0) + + nn.init.xavier_uniform_(self.output_proj.weight) + nn.init.constant_(self.output_proj.bias, 0) + + + def forward(self, queries, traj_points, bev_feature, spatial_shape): + """ + Args: + queries: input features with shape of (bs, num_queries, embed_dims) + traj_points: trajectory points with shape of (bs, num_queries, num_points, 2) + bev_feature: bev features with shape of (bs, embed_dims, height, width) + spatial_shapes: (height, width) + + """ + + bs, num_queries, num_points, _ = traj_points.shape + + # Normalize trajectory points to [-1, 1] range for grid_sample + normalized_trajectory = traj_points.clone() + normalized_trajectory[..., 0] = normalized_trajectory[..., 0] / self.config.lidar_max_y + normalized_trajectory[..., 1] = normalized_trajectory[..., 1] / self.config.lidar_max_x + + normalized_trajectory = normalized_trajectory[..., [1, 0]] # Swap x and y + + attention_weights = self.attention_weights(queries) + attention_weights = attention_weights.view(bs, num_queries, num_points).softmax(-1) + + value = self.value_proj(bev_feature) + grid = normalized_trajectory.view(bs, num_queries, num_points, 2) + # Sample features + sampled_features = torch.nn.functional.grid_sample( + value, + grid, + mode='bilinear', + padding_mode='zeros', + align_corners=False + ) # bs, C, num_queries, num_points + + attention_weights = attention_weights.unsqueeze(1) + out = (attention_weights * sampled_features).sum(dim=-1) + out = out.permute(0, 2, 1).contiguous() # bs, num_queries, C + out = self.output_proj(out) + + return self.dropout(out) + queries + + +# navi +class GridSampleCrossBEVAttention_navi(nn.Module): + def __init__(self, embed_dims, num_heads, num_levels=1, in_bev_dims=64, num_points=1, config=None): + super(GridSampleCrossBEVAttention_navi, self).__init__() + self.embed_dims = embed_dims + self.num_heads = num_heads + self.num_levels = num_levels + self.num_points = num_points + self.config = config + self.attention_weights = nn.Linear(embed_dims,num_points) + self.output_proj = nn.Linear(embed_dims, embed_dims) + self.dropout = nn.Dropout(0.1) + + + self.value_proj = nn.Sequential( + nn.Conv2d(in_bev_dims, 256, kernel_size=(3, 3), stride=(1, 1), padding=1,bias=True), + nn.ReLU(inplace=True), + ) + + self.init_weight() + + def init_weight(self): + + nn.init.constant_(self.attention_weights.weight, 0) + nn.init.constant_(self.attention_weights.bias, 0) + + nn.init.xavier_uniform_(self.output_proj.weight) + nn.init.constant_(self.output_proj.bias, 0) + + + def forward(self, queries, gt_points, bev_feature, spatial_shape): + """ + Args: + queries: input features with shape of (bs, num_queries, embed_dims) + traj_points: trajectory points with shape of (bs, num_queries, num_points, 2) + bev_feature: bev features with shape of (bs, embed_dims, height, width) + spatial_shapes: (height, width) + + """ + gt_points=gt_points.to(torch.float32) + gt_points=gt_points.unsqueeze(1) + gt_points=gt_points.unsqueeze(1) + gt_points=gt_points.expand(-1,20,-1,-1) + # 64 20 1 2 + bs, num_queries, num_points, _ = gt_points.shape + + # Normalize trajectory points to [-1, 1] range for grid_sample + normalized_trajectory = gt_points.clone() + normalized_trajectory[..., 0] = normalized_trajectory[..., 0] / self.config.lidar_max_y + normalized_trajectory[..., 1] = normalized_trajectory[..., 1] / self.config.lidar_max_x + + normalized_trajectory = normalized_trajectory[..., [1, 0]] # Swap x and y + + # 64 20 1 + attention_weights = self.attention_weights(queries) + attention_weights = attention_weights.view(bs, num_queries, num_points).softmax(-1) + + value = self.value_proj(bev_feature) + #64 20 1 2 + grid = normalized_trajectory.view(bs, num_queries, num_points, 2) + grid=grid.to(torch.float32) + # Sample features + sampled_features = torch.nn.functional.grid_sample( + value, + grid, + mode='bilinear', + padding_mode='zeros', + align_corners=False + ) # bs, C, num_queries, num_points + + + # 64 1 1 1 + attention_weights = attention_weights.unsqueeze(1) + out = (attention_weights * sampled_features).sum(dim=-1) + out = out.permute(0, 2, 1).contiguous() # bs, num_queries, C + out = self.output_proj(out) + + return self.dropout(out) + queries diff --git a/navsim/agents/diffusiondrive/modules/conditional_unet1d.py b/navsim/agents/diffusiondrive/modules/conditional_unet1d.py new file mode 100644 index 0000000..c117792 --- /dev/null +++ b/navsim/agents/diffusiondrive/modules/conditional_unet1d.py @@ -0,0 +1,281 @@ +from typing import Union +import logging +import torch +import torch.nn as nn +import einops +from einops.layers.torch import Rearrange +import math + +class Conv1dBlock(nn.Module): + ''' + Conv1d --> GroupNorm --> Mish + ''' + + def __init__(self, inp_channels, out_channels, kernel_size, n_groups=8): + super().__init__() + + self.block = nn.Sequential( + nn.Conv1d(inp_channels, out_channels, kernel_size, padding=kernel_size // 2), + # Rearrange('batch channels horizon -> batch channels 1 horizon'), + nn.GroupNorm(n_groups, out_channels), + # Rearrange('batch channels 1 horizon -> batch channels horizon'), + nn.Mish(), + ) + + def forward(self, x): + return self.block(x) + +class Downsample1d(nn.Module): + def __init__(self, dim): + super().__init__() + self.conv = nn.Conv1d(dim, dim, 3, 2, 1) + + def forward(self, x): + return self.conv(x) + +class Upsample1d(nn.Module): + def __init__(self, dim): + super().__init__() + self.conv = nn.ConvTranspose1d(dim, dim, 4, 2, 1) + + def forward(self, x): + return self.conv(x) + +class SinusoidalPosEmb(nn.Module): + def __init__(self, dim): + super().__init__() + self.dim = dim + + def forward(self, x): + device = x.device + half_dim = self.dim // 2 + emb = math.log(10000) / (half_dim - 1) + emb = torch.exp(torch.arange(half_dim, device=device) * -emb) + emb = x[:, None] * emb[None, :] + emb = torch.cat((emb.sin(), emb.cos()), dim=-1) + return emb + +class ConditionalResidualBlock1D(nn.Module): + def __init__(self, + in_channels, + out_channels, + cond_dim, + kernel_size=3, + n_groups=8, + cond_predict_scale=False): + super().__init__() + + self.blocks = nn.ModuleList([ + Conv1dBlock(in_channels, out_channels, kernel_size, n_groups=n_groups), + Conv1dBlock(out_channels, out_channels, kernel_size, n_groups=n_groups), + ]) + + # FiLM modulation https://arxiv.org/abs/1709.07871 + # predicts per-channel scale and bias + cond_channels = out_channels + if cond_predict_scale: + cond_channels = out_channels * 2 + self.cond_predict_scale = cond_predict_scale + self.out_channels = out_channels + self.cond_encoder = nn.Sequential( + nn.Mish(), + nn.Linear(cond_dim, cond_channels), + Rearrange('batch t -> batch t 1'), + ) + + # make sure dimensions compatible + self.residual_conv = nn.Conv1d(in_channels, out_channels, 1) \ + if in_channels != out_channels else nn.Identity() + + def forward(self, x, cond): + ''' + x : [ batch_size x in_channels x horizon ] + cond : [ batch_size x cond_dim] + + returns: + out : [ batch_size x out_channels x horizon ] + ''' + out = self.blocks[0](x) + embed = self.cond_encoder(cond) + if self.cond_predict_scale: + embed = embed.reshape( + embed.shape[0], 2, self.out_channels, 1) + scale = embed[:,0,...] + bias = embed[:,1,...] + out = scale * out + bias + else: + out = out + embed + out = self.blocks[1](out) + out = out + self.residual_conv(x) + return out + + +class ConditionalUnet1D(nn.Module): + def __init__(self, + input_dim, + local_cond_dim=None, + global_cond_dim=None, + diffusion_step_embed_dim=256, + down_dims=[256,512,1024], + kernel_size=3, + n_groups=8, + cond_predict_scale=False + ): + super().__init__() + all_dims = [input_dim] + list(down_dims) + start_dim = down_dims[0] + + dsed = diffusion_step_embed_dim + diffusion_step_encoder = nn.Sequential( + SinusoidalPosEmb(dsed), + nn.Linear(dsed, dsed * 4), + nn.Mish(), + nn.Linear(dsed * 4, dsed), + ) + cond_dim = dsed + if global_cond_dim is not None: + cond_dim += global_cond_dim + + in_out = list(zip(all_dims[:-1], all_dims[1:])) + + local_cond_encoder = None + if local_cond_dim is not None: + _, dim_out = in_out[0] + dim_in = local_cond_dim + local_cond_encoder = nn.ModuleList([ + # down encoder + ConditionalResidualBlock1D( + dim_in, dim_out, cond_dim=cond_dim, + kernel_size=kernel_size, n_groups=n_groups, + cond_predict_scale=cond_predict_scale), + # up encoder + ConditionalResidualBlock1D( + dim_in, dim_out, cond_dim=cond_dim, + kernel_size=kernel_size, n_groups=n_groups, + cond_predict_scale=cond_predict_scale) + ]) + + mid_dim = all_dims[-1] + self.mid_modules = nn.ModuleList([ + ConditionalResidualBlock1D( + mid_dim, mid_dim, cond_dim=cond_dim, + kernel_size=kernel_size, n_groups=n_groups, + cond_predict_scale=cond_predict_scale + ), + ConditionalResidualBlock1D( + mid_dim, mid_dim, cond_dim=cond_dim, + kernel_size=kernel_size, n_groups=n_groups, + cond_predict_scale=cond_predict_scale + ), + ]) + + down_modules = nn.ModuleList([]) + for ind, (dim_in, dim_out) in enumerate(in_out): + is_last = ind >= (len(in_out) - 1) + down_modules.append(nn.ModuleList([ + ConditionalResidualBlock1D( + dim_in, dim_out, cond_dim=cond_dim, + kernel_size=kernel_size, n_groups=n_groups, + cond_predict_scale=cond_predict_scale), + ConditionalResidualBlock1D( + dim_out, dim_out, cond_dim=cond_dim, + kernel_size=kernel_size, n_groups=n_groups, + cond_predict_scale=cond_predict_scale), + Downsample1d(dim_out) if not is_last else nn.Identity() + ])) + + up_modules = nn.ModuleList([]) + for ind, (dim_in, dim_out) in enumerate(reversed(in_out[1:])): + is_last = ind >= (len(in_out) - 1) + up_modules.append(nn.ModuleList([ + ConditionalResidualBlock1D( + dim_out*2, dim_in, cond_dim=cond_dim, + kernel_size=kernel_size, n_groups=n_groups, + cond_predict_scale=cond_predict_scale), + ConditionalResidualBlock1D( + dim_in, dim_in, cond_dim=cond_dim, + kernel_size=kernel_size, n_groups=n_groups, + cond_predict_scale=cond_predict_scale), + Upsample1d(dim_in) if not is_last else nn.Identity() + ])) + + final_conv = nn.Sequential( + Conv1dBlock(start_dim, start_dim, kernel_size=kernel_size), + nn.Conv1d(start_dim, input_dim, 1), + ) + + self.diffusion_step_encoder = diffusion_step_encoder + self.local_cond_encoder = local_cond_encoder + self.up_modules = up_modules + self.down_modules = down_modules + self.final_conv = final_conv + + def forward(self, + sample: torch.Tensor, + timestep: Union[torch.Tensor, float, int], + local_cond=None, global_cond=None, **kwargs): + """ + x: (B,T,input_dim) + timestep: (B,) or int, diffusion step + local_cond: (B,T,local_cond_dim) + global_cond: (B,global_cond_dim) + output: (B,T,input_dim) + """ + sample = einops.rearrange(sample, 'b h t -> b t h') + + # 1. time + timesteps = timestep + if not torch.is_tensor(timesteps): + # TODO: this requires sync between CPU and GPU. So try to pass timesteps as tensors if you can + timesteps = torch.tensor([timesteps], dtype=torch.long, device=sample.device) + elif torch.is_tensor(timesteps) and len(timesteps.shape) == 0: + timesteps = timesteps[None].to(sample.device) + # broadcast to batch dimension in a way that's compatible with ONNX/Core ML + timesteps = timesteps.expand(sample.shape[0]) + + global_feature = self.diffusion_step_encoder(timesteps) + + if global_cond is not None: + global_feature = torch.cat([ + global_feature, global_cond + ], axis=-1) + + # encode local features + h_local = list() + if local_cond is not None: + local_cond = einops.rearrange(local_cond, 'b h t -> b t h') + resnet, resnet2 = self.local_cond_encoder + x = resnet(local_cond, global_feature) + h_local.append(x) + x = resnet2(local_cond, global_feature) + h_local.append(x) + + x = sample + h = [] + for idx, (resnet, resnet2, downsample) in enumerate(self.down_modules): + x = resnet(x, global_feature) + if idx == 0 and len(h_local) > 0: + x = x + h_local[0] + x = resnet2(x, global_feature) + h.append(x) + x = downsample(x) + + for mid_module in self.mid_modules: + x = mid_module(x, global_feature) + + for idx, (resnet, resnet2, upsample) in enumerate(self.up_modules): + x = torch.cat((x, h.pop()), dim=1) + x = resnet(x, global_feature) + # The correct condition should be: + # if idx == (len(self.up_modules)-1) and len(h_local) > 0: + # However this change will break compatibility with published checkpoints. + # Therefore it is left as a comment. + if idx == len(self.up_modules) and len(h_local) > 0: + x = x + h_local[1] + x = resnet2(x, global_feature) + x = upsample(x) + + x = self.final_conv(x) + + x = einops.rearrange(x, 'b t h -> b h t') + return x \ No newline at end of file diff --git a/navsim/agents/diffusiondrive/modules/multimodal_loss.py b/navsim/agents/diffusiondrive/modules/multimodal_loss.py new file mode 100644 index 0000000..f13d0f7 --- /dev/null +++ b/navsim/agents/diffusiondrive/modules/multimodal_loss.py @@ -0,0 +1,166 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import functools +from typing import Callable, Optional +from torch import Tensor +from navsim.agents.diffusiondrive.transfuser_config import TransfuserConfig +# from mmcv.ops import sigmoid_focal_loss as _sigmoid_focal_loss +# from mmdet.models.losses import FocalLoss + +def reduce_loss(loss: Tensor, reduction: str) -> Tensor: + """Reduce loss as specified. + + Args: + loss (Tensor): Elementwise loss tensor. + reduction (str): Options are "none", "mean" and "sum". + + Return: + Tensor: Reduced loss tensor. + """ + reduction_enum = F._Reduction.get_enum(reduction) + # none: 0, elementwise_mean:1, sum: 2 + if reduction_enum == 0: + return loss + elif reduction_enum == 1: + return loss.mean() + elif reduction_enum == 2: + return loss.sum() + +def weight_reduce_loss(loss: Tensor, + weight: Optional[Tensor] = None, + reduction: str = 'mean', + avg_factor: Optional[float] = None) -> Tensor: + """Apply element-wise weight and reduce loss. + + Args: + loss (Tensor): Element-wise loss. + weight (Optional[Tensor], optional): Element-wise weights. + Defaults to None. + reduction (str, optional): Same as built-in losses of PyTorch. + Defaults to 'mean'. + avg_factor (Optional[float], optional): Average factor when + computing the mean of losses. Defaults to None. + + Returns: + Tensor: Processed loss values. + """ + # if weight is specified, apply element-wise weight + if weight is not None: + loss = loss * weight + + # if avg_factor is not specified, just reduce the loss + if avg_factor is None: + loss = reduce_loss(loss, reduction) + else: + # if reduction is mean, then average the loss by avg_factor + if reduction == 'mean': + # Avoid causing ZeroDivisionError when avg_factor is 0.0, + # i.e., all labels of an image belong to ignore index. + eps = torch.finfo(torch.float32).eps + loss = loss.sum() / (avg_factor + eps) + # if reduction is 'none', then do nothing, otherwise raise an error + elif reduction != 'none': + raise ValueError('avg_factor can not be used with reduction="sum"') + return loss + +def py_sigmoid_focal_loss(pred, + target, + weight=None, + gamma=2.0, + alpha=0.25, + reduction='mean', + avg_factor=None): + """PyTorch version of `Focal Loss `_. + + Args: + pred (torch.Tensor): The prediction with shape (N, C), C is the + number of classes + target (torch.Tensor): The learning label of the prediction. + weight (torch.Tensor, optional): Sample-wise loss weight. + gamma (float, optional): The gamma for calculating the modulating + factor. Defaults to 2.0. + alpha (float, optional): A balanced form for Focal Loss. + Defaults to 0.25. + reduction (str, optional): The method used to reduce the loss into + a scalar. Defaults to 'mean'. + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + """ + pred_sigmoid = pred.sigmoid() + target = target.type_as(pred) + # Actually, pt here denotes (1 - pt) in the Focal Loss paper + pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target) + # Thus it's pt.pow(gamma) rather than (1 - pt).pow(gamma) + focal_weight = (alpha * target + (1 - alpha) * + (1 - target)) * pt.pow(gamma) + loss = F.binary_cross_entropy_with_logits( + pred, target, reduction='none') * focal_weight + if weight is not None: + if weight.shape != loss.shape: + if weight.size(0) == loss.size(0): + # For most cases, weight is of shape (num_priors, ), + # which means it does not have the second axis num_class + weight = weight.view(-1, 1) + else: + # Sometimes, weight per anchor per class is also needed. e.g. + # in FSAF. But it may be flattened of shape + # (num_priors x num_class, ), while loss is still of shape + # (num_priors, num_class). + assert weight.numel() == loss.numel() + weight = weight.view(loss.size(0), -1) + assert weight.ndim == loss.ndim + loss = weight_reduce_loss(loss, weight, reduction, avg_factor) + return loss + + +class LossComputer(nn.Module): + def __init__(self,config: TransfuserConfig): + self._config = config + super(LossComputer, self).__init__() + # self.focal_loss = FocalLoss(use_sigmoid=True, gamma=2.0, alpha=0.25, reduction='mean', loss_weight=1.0, activated=False) + self.cls_loss_weight = config.trajectory_cls_weight + self.reg_loss_weight = config.trajectory_reg_weight + def forward(self, poses_reg, poses_cls, targets, plan_anchor): + """ + pred_traj: (bs, 20, 8, 3) + pred_cls: (bs, 20) + plan_anchor: (bs,20, 8, 2) + targets['trajectory']: (bs, 8, 3) + """ + bs, num_mode, ts, d = poses_reg.shape + # print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>') + # print(targets) + # import pdb;pdb.set_trace() + target_traj = targets["trajectory"] + dist = torch.linalg.norm(target_traj.unsqueeze(1)[...,:2] - plan_anchor, dim=-1) + dist = dist.mean(dim=-1) + mode_idx = torch.argmin(dist, dim=-1) + cls_target = mode_idx + mode_idx = mode_idx[...,None,None,None].repeat(1,1,ts,d) + best_reg = torch.gather(poses_reg, 1, mode_idx).squeeze(1) + # import ipdb; ipdb.set_trace() + # Calculate cls loss using focal loss + target_classes_onehot = torch.zeros([bs, num_mode], + dtype=poses_cls.dtype, + layout=poses_cls.layout, + device=poses_cls.device) + target_classes_onehot.scatter_(1, cls_target.unsqueeze(1), 1) + + # Use py_sigmoid_focal_loss function for focal loss calculation + loss_cls = self.cls_loss_weight * py_sigmoid_focal_loss( + poses_cls, + target_classes_onehot, + weight=None, + gamma=2.0, + alpha=0.25, + reduction='mean', + avg_factor=None + ) + + # Calculate regression loss + reg_loss = self.reg_loss_weight * F.l1_loss(best_reg, target_traj) + # import ipdb; ipdb.set_trace() + # Combine classification and regression losses + ret_loss = loss_cls + reg_loss + return ret_loss diff --git a/navsim/agents/diffusiondrive/modules/scheduler.py b/navsim/agents/diffusiondrive/modules/scheduler.py new file mode 100644 index 0000000..9f6482e --- /dev/null +++ b/navsim/agents/diffusiondrive/modules/scheduler.py @@ -0,0 +1,54 @@ +import math + +from torch.optim.lr_scheduler import _LRScheduler + + +class WarmupCosLR(_LRScheduler): + def __init__( + self, optimizer, min_lr, lr, warmup_epochs, epochs, last_epoch=-1, verbose=False + ) -> None: + self.min_lr = min_lr + self.lr = lr + self.epochs = epochs + self.warmup_epochs = warmup_epochs + super(WarmupCosLR, self).__init__(optimizer, last_epoch, verbose) + + def state_dict(self): + """Returns the state of the scheduler as a :class:`dict`. + + It contains an entry for every variable in self.__dict__ which + is not the optimizer. + """ + return { + key: value for key, value in self.__dict__.items() if key != "optimizer" + } + + def load_state_dict(self, state_dict): + """Loads the schedulers state. + + Args: + state_dict (dict): scheduler state. Should be an object returned + from a call to :meth:`state_dict`. + """ + self.__dict__.update(state_dict) + + def get_init_lr(self): + lr = self.lr / self.warmup_epochs + return lr + + def get_lr(self): + if self.last_epoch < self.warmup_epochs: + lr = self.lr * (self.last_epoch + 1) / self.warmup_epochs + else: + lr = self.min_lr + 0.5 * (self.lr - self.min_lr) * ( + 1 + + math.cos( + math.pi + * (self.last_epoch - self.warmup_epochs) + / (self.epochs - self.warmup_epochs) + ) + ) + if "lr_scale" in self.optimizer.param_groups[0]: + return [lr * group["lr_scale"] for group in self.optimizer.param_groups] + + return [lr for _ in self.optimizer.param_groups] diff --git a/navsim/agents/diffusiondrive/transfuser_agent.py b/navsim/agents/diffusiondrive/transfuser_agent.py new file mode 100644 index 0000000..9874202 --- /dev/null +++ b/navsim/agents/diffusiondrive/transfuser_agent.py @@ -0,0 +1,224 @@ +from typing import Any, List, Dict, Optional, Union + +import torch +import torch.nn as nn +from torch.optim import Optimizer +from torch.optim.lr_scheduler import LRScheduler +import pytorch_lightning as pl + +from navsim.agents.abstract_agent_diffusiondrive import AbstractAgent +from navsim.agents.diffusiondrive.transfuser_config import TransfuserConfig + +from navsim.agents.diffusiondrive.transfuser_model_v2 import V2TransfuserModel as TransfuserModel + +from navsim.agents.diffusiondrive.transfuser_callback import TransfuserCallback +from navsim.agents.diffusiondrive.transfuser_loss import transfuser_loss +from navsim.agents.diffusiondrive.transfuser_features import TransfuserFeatureBuilder, TransfuserTargetBuilder +from navsim.common.dataclasses import SensorConfig +from navsim.planning.training.abstract_feature_target_builder import AbstractFeatureBuilder, AbstractTargetBuilder +from navsim.agents.diffusiondrive.modules.scheduler import WarmupCosLR +from omegaconf import DictConfig, OmegaConf, open_dict +import torch.optim as optim +from navsim.common.dataclasses import AgentInput, Trajectory, SensorConfig +def build_from_configs(obj, cfg: DictConfig, **kwargs): + if cfg is None: + return None + cfg = cfg.copy() + if isinstance(cfg, DictConfig): + OmegaConf.set_struct(cfg, False) + type = cfg.pop('type') + return getattr(obj, type)(**cfg, **kwargs) + +class TransfuserAgent(AbstractAgent): + """Agent interface for TransFuser baseline.""" + + def __init__( + self, + config: TransfuserConfig, + lr: float, + checkpoint_path: Optional[str] = None, + ): + """ + Initializes TransFuser agent. + :param config: global config of TransFuser agent + :param lr: learning rate during training + :param checkpoint_path: optional path string to checkpoint, defaults to None + """ + super().__init__() + + self._config = config + self._lr = lr + # print(lr) + # import pdb;pdb.set_trace() + self._checkpoint_path = checkpoint_path + self._transfuser_model = TransfuserModel(config) + self.init_from_pretrained() + + def init_from_pretrained(self): + # import ipdb; ipdb.set_trace() + if self._checkpoint_path: + if torch.cuda.is_available(): + checkpoint = torch.load(self._checkpoint_path) + else: + checkpoint = torch.load(self._checkpoint_path, map_location=torch.device('cpu')) + + state_dict = checkpoint['state_dict'] + + # Remove 'agent.' prefix from keys if present + state_dict = {k.replace('agent.', ''): v for k, v in state_dict.items()} + + # Load state dict and get info about missing and unexpected keys + missing_keys, unexpected_keys = self.load_state_dict(state_dict, strict=False) + + if missing_keys: + print(f"Missing keys when loading pretrained weights: {missing_keys}") + if unexpected_keys: + print(f"Unexpected keys when loading pretrained weights: {unexpected_keys}") + else: + print("No checkpoint path provided. Initializing from scratch.") + def name(self) -> str: + """Inherited, see superclass.""" + return self.__class__.__name__ + + def initialize(self) -> None: + """Inherited, see superclass.""" + if torch.cuda.is_available(): + state_dict: Dict[str, Any] = torch.load(self._checkpoint_path)["state_dict"] + else: + state_dict: Dict[str, Any] = torch.load(self._checkpoint_path, map_location=torch.device("cpu"))[ + "state_dict" + ] + self.load_state_dict({k.replace("agent.", ""): v for k, v in state_dict.items()}) + + + def get_sensor_config(self) -> SensorConfig: + """Inherited, see superclass.""" + return SensorConfig.build_all_sensors(include=[3]) + + def get_target_builders(self) -> List[AbstractTargetBuilder]: + """Inherited, see superclass.""" + return [TransfuserTargetBuilder(config=self._config)] + + def get_feature_builders(self) -> List[AbstractFeatureBuilder]: + """Inherited, see superclass.""" + return [TransfuserFeatureBuilder(config=self._config)] + + def forward(self, features: Dict[str, torch.Tensor], targets: Dict[str, torch.Tensor]=None) -> Dict[str, torch.Tensor]: + """Inherited, see superclass.""" + return self._transfuser_model(features,targets=targets) + + # def compute_trajectory(self, agent_input: AgentInput) -> Tuple[np.ndarray,np.ndarray]: + # """ + # Computes the ego vehicle trajectory. + # :param current_input: Dataclass with agent inputs. + # :return: Trajectory representing the predicted ego's position in future + # """ + # self.eval() + # features: Dict[str, torch.Tensor] = {} + # # targets: Dict[str, torch.Tensor] = {} + # # build features + # for builder in self.get_feature_builders(): + # features.update(builder.compute_features(agent_input)) + + # # for builder in self.get_target_builders(): + # # targets.update(builder.compute_targets(scene)) + + + # # "trajectory": trajectory, + # # "agent_states": agent_states, + # # "agent_labels": agent_labels, + # # "bev_semantic_map": bev_semantic_map,` + + # # add batch dimension + # features = {k: v.unsqueeze(0) for k, v in features.items()} + # # targets = {k: v.unsqueeze(0) for k, v in targets.items()} + # # print(targets) + # # print('<<<<<<<<<<<<<<<') + # # poses1 = targets["trajectory"].squeeze(0).numpy() + # # forward pass + # with torch.no_grad(): + # predictions = self.forward(features) + # poses = predictions['trajectory'].squeeze(0).numpy()# 20 8 3 20 64 8 3 + # anchor_poses = predictions['anchor_trajectories'].squeeze(0).numpy() # 确保转换为numpy + # # extract trajectory + # # print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") + # # print(anchor_trajectories.shape) + # return poses,anchor_poses + # # return Trajectory(poses) + def compute_loss( + self, + features: Dict[str, torch.Tensor], + targets: Dict[str, torch.Tensor], + predictions: Dict[str, torch.Tensor], + ) -> torch.Tensor: + """Inherited, see superclass.""" + return transfuser_loss(targets, predictions, self._config) + + def get_optimizers(self) -> Union[Optimizer, Dict[str, Union[Optimizer, LRScheduler]]]: + """Inherited, see superclass.""" + return self.get_coslr_optimizers() + + def get_step_lr_optimizers(self): + optimizer = torch.optim.Adam(self._transfuser_model.parameters(), lr=self._lr, weight_decay=self._config.weight_decay) + scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=self._config.lr_steps, gamma=0.1) + return {'optimizer': optimizer, 'lr_scheduler': scheduler} + + def get_coslr_optimizers(self): + # import ipdb; ipdb.set_trace() + optimizer_cfg = dict(type=self._config.optimizer_type, + lr=self._lr, + weight_decay=self._config.weight_decay, + paramwise_cfg=self._config.opt_paramwise_cfg + ) + scheduler_cfg = dict(type=self._config.scheduler_type, + milestones=self._config.lr_steps, + gamma=0.1, + ) + + optimizer_cfg = DictConfig(optimizer_cfg) + scheduler_cfg = DictConfig(scheduler_cfg) + + with open_dict(optimizer_cfg): + paramwise_cfg = optimizer_cfg.pop('paramwise_cfg', None) + + if paramwise_cfg: + params = [] + pgs = [[] for _ in paramwise_cfg['name']] + + for k, v in self._transfuser_model.named_parameters(): + in_param_group = True + for i, (pattern, pg_cfg) in enumerate(paramwise_cfg['name'].items()): + if pattern in k: + pgs[i].append(v) + in_param_group = False + if in_param_group: + params.append(v) + else: + params = self._transfuser_model.parameters() + + optimizer = build_from_configs(optim, optimizer_cfg, params=params) + # import ipdb; ipdb.set_trace() + if paramwise_cfg: + for pg, (_, pg_cfg) in zip(pgs, paramwise_cfg['name'].items()): + cfg = {} + if 'lr_mult' in pg_cfg: + cfg['lr'] = optimizer_cfg['lr'] * pg_cfg['lr_mult'] + optimizer.add_param_group({'params': pg, **cfg}) + + # scheduler = build_from_configs(optim.lr_scheduler, scheduler_cfg, optimizer=optimizer) + scheduler = WarmupCosLR( + optimizer=optimizer, + lr=self._lr, + min_lr=1e-6, + epochs=100, + warmup_epochs=3, + ) + + if 'interval' in scheduler_cfg: + scheduler = {'scheduler': scheduler, 'interval': scheduler_cfg['interval']} + + return {'optimizer': optimizer, 'lr_scheduler': scheduler} + + def get_training_callbacks(self) -> List[pl.Callback]: + """Inherited, see superclass.""" + return [TransfuserCallback(self._config),pl.callbacks.ModelCheckpoint(every_n_epochs=5, save_top_k=-1)] \ No newline at end of file diff --git a/navsim/agents/diffusiondrive/transfuser_backbone.py b/navsim/agents/diffusiondrive/transfuser_backbone.py new file mode 100644 index 0000000..6cce859 --- /dev/null +++ b/navsim/agents/diffusiondrive/transfuser_backbone.py @@ -0,0 +1,512 @@ +""" +Implements the TransFuser vision backbone. +""" + +import copy +import math + +import timm +import torch +import torch.nn.functional as F +from torch import nn + +from navsim.agents.diffusiondrive.transfuser_config import TransfuserConfig + + +class TransfuserBackbone(nn.Module): + """Multi-scale Fusion Transformer for image + LiDAR feature fusion.""" + + def __init__(self, config: TransfuserConfig): + + super().__init__() + self.config = config + try: + self.image_encoder = timm.create_model(config.image_architecture, pretrained=True, features_only=True) + except Exception as e: + print(f"Failed to load image encoder with error: {e}") + self.image_encoder = timm.create_model(config.image_architecture, pretrained=True, features_only=True, + pretrained_cfg_overlay=dict(file=config.bkb_path)) + if config.use_ground_plane: + in_channels = 2 * config.lidar_seq_len + else: + in_channels = config.lidar_seq_len + + if config.latent: + self.lidar_latent = nn.Parameter( + torch.randn( + (1, in_channels, config.lidar_resolution_width, config.lidar_resolution_height), + requires_grad=True, + ) + ) + + self.avgpool_img = nn.AdaptiveAvgPool2d((self.config.img_vert_anchors, self.config.img_horz_anchors)) + + self.lidar_encoder = timm.create_model( + config.lidar_architecture, + pretrained=False, + in_chans=in_channels, + features_only=True, + ) + self.global_pool_lidar = nn.AdaptiveAvgPool2d(output_size=1) + self.avgpool_lidar = nn.AdaptiveAvgPool2d((self.config.lidar_vert_anchors, self.config.lidar_horz_anchors)) + lidar_time_frames = [1, 1, 1, 1] + + self.global_pool_img = nn.AdaptiveAvgPool2d(output_size=1) + start_index = 0 + # Some networks have a stem layer + if len(self.image_encoder.return_layers) > 4: + start_index += 1 + + self.transformers = nn.ModuleList( + [ + GPT( + n_embd=self.image_encoder.feature_info.info[start_index + i]["num_chs"], + config=config, + # lidar_video=self.lidar_video, + lidar_time_frames=lidar_time_frames[i], + ) + for i in range(4) + ] + ) + self.lidar_channel_to_img = nn.ModuleList( + [ + nn.Conv2d( + self.lidar_encoder.feature_info.info[start_index + i]["num_chs"], + self.image_encoder.feature_info.info[start_index + i]["num_chs"], + kernel_size=1, + ) + for i in range(4) + ] + ) + self.img_channel_to_lidar = nn.ModuleList( + [ + nn.Conv2d( + self.image_encoder.feature_info.info[start_index + i]["num_chs"], + self.lidar_encoder.feature_info.info[start_index + i]["num_chs"], + kernel_size=1, + ) + for i in range(4) + ] + ) + + self.num_image_features = self.image_encoder.feature_info.info[start_index + 3]["num_chs"] + # Typical encoders down-sample by a factor of 32 + self.perspective_upsample_factor = ( + self.image_encoder.feature_info.info[start_index + 3]["reduction"] + // self.config.perspective_downsample_factor + ) + + if self.config.transformer_decoder_join: + self.num_features = self.lidar_encoder.feature_info.info[start_index + 3]["num_chs"] + else: + if self.config.add_features: + self.lidar_to_img_features_end = nn.Linear( + self.lidar_encoder.feature_info.info[start_index + 3]["num_chs"], + self.image_encoder.feature_info.info[start_index + 3]["num_chs"], + ) + # Number of features the encoder produces. + self.num_features = self.image_encoder.feature_info.info[start_index + 3]["num_chs"] + else: + # Number of features the encoder produces. + self.num_features = ( + self.image_encoder.feature_info.info[start_index + 3]["num_chs"] + + self.lidar_encoder.feature_info.info[start_index + 3]["num_chs"] + ) + + # FPN fusion + channel = self.config.bev_features_channels + self.relu = nn.ReLU(inplace=True) + # top down + if self.config.detect_boxes or self.config.use_bev_semantic: + self.upsample = nn.Upsample( + scale_factor=self.config.bev_upsample_factor, mode="bilinear", align_corners=False + ) + self.upsample2 = nn.Upsample( + size=( + self.config.lidar_resolution_height // self.config.bev_down_sample_factor, + self.config.lidar_resolution_width // self.config.bev_down_sample_factor, + ), + mode="bilinear", + align_corners=False, + ) + + self.up_conv5 = nn.Conv2d(channel, channel, (3, 3), padding=1) + self.up_conv4 = nn.Conv2d(channel, channel, (3, 3), padding=1) + + # lateral + self.c5_conv = nn.Conv2d(self.lidar_encoder.feature_info.info[start_index + 3]["num_chs"], channel, (1, 1)) + + def top_down(self, x): + + p5 = self.relu(self.c5_conv(x)) + p4 = self.relu(self.up_conv5(self.upsample(p5))) + p3 = self.relu(self.up_conv4(self.upsample2(p4))) + + return p3 + + def forward(self, image, lidar): + """ + Image + LiDAR feature fusion using transformers + Args: + image_list (list): list of input images + lidar_list (list): list of input LiDAR BEV + """ + image_features, lidar_features = image, lidar + + if self.config.latent and lidar_features is None: + batch_size = image.shape[0] + lidar_features = self.lidar_latent.repeat(batch_size, 1, 1, 1) + + # Generate an iterator for all the layers in the network that one can loop through. + image_layers = iter(self.image_encoder.items()) + lidar_layers = iter(self.lidar_encoder.items()) + # import ipdb; ipdb.set_trace() + # Stem layer. + # In some architectures the stem is not a return layer, so we need to skip it. + if len(self.image_encoder.return_layers) > 4: + image_features = self.forward_layer_block(image_layers, self.image_encoder.return_layers, image_features) + if len(self.lidar_encoder.return_layers) > 4: + lidar_features = self.forward_layer_block(lidar_layers, self.lidar_encoder.return_layers, lidar_features) + # Loop through the 4 blocks of the network. + for i in range(4): + image_features = self.forward_layer_block(image_layers, self.image_encoder.return_layers, image_features) + lidar_features = self.forward_layer_block(lidar_layers, self.lidar_encoder.return_layers, lidar_features) + + image_features, lidar_features = self.fuse_features(image_features, lidar_features, i) + + if self.config.detect_boxes or self.config.use_bev_semantic: + x4 = lidar_features + + image_feature_grid = None + if self.config.use_semantic or self.config.use_depth: + image_feature_grid = image_features + + if self.config.transformer_decoder_join: + fused_features = lidar_features + else: + image_features = self.global_pool_img(image_features) + image_features = torch.flatten(image_features, 1) + lidar_features = self.global_pool_lidar(lidar_features) + lidar_features = torch.flatten(lidar_features, 1) + + if self.config.add_features: + lidar_features = self.lidar_to_img_features_end(lidar_features) + fused_features = image_features + lidar_features + else: + fused_features = torch.cat((image_features, lidar_features), dim=1) + + if self.config.detect_boxes or self.config.use_bev_semantic: + features = self.top_down(x4) + else: + features = None + + return features, fused_features, image_feature_grid + + def forward_layer_block(self, layers, return_layers, features): + """ + Run one forward pass to a block of layers from a TIMM neural network and returns the result. + Advances the whole network by just one block + :param layers: Iterator starting at the current layer block + :param return_layers: TIMM dictionary describing at which intermediate layers features are returned. + :param features: Input features + :return: Processed features + """ + for name, module in layers: + features = module(features) + if name in return_layers: + break + return features + + def fuse_features(self, image_features, lidar_features, layer_idx): + """ + Perform a TransFuser feature fusion block using a Transformer module. + :param image_features: Features from the image branch + :param lidar_features: Features from the LiDAR branch + :param layer_idx: Transformer layer index. + :return: image_features and lidar_features with added features from the other branch. + """ + image_embd_layer = self.avgpool_img(image_features) + lidar_embd_layer = self.avgpool_lidar(lidar_features) + + lidar_embd_layer = self.lidar_channel_to_img[layer_idx](lidar_embd_layer) + + image_features_layer, lidar_features_layer = self.transformers[layer_idx](image_embd_layer, lidar_embd_layer) + lidar_features_layer = self.img_channel_to_lidar[layer_idx](lidar_features_layer) + + image_features_layer = F.interpolate( + image_features_layer, + size=(image_features.shape[2], image_features.shape[3]), + mode="bilinear", + align_corners=False, + ) + lidar_features_layer = F.interpolate( + lidar_features_layer, + size=(lidar_features.shape[2], lidar_features.shape[3]), + mode="bilinear", + align_corners=False, + ) + + image_features = image_features + image_features_layer + lidar_features = lidar_features + lidar_features_layer + + return image_features, lidar_features + + +class GPT(nn.Module): + """The full GPT language backbone, with a context size of block_size.""" + + # def __init__(self, n_embd, config, lidar_video, lidar_time_frames): + def __init__(self, n_embd, config, lidar_time_frames): + super().__init__() + self.n_embd = n_embd + # We currently only support seq len 1 + self.seq_len = 1 + self.lidar_seq_len = config.lidar_seq_len + self.config = config + self.lidar_time_frames = lidar_time_frames + + # positional embedding parameter (learnable), image + lidar + self.pos_emb = nn.Parameter( + torch.zeros( + 1, + self.seq_len * self.config.img_vert_anchors * self.config.img_horz_anchors + + lidar_time_frames * self.config.lidar_vert_anchors * self.config.lidar_horz_anchors, + self.n_embd, + ) + ) + + self.drop = nn.Dropout(config.embd_pdrop) + + # transformer + self.blocks = nn.Sequential( + *[ + Block(n_embd, config.n_head, config.block_exp, config.attn_pdrop, config.resid_pdrop) + for layer in range(config.n_layer) + ] + ) + + # decoder head + self.ln_f = nn.LayerNorm(n_embd) + + self.apply(self._init_weights) + + def _init_weights(self, module): + if isinstance(module, nn.Linear): + module.weight.data.normal_( + mean=self.config.gpt_linear_layer_init_mean, + std=self.config.gpt_linear_layer_init_std, + ) + if module.bias is not None: + module.bias.data.zero_() + elif isinstance(module, nn.LayerNorm): + module.bias.data.zero_() + module.weight.data.fill_(self.config.gpt_layer_norm_init_weight) + + def forward(self, image_tensor, lidar_tensor): + """ + Args: + image_tensor (tensor): B*4*seq_len, C, H, W + lidar_tensor (tensor): B*seq_len, C, H, W + """ + + bz = lidar_tensor.shape[0] + lidar_h, lidar_w = lidar_tensor.shape[2:4] + + img_h, img_w = image_tensor.shape[2:4] + + assert self.seq_len == 1 + image_tensor = image_tensor.permute(0, 2, 3, 1).contiguous().view(bz, -1, self.n_embd) + lidar_tensor = lidar_tensor.permute(0, 2, 3, 1).contiguous().view(bz, -1, self.n_embd) + + token_embeddings = torch.cat((image_tensor, lidar_tensor), dim=1) + + x = self.drop(self.pos_emb + token_embeddings) + x = self.blocks(x) # (B, an * T, C) + x = self.ln_f(x) # (B, an * T, C) + + image_tensor_out = ( + x[:, : self.seq_len * self.config.img_vert_anchors * self.config.img_horz_anchors, :] + .view(bz * self.seq_len, img_h, img_w, -1) + .permute(0, 3, 1, 2) + .contiguous() + ) + lidar_tensor_out = ( + x[ + :, + self.seq_len * self.config.img_vert_anchors * self.config.img_horz_anchors :, + :, + ] + .view(bz, lidar_h, lidar_w, -1) + .permute(0, 3, 1, 2) + .contiguous() + ) + + return image_tensor_out, lidar_tensor_out + + +class SelfAttention(nn.Module): + """ + A vanilla multi-head masked self-attention layer with a projection at the + end. + """ + + def __init__(self, n_embd, n_head, attn_pdrop, resid_pdrop): + super().__init__() + assert n_embd % n_head == 0 + # key, query, value projections for all heads + self.key = nn.Linear(n_embd, n_embd) + self.query = nn.Linear(n_embd, n_embd) + self.value = nn.Linear(n_embd, n_embd) + # regularization + self.attn_drop = nn.Dropout(attn_pdrop) + self.resid_drop = nn.Dropout(resid_pdrop) + # output projection + self.proj = nn.Linear(n_embd, n_embd) + self.n_head = n_head + + def forward(self, x): + b, t, c = x.size() + + # calculate query, key, values for all heads in batch and move head + # forward to be the batch dim + k = self.key(x).view(b, t, self.n_head, c // self.n_head).transpose(1, 2) # (b, nh, t, hs) + q = self.query(x).view(b, t, self.n_head, c // self.n_head).transpose(1, 2) # (b, nh, t, hs) + v = self.value(x).view(b, t, self.n_head, c // self.n_head).transpose(1, 2) # (b, nh, t, hs) + + # self-attend: (b, nh, t, hs) x (b, nh, hs, t) -> (b, nh, t, t) + att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1))) + att = F.softmax(att, dim=-1) + att = self.attn_drop(att) + y = att @ v # (b, nh, t, t) x (b, nh, t, hs) -> (b, nh, t, hs) + y = y.transpose(1, 2).contiguous().view(b, t, c) # re-assemble all head outputs side by side + + # output projection + y = self.resid_drop(self.proj(y)) + return y + + +class Block(nn.Module): + """an unassuming Transformer block""" + + def __init__(self, n_embd, n_head, block_exp, attn_pdrop, resid_pdrop): + super().__init__() + self.ln1 = nn.LayerNorm(n_embd) + self.ln2 = nn.LayerNorm(n_embd) + self.attn = SelfAttention(n_embd, n_head, attn_pdrop, resid_pdrop) + self.mlp = nn.Sequential( + nn.Linear(n_embd, block_exp * n_embd), + nn.ReLU(True), # changed from GELU + nn.Linear(block_exp * n_embd, n_embd), + nn.Dropout(resid_pdrop), + ) + + def forward(self, x): + x = x + self.attn(self.ln1(x)) + x = x + self.mlp(self.ln2(x)) + + return x + + +class MultiheadAttentionWithAttention(nn.Module): + """ + MultiheadAttention that also return attention weights + """ + + def __init__(self, n_embd, n_head, pdrop): + super().__init__() + assert n_embd % n_head == 0 + # key, query, value projections for all heads + self.key = nn.Linear(n_embd, n_embd) + self.query = nn.Linear(n_embd, n_embd) + self.value = nn.Linear(n_embd, n_embd) + # regularization + self.attn_drop = nn.Dropout(pdrop) + self.resid_drop = nn.Dropout(pdrop) + # output projection + self.proj = nn.Linear(n_embd, n_embd) + self.n_head = n_head + + def forward(self, q_in, k_in, v_in): + b, t, c = q_in.size() + _, t_mem, _ = k_in.size() + + # calculate query, key, values for all heads in batch and move head + # forward to be the batch dim + q = self.query(q_in).view(b, t, self.n_head, c // self.n_head).transpose(1, 2) # (b, nh, t, hs) + k = self.key(k_in).view(b, t_mem, self.n_head, c // self.n_head).transpose(1, 2) # (b, nh, t, hs) + v = self.value(v_in).view(b, t_mem, self.n_head, c // self.n_head).transpose(1, 2) # (b, nh, t, hs) + + # self-attend: (b, nh, t, hs) x (b, nh, hs, t) -> (b, nh, t, t) + att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1))) + att = F.softmax(att, dim=-1) + att = self.attn_drop(att) + y = att @ v # (b, nh, t, t) x (b, nh, t, hs) -> (b, nh, t, hs) + y = y.transpose(1, 2).contiguous().view(b, t, c) # re-assemble all head outputs side by side + + # output projection + y = self.resid_drop(self.proj(y)) + attention = torch.mean(att, dim=1) # Average attention over heads + return y, attention + + +class TransformerDecoderLayerWithAttention(nn.Module): + """A Transformer decoder that returns the attentions.""" + + def __init__( + self, + d_model, + nhead, + dim_feedforward=2048, + dropout=0.1, + activation=F.relu, + layer_norm_eps=1e-5, + ): + super().__init__() + self.self_attn = MultiheadAttentionWithAttention(d_model, nhead, dropout) + self.multihead_attn = MultiheadAttentionWithAttention(d_model, nhead, dropout) + self.linear1 = nn.Linear(d_model, dim_feedforward) + self.dropout = nn.Dropout(dropout) + self.linear2 = nn.Linear(dim_feedforward, d_model) + + self.norm1 = nn.LayerNorm(d_model, eps=layer_norm_eps) + self.norm2 = nn.LayerNorm(d_model, eps=layer_norm_eps) + self.norm3 = nn.LayerNorm(d_model, eps=layer_norm_eps) + self.dropout1 = nn.Dropout(dropout) + self.dropout2 = nn.Dropout(dropout) + self.dropout3 = nn.Dropout(dropout) + + self.activation = activation + + def forward(self, tgt, memory): + x = tgt + tmp, _ = self.self_attn(x, x, x) + x = self.norm1(x + self.dropout1(tmp)) + tmp, attention = self.multihead_attn(x, memory, memory) + x = self.norm2(x + self.dropout2(tmp)) + tmp = self.linear2(self.dropout(self.activation(self.linear1(x)))) + x = self.norm3(x + self.dropout3(tmp)) + + return x, attention + + +class TransformerDecoderWithAttention(nn.Module): + """A Transformer decoder that returns the attentions.""" + + def __init__(self, layers, num_layers, norm=None): + super().__init__() + self.layers = nn.ModuleList([copy.deepcopy(layers) for i in range(num_layers)]) + self.num_layers = num_layers + self.norm = norm + + def forward(self, queries, memory): + output = queries + attentions = [] + for mod in self.layers: + output, attention = mod(output, memory) + attentions.append(attention) + + if self.norm is not None: + output = self.norm(output) + + avg_attention = torch.mean(torch.stack(attentions), dim=0) + return output, avg_attention diff --git a/navsim/agents/diffusiondrive/transfuser_callback.py b/navsim/agents/diffusiondrive/transfuser_callback.py new file mode 100644 index 0000000..4993d23 --- /dev/null +++ b/navsim/agents/diffusiondrive/transfuser_callback.py @@ -0,0 +1,302 @@ +from typing import Any, Dict, Optional, Union +from PIL import ImageColor + +import cv2 +import numpy as np +import numpy.typing as npt + +import torch +import torchvision.utils as vutils +import pytorch_lightning as pl + +from nuplan.common.actor_state.oriented_box import OrientedBox +from nuplan.common.actor_state.state_representation import StateSE2 +from nuplan.common.maps.abstract_map import SemanticMapLayer + +from navsim.agents.diffusiondrive.transfuser_features import BoundingBox2DIndex +from navsim.agents.diffusiondrive.transfuser_config import TransfuserConfig +from navsim.visualization.config import MAP_LAYER_CONFIG, AGENT_CONFIG + + +class TransfuserCallback(pl.Callback): + """Visualization Callback for TransFuser during training.""" + + def __init__( + self, + config: TransfuserConfig, + num_plots: int = 3, + num_rows: int = 2, + num_columns: int = 2, + ) -> None: + """ + Initializes the visualization callback. + :param config: global config dataclass of TransFuser + :param num_plots: number of images tiles, defaults to 3 + :param num_rows: number of rows in image tile, defaults to 2 + :param num_columns: number of columns in image tile, defaults to 2 + """ + + self._config = config + + self._num_plots = num_plots + self._num_rows = num_rows + self._num_columns = num_columns + + def on_validation_epoch_start(self, trainer: pl.Trainer, lightning_module: pl.LightningModule) -> None: + """Inherited, see superclass.""" + pass + + def on_validation_epoch_end(self, trainer: pl.Trainer, lightning_module: pl.LightningModule) -> None: + """Inherited, see superclass.""" + pass + """ + device = lightning_module.device + for idx_plot in range(self._num_plots): + features, targets = next(iter(trainer.val_dataloaders)) + features, targets = dict_to_device(features, device), dict_to_device(targets, device) + with torch.no_grad(): + predictions = lightning_module.agent.forward(features) + + features, targets, predictions = ( + dict_to_device(features, "cpu"), + dict_to_device(targets, "cpu"), + dict_to_device(predictions, "cpu"), + ) + grid = self._visualize_model(features, targets, predictions) + trainer.logger.experiment.add_image(f"val_plot_{idx_plot}", grid, global_step=trainer.current_epoch) + """ + + def on_test_epoch_start(self, trainer: pl.Trainer, lightning_module: pl.LightningModule) -> None: + """Inherited, see superclass.""" + pass + + def on_test_epoch_end(self, trainer: pl.Trainer, lightning_module: pl.LightningModule) -> None: + """Inherited, see superclass.""" + pass + + def on_train_epoch_start(self, trainer: pl.Trainer, lightning_module: pl.LightningModule) -> None: + """Inherited, see superclass.""" + pass + # device = lightning_module.device + # for idx_plot in range(self._num_plots): + # features, targets = next(iter(trainer.train_dataloader)) + # features, targets = dict_to_device(features, device), dict_to_device(targets, device) + # lightning_module.agent.eval() + # with torch.no_grad(): + # predictions = lightning_module.agent.forward(features,targets) + + # features, targets, predictions = ( + # dict_to_device(features, "cpu"), + # dict_to_device(targets, "cpu"), + # dict_to_device(predictions, "cpu"), + # ) + # grid = self._visualize_model(features, targets, predictions) + # trainer.logger.experiment.add_image(f"train_plot_{idx_plot}", grid, global_step=trainer.current_epoch) + + + def on_train_epoch_end( + self, trainer: pl.Trainer, lightning_module: pl.LightningModule, unused: Optional[Any] = None + ) -> None: + """Inherited, see superclass.""" + pass + """ + device = lightning_module.device + for idx_plot in range(self._num_plots): + features, targets = next(iter(trainer.train_dataloader)) + features, targets = dict_to_device(features, device), dict_to_device(targets, device) + with torch.no_grad(): + predictions = lightning_module.agent.forward(features,targets) + + features, targets, predictions = ( + dict_to_device(features, "cpu"), + dict_to_device(targets, "cpu"), + dict_to_device(predictions, "cpu"), + ) + grid = self._visualize_model(features, targets, predictions) + trainer.logger.experiment.add_image(f"train_plot_{idx_plot}", grid, global_step=trainer.current_epoch) + """ + + def _visualize_model( + self, + features: Dict[str, torch.Tensor], + targets: Dict[str, torch.Tensor], + predictions: Dict[str, torch.Tensor], + ) -> torch.Tensor: + """ + Create tile of input-output visualizations for TransFuser. + :param features: dictionary of feature names and tensors + :param targets: dictionary of target names and tensors + :param predictions: dictionary of target names and predicted tensors + :return: image tiles as RGB tensors + """ + camera = features["camera_feature"].permute(0, 2, 3, 1).numpy() + bev = targets["bev_semantic_map"].numpy() + lidar_map = features["lidar_feature"].squeeze(1).numpy() + agent_labels = targets["agent_labels"].numpy() + agent_states = targets["agent_states"].numpy() + trajectory = targets["trajectory"].numpy() + + pred_bev = predictions["bev_semantic_map"].argmax(1).numpy() + pred_agent_labels = predictions["agent_labels"].sigmoid().numpy() + pred_agent_states = predictions["agent_states"].numpy() + pred_trajectory = predictions["trajectory"].numpy() + + plots = [] + for sample_idx in range(self._num_rows * self._num_columns): + plot = np.zeros((256, 768, 3), dtype=np.uint8) + plot[:128, :512] = (camera[sample_idx] * 255).astype(np.uint8)[::2, ::2] + + plot[128:, :256] = semantic_map_to_rgb(bev[sample_idx], self._config) + plot[128:, 256:512] = semantic_map_to_rgb(pred_bev[sample_idx], self._config) + + agent_states_ = agent_states[sample_idx][agent_labels[sample_idx]] + pred_agent_states_ = pred_agent_states[sample_idx][pred_agent_labels[sample_idx] > 0.5] + plot[:, 512:] = lidar_map_to_rgb( + lidar_map[sample_idx], + agent_states_, + pred_agent_states_, + trajectory[sample_idx], + pred_trajectory[sample_idx], + self._config, + ) + + plots.append(torch.tensor(plot).permute(2, 0, 1)) + + return vutils.make_grid(plots, normalize=False, nrow=self._num_rows) + + +def dict_to_device(dict: Dict[str, torch.Tensor], device: Union[torch.device, str]) -> Dict[str, torch.Tensor]: + """ + Helper function to move tensors from dictionary to device. + :param dict: dictionary of names and tensors + :param device: torch device to move tensors to + :return: dictionary with tensors on specified device + """ + for key in dict.keys(): + dict[key] = dict[key].to(device) + return dict + + +def semantic_map_to_rgb(semantic_map: npt.NDArray[np.int64], config: TransfuserConfig) -> npt.NDArray[np.uint8]: + """ + Convert semantic map to RGB image. + :param semantic_map: numpy array of segmentation map (multi-channel) + :param config: global config dataclass of TransFuser + :return: RGB image as numpy array + """ + + height, width = semantic_map.shape[:2] + rgb_map = np.ones((height, width, 3), dtype=np.uint8) * 255 + + for label in range(1, config.num_bev_classes): + + if config.bev_semantic_classes[label][0] == "linestring": + hex_color = MAP_LAYER_CONFIG[SemanticMapLayer.BASELINE_PATHS]["line_color"] + else: + layer = config.bev_semantic_classes[label][-1][0] # take color of first element + hex_color = ( + AGENT_CONFIG[layer]["fill_color"] + if layer in AGENT_CONFIG.keys() + else MAP_LAYER_CONFIG[layer]["fill_color"] + ) + + rgb_map[semantic_map == label] = ImageColor.getcolor(hex_color, "RGB") + return rgb_map[::-1, ::-1] + + +def lidar_map_to_rgb( + lidar_map: npt.NDArray[np.int64], + agent_states: npt.NDArray[np.float32], + pred_agent_states: npt.NDArray[np.float32], + trajectory: npt.NDArray[np.float32], + pred_trajectory: npt.NDArray[np.float32], + config: TransfuserConfig, +) -> npt.NDArray[np.uint8]: + """ + Converts lidar histogram map with predictions and targets to RGB. + :param lidar_map: lidar histogram raster + :param agent_states: target agent bounding box states + :param pred_agent_states: predicted agent bounding box states + :param trajectory: target trajectory of human operator + :param pred_trajectory: predicted trajectory of agent + :param config: global config dataclass of TransFuser + :return: RGB image for training visualization + """ + gt_color, pred_color = (0, 255, 0), (255, 0, 0) + point_size = 4 + + height, width = lidar_map.shape[:2] + + def coords_to_pixel(coords): + """Convert local coordinates to pixel indices.""" + pixel_center = np.array([[height / 2.0, width / 2.0]]) + coords_idcs = (coords / config.bev_pixel_size) + pixel_center + return coords_idcs.astype(np.int32) + + rgb_map = (lidar_map * 255).astype(np.uint8) + rgb_map = 255 - rgb_map[..., None].repeat(3, axis=-1) + + for color, agent_state_array in zip([gt_color, pred_color], [agent_states, pred_agent_states]): + for agent_state in agent_state_array: + agent_box = OrientedBox( + StateSE2(*agent_state[BoundingBox2DIndex.STATE_SE2]), + agent_state[BoundingBox2DIndex.LENGTH], + agent_state[BoundingBox2DIndex.WIDTH], + 1.0, + ) + exterior = np.array(agent_box.geometry.exterior.coords).reshape((-1, 1, 2)) + exterior = coords_to_pixel(exterior) + exterior = np.flip(exterior, axis=-1) + cv2.polylines(rgb_map, [exterior], isClosed=True, color=color, thickness=2) + + for color, traj in zip([gt_color, pred_color], [trajectory, pred_trajectory]): + trajectory_indices = coords_to_pixel(traj[:, :2]) + for x, y in trajectory_indices: + cv2.circle(rgb_map, (y, x), point_size, color, -1) # -1 fills the circle + + return rgb_map[::-1, ::-1] + + +def debug_lidar_map_to_rgb( + lidar_map: npt.NDArray[np.int64], + agent_states: npt.NDArray[np.float32], + pred_agent_states: npt.NDArray[np.float32], + trajectory: npt.NDArray[np.float32], + pred_trajectory: npt.NDArray[np.float32], + config: TransfuserConfig, +) -> npt.NDArray[np.uint8]: + """ + Converts lidar histogram map with predictions and targets to RGB. + :param lidar_map: lidar histogram raster + :param agent_states: target agent bounding box states + :param pred_agent_states: predicted agent bounding box states + :param trajectory: target trajectory of human operator + :param pred_trajectory: predicted trajectory of agent + :param config: global config dataclass of TransFuser + :return: RGB image for training visualization + """ + # import pdb; ipdb.set_trace() + gt_color, pred_color = (0, 255, 0), (255, 0, 0) + point_size = 4 + + height, width = lidar_map.shape[:2] + canvas = np.zeros_like(lidar_map) + lidar_map = canvas + def coords_to_pixel(coords): + """Convert local coordinates to pixel indices.""" + pixel_center = np.array([[height / 2.0, width / 2.0]]) + coords_idcs = (coords / config.bev_pixel_size) + pixel_center + return coords_idcs.astype(np.int32) + + rgb_map = (lidar_map * 255).astype(np.uint8) + # rgb_map = 255 - rgb_map[..., None].repeat(3, axis=-1) + rgb_map = rgb_map[..., None].repeat(3, axis=-1) + + # for color, traj in zip([gt_color, pred_color], [trajectory, pred_trajectory]): + traj = trajectory + color = gt_color + trajectory_indices = coords_to_pixel(traj[:, :2]) + for x, y in trajectory_indices: + cv2.circle(rgb_map, (y, x), point_size, color, -1) # -1 fills the circle + # import ipdb; ipdb.set_trace() + return rgb_map[::-1, ::-1] \ No newline at end of file diff --git a/navsim/agents/diffusiondrive/transfuser_config.py b/navsim/agents/diffusiondrive/transfuser_config.py new file mode 100644 index 0000000..9b3ae27 --- /dev/null +++ b/navsim/agents/diffusiondrive/transfuser_config.py @@ -0,0 +1,149 @@ +from dataclasses import dataclass +from typing import Tuple, List + +import numpy as np +from nuplan.common.maps.abstract_map import SemanticMapLayer +from nuplan.common.actor_state.tracked_objects_types import TrackedObjectType +from nuplan.planning.simulation.trajectory.trajectory_sampling import TrajectorySampling + +@dataclass +class TransfuserConfig: + """Global TransFuser config.""" + + trajectory_sampling: TrajectorySampling = TrajectorySampling(time_horizon=4, interval_length=0.5) + + image_architecture: str = "resnet34" + lidar_architecture: str = "resnet34" + bkb_path: str = "/home/users/bencheng.liao/.cache/huggingface/hub/checkpoints/resnet34.a1_in1k/pytorch_model.bin" + plan_anchor_path: str = "/data/hdd01/dingzx/vpn/kmeans_navsim_traj_20.npy" + + # lidar_latent: + latent: bool = False + latent_rad_thresh: float = 4 * np.pi / 9 + + max_height_lidar: float = 100.0 + pixels_per_meter: float = 4.0 + hist_max_per_pixel: int = 5 + + lidar_min_x: float = -32 + lidar_max_x: float = 32 + lidar_min_y: float = -32 + lidar_max_y: float = 32 + + lidar_split_height: float = 0.2 + use_ground_plane: bool = False + + # new + lidar_seq_len: int = 1 + + camera_width: int = 1024 + camera_height: int = 256 + lidar_resolution_width = 256 + lidar_resolution_height = 256 + + img_vert_anchors: int = 256 // 32 + img_horz_anchors: int = 1024 // 32 + lidar_vert_anchors: int = 256 // 32 + lidar_horz_anchors: int = 256 // 32 + + block_exp = 4 + n_layer = 2 # Number of transformer layers used in the vision backbone + n_head = 4 + n_scale = 4 + embd_pdrop = 0.1 + resid_pdrop = 0.1 + attn_pdrop = 0.1 + # Mean of the normal distribution initialization for linear layers in the GPT + gpt_linear_layer_init_mean = 0.0 + # Std of the normal distribution initialization for linear layers in the GPT + gpt_linear_layer_init_std = 0.02 + # Initial weight of the layer norms in the gpt. + gpt_layer_norm_init_weight = 1.0 + + perspective_downsample_factor = 1 + transformer_decoder_join = True + detect_boxes = True + use_bev_semantic = True + use_semantic = False + use_depth = False + add_features = True + + # Transformer + tf_d_model: int = 256 + tf_d_ffn: int = 1024 + tf_num_layers: int = 3 + tf_num_head: int = 8 + tf_dropout: float = 0.0 + + # detection + num_bounding_boxes: int = 30 + + # loss weights + trajectory_weight: float = 12.0 + trajectory_cls_weight: float = 10.0 + trajectory_reg_weight: float = 8.0 + diff_loss_weight: float = 20.0 + agent_class_weight: float = 10.0 + agent_box_weight: float = 1.0 + bev_semantic_weight: float = 14.0 + use_ema: bool = False + # BEV mapping + bev_semantic_classes = { + 1: ("polygon", [SemanticMapLayer.LANE, SemanticMapLayer.INTERSECTION]), # road + 2: ("polygon", [SemanticMapLayer.WALKWAYS]), # walkways + 3: ("linestring", [SemanticMapLayer.LANE, SemanticMapLayer.LANE_CONNECTOR]), # centerline + 4: ( + "box", + [ + TrackedObjectType.CZONE_SIGN, + TrackedObjectType.BARRIER, + TrackedObjectType.TRAFFIC_CONE, + TrackedObjectType.GENERIC_OBJECT, + ], + ), # static_objects + 5: ("box", [TrackedObjectType.VEHICLE]), # vehicles + 6: ("box", [TrackedObjectType.PEDESTRIAN]), # pedestrians + } + + bev_pixel_width: int = lidar_resolution_width + bev_pixel_height: int = lidar_resolution_height // 2 + bev_pixel_size: float = 0.25 + + num_bev_classes = 7 + bev_features_channels: int = 64 + bev_down_sample_factor: int = 4 + bev_upsample_factor: int = 2 + + + # optmizer + weight_decay: float = 1e-4 + lr_steps = [70] + optimizer_type = "AdamW" + scheduler_type = "MultiStepLR" + cfg_lr_mult = 0.5 + opt_paramwise_cfg = { + "name":{ + "image_encoder":{ + "lr_mult": cfg_lr_mult + } + } + } + # optimizer=dict( + # type="AdamW", + # lr=1e-4, + # weight_decay=1e-6, + # ) + # scheduler=dict( + # type="MultiStepLR", + # milestones=[90], + # gamma=0.1, + # ) + + @property + def bev_semantic_frame(self) -> Tuple[int, int]: + return (self.bev_pixel_height, self.bev_pixel_width) + + @property + def bev_radius(self) -> float: + values = [self.lidar_min_x, self.lidar_max_x, self.lidar_min_y, self.lidar_max_y] + return max([abs(value) for value in values]) diff --git a/navsim/agents/diffusiondrive/transfuser_features.py b/navsim/agents/diffusiondrive/transfuser_features.py new file mode 100644 index 0000000..b258772 --- /dev/null +++ b/navsim/agents/diffusiondrive/transfuser_features.py @@ -0,0 +1,404 @@ +from enum import IntEnum +from typing import Any, Dict, List, Tuple +import cv2 +import numpy as np +import numpy.typing as npt + +import torch +from torchvision import transforms + +from shapely import affinity +from shapely.geometry import Polygon, LineString + +from nuplan.common.maps.abstract_map import AbstractMap, SemanticMapLayer, MapObject +from nuplan.common.actor_state.oriented_box import OrientedBox +from nuplan.common.actor_state.state_representation import StateSE2 +from nuplan.common.actor_state.tracked_objects_types import TrackedObjectType + +from navsim.agents.diffusiondrive.transfuser_config import TransfuserConfig +from navsim.common.dataclasses import AgentInput, Scene, Annotations +from navsim.common.enums import BoundingBoxIndex, LidarIndex +from navsim.planning.scenario_builder.navsim_scenario_utils import tracked_object_types +from navsim.planning.training.abstract_feature_target_builder import AbstractFeatureBuilder, AbstractTargetBuilder + + +class TransfuserFeatureBuilder(AbstractFeatureBuilder): + """Input feature builder for TransFuser.""" + + def __init__(self, config: TransfuserConfig): + """ + Initializes feature builder. + :param config: global config dataclass of TransFuser + """ + self._config = config + + def get_unique_name(self) -> str: + """Inherited, see superclass.""" + return "transfuser_feature" + + def compute_features(self, agent_input: AgentInput) -> Dict[str, torch.Tensor]: + """Inherited, see superclass.""" + features = {} + + features["camera_feature"] = self._get_camera_feature(agent_input) + if not self._config.latent: + features["lidar_feature"] = self._get_lidar_feature(agent_input) + # dzx=agent_input.ego_statuses[-1].driving_command.shape + # print(dzx) + # a=agent_input.ego_statuses[-1].ego_velocity + # b=agent_input.ego_statuses[-1].ego_acceleration + features["status_feature"] = torch.concatenate( + [ + torch.tensor(agent_input.ego_statuses[-1].driving_command, dtype=torch.float32), + torch.tensor(agent_input.ego_statuses[-1].ego_velocity, dtype=torch.float32), + torch.tensor(agent_input.ego_statuses[-1].ego_acceleration, dtype=torch.float32), + ], + ) + import pdb;pdb.set + return features + + def _get_camera_feature(self, agent_input: AgentInput) -> torch.Tensor: + """ + Extract stitched camera from AgentInput + :param agent_input: input dataclass + :return: stitched front view image as torch tensor + """ + + cameras = agent_input.cameras[-1] + + # Crop to ensure 4:1 aspect ratio + l0 = cameras.cam_l0.image[28:-28, 416:-416] + f0 = cameras.cam_f0.image[28:-28] + r0 = cameras.cam_r0.image[28:-28, 416:-416] + + # stitch l0, f0, r0 images + stitched_image = np.concatenate([l0, f0, r0], axis=1) + resized_image = cv2.resize(stitched_image, (1024, 256)) + # resized_image = cv2.resize(stitched_image, (2048, 512)) + tensor_image = transforms.ToTensor()(resized_image) + + + return tensor_image + + def _get_lidar_feature(self, agent_input: AgentInput) -> torch.Tensor: + """ + Compute LiDAR feature as 2D histogram, according to Transfuser + :param agent_input: input dataclass + :return: LiDAR histogram as torch tensors + """ + + # only consider (x,y,z) & swap axes for (N,3) numpy array + lidar_pc = agent_input.lidars[-1].lidar_pc[LidarIndex.POSITION].T + + # NOTE: Code from + # https://github.com/autonomousvision/carla_garage/blob/main/team_code/data.py#L873 + def splat_points(point_cloud): + # 256 x 256 grid + xbins = np.linspace( + self._config.lidar_min_x, + self._config.lidar_max_x, + (self._config.lidar_max_x - self._config.lidar_min_x) * int(self._config.pixels_per_meter) + 1, + ) + ybins = np.linspace( + self._config.lidar_min_y, + self._config.lidar_max_y, + (self._config.lidar_max_y - self._config.lidar_min_y) * int(self._config.pixels_per_meter) + 1, + ) + hist = np.histogramdd(point_cloud[:, :2], bins=(xbins, ybins))[0] + hist[hist > self._config.hist_max_per_pixel] = self._config.hist_max_per_pixel + overhead_splat = hist / self._config.hist_max_per_pixel + return overhead_splat + + # Remove points above the vehicle + lidar_pc = lidar_pc[lidar_pc[..., 2] < self._config.max_height_lidar] + below = lidar_pc[lidar_pc[..., 2] <= self._config.lidar_split_height] + above = lidar_pc[lidar_pc[..., 2] > self._config.lidar_split_height] + above_features = splat_points(above) + if self._config.use_ground_plane: + below_features = splat_points(below) + features = np.stack([below_features, above_features], axis=-1) + else: + features = np.stack([above_features], axis=-1) + features = np.transpose(features, (2, 0, 1)).astype(np.float32) + + return torch.tensor(features) + + +class TransfuserTargetBuilder(AbstractTargetBuilder): + """Output target builder for TransFuser.""" + + def __init__(self, config: TransfuserConfig): + """ + Initializes target builder. + :param config: global config dataclass of TransFuser + """ + self._config = config + + def get_unique_name(self) -> str: + """Inherited, see superclass.""" + return "transfuser_target" + + def compute_targets(self, scene: Scene) -> Dict[str, torch.Tensor]: + """Inherited, see superclass.""" + # import pdb;pdb.set_trace() + trajectory = torch.tensor( + scene.get_future_trajectory(num_trajectory_frames=self._config.trajectory_sampling.num_poses).poses + ) + frame_idx = scene.scene_metadata.num_history_frames - 1 + annotations = scene.frames[frame_idx].annotations + ego_pose = StateSE2(*scene.frames[frame_idx].ego_status.ego_pose) + + agent_states, agent_labels = self._compute_agent_targets(annotations) + bev_semantic_map = self._compute_bev_semantic_map(annotations, scene.map_api, ego_pose) + + return { + "trajectory": trajectory, + "agent_states": agent_states, + "agent_labels": agent_labels, + "bev_semantic_map": bev_semantic_map, + } + + def _compute_agent_targets(self, annotations: Annotations) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Extracts 2D agent bounding boxes in ego coordinates + :param annotations: annotation dataclass + :return: tuple of bounding box values and labels (binary) + """ + + max_agents = self._config.num_bounding_boxes + agent_states_list: List[npt.NDArray[np.float32]] = [] + + def _xy_in_lidar(x: float, y: float, config: TransfuserConfig) -> bool: + return (config.lidar_min_x <= x <= config.lidar_max_x) and (config.lidar_min_y <= y <= config.lidar_max_y) + + for box, name in zip(annotations.boxes, annotations.names): + box_x, box_y, box_heading, box_length, box_width = ( + box[BoundingBoxIndex.X], + box[BoundingBoxIndex.Y], + box[BoundingBoxIndex.HEADING], + box[BoundingBoxIndex.LENGTH], + box[BoundingBoxIndex.WIDTH], + ) + + if name == "vehicle" and _xy_in_lidar(box_x, box_y, self._config): + agent_states_list.append(np.array([box_x, box_y, box_heading, box_length, box_width], dtype=np.float32)) + + agents_states_arr = np.array(agent_states_list) + + # filter num_instances nearest + agent_states = np.zeros((max_agents, BoundingBox2DIndex.size()), dtype=np.float32) + agent_labels = np.zeros(max_agents, dtype=bool) + + if len(agents_states_arr) > 0: + distances = np.linalg.norm(agents_states_arr[..., BoundingBox2DIndex.POINT], axis=-1) + argsort = np.argsort(distances)[:max_agents] + + # filter detections + agents_states_arr = agents_states_arr[argsort] + agent_states[: len(agents_states_arr)] = agents_states_arr + agent_labels[: len(agents_states_arr)] = True + + return torch.tensor(agent_states), torch.tensor(agent_labels) + + def _compute_bev_semantic_map( + self, annotations: Annotations, map_api: AbstractMap, ego_pose: StateSE2 + ) -> torch.Tensor: + """ + Creates sematic map in BEV + :param annotations: annotation dataclass + :param map_api: map interface of nuPlan + :param ego_pose: ego pose in global frame + :return: 2D torch tensor of semantic labels + """ + + bev_semantic_map = np.zeros(self._config.bev_semantic_frame, dtype=np.int64) + for label, (entity_type, layers) in self._config.bev_semantic_classes.items(): + if entity_type == "polygon": + entity_mask = self._compute_map_polygon_mask(map_api, ego_pose, layers) + elif entity_type == "linestring": + entity_mask = self._compute_map_linestring_mask(map_api, ego_pose, layers) + else: + entity_mask = self._compute_box_mask(annotations, layers) + bev_semantic_map[entity_mask] = label + + return torch.Tensor(bev_semantic_map) + + def _compute_map_polygon_mask( + self, map_api: AbstractMap, ego_pose: StateSE2, layers: List[SemanticMapLayer] + ) -> npt.NDArray[np.bool_]: + """ + Compute binary mask given a map layer class + :param map_api: map interface of nuPlan + :param ego_pose: ego pose in global frame + :param layers: map layers + :return: binary mask as numpy array + """ + + map_object_dict = map_api.get_proximal_map_objects( + point=ego_pose.point, radius=self._config.bev_radius, layers=layers + ) + map_polygon_mask = np.zeros(self._config.bev_semantic_frame[::-1], dtype=np.uint8) + for layer in layers: + for map_object in map_object_dict[layer]: + polygon: Polygon = self._geometry_local_coords(map_object.polygon, ego_pose) + exterior = np.array(polygon.exterior.coords).reshape((-1, 1, 2)) + exterior = self._coords_to_pixel(exterior) + cv2.fillPoly(map_polygon_mask, [exterior], color=255) + # OpenCV has origin on top-left corner + map_polygon_mask = np.rot90(map_polygon_mask)[::-1] + return map_polygon_mask > 0 + + def _compute_map_linestring_mask( + self, map_api: AbstractMap, ego_pose: StateSE2, layers: List[SemanticMapLayer] + ) -> npt.NDArray[np.bool_]: + """ + Compute binary of linestring given a map layer class + :param map_api: map interface of nuPlan + :param ego_pose: ego pose in global frame + :param layers: map layers + :return: binary mask as numpy array + """ + map_object_dict = map_api.get_proximal_map_objects( + point=ego_pose.point, radius=self._config.bev_radius, layers=layers + ) + map_linestring_mask = np.zeros(self._config.bev_semantic_frame[::-1], dtype=np.uint8) + for layer in layers: + for map_object in map_object_dict[layer]: + linestring: LineString = self._geometry_local_coords(map_object.baseline_path.linestring, ego_pose) + points = np.array(linestring.coords).reshape((-1, 1, 2)) + points = self._coords_to_pixel(points) + cv2.polylines(map_linestring_mask, [points], isClosed=False, color=255, thickness=2) + # OpenCV has origin on top-left corner + map_linestring_mask = np.rot90(map_linestring_mask)[::-1] + return map_linestring_mask > 0 + + def _compute_box_mask(self, annotations: Annotations, layers: TrackedObjectType) -> npt.NDArray[np.bool_]: + """ + Compute binary of bounding boxes in BEV space + :param annotations: annotation dataclass + :param layers: bounding box labels to include + :return: binary mask as numpy array + """ + box_polygon_mask = np.zeros(self._config.bev_semantic_frame[::-1], dtype=np.uint8) + for name_value, box_value in zip(annotations.names, annotations.boxes): + agent_type = tracked_object_types[name_value] + if agent_type in layers: + # box_value = (x, y, z, length, width, height, yaw) TODO: add intenum + x, y, heading = box_value[0], box_value[1], box_value[-1] + box_length, box_width, box_height = box_value[3], box_value[4], box_value[5] + agent_box = OrientedBox(StateSE2(x, y, heading), box_length, box_width, box_height) + exterior = np.array(agent_box.geometry.exterior.coords).reshape((-1, 1, 2)) + exterior = self._coords_to_pixel(exterior) + cv2.fillPoly(box_polygon_mask, [exterior], color=255) + # OpenCV has origin on top-left corner + box_polygon_mask = np.rot90(box_polygon_mask)[::-1] + return box_polygon_mask > 0 + + @staticmethod + def _query_map_objects( + self, map_api: AbstractMap, ego_pose: StateSE2, layers: List[SemanticMapLayer] + ) -> List[MapObject]: + """ + Queries map objects + :param map_api: map interface of nuPlan + :param ego_pose: ego pose in global frame + :param layers: map layers + :return: list of map objects + """ + + # query map api with interesting layers + map_object_dict = map_api.get_proximal_map_objects(point=ego_pose.point, radius=self, layers=layers) + map_objects: List[MapObject] = [] + for layer in layers: + map_objects += map_object_dict[layer] + return map_objects + + @staticmethod + def _geometry_local_coords(geometry: Any, origin: StateSE2) -> Any: + """ + Transform shapely geometry in local coordinates of origin. + :param geometry: shapely geometry + :param origin: pose dataclass + :return: shapely geometry + """ + + a = np.cos(origin.heading) + b = np.sin(origin.heading) + d = -np.sin(origin.heading) + e = np.cos(origin.heading) + xoff = -origin.x + yoff = -origin.y + + translated_geometry = affinity.affine_transform(geometry, [1, 0, 0, 1, xoff, yoff]) + rotated_geometry = affinity.affine_transform(translated_geometry, [a, b, d, e, 0, 0]) + + return rotated_geometry + + def _coords_to_pixel(self, coords): + """ + Transform local coordinates in pixel indices of BEV map + :param coords: _description_ + :return: _description_ + """ + + # NOTE: remove half in backward direction + pixel_center = np.array([[0, self._config.bev_pixel_width / 2.0]]) + coords_idcs = (coords / self._config.bev_pixel_size) + pixel_center + + return coords_idcs.astype(np.int32) + + +class BoundingBox2DIndex(IntEnum): + """Intenum for bounding boxes in TransFuser.""" + + _X = 0 + _Y = 1 + _HEADING = 2 + _LENGTH = 3 + _WIDTH = 4 + + @classmethod + def size(cls): + valid_attributes = [ + attribute + for attribute in dir(cls) + if attribute.startswith("_") and not attribute.startswith("__") and not callable(getattr(cls, attribute)) + ] + return len(valid_attributes) + + @classmethod + @property + def X(cls): + return cls._X + + @classmethod + @property + def Y(cls): + return cls._Y + + @classmethod + @property + def HEADING(cls): + return cls._HEADING + + @classmethod + @property + def LENGTH(cls): + return cls._LENGTH + + @classmethod + @property + def WIDTH(cls): + return cls._WIDTH + + @classmethod + @property + def POINT(cls): + # assumes X, Y have subsequent indices + return slice(cls._X, cls._Y + 1) + + @classmethod + @property + def STATE_SE2(cls): + # assumes X, Y, HEADING have subsequent indices + return slice(cls._X, cls._HEADING + 1) diff --git a/navsim/agents/diffusiondrive/transfuser_loss.py b/navsim/agents/diffusiondrive/transfuser_loss.py new file mode 100644 index 0000000..e0f74d3 --- /dev/null +++ b/navsim/agents/diffusiondrive/transfuser_loss.py @@ -0,0 +1,172 @@ +from typing import Dict +from scipy.optimize import linear_sum_assignment + +import torch +import torch.nn.functional as F + +from navsim.agents.diffusiondrive.transfuser_config import TransfuserConfig +from navsim.agents.diffusiondrive.transfuser_features import BoundingBox2DIndex + + +def transfuser_loss( + targets: Dict[str, torch.Tensor], predictions: Dict[str, torch.Tensor], config: TransfuserConfig +): + """ + Helper function calculating complete loss of Transfuser + :param targets: dictionary of name tensor pairings + :param predictions: dictionary of name tensor pairings + :param config: global Transfuser config + :return: combined loss value + """ + # 修改target轨迹 + # import ipdb; ipdb.set_trace()D + if "trajectory_loss" in predictions: + trajectory_loss = predictions["trajectory_loss"] + else: + trajectory_loss = F.l1_loss(predictions["trajectory"], targets["trajectory"]) + # # 微调不需要 + agent_class_loss, agent_box_loss = _agent_loss(targets, predictions, config) + bev_semantic_loss = F.cross_entropy( + predictions["bev_semantic_map"], targets["bev_semantic_map"].long() + ) + if 'diffusion_loss' in predictions: + diffusion_loss = predictions['diffusion_loss'] + else: + diffusion_loss = 0 + loss = ( + config.trajectory_weight * trajectory_loss + + config.diff_loss_weight * diffusion_loss + + config.agent_class_weight * agent_class_loss + + config.agent_box_weight * agent_box_loss + + config.bev_semantic_weight * bev_semantic_loss + ) + loss_dict = { + 'loss': loss, + 'trajectory_loss': config.trajectory_weight*trajectory_loss, + 'diffusion_loss': config.diff_loss_weight*diffusion_loss, + 'agent_class_loss': config.agent_class_weight*agent_class_loss, + 'agent_box_loss': config.agent_box_weight*agent_box_loss, + 'bev_semantic_loss': config.bev_semantic_weight*bev_semantic_loss + } + if "trajectory_loss_dict" in predictions: + trajectory_loss_dict = predictions["trajectory_loss_dict"] + loss_dict.update(trajectory_loss_dict) + # import ipdb; ipdb.set_trace() + return loss_dict + + +def _agent_loss( + targets: Dict[str, torch.Tensor], predictions: Dict[str, torch.Tensor], config: TransfuserConfig +): + """ + Hungarian matching loss for agent detection + :param targets: dictionary of name tensor pairings + :param predictions: dictionary of name tensor pairings + :param config: global Transfuser config + :return: detection loss + """ + + gt_states, gt_valid = targets["agent_states"], targets["agent_labels"] + pred_states, pred_logits = predictions["agent_states"], predictions["agent_labels"] + + if config.latent: + rad_to_ego = torch.arctan2( + gt_states[..., BoundingBox2DIndex.Y], + gt_states[..., BoundingBox2DIndex.X], + ) + + in_latent_rad_thresh = torch.logical_and( + -config.latent_rad_thresh <= rad_to_ego, + rad_to_ego <= config.latent_rad_thresh, + ) + gt_valid = torch.logical_and(in_latent_rad_thresh, gt_valid) + + # save constants + batch_dim, num_instances = pred_states.shape[:2] + num_gt_instances = gt_valid.sum() + num_gt_instances = num_gt_instances if num_gt_instances > 0 else num_gt_instances + 1 + + ce_cost = _get_ce_cost(gt_valid, pred_logits) + l1_cost = _get_l1_cost(gt_states, pred_states, gt_valid) + + cost = config.agent_class_weight * ce_cost + config.agent_box_weight * l1_cost + cost = cost.cpu() + + indices = [linear_sum_assignment(c) for i, c in enumerate(cost)] + matching = [ + (torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) + for i, j in indices + ] + idx = _get_src_permutation_idx(matching) + + pred_states_idx = pred_states[idx] + gt_states_idx = torch.cat([t[i] for t, (_, i) in zip(gt_states, indices)], dim=0) + + pred_valid_idx = pred_logits[idx] + gt_valid_idx = torch.cat([t[i] for t, (_, i) in zip(gt_valid, indices)], dim=0).float() + + l1_loss = F.l1_loss(pred_states_idx, gt_states_idx, reduction="none") + l1_loss = l1_loss.sum(-1) * gt_valid_idx + l1_loss = l1_loss.view(batch_dim, -1).sum() / num_gt_instances + + ce_loss = F.binary_cross_entropy_with_logits(pred_valid_idx, gt_valid_idx, reduction="none") + ce_loss = ce_loss.view(batch_dim, -1).mean() + + return ce_loss, l1_loss + + +@torch.no_grad() +def _get_ce_cost(gt_valid: torch.Tensor, pred_logits: torch.Tensor) -> torch.Tensor: + """ + Function to calculate cross-entropy cost for cost matrix. + :param gt_valid: tensor of binary ground-truth labels + :param pred_logits: tensor of predicted logits of neural net + :return: bce cost matrix as tensor + """ + + # NOTE: numerically stable BCE with logits + # https://github.com/pytorch/pytorch/blob/c64e006fc399d528bb812ae589789d0365f3daf4/aten/src/ATen/native/Loss.cpp#L214 + gt_valid_expanded = gt_valid[:, :, None].detach().float() # (b, n, 1) + pred_logits_expanded = pred_logits[:, None, :].detach() # (b, 1, n) + + max_val = torch.relu(-pred_logits_expanded) + helper_term = max_val + torch.log( + torch.exp(-max_val) + torch.exp(-pred_logits_expanded - max_val) + ) + ce_cost = (1 - gt_valid_expanded) * pred_logits_expanded + helper_term # (b, n, n) + ce_cost = ce_cost.permute(0, 2, 1) + + return ce_cost + + +@torch.no_grad() +def _get_l1_cost( + gt_states: torch.Tensor, pred_states: torch.Tensor, gt_valid: torch.Tensor +) -> torch.Tensor: + """ + Function to calculate L1 cost for cost matrix. + :param gt_states: tensor of ground-truth bounding boxes + :param pred_states: tensor of predicted bounding boxes + :param gt_valid: mask of binary ground-truth labels + :return: l1 cost matrix as tensor + """ + + gt_states_expanded = gt_states[:, :, None, :2].detach() # (b, n, 1, 2) + pred_states_expanded = pred_states[:, None, :, :2].detach() # (b, 1, n, 2) + l1_cost = gt_valid[..., None].float() * (gt_states_expanded - pred_states_expanded).abs().sum( + dim=-1 + ) + l1_cost = l1_cost.permute(0, 2, 1) + return l1_cost + + +def _get_src_permutation_idx(indices): + """ + Helper function to align indices after matching + :param indices: matched indices + :return: permuted indices + """ + # permute predictions following indices + batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)]) + src_idx = torch.cat([src for (src, _) in indices]) + return batch_idx, src_idx diff --git a/navsim/agents/diffusiondrive/transfuser_model_v2.py b/navsim/agents/diffusiondrive/transfuser_model_v2.py new file mode 100644 index 0000000..b8bcbf4 --- /dev/null +++ b/navsim/agents/diffusiondrive/transfuser_model_v2.py @@ -0,0 +1,603 @@ +from typing import Dict +import numpy as np +import torch +import torch.nn as nn +import copy +from navsim.agents.diffusiondrive.transfuser_config import TransfuserConfig +from navsim.agents.diffusiondrive.transfuser_backbone import TransfuserBackbone +from navsim.agents.diffusiondrive.transfuser_features import BoundingBox2DIndex +from navsim.common.enums import StateSE2Index +from diffusers.schedulers import DDIMScheduler +from navsim.agents.diffusiondrive.modules.conditional_unet1d import ConditionalUnet1D,SinusoidalPosEmb +import torch.nn.functional as F +from navsim.agents.diffusiondrive.modules.blocks import linear_relu_ln,bias_init_with_prob, gen_sineembed_for_position, GridSampleCrossBEVAttention,GridSampleCrossBEVAttention_navi +from navsim.agents.diffusiondrive.modules.multimodal_loss import LossComputer +from torch.nn import TransformerDecoder,TransformerDecoderLayer +from typing import Any, List, Dict, Optional, Union + +class V2TransfuserModel(nn.Module): + """Torch module for Transfuser.""" + + def __init__(self, config: TransfuserConfig): + """ + Initializes TransFuser torch module. + :param config: global config dataclass of TransFuser. + """ + + super().__init__() + + self._query_splits = [ + 1, + config.num_bounding_boxes, + ] + + self._config = config + self._backbone = TransfuserBackbone(config) + + self._keyval_embedding = nn.Embedding(8**2 + 1, config.tf_d_model) # 8x8 feature grid + trajectory + self._query_embedding = nn.Embedding(sum(self._query_splits), config.tf_d_model) + + # usually, the BEV features are variable in size. + self._bev_downscale = nn.Conv2d(512, config.tf_d_model, kernel_size=1) + # self._status_encoding = nn.Linear(4 + 2 + 2, config.tf_d_model) + # command 一维v 一维a + self._status_encoding = nn.Linear(4 + 1 + 1, config.tf_d_model) + + # 只用command + # self._status_encoding = nn.Linear(4, config.tf_d_model) + self._bev_semantic_head = nn.Sequential( + nn.Conv2d( + config.bev_features_channels, + config.bev_features_channels, + kernel_size=(3, 3), + stride=1, + padding=(1, 1), + bias=True, + ), + nn.ReLU(inplace=True), + nn.Conv2d( + config.bev_features_channels, + config.num_bev_classes, + kernel_size=(1, 1), + stride=1, + padding=0, + bias=True, + ), + nn.Upsample( + size=(config.lidar_resolution_height // 2, config.lidar_resolution_width), + mode="bilinear", + align_corners=False, + ), + ) + + tf_decoder_layer = nn.TransformerDecoderLayer( + d_model=config.tf_d_model, + nhead=config.tf_num_head, + dim_feedforward=config.tf_d_ffn, + dropout=config.tf_dropout, + batch_first=True, + ) + + self._tf_decoder = nn.TransformerDecoder(tf_decoder_layer, config.tf_num_layers) + self._agent_head = AgentHead( + num_agents=config.num_bounding_boxes, + d_ffn=config.tf_d_ffn, + d_model=config.tf_d_model, + ) + + self._trajectory_head = TrajectoryHead( + num_poses=config.trajectory_sampling.num_poses, + d_ffn=config.tf_d_ffn, + d_model=config.tf_d_model, + plan_anchor_path=config.plan_anchor_path, + config=config, + ) + self.bev_proj = nn.Sequential( + *linear_relu_ln(256, 1, 1,320), + ) + + def forward(self, features: Dict[str, torch.Tensor], targets: Dict[str, torch.Tensor]=None) -> Dict[str, torch.Tensor]: + """Torch module forward pass.""" + + camera_feature: torch.Tensor = features["camera_feature"] + + if self._config.latent: + lidar_feature = None + else: + lidar_feature: torch.Tensor = features["lidar_feature"] + # lidar_feature: torch.Tensor = features["lidar_feature"] + status_feature: torch.Tensor = features["status_feature"] + + # 只用command + # status_feature=status_feature[:,:4] + + batch_size = status_feature.shape[0] + + bev_feature_upscale, bev_feature, _ = self._backbone(camera_feature, lidar_feature) + cross_bev_feature = bev_feature_upscale + bev_spatial_shape = bev_feature_upscale.shape[2:] + concat_cross_bev_shape = bev_feature.shape[2:] + bev_feature = self._bev_downscale(bev_feature).flatten(-2, -1) + bev_feature = bev_feature.permute(0, 2, 1) + status_encoding = self._status_encoding(status_feature) + + keyval = torch.concatenate([bev_feature, status_encoding[:, None]], dim=1) + keyval += self._keyval_embedding.weight[None, ...] + + concat_cross_bev = keyval[:,:-1].permute(0,2,1).contiguous().view(batch_size, -1, concat_cross_bev_shape[0], concat_cross_bev_shape[1]) + # upsample to the same shape as bev_feature_upscale + + concat_cross_bev = F.interpolate(concat_cross_bev, size=bev_spatial_shape, mode='bilinear', align_corners=False) + # concat concat_cross_bev and cross_bev_feature + cross_bev_feature = torch.cat([concat_cross_bev, cross_bev_feature], dim=1) + + cross_bev_feature = self.bev_proj(cross_bev_feature.flatten(-2,-1).permute(0,2,1)) + cross_bev_feature = cross_bev_feature.permute(0,2,1).contiguous().view(batch_size, -1, bev_spatial_shape[0], bev_spatial_shape[1]) + query = self._query_embedding.weight[None, ...].repeat(batch_size, 1, 1) + query_out = self._tf_decoder(query, keyval) + + bev_semantic_map = self._bev_semantic_head(bev_feature_upscale) + trajectory_query, agents_query = query_out.split(self._query_splits, dim=1) + + output: Dict[str, torch.Tensor] = {"bev_semantic_map": bev_semantic_map} + + trajectory = self._trajectory_head(trajectory_query,agents_query, cross_bev_feature,bev_spatial_shape,status_encoding[:, None],targets=targets,global_img=None) + output.update(trajectory) + + agents = self._agent_head(agents_query) + output.update(agents) + return output + # return output + +class AgentHead(nn.Module): + """Bounding box prediction head.""" + + def __init__( + self, + num_agents: int, + d_ffn: int, + d_model: int, + ): + """ + Initializes prediction head. + :param num_agents: maximum number of agents to predict + :param d_ffn: dimensionality of feed-forward network + :param d_model: input dimensionality + """ + super(AgentHead, self).__init__() + + self._num_objects = num_agents + self._d_model = d_model + self._d_ffn = d_ffn + + self._mlp_states = nn.Sequential( + nn.Linear(self._d_model, self._d_ffn), + nn.ReLU(), + nn.Linear(self._d_ffn, BoundingBox2DIndex.size()), + ) + + self._mlp_label = nn.Sequential( + nn.Linear(self._d_model, 1), + ) + + def forward(self, agent_queries) -> Dict[str, torch.Tensor]: + """Torch module forward pass.""" + + agent_states = self._mlp_states(agent_queries) + agent_states[..., BoundingBox2DIndex.POINT] = agent_states[..., BoundingBox2DIndex.POINT].tanh() * 32 + agent_states[..., BoundingBox2DIndex.HEADING] = agent_states[..., BoundingBox2DIndex.HEADING].tanh() * np.pi + + agent_labels = self._mlp_label(agent_queries).squeeze(dim=-1) + + return {"agent_states": agent_states, "agent_labels": agent_labels} + +class DiffMotionPlanningRefinementModule(nn.Module): + def __init__( + self, + embed_dims=256, + ego_fut_ts=8, + ego_fut_mode=20, + if_zeroinit_reg=True, + ): + super(DiffMotionPlanningRefinementModule, self).__init__() + self.embed_dims = embed_dims + self.ego_fut_ts = ego_fut_ts + self.ego_fut_mode = ego_fut_mode + self.plan_cls_branch = nn.Sequential( + *linear_relu_ln(embed_dims, 1, 2), + nn.Linear(embed_dims, 1), + ) + self.plan_reg_branch = nn.Sequential( + nn.Linear(embed_dims, embed_dims), + nn.ReLU(), + nn.Linear(embed_dims, embed_dims), + nn.ReLU(), + nn.Linear(embed_dims, ego_fut_ts * 3), + ) + self.if_zeroinit_reg = False + + self.init_weight() + + def init_weight(self): + if self.if_zeroinit_reg: + nn.init.constant_(self.plan_reg_branch[-1].weight, 0) + nn.init.constant_(self.plan_reg_branch[-1].bias, 0) + + bias_init = bias_init_with_prob(0.01) + nn.init.constant_(self.plan_cls_branch[-1].bias, bias_init) + def forward( + self, + traj_feature, + ): + bs, ego_fut_mode, _ = traj_feature.shape + + # 6. get final prediction + traj_feature = traj_feature.view(bs, ego_fut_mode,-1) + plan_cls = self.plan_cls_branch(traj_feature).squeeze(-1) + traj_delta = self.plan_reg_branch(traj_feature) + plan_reg = traj_delta.reshape(bs,ego_fut_mode, self.ego_fut_ts, 3) + + return plan_reg, plan_cls +class ModulationLayer(nn.Module): + + def __init__(self, embed_dims: int, condition_dims: int): + super(ModulationLayer, self).__init__() + self.if_zeroinit_scale=False + self.embed_dims = embed_dims + self.scale_shift_mlp = nn.Sequential( + nn.Mish(), + nn.Linear(condition_dims, embed_dims*2), + ) + self.init_weight() + + def init_weight(self): + if self.if_zeroinit_scale: + nn.init.constant_(self.scale_shift_mlp[-1].weight, 0) + nn.init.constant_(self.scale_shift_mlp[-1].bias, 0) + + def forward( + self, + traj_feature, + time_embed, + global_cond=None, + global_img=None, + ): + if global_cond is not None: + global_feature = torch.cat([ + global_cond, time_embed + ], axis=-1) + else: + global_feature = time_embed + if global_img is not None: + global_img = global_img.flatten(2,3).permute(0,2,1).contiguous() + global_feature = torch.cat([ + global_img, global_feature + ], axis=-1) + + scale_shift = self.scale_shift_mlp(global_feature) + scale,shift = scale_shift.chunk(2,dim=-1) + traj_feature = traj_feature * (1 + scale) + shift + return traj_feature + +class CustomTransformerDecoderLayer(nn.Module): + def __init__(self, + num_poses, + d_model, + d_ffn, + config, + ): + super().__init__() + self.dropout = nn.Dropout(0.1) + self.dropout1 = nn.Dropout(0.1) + + self.cross_bev_attention = GridSampleCrossBEVAttention( + config.tf_d_model, + config.tf_num_head, + num_points=num_poses, + config=config, + in_bev_dims=256, + ) + + # GridSampleCrossBEVAttention_navi + self.cross_bev_attention_navi = GridSampleCrossBEVAttention_navi( + config.tf_d_model, + config.tf_num_head, + num_points=1, + config=config, + in_bev_dims=256, + ) + self.cross_agent_attention = nn.MultiheadAttention( + config.tf_d_model, + config.tf_num_head, + dropout=config.tf_dropout, + batch_first=True, + ) + self.cross_ego_attention = nn.MultiheadAttention( + config.tf_d_model, + config.tf_num_head, + dropout=config.tf_dropout, + batch_first=True, + ) + self.ffn = nn.Sequential( + nn.Linear(config.tf_d_model, config.tf_d_ffn), + nn.ReLU(), + nn.Linear(config.tf_d_ffn, config.tf_d_model), + ) + self.norm1 = nn.LayerNorm(config.tf_d_model) + self.norm2 = nn.LayerNorm(config.tf_d_model) + self.norm3 = nn.LayerNorm(config.tf_d_model) + self.time_modulation = ModulationLayer(config.tf_d_model,256) + self.task_decoder = DiffMotionPlanningRefinementModule( + embed_dims=config.tf_d_model, + ego_fut_ts=num_poses, + ego_fut_mode=20, + ) + + def forward(self, + traj_feature, + noisy_traj_points, + bev_feature, + bev_spatial_shape, + agents_query, + ego_query, + time_embed, + status_encoding, + global_img=None, + gt_points=None + ): + + traj_feature = self.cross_bev_attention(traj_feature,noisy_traj_points,bev_feature,bev_spatial_shape) + + # navi + if gt_points is not None: + traj_feature = self.cross_bev_attention_navi(traj_feature,gt_points,bev_feature,bev_spatial_shape) + # print("navi") + + traj_feature = traj_feature + self.dropout(self.cross_agent_attention(traj_feature, agents_query,agents_query)[0]) + traj_feature = self.norm1(traj_feature) + + # traj_feature = traj_feature + self.dropout(self.self_attn(traj_feature, traj_feature, traj_feature)[0]) + + # 4.5 cross attention with ego query + traj_feature = traj_feature + self.dropout1(self.cross_ego_attention(traj_feature, ego_query,ego_query)[0]) + traj_feature = self.norm2(traj_feature) + + # 4.6 feedforward network + traj_feature = self.norm3(self.ffn(traj_feature)) + # 4.8 modulate with time steps + traj_feature = self.time_modulation(traj_feature, time_embed,global_cond=None,global_img=global_img) + + # 4.9 predict the offset & heading + poses_reg, poses_cls = self.task_decoder(traj_feature) #bs,20,8,3; bs,20 + poses_reg[...,:2] = poses_reg[...,:2] + noisy_traj_points + poses_reg[..., StateSE2Index.HEADING] = poses_reg[..., StateSE2Index.HEADING].tanh() * np.pi + + return poses_reg, poses_cls +def _get_clones(module, N): + # FIXME: copy.deepcopy() is not defined on nn.module + return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) + + +class CustomTransformerDecoder(nn.Module): + def __init__( + self, + decoder_layer, + num_layers, + norm=None, + ): + super().__init__() + torch._C._log_api_usage_once(f"torch.nn.modules.{self.__class__.__name__}") + self.layers = _get_clones(decoder_layer, num_layers) + self.num_layers = num_layers + + def forward(self, + traj_feature, + noisy_traj_points, + bev_feature, + bev_spatial_shape, + agents_query, + ego_query, + time_embed, + status_encoding, + global_img=None, + gt_points=None + ): + poses_reg_list = [] + poses_cls_list = [] + traj_points = noisy_traj_points + for mod in self.layers: + poses_reg, poses_cls = mod(traj_feature, traj_points, bev_feature, bev_spatial_shape, agents_query, ego_query, time_embed, status_encoding,global_img,gt_points) + poses_reg_list.append(poses_reg) + poses_cls_list.append(poses_cls) + traj_points = poses_reg[...,:2].clone().detach() + return poses_reg_list, poses_cls_list + +class TrajectoryHead(nn.Module): + """Trajectory prediction head.""" + + def __init__(self, num_poses: int, d_ffn: int, d_model: int, plan_anchor_path: str,config: TransfuserConfig): + """ + Initializes trajectory head. + :param num_poses: number of (x,y,θ) poses to predict + :param d_ffn: dimensionality of feed-forward network + :param d_model: input dimensionality + """ + super(TrajectoryHead, self).__init__() + + self._num_poses = num_poses + self._d_model = d_model + self._d_ffn = d_ffn + self.diff_loss_weight = 2.0 + self.ego_fut_mode = 20 + + self.diffusion_scheduler = DDIMScheduler( + num_train_timesteps=1000, + beta_schedule="scaled_linear", + prediction_type="sample", + ) + # print(plan_anchor_path) + # import pdb;pdb.set_trace() + plan_anchor = np.load(plan_anchor_path) + + self.plan_anchor = nn.Parameter( + torch.tensor(plan_anchor, dtype=torch.float32), + requires_grad=False, + ) # 20,8,2 + self.plan_anchor_encoder = nn.Sequential( + *linear_relu_ln(d_model, 1, 1,512), + nn.Linear(d_model, d_model), + ) + self.time_mlp = nn.Sequential( + SinusoidalPosEmb(d_model), + nn.Linear(d_model, d_model * 4), + nn.Mish(), + nn.Linear(d_model * 4, d_model), + ) + + diff_decoder_layer = CustomTransformerDecoderLayer( + num_poses=num_poses, + d_model=d_model, + d_ffn=d_ffn, + config=config, + ) + self.diff_decoder = CustomTransformerDecoder(diff_decoder_layer, 2) + + self.loss_computer = LossComputer(config) + def norm_odo(self, odo_info_fut): + odo_info_fut_x = odo_info_fut[..., 0:1] + odo_info_fut_y = odo_info_fut[..., 1:2] + odo_info_fut_head = odo_info_fut[..., 2:3] + + odo_info_fut_x = 2*(odo_info_fut_x + 1.2)/56.9 -1 + odo_info_fut_y = 2*(odo_info_fut_y + 20)/46 -1 + odo_info_fut_head = 2*(odo_info_fut_head + 2)/3.9 -1 + return torch.cat([odo_info_fut_x, odo_info_fut_y, odo_info_fut_head], dim=-1) + def denorm_odo(self, odo_info_fut): + odo_info_fut_x = odo_info_fut[..., 0:1] + odo_info_fut_y = odo_info_fut[..., 1:2] + odo_info_fut_head = odo_info_fut[..., 2:3] + + odo_info_fut_x = (odo_info_fut_x + 1)/2 * 56.9 - 1.2 + odo_info_fut_y = (odo_info_fut_y + 1)/2 * 46 - 20 + odo_info_fut_head = (odo_info_fut_head + 1)/2 * 3.9 - 2 + return torch.cat([odo_info_fut_x, odo_info_fut_y, odo_info_fut_head], dim=-1) + def forward(self, ego_query, agents_query, bev_feature,bev_spatial_shape,status_encoding, targets=None,global_img=None) -> Dict[str, torch.Tensor]: + """Torch module forward pass.""" + if self.training: + return self.forward_train(ego_query, agents_query, bev_feature,bev_spatial_shape,status_encoding,targets,global_img) + else: + return self.forward_test(ego_query, agents_query, bev_feature,bev_spatial_shape,status_encoding,global_img) + + + def forward_train(self, ego_query,agents_query,bev_feature,bev_spatial_shape,status_encoding, targets=None,global_img=None) -> Dict[str, torch.Tensor]: + bs = ego_query.shape[0] + device = ego_query.device + # 1. add truncated noise to the plan anchor + plan_anchor = self.plan_anchor.unsqueeze(0).repeat(bs,1,1,1) + odo_info_fut = self.norm_odo(plan_anchor) + timesteps = torch.randint( + 0, 50, + (bs,), device=device + ) + noise = torch.randn(odo_info_fut.shape, device=device) + noisy_traj_points = self.diffusion_scheduler.add_noise( + original_samples=odo_info_fut, + noise=noise, + timesteps=timesteps, + ).float() + noisy_traj_points = torch.clamp(noisy_traj_points, min=-1, max=1) + noisy_traj_points = self.denorm_odo(noisy_traj_points) + + ego_fut_mode = noisy_traj_points.shape[1] + # 2. proj noisy_traj_points to the query + traj_pos_embed = gen_sineembed_for_position(noisy_traj_points,hidden_dim=64) + traj_pos_embed = traj_pos_embed.flatten(-2) + traj_feature = self.plan_anchor_encoder(traj_pos_embed) + traj_feature = traj_feature.view(bs,ego_fut_mode,-1) + # 3. embed the timesteps + time_embed = self.time_mlp(timesteps) + time_embed = time_embed.view(bs,1,-1) + + gt_points = targets['trajectory'][:,-1,:2] + # gt_points=None + # 4. begin the stacked decoder + poses_reg_list, poses_cls_list = self.diff_decoder(traj_feature, noisy_traj_points, bev_feature, bev_spatial_shape, agents_query, ego_query, time_embed, status_encoding,global_img,gt_points=gt_points) + + trajectory_loss_dict = {} + ret_traj_loss = 0 + for idx, (poses_reg, poses_cls) in enumerate(zip(poses_reg_list, poses_cls_list)): + trajectory_loss = self.loss_computer(poses_reg, poses_cls, targets, plan_anchor) + trajectory_loss_dict[f"trajectory_loss_{idx}"] = trajectory_loss + ret_traj_loss += trajectory_loss + + mode_idx = poses_cls_list[-1].argmax(dim=-1) + mode_idx = mode_idx[...,None,None,None].repeat(1,1,self._num_poses,3) + best_reg = torch.gather(poses_reg_list[-1], 1, mode_idx).squeeze(1) + return {"trajectory": best_reg,"trajectory_loss":ret_traj_loss,"trajectory_loss_dict":trajectory_loss_dict} + + def forward_test(self, ego_query,agents_query,bev_feature,bev_spatial_shape,status_encoding,global_img) -> Dict[str, torch.Tensor]: + step_num = 2 + bs = ego_query.shape[0] + anchor = 20 + num_samples = 64 + device = ego_query.device + self.diffusion_scheduler.set_timesteps(1000, device) + step_ratio = 20 / step_num + roll_timesteps = (np.arange(0, step_num) * step_ratio).round()[::-1].copy().astype(np.int64) + roll_timesteps = torch.from_numpy(roll_timesteps).to(device) + + + # 1. add truncated noise to the plan anchor + plan_anchor = self.plan_anchor.unsqueeze(0).repeat(bs,1,1,1) + plan_anchor = plan_anchor.unsqueeze(2).repeat(1,1,num_samples,1,1) + plan_anchor = plan_anchor.view(bs,num_samples*anchor, 8, 2) + img = self.norm_odo(plan_anchor) + noise = torch.randn(img.shape, device=device) + trunc_timesteps = torch.ones((bs,), device=device, dtype=torch.long) * 8 + img = self.diffusion_scheduler.add_noise(original_samples=img, noise=noise, timesteps=trunc_timesteps) + noisy_trajs = self.denorm_odo(img) + ego_fut_mode = img.shape[1] + for k in roll_timesteps[:]: + x_boxes = torch.clamp(img, min=-1, max=1) + noisy_traj_points = self.denorm_odo(x_boxes) + + # 2. proj noisy_traj_points to the query + traj_pos_embed = gen_sineembed_for_position(noisy_traj_points,hidden_dim=64) + traj_pos_embed = traj_pos_embed.flatten(-2) + traj_feature = self.plan_anchor_encoder(traj_pos_embed) + traj_feature = traj_feature.view(bs,ego_fut_mode,-1) + + timesteps = k + if not torch.is_tensor(timesteps): + # TODO: this requires sync between CPU and GPU. So try to pass timesteps as tensors if you can + timesteps = torch.tensor([timesteps], dtype=torch.long, device=img.device) + elif torch.is_tensor(timesteps) and len(timesteps.shape) == 0: + timesteps = timesteps[None].to(img.device) + + # 3. embed the timesteps + timesteps = timesteps.expand(img.shape[0]) + time_embed = self.time_mlp(timesteps) + time_embed = time_embed.view(bs,1,-1) + + # 4. begin the stacked decoder + poses_reg_list, poses_cls_list = self.diff_decoder(traj_feature, noisy_traj_points, bev_feature, bev_spatial_shape, agents_query, ego_query, time_embed, status_encoding,global_img) + poses_reg = poses_reg_list[-1] + poses_cls = poses_cls_list[-1] + x_start = poses_reg[...,:2] + x_start = self.norm_odo(x_start) + img = self.diffusion_scheduler.step( + model_output=x_start, + timestep=k, + sample=img + ).prev_sample + # poses_cls = poses_cls.view(bs, anchor*num_samples) + mode_idx = poses_cls.argmax(dim=-1) + mode_idx = mode_idx[...,None,None,None].repeat(1,1,self._num_poses,3) + + best_reg = torch.gather(poses_reg, 1, mode_idx).squeeze(1) + poses_reg = poses_reg.view(bs, anchor, num_samples, self._num_poses, 3) + + return {"trajectory": best_reg, + 'anchor_trajectories': poses_reg + } + \ No newline at end of file diff --git a/navsim/agents/transfuser/transfuser_agent.py b/navsim/agents/transfuser/transfuser_agent.py index efbf8c5..0a85669 100644 --- a/navsim/agents/transfuser/transfuser_agent.py +++ b/navsim/agents/transfuser/transfuser_agent.py @@ -100,4 +100,4 @@ def get_optimizers( def get_training_callbacks(self) -> List[pl.Callback]: """Inherited, see superclass.""" - return [TransfuserCallback(self._config)] + return [TransfuserCallback(self._config),pl.callbacks.ModelCheckpoint(every_n_epochs=5, save_top_k=-1)] diff --git a/navsim/agents/transfuser/transfuser_loss.py b/navsim/agents/transfuser/transfuser_loss.py index 0b0023b..4e135f3 100644 --- a/navsim/agents/transfuser/transfuser_loss.py +++ b/navsim/agents/transfuser/transfuser_loss.py @@ -16,7 +16,8 @@ def transfuser_loss(targets: Dict[str, torch.Tensor], predictions: Dict[str, tor :param config: global Transfuser config :return: combined loss value """ - + # import pdb;pdb.set_trace() + # print("??") trajectory_loss = F.l1_loss(predictions["trajectory"], targets["trajectory"]) agent_class_loss, agent_box_loss = _agent_loss(targets, predictions, config) bev_semantic_loss = F.cross_entropy(predictions["bev_semantic_map"], targets["bev_semantic_map"].long()) diff --git a/navsim/agents/transfuser/transfuser_model.py b/navsim/agents/transfuser/transfuser_model.py index 34d45ec..136f6bd 100644 --- a/navsim/agents/transfuser/transfuser_model.py +++ b/navsim/agents/transfuser/transfuser_model.py @@ -103,6 +103,8 @@ def forward(self, features: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: bev_feature = self._bev_downscale(bev_feature).flatten(-2, -1) bev_feature = bev_feature.permute(0, 2, 1) + # import pdb;pdb.set_trace() + # status_feature=status_feature[:,-1,[0,1,2,3,7,8,9,10]] status_encoding = self._status_encoding(status_feature) keyval = torch.concatenate([bev_feature, status_encoding[:, None]], dim=1) diff --git a/navsim/common/dataclasses.py b/navsim/common/dataclasses.py index 415b23a..6f971a1 100644 --- a/navsim/common/dataclasses.py +++ b/navsim/common/dataclasses.py @@ -30,6 +30,717 @@ NUPLAN_MAPS_ROOT = os.environ.get("NUPLAN_MAPS_ROOT") + + +import torch +from torchvision import transforms + +import skimage as sk +from skimage.filters import gaussian +from io import BytesIO +from wand.image import Image as WandImage +from wand.api import library as wandlibrary +import wand.color as WandColor +import ctypes +from PIL import Image as PILImage +import cv2 +from scipy.ndimage import zoom as scizoom +from scipy.ndimage import map_coordinates +import warnings +import numpy as np + +# -*- coding: utf-8 -*- + +import os +from PIL import Image +import os.path +import time +import torch +import torchvision.datasets as dset +import torchvision.transforms as trn +import torch.utils.data as data +import numpy as np +from torchvision import transforms + +from PIL import Image + +# /////////////// Data Loader /////////////// + + +IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm'] + + +def is_image_file(filename): + """Checks if a file is an image. + Args: + filename (string): path to a file + Returns: + bool: True if the filename ends with a known image extension + """ + filename_lower = filename.lower() + return any(filename_lower.endswith(ext) for ext in IMG_EXTENSIONS) + + +def find_classes(dir): + classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))] + classes.sort() + class_to_idx = {classes[i]: i for i in range(len(classes))} + return classes, class_to_idx + + +def make_dataset(dir, class_to_idx): + images = [] + dir = os.path.expanduser(dir) + for target in sorted(os.listdir(dir)): + d = os.path.join(dir, target) + if not os.path.isdir(d): + continue + + for root, _, fnames in sorted(os.walk(d)): + for fname in sorted(fnames): + if is_image_file(fname): + path = os.path.join(root, fname) + item = (path, class_to_idx[target]) + images.append(item) + + return images + + +def pil_loader(path): + # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835) + with open(path, 'rb') as f: + img = Image.open(f) + return img.convert('RGB') + + +def accimage_loader(path): + import accimage + try: + return accimage.Image(path) + except IOError: + # Potentially a decoding problem, fall back to PIL.Image + return pil_loader(path) + + +def default_loader(path): + from torchvision import get_image_backend + if get_image_backend() == 'accimage': + return accimage_loader(path) + else: + return pil_loader(path) + + +class DistortImageFolder(data.Dataset): + def __init__(self, root, method, severity, transform=None, target_transform=None, + loader=default_loader): + classes, class_to_idx = find_classes(root) + imgs = make_dataset(root, class_to_idx) + if len(imgs) == 0: + raise (RuntimeError("Found 0 images in subfolders of: " + root + "\n" + "Supported image extensions are: " + ",".join( + IMG_EXTENSIONS))) + + self.root = root + self.method = method + self.severity = severity + self.imgs = imgs + self.classes = classes + self.class_to_idx = class_to_idx + self.idx_to_class = {v: k for k, v in class_to_idx.items()} + self.transform = transform + self.target_transform = target_transform + self.loader = loader + + def __getitem__(self, index): + path, target = self.imgs[index] + img = self.loader(path) + if self.transform is not None: + img = self.transform(img) + img = self.method(img, self.severity) + if self.target_transform is not None: + target = self.target_transform(target) + + save_path = '/share/data/vision-greg/DistortedImageNet/JPEG/' + self.method.__name__ + \ + '/' + str(self.severity) + '/' + self.idx_to_class[target] + + if not os.path.exists(save_path): + os.makedirs(save_path) + + save_path += path[path.rindex('/'):] + + Image.fromarray(np.uint8(img)).save(save_path, quality=85, optimize=True) + + return 0 # we do not care about returning the data + + def __len__(self): + return len(self.imgs) + + +warnings.simplefilter("ignore", UserWarning) + + +def auc(errs): # area under the alteration error curve + area = 0 + for i in range(1, len(errs)): + area += (errs[i] + errs[i - 1]) / 2 + area /= len(errs) - 1 + return area + + +def disk(radius, alias_blur=0.1, dtype=np.float32): + if radius <= 8: + L = np.arange(-8, 8 + 1) + ksize = (3, 3) + else: + L = np.arange(-radius, radius + 1) + ksize = (5, 5) + X, Y = np.meshgrid(L, L) + aliased_disk = np.array((X ** 2 + Y ** 2) <= radius ** 2, dtype=dtype) + aliased_disk /= np.sum(aliased_disk) + + # supersample disk to antialias + return cv2.GaussianBlur(aliased_disk, ksize=ksize, sigmaX=alias_blur) + + +# Tell Python about the C method +wandlibrary.MagickMotionBlurImage.argtypes = (ctypes.c_void_p, # wand + ctypes.c_double, # radius + ctypes.c_double, # sigma + ctypes.c_double) # angle + + +# Extend wand.image.Image class to include method signature +class MotionImage(WandImage): + def motion_blur(self, radius=0.0, sigma=0.0, angle=0.0): + wandlibrary.MagickMotionBlurImage(self.wand, radius, sigma, angle) + + +# modification of https://github.com/FLHerne/mapgen/blob/master/diamondsquare.py +def plasma_fractal(mapsize=256, wibbledecay=3): + """ + Generate a heightmap using diamond-square algorithm. + Return square 2d array, side length 'mapsize', of floats in range 0-255. + 'mapsize' must be a power of two. + """ + assert (mapsize & (mapsize - 1) == 0) + maparray = np.empty((mapsize, mapsize), dtype=np.float_) + maparray[0, 0] = 0 + stepsize = mapsize + wibble = 100 + + def wibbledmean(array): + return array / 4 + wibble * np.random.uniform(-wibble, wibble, array.shape) + + def fillsquares(): + """For each square of points stepsize apart, + calculate middle value as mean of points + wibble""" + cornerref = maparray[0:mapsize:stepsize, 0:mapsize:stepsize] + squareaccum = cornerref + np.roll(cornerref, shift=-1, axis=0) + squareaccum += np.roll(squareaccum, shift=-1, axis=1) + maparray[stepsize // 2:mapsize:stepsize, + stepsize // 2:mapsize:stepsize] = wibbledmean(squareaccum) + + def filldiamonds(): + """For each diamond of points stepsize apart, + calculate middle value as mean of points + wibble""" + mapsize = maparray.shape[0] + drgrid = maparray[stepsize // 2:mapsize:stepsize, stepsize // 2:mapsize:stepsize] + ulgrid = maparray[0:mapsize:stepsize, 0:mapsize:stepsize] + ldrsum = drgrid + np.roll(drgrid, 1, axis=0) + lulsum = ulgrid + np.roll(ulgrid, -1, axis=1) + ltsum = ldrsum + lulsum + maparray[0:mapsize:stepsize, stepsize // 2:mapsize:stepsize] = wibbledmean(ltsum) + tdrsum = drgrid + np.roll(drgrid, 1, axis=1) + tulsum = ulgrid + np.roll(ulgrid, -1, axis=0) + ttsum = tdrsum + tulsum + maparray[stepsize // 2:mapsize:stepsize, 0:mapsize:stepsize] = wibbledmean(ttsum) + + while stepsize >= 2: + fillsquares() + filldiamonds() + stepsize //= 2 + wibble /= wibbledecay + + maparray -= maparray.min() + return maparray / maparray.max() + + +def clipped_zoom(img, zoom_factor): + + h, w = img.shape[:2] + + new_h = int(np.round(h / zoom_factor)) + new_w = int(np.round(w / zoom_factor)) + + top = (h - new_h) // 2 + left = (w - new_w) // 2 + + img = scizoom(img[top:top+new_h, left:left+new_w], + (zoom_factor, zoom_factor, 1), + order=1) + + return cv2.resize(img, (w, h), interpolation=cv2.INTER_LINEAR) + + +# /////////////// End Distortion Helpers /////////////// + + +# /////////////// Distortions /////////////// + +def gaussian_noise(x, severity=1): + c = [.08, .12, 0.18, 0.26, 0.38][severity - 1] + + x = np.array(x) / 255. + return np.clip(x + np.random.normal(size=x.shape, scale=c), 0, 1) * 255 + + +def shot_noise(x, severity=1): + c = [60, 25, 12, 5, 3][severity - 1] + + x = np.array(x) / 255. + return np.clip(np.random.poisson(x * c) / c, 0, 1) * 255 + + +def impulse_noise(x, severity=1): + c = [.03, .06, .09, 0.17, 0.27][severity - 1] + + x = sk.util.random_noise(np.array(x) / 255., mode='s&p', amount=c) + return np.clip(x, 0, 1) * 255 + + +def speckle_noise(x, severity=1): + c = [.15, .2, 0.35, 0.45, 0.6][severity - 1] + + x = np.array(x) / 255. + return np.clip(x + x * np.random.normal(size=x.shape, scale=c), 0, 1) * 255 + + +def fgsm(x, source_net, severity=1): + c = [8, 16, 32, 64, 128][severity - 1] + + x = V(x, requires_grad=True) + logits = source_net(x) + source_net.zero_grad() + loss = F.cross_entropy(logits, V(logits.data.max(1)[1].squeeze_()), size_average=False) + loss.backward() + + return standardize(torch.clamp(unstandardize(x.data) + c / 255. * unstandardize(torch.sign(x.grad.data)), 0, 1)) + + +def gaussian_blur(x, severity=1): + c = [1, 2, 3, 4, 6][severity - 1] + + x = gaussian(np.array(x) / 255., sigma=c, multichannel=True) + return np.clip(x, 0, 1) * 255 + + +def glass_blur(x, severity=1): + # sigma, max_delta, iterations + c = [(0.7, 1, 2), (0.9, 2, 1), (1, 2, 3), (1.1, 3, 2), (1.5, 4, 2)][severity - 1] + + x = np.uint8(gaussian(np.array(x) / 255., sigma=c[0], multichannel=True) * 255) + + # locally shuffle pixels + for i in range(c[2]): + for h in range(224 - c[1], c[1], -1): + for w in range(224 - c[1], c[1], -1): + dx, dy = np.random.randint(-c[1], c[1], size=(2,)) + h_prime, w_prime = h + dy, w + dx + # swap + x[h, w], x[h_prime, w_prime] = x[h_prime, w_prime], x[h, w] + + return np.clip(gaussian(x / 255., sigma=c[0], multichannel=True), 0, 1) * 255 + + +def defocus_blur(x, severity=1): + c = [(3, 0.1), (4, 0.5), (6, 0.5), (8, 0.5), (10, 0.5)][severity - 1] + + x = np.array(x) / 255. + kernel = disk(radius=c[0], alias_blur=c[1]) + + channels = [] + for d in range(3): + channels.append(cv2.filter2D(x[:, :, d], -1, kernel)) + channels = np.array(channels).transpose((1, 2, 0)) # 3x224x224 -> 224x224x3 + + return np.clip(channels, 0, 1) * 255 + + +def motion_blur(x, severity=1): + c = [(10, 3), (15, 5), (15, 8), (15, 12), (20, 15)][severity - 1] + + output = BytesIO() + x.save(output, format='PNG') + x = MotionImage(blob=output.getvalue()) + + x.motion_blur(radius=c[0], sigma=c[1], angle=np.random.uniform(-45, 45)) + + x = cv2.imdecode(np.frombuffer(x.make_blob(), np.uint8), + cv2.IMREAD_UNCHANGED) + + if x.shape != (224, 224): + return np.clip(x[..., [2, 1, 0]], 0, 255) # BGR to RGB + else: # greyscale to RGB + return np.clip(np.array([x, x, x]).transpose((1, 2, 0)), 0, 255) + + +def clipped_zoom(img, zoom_factor): + h, w = img.shape[:2] + + new_h = int(np.round(h / zoom_factor)) + new_w = int(np.round(w / zoom_factor)) + + top = (h - new_h) // 2 + left = (w - new_w) // 2 + + img = scizoom(img[top:top+new_h, left:left+new_w], + (zoom_factor, zoom_factor, 1), + order=1) + + return cv2.resize(img, (w, h), interpolation=cv2.INTER_LINEAR) + +def zoom_blur(x, severity=1): + c = [np.arange(1, 1.11, 0.01), + np.arange(1, 1.16, 0.01), + np.arange(1, 1.21, 0.02), + np.arange(1, 1.26, 0.02), + np.arange(1, 1.31, 0.03)][severity - 1] + + x = (np.array(x) / 255.).astype(np.float32) + h, w = x.shape[:2] + + out = np.zeros_like(x) + + for zoom_factor in c: + zoomed = clipped_zoom(x, zoom_factor) + out += zoomed + + x = (x + out) / (len(c) + 1) + return np.clip(x, 0, 1) * 255 + + +# def barrel(x, severity=1): +# c = [(0,0.03,0.03), (0.05,0.05,0.05), (0.1,0.1,0.1), +# (0.2,0.2,0.2), (0.1,0.3,0.6)][severity - 1] +# +# output = BytesIO() +# x.save(output, format='PNG') +# +# x = WandImage(blob=output.getvalue()) +# x.distort('barrel', c) +# +# x = cv2.imdecode(np.fromstring(x.make_blob(), np.uint8), +# cv2.IMREAD_UNCHANGED) +# +# if x.shape != (224, 224): +# return np.clip(x[..., [2, 1, 0]], 0, 255) # BGR to RGB +# else: # greyscale to RGB +# return np.clip(np.array([x, x, x]).transpose((1, 2, 0)), 0, 255) + + +def fog(x, severity=1): + c = [(1.5, 2), (2, 2), (2.5, 1.7), (2.5, 1.5), (3, 1.4)][severity - 1] + + x = np.array(x) / 255. + max_val = x.max() + x += c[0] * plasma_fractal(wibbledecay=c[1])[:224, :224][..., np.newaxis] + return np.clip(x * max_val / (max_val + c[0]), 0, 1) * 255 + + +def frost(x, severity=1): + c = [(1, 0.4), + (0.8, 0.6), + (0.7, 0.7), + (0.65, 0.7), + (0.6, 0.75)][severity - 1] + idx = np.random.randint(5) + filename = ['./frost1.png', './frost2.png', './frost3.png', './frost4.jpg', './frost5.jpg', './frost6.jpg'][idx] + frost = cv2.imread(filename) + # randomly crop and convert to rgb + x_start, y_start = np.random.randint(0, frost.shape[0] - 224), np.random.randint(0, frost.shape[1] - 224) + frost = frost[x_start:x_start + 224, y_start:y_start + 224][..., [2, 1, 0]] + + return np.clip(c[0] * np.array(x) + c[1] * frost, 0, 255) + + +def snow(x, severity=1): + c = [(0.1, 0.3, 3, 0.5, 10, 4, 0.8), + (0.2, 0.3, 2, 0.5, 12, 4, 0.7), + (0.55, 0.3, 4, 0.9, 12, 8, 0.7), + (0.55, 0.3, 4.5, 0.85, 12, 8, 0.65), + (0.55, 0.3, 2.5, 0.85, 12, 12, 0.55)][severity - 1] + + # 获取原始图像尺寸 + if isinstance(x, Image.Image): + w, h = x.size + x = np.array(x).astype(np.float32) / 255. + else: + h, w = x.shape[:2] + x = x.astype(np.float32) / 255. + + # 生成雪层(适配任意尺寸) + snow_layer = np.random.normal(size=(h, w), loc=c[0], scale=c[1]) + + # 缩放雪层(保持宽高比) + zoom_factor = c[2] + snow_layer_zoomed = clipped_zoom(snow_layer[..., np.newaxis], zoom_factor) + snow_layer = snow_layer_zoomed.squeeze() + + # 应用阈值 + snow_layer[snow_layer < c[3]] = 0 + + # 创建运动模糊雪层 + snow_pil = Image.fromarray((np.clip(snow_layer, 0, 1) * 255).astype(np.uint8)) + with WandImage() as wand_img: + wand_img.read(blob=snow_pil.tobytes()) + wand_img.motion_blur(radius=c[4], sigma=c[5], angle=np.random.uniform(-135, -45)) + snow_layer = np.array(wand_img).astype(np.float32) / 255. + + # 适配不同通道情况 + if snow_layer.ndim == 2: + snow_layer = snow_layer[..., np.newaxis] + + # 动态调整灰度转换 + gray = cv2.cvtColor(x, cv2.COLOR_RGB2GRAY) + gray_reshaped = gray.reshape(h, w, 1) # 使用动态尺寸 + + # 合成最终图像 + blended = c[6] * x + (1 - c[6]) * np.maximum(x, gray_reshaped * 1.5 + 0.5) + result = np.clip(blended + snow_layer + np.rot90(snow_layer, k=2), 0, 1) + + return (result * 255).astype(np.uint8) + + +def spatter(x, severity=1): + c = [(0.65, 0.3, 4, 0.69, 0.6, 0), + (0.65, 0.3, 3, 0.68, 0.6, 0), + (0.65, 0.3, 2, 0.68, 0.5, 0), + (0.65, 0.3, 1, 0.65, 1.5, 1), + (0.67, 0.4, 1, 0.65, 1.5, 1)][severity - 1] + x = np.array(x, dtype=np.float32) / 255. + + liquid_layer = np.random.normal(size=x.shape[:2], loc=c[0], scale=c[1]) + + liquid_layer = gaussian(liquid_layer, sigma=c[2]) + liquid_layer[liquid_layer < c[3]] = 0 + if c[5] == 0: + liquid_layer = (liquid_layer * 255).astype(np.uint8) + dist = 255 - cv2.Canny(liquid_layer, 50, 150) + dist = cv2.distanceTransform(dist, cv2.DIST_L2, 5) + _, dist = cv2.threshold(dist, 20, 20, cv2.THRESH_TRUNC) + dist = cv2.blur(dist, (3, 3)).astype(np.uint8) + dist = cv2.equalizeHist(dist) + # ker = np.array([[-1,-2,-3],[-2,0,0],[-3,0,1]], dtype=np.float32) + # ker -= np.mean(ker) + ker = np.array([[-2, -1, 0], [-1, 1, 1], [0, 1, 2]]) + dist = cv2.filter2D(dist, cv2.CV_8U, ker) + dist = cv2.blur(dist, (3, 3)).astype(np.float32) + + m = cv2.cvtColor(liquid_layer * dist, cv2.COLOR_GRAY2BGRA) + m /= np.max(m, axis=(0, 1)) + m *= c[4] + + # water is pale turqouise + color = np.concatenate((175 / 255. * np.ones_like(m[..., :1]), + 238 / 255. * np.ones_like(m[..., :1]), + 238 / 255. * np.ones_like(m[..., :1])), axis=2) + + color = cv2.cvtColor(color, cv2.COLOR_BGR2BGRA) + x = cv2.cvtColor(x, cv2.COLOR_BGR2BGRA) + + return cv2.cvtColor(np.clip(x + m * color, 0, 1), cv2.COLOR_BGRA2BGR) * 255 + else: + m = np.where(liquid_layer > c[3], 1, 0) + m = gaussian(m.astype(np.float32), sigma=c[4]) + m[m < 0.8] = 0 + # m = np.abs(m) ** (1/c[4]) + + # mud brown + color = np.concatenate((63 / 255. * np.ones_like(x[..., :1]), + 42 / 255. * np.ones_like(x[..., :1]), + 20 / 255. * np.ones_like(x[..., :1])), axis=2) + + color *= m[..., np.newaxis] + x *= (1 - m[..., np.newaxis]) + + return np.clip(x + color, 0, 1) * 255 + + +def contrast(x, severity=1): + c = [0.4, .3, .2, .1, .05][severity - 1] + + x = np.array(x) / 255. + means = np.mean(x, axis=(0, 1), keepdims=True) + return np.clip((x - means) * c + means, 0, 1) * 255 + + +def brightness(x, severity=1): + c = [.1, .2, .3, .4, .5][severity - 1] + + x = np.array(x) / 255. + x = sk.color.rgb2hsv(x) + x[:, :, 2] = np.clip(x[:, :, 2] + c, 0, 1) + x = sk.color.hsv2rgb(x) + + return np.clip(x, 0, 1) * 255 + + +def saturate(x, severity=1): + c = [(0.3, 0), (0.1, 0), (2, 0), (5, 0.1), (20, 0.2)][severity - 1] + + x = np.array(x) / 255. + x = sk.color.rgb2hsv(x) + x[:, :, 1] = np.clip(x[:, :, 1] * c[0] + c[1], 0, 1) + x = sk.color.hsv2rgb(x) + + return np.clip(x, 0, 1) * 255 + + +def jpeg_compression(x, severity=1): + c = [25, 18, 15, 10, 7][severity - 1] + + output = BytesIO() + x.save(output, 'JPEG', quality=c) + x = PILImage.open(output) + + return x + + +def pixelate(x, severity=1): + c = [0.6, 0.5, 0.4, 0.3, 0.25][severity - 1] + + x = x.resize((int(224 * c), int(224 * c)), PILImage.BOX) + x = x.resize((224, 224), PILImage.BOX) + + return x + + +# mod of https://gist.github.com/erniejunior/601cdf56d2b424757de5 +def elastic_transform(image, severity=1): + c = [(244 * 2, 244 * 0.7, 244 * 0.1), # 244 should have been 224, but ultimately nothing is incorrect + (244 * 2, 244 * 0.08, 244 * 0.2), + (244 * 0.05, 244 * 0.01, 244 * 0.02), + (244 * 0.07, 244 * 0.01, 244 * 0.02), + (244 * 0.12, 244 * 0.01, 244 * 0.02)][severity - 1] + + image = np.array(image, dtype=np.float32) / 255. + shape = image.shape + shape_size = shape[:2] + + # random affine + center_square = np.float32(shape_size) // 2 + square_size = min(shape_size) // 3 + pts1 = np.float32([center_square + square_size, + [center_square[0] + square_size, center_square[1] - square_size], + center_square - square_size]) + pts2 = pts1 + np.random.uniform(-c[2], c[2], size=pts1.shape).astype(np.float32) + M = cv2.getAffineTransform(pts1, pts2) + image = cv2.warpAffine(image, M, shape_size[::-1], borderMode=cv2.BORDER_REFLECT_101) + + dx = (gaussian(np.random.uniform(-1, 1, size=shape[:2]), + c[1], mode='reflect', truncate=3) * c[0]).astype(np.float32) + dy = (gaussian(np.random.uniform(-1, 1, size=shape[:2]), + c[1], mode='reflect', truncate=3) * c[0]).astype(np.float32) + dx, dy = dx[..., np.newaxis], dy[..., np.newaxis] + + x, y, z = np.meshgrid(np.arange(shape[1]), np.arange(shape[0]), np.arange(shape[2])) + indices = np.reshape(y + dy, (-1, 1)), np.reshape(x + dx, (-1, 1)), np.reshape(z, (-1, 1)) + return np.clip(map_coordinates(image, indices, order=1, mode='reflect').reshape(shape), 0, 1) * 255 + + +# /////////////// End Distortions /////////////// + + +# /////////////// Further Setup /////////////// + + +def save_distorted(method=gaussian_noise): + for severity in range(1, 6): + print(method.__name__, severity) + distorted_dataset = DistortImageFolder( + root="/share/data/vision-greg/ImageNet/clsloc/images/val", + method=method, severity=severity, + transform=trn.Compose([trn.Resize(256), trn.CenterCrop(224)])) + distorted_dataset_loader = torch.utils.data.DataLoader( + distorted_dataset, batch_size=100, shuffle=False, num_workers=4) + + for _ in distorted_dataset_loader: continue + + +# /////////////// End Further Setup /////////////// + + +# /////////////// Display Results /////////////// +import collections + +# print('\nUsing ImageNet data') + +d = collections.OrderedDict() + # d['Zoom Blur'] = zoom_blur + # d['JPEG'] = jpeg_compression + # d['Pixelate'] = pixelate + # d['Motion Blur'] = motion_blur + # d['Defocus Blur'] = defocus_blur + # d['Elastic'] = elastic_transform + + + + +# d['Gaussian Noise'] = gaussian_noise +# d['Shot Noise'] = shot_noise +# d['Impulse Noise'] = impulse_noise +# d['Brightness'] = brightness +# d['Contrast'] = contrast +# d['Speckle Noise'] = speckle_noise +# d['Spatter'] = spatter +# d['Saturate'] = saturate +import os +from pathlib import Path +from PIL import Image +import numpy as np +import torchvision.transforms as trn + +import os +import numpy as np +import torch +from PIL import Image +from torchvision import transforms +from pathlib import Path + +def save_distorted_images_from_numpy( + input_image: np.ndarray, # h w c + methods: dict, + severity: int = 4, +): + + img_pil = Image.fromarray(input_image.astype(np.uint8)).convert('RGB') + for method_name, method in methods.items(): + distorted_img = method(img_pil, severity) + to_pil = transforms.ToPILImage() + if not isinstance(distorted_img, Image.Image): + if isinstance(distorted_img, np.ndarray): + distorted_img = Image.fromarray(distorted_img.astype(np.uint8)) + else: + distorted_img = to_pil(distorted_img) + + if distorted_img.size != img_pil.size: + distorted_img = distorted_img.resize(img_pil.size, Image.LANCZOS) + + distorted_tensor = transforms.ToTensor()(distorted_img) + + return distorted_tensor + +def camera_feature_improve(image): + dd = collections.OrderedDict() + dd['Motion Blur'] = motion_blur + tensor_image_noise = save_distorted_images_from_numpy(image, dd) + tensor_image = tensor_image_noise.detach().cpu() + denormalized_image = tensor_image * 255.0 + denormalized_image = denormalized_image.byte() + np_image_noise = denormalized_image.numpy() + return np_image_noise + + @dataclass class Camera: """Camera dataclass for image and parameters.""" @@ -77,8 +788,23 @@ def from_camera_dict( camera_identifier = camera_name.lower() if camera_identifier in sensor_names: image_path = sensor_blobs_path / camera_dict[camera_name]["data_path"] + # # oringin + # data_dict[camera_identifier] = Camera( + # image=np.array(Image.open(image_path)), + # sensor2lidar_rotation=camera_dict[camera_name]["sensor2lidar_rotation"], + # sensor2lidar_translation=camera_dict[camera_name]["sensor2lidar_translation"], + # intrinsics=camera_dict[camera_name]["cam_intrinsic"], + # distortion=camera_dict[camera_name]["distortion"], + # camera_path=camera_dict[camera_name]["data_path"], + # ) + # #noise + image=np.array(Image.open(image_path)) + # import random; + # idx=random.randint(1,10) + # if idx<5: + # image=camera_feature_improve(image).transpose(1, 2, 0) data_dict[camera_identifier] = Camera( - image=np.array(Image.open(image_path)), + image=image, sensor2lidar_rotation=camera_dict[camera_name]["sensor2lidar_rotation"], sensor2lidar_translation=camera_dict[camera_name]["sensor2lidar_translation"], intrinsics=camera_dict[camera_name]["cam_intrinsic"], @@ -197,6 +923,24 @@ def from_scene_dict_list( for frame_idx in range(num_history_frames): ego_dynamic_state = scene_dict_list[frame_idx]["ego_dynamic_state"] + # # 修改变成一维 + # import math + # vle=math.sqrt(ego_dynamic_state[0]**2 + ego_dynamic_state[1]**2) + # acc=math.sqrt(ego_dynamic_state[2]**2 + ego_dynamic_state[3]**2) + # tag_vle=1 + # tag_acc=1 + # if ego_dynamic_state[0]<0: + # tag_vle=-1 + # if ego_dynamic_state[2]<0: + # tag_acc=-1 + # ego_status = EgoStatus( + # ego_pose=np.array(local_ego_poses[frame_idx], dtype=np.float32), + # ego_velocity=np.array(tag_vle*vle, dtype=np.float32), + # ego_acceleration=np.array(tag_acc*acc, dtype=np.float32), + # driving_command=scene_dict_list[frame_idx]["driving_command"], + # ) + # print("<<<<<") + # import pdb;pdb.set_trace() ego_status = EgoStatus( ego_pose=np.array(local_ego_poses[frame_idx], dtype=np.float32), ego_velocity=np.array(ego_dynamic_state[:2], dtype=np.float32), @@ -320,23 +1064,23 @@ class Scene: This can be used to provide future detections of pedestrians and objects in synthetic scenarios where future frames are unavailable. Defaults to None. """ - + def get_future_trajectory(self, num_trajectory_frames: Optional[int] = None) -> Trajectory: """ Extracts future trajectory of the human operator in local coordinates (ie. ego rear-axle). :param num_trajectory_frames: optional number frames to extract poses, defaults to None :return: trajectory dataclass """ - if num_trajectory_frames is None: num_trajectory_frames = self.scene_metadata.num_future_frames - + # if num_trajectory_frames is None: + # num_trajectory_frames = 8 start_frame_idx = self.scene_metadata.num_history_frames - 1 global_ego_poses = [] for frame_idx in range(start_frame_idx, start_frame_idx + num_trajectory_frames + 1): global_ego_poses.append(self.frames[frame_idx].ego_status.ego_pose) - + # print(len(StateSE2(*global_ego_poses[0]))) local_ego_poses = convert_absolute_to_relative_se2_array( StateSE2(*global_ego_poses[0]), np.array(global_ego_poses[1:], dtype=np.float64), @@ -591,7 +1335,7 @@ def load_from_disk( lidar_path=lidar_path, sensor_names=sensor_names, ) - + # cameras = Cameras.from_camera_dict( sensor_blobs_path=sensor_blobs_path, camera_dict=frame_data["camera_dict"], @@ -635,7 +1379,8 @@ class SceneFilter: include_synthetic_scenes: bool = False synthetic_scene_tokens: Optional[List[str]] = None # TODO: expand filter options - + reactive_synthetic_initial_tokens: Optional[List[str]] = None + non_reactive_synthetic_initial_tokens: Optional[List[str]] = None def __post_init__(self): if self.frame_interval is None: @@ -709,7 +1454,7 @@ def build_all_sensors(cls, include: Union[bool, List[int]] = True) -> SensorConf cam_r1=include, cam_r2=include, cam_b0=include, - lidar_pc=include, + lidar_pc=False, #include ) @classmethod diff --git a/navsim/common/dataloader.py b/navsim/common/dataloader.py index f0be250..31bfe61 100644 --- a/navsim/common/dataloader.py +++ b/navsim/common/dataloader.py @@ -216,7 +216,7 @@ def get_scene_from_token(self, token: str) -> Scene: num_future_frames=self._scene_filter.num_future_frames, sensor_config=self._sensor_config, ) - + def get_agent_input_from_token(self, token: str) -> AgentInput: """ Loads agent input given a scene identifier string (token). diff --git a/navsim/planning/script/config/common/agent/diffusiondrive_agent.yaml b/navsim/planning/script/config/common/agent/diffusiondrive_agent.yaml new file mode 100644 index 0000000..2833d17 --- /dev/null +++ b/navsim/planning/script/config/common/agent/diffusiondrive_agent.yaml @@ -0,0 +1,19 @@ +_target_: navsim.agents.diffusiondrive.transfuser_agent.TransfuserAgent +_convert_: 'all' +config: + _target_: navsim.agents.diffusiondrive.transfuser_config.TransfuserConfig + _convert_: 'all' + + trajectory_sampling: + _target_: nuplan.planning.simulation.trajectory.trajectory_sampling.TrajectorySampling + _convert_: 'all' + time_horizon: 4 + interval_length: 0.5 + + latent: False + +checkpoint_path: null +# 本来是6e-4 +# lr: 6e-5 +lr: 6e-4 + diff --git a/navsim/planning/script/config/common/default_dataset_paths.yaml b/navsim/planning/script/config/common/default_dataset_paths.yaml index 387d613..390797d 100644 --- a/navsim/planning/script/config/common/default_dataset_paths.yaml +++ b/navsim/planning/script/config/common/default_dataset_paths.yaml @@ -1,5 +1,12 @@ -navsim_log_path: ${oc.env:OPENSCENE_DATA_ROOT}/navsim_logs/${train_test_split.data_split} # path to log annotations -navsim_blobs_path: ${oc.env:OPENSCENE_DATA_ROOT}/sensor_blobs/${train_test_split.data_split} # path to log annotations -sensor_blobs_path: ${oc.env:OPENSCENE_DATA_ROOT}/synthetic_scenes/synthetic_sensor # path to sensor blobs -synthetic_scenes_path: ${oc.env:OPENSCENE_DATA_ROOT}/synthetic_scenes/scene_pickles # path to synthetic scenes -metric_cache_path: ${oc.env:NAVSIM_EXP_ROOT}/metric_cache # path to metric cache +# navsim_log_path: ${oc.env:OPENSCENE_DATA_ROOT}/openscene-v1.1/meta_datas/openscene-v1.1/meta_datas/${train_test_split.data_split} # path to log annotations +# navsim_blobs_path: ${oc.env:OPENSCENE_DATA_ROOT}/nuplan-v1.1/sensor_blobs # path to log annotations +# sensor_blobs_path: /horizon-bucket/saturn_v_dev/01_users/zebin.xing/synthetic_scenes/synthetic_sensor # path to sensor blobs +# synthetic_scenes_path: /horizon-bucket/saturn_v_dev/01_users/zebin.xing/synthetic_scenes/scene_pickles # path to synthetic scenes +# metric_cache_path: ${oc.env:NAVSIM_EXP_ROOT}/metric_cache # path to metric cache +navsim_log_path: ${oc.env:OPENSCENE_DATA_ROOT}/openscene-v1.1/meta_datas/${train_test_split.data_split} # path to log annotations +navsim_blobs_path: ${oc.env:OPENSCENE_DATA_ROOT}/openscene-v1.1/sensor_blobs/${train_test_split.data_split} # path to log annotations +sensor_blobs_path: /data/hdd01/dingzx/dataset/synthetic_scenes/synthetic_sensor +# synthetic_scenes_path: ${oc.env:OPENSCENE_DATA_ROOT}/synthetic_scenes/scene_pickles # path to synthetic scenes +synthetic_scenes_path: /data/hdd01/dingzx/dataset/synthetic_scenes/scene_pickles +metric_cache_path: ${oc.env:NAVSIM_EXP_ROOT}/metric_cache_warmup_navsafe_two_stage_extended # path to metric cache +# /data/hdd01/dingzx/navsim_exp/metric_cache_warmup_navsafe_two_stage_extended diff --git a/navsim/planning/script/config/common/default_evaluation.yaml b/navsim/planning/script/config/common/default_evaluation.yaml index 7315218..0df7f66 100644 --- a/navsim/planning/script/config/common/default_evaluation.yaml +++ b/navsim/planning/script/config/common/default_evaluation.yaml @@ -2,9 +2,19 @@ defaults: - traffic_agents_policy: log_replay_traffic_agents - default_dataset_paths - _self_ - -# Cache parameters -experiment_name: ??? +experiment_name: training_transfuser_trainval +# metric_cache_path: ${oc.env:NAVSIM_EXP_ROOT}/metric_cache +# navsim_log_path: ${oc.env:OPENSCENE_DATA_ROOT}/openscene-v1.1/meta_datas/${train_test_split.data_split} +# sensor_blobs_path: ${oc.env:OPENSCENE_DATA_ROOT}/openscene-v1.1/sensor_blobs/${train_test_split.data_split} +# synthetic_scenes_path: /data/hdd01/dingzx/dataset/synthetic_scenes date_format: '%Y.%m.%d.%H.%M.%S' experiment_uid: ${now:${date_format}} output_dir: ${oc.env:NAVSIM_EXP_ROOT}/${experiment_name}/${experiment_uid} # path where output csv is saved + + +# # navsim_log_path: ${oc.env:OPENSCENE_DATA_ROOT}/openscene-v1.1/meta_datas/${train_test_split.data_split} # path to log annotations +# navsim_blobs_path: ${oc.env:OPENSCENE_DATA_ROOT}/openscene-v1.1/sensor_blobs/${train_test_split.data_split} # path to log annotations +# sensor_blobs_path: /data/hdd01/dingzx/dataset/synthetic_scenes/synthetic_sensor +# # synthetic_scenes_path: ${oc.env:OPENSCENE_DATA_ROOT}/synthetic_scenes/scene_pickles # path to synthetic scenes +# synthetic_scenes_path: /data/hdd01/dingzx/dataset/synthetic_scenes/scene_pickles +# # metric_cache_path: ${oc.env:NAVSIM_EXP_ROOT}/metric_cache # path to metric cache diff --git a/navsim/planning/script/config/common/train_test_split/scene_filter/warmup_two_stage.yaml b/navsim/planning/script/config/common/train_test_split/scene_filter/warmup_two_stage.yaml new file mode 100644 index 0000000..0076d75 --- /dev/null +++ b/navsim/planning/script/config/common/train_test_split/scene_filter/warmup_two_stage.yaml @@ -0,0 +1,272 @@ +_target_: navsim.common.dataclasses.SceneFilter +_convert_: all +num_history_frames: 4 +num_future_frames: 8 +frame_interval: 1 +has_route: true +max_scenes: null +include_synthetic_scenes: true +log_names: +- '2021.08.16.14.23.37_veh-45_00015_00132' +- '2021.08.30.14.54.34_veh-40_00439_00835' +- '2021.09.16.15.47.30_veh-45_01199_01391' +- '2021.09.16.19.27.01_veh-45_01749_03230' +- '2021.09.29.14.44.26_veh-28_00528_00992' +- '2021.09.29.15.23.04_veh-28_00601_00802' +- '2021.09.29.19.02.14_veh-28_02911_03005' +- '2021.10.06.08.16.17_veh-52_01590_01725' +tokens: +- '0dc54a8c8203567b' +- '13310ca9a7715154' +- '19e90f2757b25f38' +- '2382542e05f253df' +- '3c542be991515ccd' +- '4cac9f6cd85a5b47' +- '5f53001e7e2d5347' +- '6326d00e52115da4' +- '67bce1e28df25eaa' +- '68097b10029d5fe1' +- '6e3cf172a2755fae' +- 'c054f473288d5515' +- 'd01fec804cd45644' +- 'ed3db88f99a05a6c' +- 'efa80e78fce1525d' +- 'f6b9867069845fd2' +reactive_synthetic_initial_tokens: +- '0395128411941228f' +- '03d0f73649589dca9' +- '052328b114b9c5cb5' +- '05d102dbfc508f379' +- '06082aa25b363522c' +- '063b137663ba8c507' +- '0923ee20dc5d20db6' +- '0a7e42966c6e6cce4' +- '0fd2976bdc791e64f' +- '102d8784884da0d54' +- '1061580743a8ed861' +- '10bdbc6a9b7b2bd43' +- '14ddd2546c5cda663' +- '151061d0d63eb2be6' +- '155fc3b8978be258b' +- '1574a4d082ac80b19' +- '15794da6178dc480a' +- '16be788d02f1fef14' +- '16e0a69867db58fda' +- '178a524a517377f19' +- '1853bf359b16c44e1' +- '18cda5f11659c3287' +- '1ac6bf0f26f45e643' +- '1bca89debb28211cf' +- '1c963497d68500ad1' +- '1cfd90c1b01a50afe' +- '1d00c1f1df43c8c01' +- '1e429d2f774768092' +- '1f2dfd2d3b74ad454' +- '2193125d732ee8d63' +- '22777eed80fe6c2f6' +- '2541602e453206839' +- '25b7f50ff65bf14c0' +- '267e63ac83466d214' +- '2693693ffe3ace9d7' +- '271bc2d78c35ee654' +- '28728be442990aea2' +- '2948ba510215527e5' +- '2e8f7bda514e182e8' +- '300c75247d0fbaeb8' +- '30442b44b719bcadc' +- '309c9042564f167ee' +- '314e1c5d77b802e7a' +- '31a41cd314e13595f' +- '3218bf90d6d24dc07' +- '326f9219f9a19b234' +- '35c0d827b75f88593' +- '35d06cd482bbccf6e' +- '36557ff78c024a815' +- '374858d4d47bbd16a' +- '3a3c5df8add8dc277' +- '3aa81c9276d98b0ab' +- '3ad3b5451829788bc' +- '3de8eff4da7a19f0f' +- '3e835a314f8e85e65' +- '3f785f1fec6b20810' +- '4010725f6359fa6d7' +- '4011e54d1cd33878d' +- '40592b30e67921dd9' +- '4068d8c6bdd0434cb' +- '41efad97268e1bea0' +- '420cf2b19dd7ceb38' +- '4291bb38d432590b4' +- '42fe9af736de10be8' +- '439515fa09c8b8c17' +- '44a0015fc40c1cb88' +- '44e909afb129ace3a' +- '44f56a3c522b7ea16' +- '454f5f1ed99b03e21' +- '462a33f831735c5b5' +- '467eb1f2d2e219049' +- '47531b5fa51305bdc' +- '47fb0d7e7e1751ffe' +- '484a304ef257656cd' +- '487a9143936d4df95' +- '488c24c04a2b22e40' +- '4a739fc19f2829e98' +- '4b5dc729abe1e75fe' +- '4c0a18dcc1cd87e5a' +- '4f486a22a5253de77' +- '50ab50f23e1e36c22' +- '50d3b8161a0e2ec5e' +- '514ba443c93be0c84' +- '543524873fd608480' +- '54f694f18d2befecc' +- '550e0654aa362120c' +- '55ae0fb023dccdeef' +- '5685730c604318275' +- '58ea88f6fa166c41d' +- '5b3d38f3a7038d843' +- '5c199633e69db185f' +- '5d58d417c1c5564c7' +- '5db0f2b89be2db1ed' +- '60b5b9341c2042487' +- '624d088d720311ed1' +- '62d82cda11d0f9b38' +- '62f565d438048059a' +- '6388350eec7089722' +- '6395dcf10e46c2df4' +- '63ddd50ee2f06d21e' +- '659c0196b18dee789' +- '65eb31b903d198175' +- '680330024974b3922' +- '688d1425020687afa' +- '68a8b0713f18185db' +- '6d179db1a50f4c602' +- '6dea4a1a6979c2797' +- '6df58e0094b1a82a5' +- '6eeb10e923eccae43' +- '7117885d184126d27' +- '7284fc567658fbe04' +- '73ca92fca40e84ca9' +- '75249638c881868c9' +- '75ddd45cee2b92f9f' +- '77107a2d9a5eb1301' +- '784776b37986bd743' +- '79f0721b30ab0e3d3' +- '7c07f70777a9d5e22' +- '7c2f2ad41acd928d7' +- '7cd03f87faa486284' +- '7e97ff6f6641a13f1' +- '7f12e0e132e710aa1' +- '8232d413132fd6755' +- '846aa1e0898aa0fd0' +- '854087b8099330cbb' +- '8683529f521965efb' +- '893336dcb0b52efed' +- '8b36f8a89f74b299b' +- '8b5b89a17f525c066' +- '8b8814727e2040c64' +- '8d909a1730d905741' +- '902320d7f1f9593e1' +- '91b676ab99380dae5' +- '925de5e162bda56b1' +- '940c8a566764b138f' +- '94edd98b10bee29f9' +- '95a1f61c556d4e7bb' +- '95b268581e07dfe75' +- '96ea7489c4ed725a8' +- '974cbc9e5430ffccb' +- '98b7bfbfa038695a8' +- '9e18abe750bbca914' +- '9fac3e2945fbb262f' +- 'a07f1969e1ecabd71' +- 'a10e31bb770227e1c' +- 'a356d1eec9561f41f' +- 'a53c4dbabccbf9ad7' +- 'a55f742686cf0e021' +- 'a779d4593119ba75d' +- 'a8b0630e7a0503603' +- 'a90e5a1d4e0eaf900' +- 'a9f3a0b070e956786' +- 'aa748e8b6e50c5aa2' +- 'aaa8ad268aeb2b33d' +- 'ac7cf00c976d1117c' +- 'acd702bf02f866713' +- 'aea520c78f8b94ce4' +- 'b0517afccc549cfa1' +- 'b29f5b928d8258e27' +- 'b4878f7f23f9a2d03' +- 'b5293d4480c7c425f' +- 'b59dbe7814a46d874' +- 'b5dc5725034e9698b' +- 'b80cec0f192c73ec8' +- 'b97724de6657d8a87' +- 'ba1811d57eb6e5fca' +- 'bbe5bcfd87eec2751' +- 'bd93ce3872b238f99' +- 'be48ae0d28bcf838c' +- 'be6f0a5cc43d23778' +- 'bf9caad8d0769b976' +- 'c062b247888baab11' +- 'c0a351e8e502a15c2' +- 'c2567200723a74dc4' +- 'c6640b50de9097a57' +- 'c67b6d22df8757dde' +- 'c67d22cbeb17a26dc' +- 'c68517f8b334e884d' +- 'c786e0464135c2ce2' +- 'ca648e31c14bd6d7b' +- 'ca7d4b2fe9e50ea90' +- 'cac747e1f9d79fb65' +- 'cb532e65ee0fb6214' +- 'cb62dae240fc0cfef' +- 'cc95e4a6ae9cd53dc' +- 'cce551cb57a2dadfb' +- 'ccfe0fb25455a6c23' +- 'cd6a11e595f9a52bc' +- 'cdcaf274771b9431a' +- 'cf8ccd6f64e2a2eb0' +- 'cfc3b4fce5ba4db5c' +- 'd036796570f6c1408' +- 'd14e7416564f243db' +- 'd2485e7a7ebeef7b4' +- 'd2bb1584003453202' +- 'd35e9678f254cab6a' +- 'd45e4b3b4680a10fe' +- 'd500c92f0a6d03763' +- 'd5cd6a5bbfa4a7cba' +- 'd7286e8accd37a650' +- 'd74ee03e2fc05b111' +- 'd7c22fdbafbc9e8e7' +- 'd88a4aa3fab405d76' +- 'd91bf2d4ee3b2aeb7' +- 'da30d8f979139b32c' +- 'db5b6cc66d7e4fb31' +- 'dc11a1e2bb4e6b9b8' +- 'dc401b1774612f61a' +- 'dd0c2f8d96f601b23' +- 'dd350799c316bcd2e' +- 'def9f9e2319514136' +- 'dfac4c9a0ecb22c25' +- 'e364916c9ca2c771c' +- 'e484a9def4ccefd34' +- 'e947e5fb3f2afafd1' +- 'eab4d52a75cf5303f' +- 'eaf5b61183e89a5f4' +- 'ebdb4044e28c31e1d' +- 'ec2382a1a500ca0e2' +- 'ecb720a8a00a2023c' +- 'eccb08b4729429873' +- 'ed1fa3b21e6fea4d2' +- 'eef3bc3f9753cbbdf' +- 'ef310cd99a8c34e5f' +- 'f0e56343f9478cde4' +- 'f1b5fa2dddcdb8ce2' +- 'f49b29b37a71f14b7' +- 'f6cf20eb11b17caba' +- 'f92a325de354a82d6' +- 'fab424b52bbed7a96' +- 'facd2351efa163724' +- 'fccd850b91749fce2' +- 'fd12d26948960e6a4' +- 'ff27ae550f0ce237b' +- 'ff631d75dff54b3a1' +- 'ffb2fa46014d4438d' + diff --git a/navsim/planning/script/config/common/train_test_split/warmup_two_stage.yaml b/navsim/planning/script/config/common/train_test_split/warmup_two_stage.yaml new file mode 100644 index 0000000..cee2bbe --- /dev/null +++ b/navsim/planning/script/config/common/train_test_split/warmup_two_stage.yaml @@ -0,0 +1,257 @@ +defaults: +- scene_filter: warmup_two_stage +data_split: test +reactive_all_mapping: +- - '6e3cf172a2755fae' + - '67bce1e28df25eaa' + - - - '55ae0fb023dccdeef' + - 'cfc3b4fce5ba4db5c' + - - 'e484a9def4ccefd34' + - '151061d0d63eb2be6' + - - '5c199633e69db185f' + - 'cac747e1f9d79fb65' + - - '1cfd90c1b01a50afe' + - 'a9f3a0b070e956786' + - - '96ea7489c4ed725a8' + - '063b137663ba8c507' + - - 'dd0c2f8d96f601b23' + - '4a739fc19f2829e98' + - - 'bbe5bcfd87eec2751' + - '3e835a314f8e85e65' + - - 'a779d4593119ba75d' + - 'd91bf2d4ee3b2aeb7' + - - 'aaa8ad268aeb2b33d' + - 'bd93ce3872b238f99' + - - 'ba1811d57eb6e5fca' + - '454f5f1ed99b03e21' + - - '06082aa25b363522c' + - '6d179db1a50f4c602' + - - '40592b30e67921dd9' + - 'e364916c9ca2c771c' + - - '16be788d02f1fef14' + - 'f92a325de354a82d6' +- - '6326d00e52115da4' + - 'efa80e78fce1525d' + - - - 'eccb08b4729429873' + - 'fab424b52bbed7a96' + - - '41efad97268e1bea0' + - '2693693ffe3ace9d7' + - - '314e1c5d77b802e7a' + - '91b676ab99380dae5' + - - 'd35e9678f254cab6a' + - 'a10e31bb770227e1c' + - - '178a524a517377f19' + - '68a8b0713f18185db' + - - '36557ff78c024a815' + - 'c68517f8b334e884d' + - - '854087b8099330cbb' + - '7cd03f87faa486284' + - - '8b36f8a89f74b299b' + - 'f0e56343f9478cde4' + - - 'cdcaf274771b9431a' + - 'db5b6cc66d7e4fb31' + - - '05d102dbfc508f379' + - 'c786e0464135c2ce2' + - - '47fb0d7e7e1751ffe' + - 'a90e5a1d4e0eaf900' + - - 'cce551cb57a2dadfb' + - 'd2485e7a7ebeef7b4' + - - 'cb532e65ee0fb6214' + - 'a356d1eec9561f41f' + - - '5db0f2b89be2db1ed' + - '550e0654aa362120c' + - - 'b4878f7f23f9a2d03' + - '95b268581e07dfe75' + - - 'ef310cd99a8c34e5f' + - '60b5b9341c2042487' + - - '8b5b89a17f525c066' + - 'ec2382a1a500ca0e2' + - - 'b0517afccc549cfa1' + - '3218bf90d6d24dc07' + - - '1f2dfd2d3b74ad454' + - 'd14e7416564f243db' + - - 'd74ee03e2fc05b111' + - 'd7286e8accd37a650' + - - 'f6cf20eb11b17caba' + - 'ff631d75dff54b3a1' + - - '25b7f50ff65bf14c0' + - 'dd350799c316bcd2e' +- - '3c542be991515ccd' + - '5f53001e7e2d5347' + - - - '052328b114b9c5cb5' + - '439515fa09c8b8c17' + - - '1ac6bf0f26f45e643' + - '484a304ef257656cd' + - - '10bdbc6a9b7b2bd43' + - '2541602e453206839' + - - '488c24c04a2b22e40' + - 'b5dc5725034e9698b' + - - '2193125d732ee8d63' + - 'f49b29b37a71f14b7' + - - 'be6f0a5cc43d23778' + - 'be48ae0d28bcf838c' + - - '6395dcf10e46c2df4' + - '4011e54d1cd33878d' + - - '6df58e0094b1a82a5' + - '514ba443c93be0c84' + - - '1061580743a8ed861' + - 'a55f742686cf0e021' + - - '22777eed80fe6c2f6' + - 'ebdb4044e28c31e1d' + - - 'c0a351e8e502a15c2' + - '300c75247d0fbaeb8' + - - 'e947e5fb3f2afafd1' + - 'a8b0630e7a0503603' + - - '925de5e162bda56b1' + - '3a3c5df8add8dc277' + - - 'ca7d4b2fe9e50ea90' + - '3f785f1fec6b20810' +- - '0dc54a8c8203567b' + - 'ed3db88f99a05a6c' + - - - '95a1f61c556d4e7bb' + - '35c0d827b75f88593' + - - 'b59dbe7814a46d874' + - '4291bb38d432590b4' + - - '543524873fd608480' + - '42fe9af736de10be8' + - - '467eb1f2d2e219049' + - 'ecb720a8a00a2023c' + - - '680330024974b3922' + - '62f565d438048059a' + - - '155fc3b8978be258b' + - '2e8f7bda514e182e8' + - - '44e909afb129ace3a' + - '44a0015fc40c1cb88' + - - '18cda5f11659c3287' + - 'c062b247888baab11' + - - '1853bf359b16c44e1' + - 'dc401b1774612f61a' + - - '4010725f6359fa6d7' + - '73ca92fca40e84ca9' + - - '7284fc567658fbe04' + - 'ffb2fa46014d4438d' + - - '50d3b8161a0e2ec5e' + - '5b3d38f3a7038d843' + - - '31a41cd314e13595f' + - '487a9143936d4df95' + - - 'c2567200723a74dc4' + - 'd88a4aa3fab405d76' + - - 'dfac4c9a0ecb22c25' + - '7c07f70777a9d5e22' +- - 'c054f473288d5515' + - '68097b10029d5fe1' + - - - 'def9f9e2319514136' + - 'acd702bf02f866713' + - - '8d909a1730d905741' + - '8b8814727e2040c64' + - - 'b80cec0f192c73ec8' + - '2948ba510215527e5' + - - '65eb31b903d198175' + - 'fd12d26948960e6a4' + - - '62d82cda11d0f9b38' + - 'a07f1969e1ecabd71' + - - '420cf2b19dd7ceb38' + - '58ea88f6fa166c41d' + - - 'd2bb1584003453202' + - '1d00c1f1df43c8c01' + - - '1e429d2f774768092' + - 'd500c92f0a6d03763' + - - '7c2f2ad41acd928d7' + - 'b97724de6657d8a87' + - - 'd7c22fdbafbc9e8e7' + - 'aa748e8b6e50c5aa2' + - - 'dc11a1e2bb4e6b9b8' + - '659c0196b18dee789' + - - '5685730c604318275' + - '1c963497d68500ad1' + - - 'ff27ae550f0ce237b' + - '98b7bfbfa038695a8' +- - '4cac9f6cd85a5b47' + - '2382542e05f253df' + - - - '0fd2976bdc791e64f' + - '6dea4a1a6979c2797' + - - '102d8784884da0d54' + - '688d1425020687afa' + - - 'facd2351efa163724' + - '0a7e42966c6e6cce4' + - - '3ad3b5451829788bc' + - '9fac3e2945fbb262f' + - - 'c67d22cbeb17a26dc' + - '77107a2d9a5eb1301' + - - '03d0f73649589dca9' + - '63ddd50ee2f06d21e' + - - '8232d413132fd6755' + - 'd45e4b3b4680a10fe' + - - 'f1b5fa2dddcdb8ce2' + - '5d58d417c1c5564c7' + - - '75ddd45cee2b92f9f' + - 'a53c4dbabccbf9ad7' + - - '3aa81c9276d98b0ab' + - '1bca89debb28211cf' + - - '6388350eec7089722' + - '624d088d720311ed1' + - - '462a33f831735c5b5' + - '374858d4d47bbd16a' + - - 'cc95e4a6ae9cd53dc' + - '14ddd2546c5cda663' + - - 'bf9caad8d0769b976' + - '35d06cd482bbccf6e' +- - 'f6b9867069845fd2' + - '19e90f2757b25f38' + - - - '16e0a69867db58fda' + - '902320d7f1f9593e1' + - - '28728be442990aea2' + - '6eeb10e923eccae43' + - - 'da30d8f979139b32c' + - '79f0721b30ab0e3d3' + - - 'c6640b50de9097a57' + - 'cd6a11e595f9a52bc' + - - 'ccfe0fb25455a6c23' + - 'cb62dae240fc0cfef' + - - '326f9219f9a19b234' + - 'd036796570f6c1408' + - - '7117885d184126d27' + - 'c67b6d22df8757dde' + - - 'eef3bc3f9753cbbdf' + - '846aa1e0898aa0fd0' + - - '309c9042564f167ee' + - '4b5dc729abe1e75fe' + - - '0923ee20dc5d20db6' + - 'eaf5b61183e89a5f4' + - - '75249638c881868c9' + - '0395128411941228f' + - - '8683529f521965efb' + - '271bc2d78c35ee654' + - - 'fccd850b91749fce2' + - '30442b44b719bcadc' + - - 'ed1fa3b21e6fea4d2' + - 'd5cd6a5bbfa4a7cba' + - - 'ca648e31c14bd6d7b' + - '4068d8c6bdd0434cb' +- - '13310ca9a7715154' + - 'd01fec804cd45644' + - - - '9e18abe750bbca914' + - '1574a4d082ac80b19' + - - '7f12e0e132e710aa1' + - 'b5293d4480c7c425f' + - - '7e97ff6f6641a13f1' + - 'ac7cf00c976d1117c' + - - '784776b37986bd743' + - 'b29f5b928d8258e27' + - - '54f694f18d2befecc' + - '50ab50f23e1e36c22' + - - '44f56a3c522b7ea16' + - '4f486a22a5253de77' + - - '3de8eff4da7a19f0f' + - '974cbc9e5430ffccb' + - - '15794da6178dc480a' + - '47531b5fa51305bdc' + - - '4c0a18dcc1cd87e5a' + - '267e63ac83466d214' + - - 'aea520c78f8b94ce4' + - 'eab4d52a75cf5303f' + - - '940c8a566764b138f' + - '94edd98b10bee29f9' + - - 'cf8ccd6f64e2a2eb0' + - '893336dcb0b52efed' + diff --git a/navsim/planning/script/config/pdm_scoring/default_run_pdm_score.yaml b/navsim/planning/script/config/pdm_scoring/default_run_pdm_score.yaml index 96225a5..5da1220 100644 --- a/navsim/planning/script/config/pdm_scoring/default_run_pdm_score.yaml +++ b/navsim/planning/script/config/pdm_scoring/default_run_pdm_score.yaml @@ -19,3 +19,4 @@ simulator: _target_: navsim.planning.simulation.planner.pdm_planner.simulation.pdm_simulator.PDMSimulator _convert_: 'all' proposal_sampling: ${proposal_sampling} + diff --git a/navsim/planning/script/config/training/default_training.yaml b/navsim/planning/script/config/training/default_training.yaml index 175b0e9..fcba075 100644 --- a/navsim/planning/script/config/training/default_training.yaml +++ b/navsim/planning/script/config/training/default_training.yaml @@ -14,6 +14,9 @@ defaults: - agent: ego_status_mlp_agent - _self_ +# - default_train_val_test_log_split + # - train_val_syn_split + split: trainval cache_path: ${oc.env:NAVSIM_EXP_ROOT}/training_cache use_cache_without_dataset: false # load the training samples from the cache. scene-filter will be ignored @@ -23,9 +26,9 @@ seed: 0 dataloader: params: batch_size: 64 # number of samples per batch - num_workers: 4 # number of workers for data loading + num_workers: 4 # 4 number of workers for data loading pin_memory: true # pin memory for faster GPU transfer - prefetch_factor: 2 # number of samples loaded in advance by each worker + prefetch_factor: 2 # 2 number of samples loaded in advance by each worker trainer: params: diff --git a/navsim/planning/script/config/training/train_val_syn_split.yaml b/navsim/planning/script/config/training/train_val_syn_split.yaml new file mode 100644 index 0000000..e2b90e1 --- /dev/null +++ b/navsim/planning/script/config/training/train_val_syn_split.yaml @@ -0,0 +1,78 @@ +train_logs: +- 2021.09.16.14.39.34_veh-42_00032_00186 +- 2021.10.06.08.16.17_veh-52_00612_00782 +- 2021.09.29.14.44.26_veh-28_00528_00992 +- 2021.09.29.18.19.40_veh-28_01268_01685 +- 2021.10.06.08.16.17_veh-52_01949_02501 +- 2021.09.29.14.44.26_veh-28_00337_00504 +- 2021.08.30.16.16.44_veh-40_00256_00716 +- 2021.06.28.20.24.43_veh-38_03385_04952 +- 2021.09.16.13.53.10_veh-42_00860_01069 +- 2021.06.03.17.06.58_veh-35_03860_03992 +- 2021.09.29.19.02.14_veh-28_02451_02708 +- 2021.10.06.07.26.10_veh-52_00953_01126 +- 2021.09.16.13.53.10_veh-42_00180_00342 +- 2021.05.25.14.24.08_veh-25_03764_04034 +- 2021.08.30.14.54.34_veh-40_00439_00835 +- 2021.08.30.16.16.44_veh-40_01099_01351 +- 2021.09.29.14.44.26_veh-28_01202_01296 +- 2021.09.29.19.02.14_veh-28_00540_00917 +- 2021.09.16.14.39.34_veh-42_00297_00935 +- 2021.08.30.16.16.44_veh-40_00779_01088 +- 2021.09.16.19.49.00_veh-42_00990_01609 +- 2021.09.16.19.27.01_veh-45_00472_00711 +- 2021.09.29.15.23.04_veh-28_00814_01101 +- 2021.10.06.07.26.10_veh-52_00772_00917 +- 2021.10.06.07.26.10_veh-52_01245_02064 +- 2021.06.28.13.53.26_veh-26_00492_00696 +- 2021.08.30.13.45.25_veh-40_00610_00771 +- 2021.09.16.13.53.10_veh-42_01510_01591 +- 2021.09.16.19.27.01_veh-45_01749_03230 +- 2021.09.16.13.53.10_veh-42_00077_00153 +- 2021.05.25.14.26.37_veh-27_04122_04279 +- 2021.09.16.19.12.04_veh-42_01438_01677 +- 2021.06.03.18.47.39_veh-35_00503_00777 +- 2021.05.25.15.59.03_veh-30_04027_04200 +- 2021.09.29.18.19.40_veh-28_00438_00833 +- 2021.06.03.18.47.39_veh-35_00257_00492 +- 2021.08.30.13.45.25_veh-40_01116_01336 +- 2021.10.06.07.26.10_veh-52_00006_00398 +- 2021.09.16.16.20.27_veh-08_02435_02525 +- 2021.05.25.15.59.03_veh-30_04463_04606 +- 2021.06.28.15.02.02_veh-38_02398_02848 +- 2021.10.06.08.16.17_veh-52_01590_01725 +- 2021.06.28.16.29.11_veh-38_03263_03766 +- 2021.10.06.07.26.10_veh-52_02208_02394 +- 2021.09.29.18.19.40_veh-28_00844_01218 +- 2021.10.06.08.16.17_veh-52_00922_01296 +- 2021.09.16.14.39.34_veh-42_01609_01687 +- 2021.05.25.17.54.41_veh-35_01905_02121 +- 2021.09.16.15.12.03_veh-42_01037_01434 +- 2021.10.06.08.16.17_veh-52_00181_00574 +- 2021.09.09.17.18.51_veh-48_01248_01450 +- 2021.09.16.21.13.37_veh-42_00172_00347 +- 2021.10.06.08.16.17_veh-52_01430_01579 +- 2021.09.29.19.02.14_veh-28_00273_00514 +- 2021.10.06.07.26.10_veh-52_00422_00728 +- 2021.09.29.15.23.04_veh-28_00601_00802 +- 2021.05.25.14.16.10_veh-35_03373_03550 +- 2021.09.16.15.47.30_veh-45_01199_01391 +- 2021.09.29.19.02.14_veh-28_02911_03005 +- 2021.05.25.14.16.10_veh-35_01100_01664 +- 2021.09.29.19.02.14_veh-28_03198_03360 +- 2021.06.03.13.55.17_veh-35_02572_02855 +- 2021.08.16.14.23.37_veh-45_00015_00132 +- 2021.06.03.12.02.06_veh-35_00233_00609 +- 2021.09.29.19.02.14_veh-28_00964_01689 +- 2021.06.03.17.06.58_veh-35_02571_02742 +- 2021.05.25.17.54.41_veh-35_04111_04288 +- 2021.05.25.15.59.03_veh-30_00625_00855 +- 2021.08.30.13.45.25_veh-40_00878_01104 +val_logs: +- 2021.05.25.14.16.10_veh-35_01690_02183 +- 2021.06.03.13.55.17_veh-35_02866_03582 +- 2021.09.29.14.44.26_veh-28_00238_00320 +- 2021.09.16.14.39.34_veh-42_01111_01448 +- 2021.09.16.19.12.04_veh-42_00289_00398 +- 2021.09.16.17.40.09_veh-45_02539_02745 +- 2021.09.29.14.44.26_veh-28_01331_01485 diff --git a/navsim/planning/script/run_create_submission_pickle.py b/navsim/planning/script/run_create_submission_pickle.py index de2e453..9a7ee22 100644 --- a/navsim/planning/script/run_create_submission_pickle.py +++ b/navsim/planning/script/run_create_submission_pickle.py @@ -10,7 +10,7 @@ from omegaconf import DictConfig from tqdm import tqdm -from navsim.agents.abstract_agent import AbstractAgent +from navsim.agents.abstract_agent_diffusiondrive import AbstractAgent from navsim.common.dataclasses import SceneFilter, Trajectory from navsim.common.dataloader import SceneLoader @@ -58,7 +58,8 @@ def run_test_evaluation( for token in tqdm(input_loader, desc="Running evaluation"): try: agent_input = input_loader.get_agent_input_from_token(token) - trajectory = agent.compute_trajectory(agent_input) + poses,_ = agent.compute_trajectory(agent_input) + trajectory=Trajectory(poses) output.update({token: trajectory}) except Exception: logger.warning(f"----------- Agent failed for token {token}:") diff --git a/navsim/planning/script/run_pdm_merge.py b/navsim/planning/script/run_pdm_merge.py new file mode 100644 index 0000000..65f8f6a --- /dev/null +++ b/navsim/planning/script/run_pdm_merge.py @@ -0,0 +1,607 @@ +import logging +import os +import traceback +import uuid +from dataclasses import fields +from datetime import datetime +from pathlib import Path +from typing import Dict, List, Union + +import hydra +import numpy as np +import pandas as pd +from hydra.utils import instantiate +from nuplan.common.actor_state.state_representation import StateSE2 +from nuplan.common.geometry.convert import relative_to_absolute_poses +from nuplan.planning.script.builders.logging_builder import build_logger +from nuplan.planning.simulation.trajectory.trajectory_sampling import TrajectorySampling +from nuplan.planning.utils.multithreading.worker_utils import worker_map +from omegaconf import DictConfig + +from navsim.agents.abstract_agent_diffusiondrive import AbstractAgent +from navsim.common.dataclasses import PDMResults, SensorConfig +from navsim.common.dataloader import MetricCacheLoader, SceneFilter, SceneLoader +from navsim.common.enums import SceneFrameType +from navsim.evaluate.pdm_score import pdm_score +from navsim.planning.script.builders.worker_pool_builder import build_worker +from navsim.planning.simulation.planner.pdm_planner.scoring.pdm_comfort_metrics import ego_is_two_frame_extended_comfort +from navsim.planning.simulation.planner.pdm_planner.scoring.pdm_scorer import PDMScorer +from navsim.planning.simulation.planner.pdm_planner.simulation.pdm_simulator import PDMSimulator +from navsim.planning.simulation.planner.pdm_planner.utils.pdm_enums import WeightedMetricIndex +from navsim.traffic_agents_policies.abstract_traffic_agents_policy import AbstractTrafficAgentsPolicy +from navsim.agents.diffusiondrive.transfuser_agent import TransfuserAgent,TransfuserConfig + + +logger = logging.getLogger(__name__) + +CONFIG_PATH = "config/pdm_scoring" +CONFIG_NAME = "default_run_pdm_score" + +from navsim.common.dataclasses import AgentInput, Scene, Annotations,Trajectory +from navsim.visualization.plots import plot_bev_with_agent +from pathlib import Path +import matplotlib.pyplot as plt + +from pathlib import Path +import shutil + +def run_pdm_score(args: List[Dict[str, Union[List[str], DictConfig]]]) -> List[pd.DataFrame]: + """ + Helper function to run PDMS evaluation in. + :param args: input arguments + """ + node_id = int(os.environ.get("NODE_RANK", 0)) + thread_id = str(uuid.uuid4()) + logger.info(f"Starting worker in thread_id={thread_id}, node_id={node_id}") + + log_names = [a["log_file"] for a in args] + tokens = [t for a in args for t in a["tokens"]] + cfg: DictConfig = args[0]["cfg"] + + simulator: PDMSimulator = instantiate(cfg.simulator) + scorer: PDMScorer = instantiate(cfg.scorer) + assert ( + simulator.proposal_sampling == scorer.proposal_sampling + ), "Simulator and scorer proposal sampling has to be identical" + # print(cfg.agent) + # import pdb;pdb.set_trace() + agent: AbstractAgent = instantiate(cfg.agent) + agent.initialize() + + traffic_agents_policy: AbstractTrafficAgentsPolicy = instantiate( + cfg.traffic_agents_policy, simulator.proposal_sampling + ) + metric_cache_loader = MetricCacheLoader(Path(cfg.metric_cache_path)) + scene_filter: SceneFilter = instantiate(cfg.train_test_split.scene_filter) + scene_filter.log_names = log_names + scene_filter.tokens = tokens + scene_loader = SceneLoader( + sensor_blobs_path=Path(cfg.sensor_blobs_path), + navsim_blobs_path=Path(cfg.navsim_blobs_path), + data_path=Path(cfg.navsim_log_path), + synthetic_scenes_path=Path(cfg.synthetic_scenes_path), + scene_filter=scene_filter, + sensor_config=agent.get_sensor_config(), + ) + + tokens_to_evaluate = list(set(scene_loader.tokens) & set(metric_cache_loader.tokens)) + pdm_results: List[pd.DataFrame] = [] + + # import pandas as pd + # import random + # import numpy as np + # csv_path = "/data/hdd01/dingzx/navsim_exp/diffusiondrive_agent_eval/2025.03.29.13.48.31/2025.03.29.14.08.58.csv" + # df = pd.read_csv(csv_path) + # zero_score_df = df[df['score'] == 0.0] + # tokens_zero = zero_score_df['token'].tolist() + + # goalflow /data/hdd01/dingzx/goalflow_image/2025.04.04.17.54.33.csv + # diffusiondrive /data/hdd01/dingzx/navsim_exp/diffusiondrive_agent_eval/2025.04.03.16.07.33/2025.04.03.16.26.38.csv + # goalflow /data/hdd01/dingzx/goalflow_image/submission.pkl + # diffusiondrive /data/hdd01/dingzx/navsim_exp/submission_cv_agent/2025.04.03.18.47.15/submission.pkl + + import pickle + import pandas as pd + with open('/data/hdd01/dingzx/goalflow_image/submission.pkl', 'rb') as f: + tra_goalflow = pickle.load(f) + with open('/data/hdd01/dingzx/navsim_exp/submission_cv_agent/2025.04.03.18.47.15/submission.pkl', 'rb') as f: + tra_diffusiondrive = pickle.load(f) + file_path = "/data/hdd01/dingzx/goalflow_image/2025.04.04.17.54.33.csv" + target_column = "two_frame_extended_comfort" + df = pd.read_csv(file_path) + empty_tokens = df[df[target_column].isna()]["token"].tolist() + + file1_path = "/data/hdd01/dingzx/navsim_exp/diffusiondrive_agent_eval/2025.04.08.00.16.45/2025.04.08.00.32.26.csv" + file2_path = "/data/hdd01/dingzx/navsim_exp/diffusiondrive_agent_eval/2025.04.08.11.03.38/2025.04.08.11.21.44.csv" + + # 读取文件 + df1 = pd.read_csv(file1_path) + df2 = pd.read_csv(file2_path) + + # diffusion 不为零的token + valid_tokens_file1 = df1[df1["drivable_area_compliance"] != 0.0]["token"].tolist() + + # goalflow 为零的token + invalid_tokens_file2 = df2[df2["drivable_area_compliance"] == 0.0]["token"].tolist() + + result_tokens = list(set(valid_tokens_file1) & set(invalid_tokens_file2)) + + for idx, (token) in enumerate(tokens_to_evaluate): + + logger.info( + f"Processing scenario {idx + 1} / {len(tokens_to_evaluate)} in thread_id={thread_id}, node_id={node_id}" + ) + try: + metric_cache = metric_cache_loader.get_from_token(token) + # # 直接出 + # if token=='e1ab42145c177ba50': + # trajectory=tra_diffusiondrive['predictions'][0][token] + # elif token in empty_tokens: + # trajectory=tra_goalflow['predictions'][0][token] + # else: + # trajectory=tra_diffusiondrive['predictions'][0][token] + + # trajectory=tra_goalflow['predictions'][0][token] + + if token=='e1ab42145c177ba50' or token in result_tokens: + trajectory=tra_diffusiondrive['predictions'][0][token] + else: + trajectory=tra_goalflow['predictions'][0][token] + + # trajectory=tra_diffusiondrive['predictions'][0][token] + score_row, ego_simulated_states = pdm_score( + metric_cache=metric_cache, + model_trajectory=trajectory, + future_sampling=simulator.proposal_sampling, + simulator=simulator, + scorer=scorer, + traffic_agents_policy=traffic_agents_policy, + ) + score_row["valid"] = True + score_row["log_name"] = metric_cache.log_name + score_row["frame_type"] = metric_cache.scene_type + score_row["start_time"] = metric_cache.timepoint.time_s + end_pose = StateSE2( + x=trajectory.poses[-1, 0], + y=trajectory.poses[-1, 1], + heading=trajectory.poses[-1, 2], + ) + absolute_endpoint = relative_to_absolute_poses(metric_cache.ego_state.rear_axle, [end_pose])[0] + score_row["endpoint_x"] = absolute_endpoint.x + score_row["endpoint_y"] = absolute_endpoint.y + score_row["start_point_x"] = metric_cache.ego_state.rear_axle.x + score_row["start_point_y"] = metric_cache.ego_state.rear_axle.y + score_row["ego_simulated_states"] = [ego_simulated_states] # used for two-frames extended comfort + + except Exception: + logger.warning(f"----------- Agent failed for token {token}:") + traceback.print_exc() + score_row = pd.DataFrame([PDMResults.get_empty_results()]) + score_row["valid"] = False + score_row["token"] = token + + pdm_results.append(score_row) + return pdm_results + + +def infer_two_stage_mapping(score_df: pd.DataFrame, first_stage_duration: float) -> pd.DataFrame: + initial_frames = score_df[(score_df["valid"]) & (score_df["frame_type"] == SceneFrameType.ORIGINAL)] + + two_stage_mapping = {} + for _, row in initial_frames.iterrows(): + # Filter tokens in the same log starting at least T seconds later + earliest_second_stage_start_time = row["start_time"] + first_stage_duration - 0.05 + latest_second_stage_start_time = row["start_time"] + first_stage_duration + 0.05 + second_stage_tokens: pd.DataFrame = score_df[ + (score_df["log_name"] == row["log_name"]) + & (score_df["start_time"] <= latest_second_stage_start_time) + & (score_df["start_time"] >= earliest_second_stage_start_time) + & (score_df["valid"]) + & (score_df["frame_type"] == SceneFrameType.SYNTHETIC) + ]["token"].to_list() + + two_stage_mapping[row["token"]] = second_stage_tokens + return two_stage_mapping + + +def validate_two_stage_mapping( + score_df: pd.DataFrame, + two_stage_mapping: Dict[str, List[str]], + validate_start_times: bool = True, +) -> None: + # make sure all tokens are unique + all_tokens = [token for tokens in two_stage_mapping.values() for token in tokens] + list(two_stage_mapping.keys()) + assert len(all_tokens) == len(set(all_tokens)), "Tokens in the two stage mapping are not unique." + + # make sure all tokens are in the score dataframe + assert set(all_tokens) == set(score_df["token"]), ( + f"Tokens in the two stage aggregation mapping and the results are not the same. " + f"Missing tokens in the mapping: {set(all_tokens) - set(score_df['token'])}." + f"Missing tokens in the results: {set(score_df['token']) - set(all_tokens)}." + ) + + # make sure subsequent tokens belong to the same log + # make sure first stage and second stage tokens are 4s apart + for first_stage_token, second_stage_tokens in two_stage_mapping.items(): + first_stage_log_name = score_df[score_df["token"] == first_stage_token].iloc[0]["log_name"] + if validate_start_times: + first_stage_start_time = score_df[score_df["token"] == first_stage_token].iloc[0]["start_time"] + else: + first_stage_start_time = 0.0 + for second_stage_token in second_stage_tokens: + second_stage_log_name = score_df[score_df["token"] == second_stage_token].iloc[0]["log_name"] + if validate_start_times: + second_stage_start_time = score_df[score_df["token"] == second_stage_token].iloc[0]["start_time"] + else: + second_stage_start_time = 4.0 + assert first_stage_log_name == second_stage_log_name, ( + f"Tokens {first_stage_token} and {second_stage_token} belong to different logs." + f"First stage log: {first_stage_log_name}, second stage log: {second_stage_log_name}." + ) + assert np.abs(second_stage_start_time - first_stage_start_time - 4.0) < 0.05, ( + f"Tokens {first_stage_token} and {second_stage_token} are not 4s apart." + f"First stage start time: {first_stage_start_time}, second stage start time: {second_stage_start_time}." + ) + + # make sure the frame_type of all first_stage tokens is ORIGINAL and all second_stage tokens is SYNTHETIC + first_stage_tokens = list(two_stage_mapping.keys()) + second_stage_tokens = [token for tokens in two_stage_mapping.values() for token in tokens] + first_stage_types = score_df.loc[score_df["token"].isin(first_stage_tokens), "frame_type"] + second_stage_types = score_df.loc[score_df["token"].isin(second_stage_tokens), "frame_type"] + assert (first_stage_types == SceneFrameType.ORIGINAL).all(), "Some first-stage tokens are not of type ORIGINAL." + assert (second_stage_types == SceneFrameType.SYNTHETIC).all(), "Some second-stage tokens are not of type SYNTHETIC." + + +def calculate_pseudo_closed_loop_weights( + score_df: pd.DataFrame, two_stage_mapping: Dict[str, List[str]] +) -> pd.DataFrame: + """ + Calculate two stage scores for each scenario. + :param score_rows: List of dataframes containing scores for each scenario. + :param first_stage_duration: Duration of the first stage in seconds. + """ + pd.options.mode.copy_on_write = True + + def _calc_distance(x1, y1, x2, y2): + return np.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2) + + first_stage_tokens = list(two_stage_mapping.keys()) + + weights = [] + for first_stage_token in first_stage_tokens: + first_stage_row = score_df[score_df["token"] == first_stage_token].iloc[0] + second_stage_tokens = two_stage_mapping[first_stage_token] + # set weight of first stage to one + weights.append(pd.DataFrame([{"token": first_stage_token, "weight": 1.0}])) + # compute weights for second stage + second_stage_scores: pd.DataFrame = score_df[(score_df["token"].isin(second_stage_tokens))] + second_stage_scores["distance"] = second_stage_scores.apply( + lambda x: _calc_distance( + first_stage_row["endpoint_x"], + first_stage_row["endpoint_y"], + x["start_point_x"], + x["start_point_y"], + ), + axis=1, + ) + second_stage_scores["weight"] = second_stage_scores["distance"].apply(lambda x: np.exp(-x)) + second_stage_scores["weight"] = second_stage_scores["weight"] / second_stage_scores["weight"].sum() + + weights.append(second_stage_scores[["token", "weight"]]) + + weights = pd.concat(weights) + return weights + + +def calculate_two_frame_extended_comfort(score_df: pd.DataFrame, proposal_sampling: TrajectorySampling) -> pd.DataFrame: + """ + Calculates two-frame extended comfort by comparing only the overlapping parts of consecutive original frames. + Handles varying observation intervals. + + :param score_df: DataFrame containing scores and states of frames. + :param proposal_sampling: Sampling parameters for trajectory. + :return: DataFrame containing two-frame extended comfort scores. + """ + results = [] + interval_length = proposal_sampling.interval_length # Default: 0.1s + # print(score_df) + grouped_logs = score_df[score_df["frame_type"] == SceneFrameType.ORIGINAL].groupby("log_name") + + for log_name, group_df in grouped_logs: + group_df = group_df.sort_values(by="start_time").reset_index(drop=True) + + for idx in range(len(group_df) - 1): # Iterate over consecutive frames + current_row = group_df.iloc[idx] + next_row = group_df.iloc[idx + 1] + + observation_interval = next_row["start_time"] - current_row["start_time"] + + if abs(observation_interval) > 0.55: + two_frame_comfort = np.nan + next_token = np.nan + else: + overlap_start = int(observation_interval / interval_length) + + current_states = current_row["ego_simulated_states"] + next_states = next_row["ego_simulated_states"] + + # Ensure they have the same shape + assert current_states.shape == next_states.shape, "Trajectories must be of equal length" + + # Extract only the overlapping part + current_states_overlap = current_states[overlap_start:] + next_states_overlap = next_states[:-overlap_start] + + # Define corresponding time points for overlap + n_overlap = current_states_overlap.shape[0] # Compute the actual number of overlapping steps + time_point_s = np.arange(n_overlap) * interval_length # Generate aligned time steps + + # Compute two-frame extended comfort + two_frame_comfort = ego_is_two_frame_extended_comfort( + current_states_overlap[None, :], + next_states_overlap[None, :], + time_point_s, + )[0].astype(np.float64) + + next_token = next_row["token"] + + results.append( + { + "current_token": current_row["token"], + "next_token": next_token, + "two_frame_extended_comfort": two_frame_comfort, + } + ) + + return pd.DataFrame(results) + + +def compute_final_scores(pdm_score_df: pd.DataFrame) -> pd.DataFrame: + """ + Compute final scores for each row in pdm_score_df after updating + the weighted metrics with two-frame extended comfort. + + If 'two_frame_extended_comfort' is NaN for a row, the corresponding + metric and its weight are set to zero, effectively ignoring it + during normalization. + + :param pdm_score_df: DataFrame containing PDM scores and metrics. + :return: A new DataFrame with the computed final scores. + """ + df = pdm_score_df.copy() + + two_frame_scores = df["two_frame_extended_comfort"].to_numpy() # shape: (N, ) + weighted_metrics = np.stack(df["weighted_metrics"].to_numpy()) # shape: (N, M) + weighted_metrics_array = np.stack(df["weighted_metrics_array"].to_numpy()) # shape: (N, M) + + mask = np.isnan(two_frame_scores) + two_frame_idx = WeightedMetricIndex.TWO_FRAME_EXTENDED_COMFORT + + weighted_metrics[mask, two_frame_idx] = 0.0 + weighted_metrics_array[mask, two_frame_idx] = 0.0 + + non_mask = ~mask + weighted_metrics[non_mask, two_frame_idx] = two_frame_scores[non_mask] + + weighted_sum = (weighted_metrics * weighted_metrics_array).sum(axis=1) + total_weight = weighted_metrics_array.sum(axis=1) + total_weight[total_weight == 0.0] = np.nan + weighted_metric_scores = weighted_sum / total_weight + + df["score"] = df["multiplicative_metrics_prod"].to_numpy() * weighted_metric_scores + df.drop( + columns=["weighted_metrics", "weighted_metrics_array", "multiplicative_metrics_prod"], + inplace=True, + ) + + return df + + +def calculate_weighted_average_score(df: pd.DataFrame) -> pd.Series: + """ + Calculate the weighted average score of a dataframe. + :param df: Dataframe containing scores. + """ + + if df.empty: + score_cols = [c for c in df.columns if c not in {"weight", "token"}] + return pd.Series([np.nan] * len(score_cols), index=score_cols) + + weights = df["weight"] + weighted_scores = df[[c for c in df.columns if c not in {"weight", "token"}]].mul(weights, axis=0) + + weighted_scores_row = weighted_scores.sum(skipna=False) + return weighted_scores_row + + +def calculate_individual_mapping_scores(df: pd.DataFrame, two_stage_mapping: Dict[str, List[str]]) -> pd.DataFrame: + """ + Compute the weighted average score for each first_stage_token + in the two_stage_mapping. The function returns a new DataFrame + containing the weighted average for each mapping. + + :param df: A DataFrame that includes columns like 'token', 'weight', 'score', etc. + :param two_stage_mapping: A dictionary where each key is a first-stage token (str), + and each value is a list of second-stage tokens. + :return: A DataFrame with one row per first-stage token, containing the + weighted average scores for that token and its second-stage tokens. + """ + # This list will hold the results (one row per mapping). + rows_for_each_mapping = [] + + for first_stage_token, second_stage_tokens in two_stage_mapping.items(): + + stage1_df = df[df["token"] == first_stage_token] + stage1_avg_series = calculate_weighted_average_score(stage1_df) + stage2_df = df[df["token"].isin(second_stage_tokens)] + stage2_avg_series = calculate_weighted_average_score(stage2_df) + + # Combine the two stages + subset_average = pd.concat([stage1_avg_series, stage2_avg_series], axis=1).mean(axis=1, skipna=True) + rows_for_each_mapping.append(subset_average) + + mapping_scores_df = pd.DataFrame(rows_for_each_mapping) + mapping_scroes_row = mapping_scores_df.mean(skipna=True) + + return mapping_scroes_row + +@hydra.main(config_path=CONFIG_PATH, config_name=CONFIG_NAME, version_base=None) +def main(cfg: DictConfig) -> None: + """ + Main entrypoint for running PDMS evaluation. + :param cfg: omegaconf dictionary + """ + + build_logger(cfg) + worker = build_worker(cfg) + + # Extract scenes based on scene-loader to know which tokens to distribute across workers + # TODO: infer the tokens per log from metadata, to not have to load metric cache and scenes here + scene_loader = SceneLoader( + sensor_blobs_path=None, + navsim_blobs_path=None, + data_path=Path(cfg.navsim_log_path), + synthetic_scenes_path=Path(cfg.synthetic_scenes_path), + scene_filter=instantiate(cfg.train_test_split.scene_filter), + sensor_config=SensorConfig.build_no_sensors(), + ) + metric_cache_loader = MetricCacheLoader(Path(cfg.metric_cache_path)) + + tokens_to_evaluate = list(set(scene_loader.tokens) & set(metric_cache_loader.tokens)) + num_missing_metric_cache_tokens = len(set(scene_loader.tokens) - set(metric_cache_loader.tokens)) + num_unused_metric_cache_tokens = len(set(metric_cache_loader.tokens) - set(scene_loader.tokens)) + + + if num_missing_metric_cache_tokens > 0: + logger.warning(f"Missing metric cache for {num_missing_metric_cache_tokens} tokens. Skipping these tokens.") + if num_unused_metric_cache_tokens > 0: + logger.warning(f"Unused metric cache for {num_unused_metric_cache_tokens} tokens. Skipping these tokens.") + logger.info(f"Starting pdm scoring of {len(tokens_to_evaluate)} scenarios...") + data_points = [ + { + "cfg": cfg, + "log_file": log_file, + "tokens": tokens_list, + } + for log_file, tokens_list in scene_loader.get_tokens_list_per_log().items() + ] + score_rows: List[pd.DataFrame] = worker_map(worker, run_pdm_score, data_points) + + pdm_score_df = pd.concat(score_rows) + + + + # Calculate two-frame extended comfort + two_frame_comfort_df = calculate_two_frame_extended_comfort( + pdm_score_df, proposal_sampling=instantiate(cfg.simulator.proposal_sampling) + ) + + # Merge two-frame comfort scores and drop unnecessary columns in one step + pdm_score_df = ( + pdm_score_df.drop(columns=["ego_simulated_states"]) # Remove the unwanted column first + .merge( + two_frame_comfort_df[["current_token", "two_frame_extended_comfort"]], + left_on="token", + right_on="current_token", + how="left", + ) + .drop(columns=["current_token"]) # Remove merged key after the merge + ) + + # Compute final scores + pdm_score_df = compute_final_scores(pdm_score_df) + + + + try: + if hasattr(cfg.train_test_split, "two_stage_mapping"): + two_stage_mapping: Dict[str, List[str]] = dict(cfg.train_test_split.two_stage_mapping) + else: + # infer two stage mapping from results + two_stage_mapping = infer_two_stage_mapping(pdm_score_df, first_stage_duration=4.0) + validate_two_stage_mapping(pdm_score_df, two_stage_mapping) + + # calculate weights for pseudo closed loop using config + weights = calculate_pseudo_closed_loop_weights(pdm_score_df, two_stage_mapping=two_stage_mapping) + assert len(weights) == len(pdm_score_df), "Couldn't calculate weights for all tokens." + pdm_score_df = pdm_score_df.merge(weights, on="token") + pseudo_closed_loop_valid = True + except Exception: + logger.warning("----------- Failed to calculate pseudo closed-loop weights:") + traceback.print_exc() + pdm_score_df["weight"] = 1.0 + pseudo_closed_loop_valid = False + + num_sucessful_scenarios = pdm_score_df["valid"].sum() + num_failed_scenarios = len(pdm_score_df) - num_sucessful_scenarios + if num_failed_scenarios > 0: + failed_tokens = pdm_score_df[not pdm_score_df["valid"]]["token"].to_list() + else: + failed_tokens = [] + + + score_cols = [ + c + for c in pdm_score_df.columns + if ( + (any(score.name in c for score in fields(PDMResults)) or c == "two_frame_extended_comfort" or c == "score") + and c != "pdm_score" + ) + ] + + # Calculate average score + average_row = pdm_score_df[score_cols].mean(skipna=True) + average_row["token"] = "average_all_frames" + average_row["valid"] = pdm_score_df["valid"].all() + + # Calculate pseudo closed loop score with weighted average + pseudo_closed_loop_row = calculate_individual_mapping_scores( + pdm_score_df[score_cols + ["token", "weight"]], two_stage_mapping + ) + pseudo_closed_loop_row["token"] = "pseudo_closed_loop" + pseudo_closed_loop_row["valid"] = pseudo_closed_loop_valid + + # Original frames average + original_frames = pdm_score_df[pdm_score_df["frame_type"] == SceneFrameType.ORIGINAL] + average_original_row = original_frames[score_cols].mean(skipna=True) + average_original_row["token"] = "average_expert_frames" + average_original_row["valid"] = original_frames["valid"].all() + + # append average and pseudo closed loop scores + pdm_score_df = pdm_score_df[["token", "valid"] + score_cols] + pdm_score_df.loc[len(pdm_score_df)] = average_row + pdm_score_df.loc[len(pdm_score_df)] = pseudo_closed_loop_row + pdm_score_df.loc[len(pdm_score_df)] = average_original_row + + save_path = Path(cfg.output_dir) + timestamp = datetime.now().strftime("%Y.%m.%d.%H.%M.%S") + pdm_score_df.to_csv(save_path / f"{timestamp}.csv") + + logger.info( + f""" + Finished running evaluation. + Number of successful scenarios: {num_sucessful_scenarios}. + Number of failed scenarios: {num_failed_scenarios}. + Final average score of valid results: {pdm_score_df['score'].mean()}. + Results are stored in: {save_path / f"{timestamp}.csv"}. + """ + ) + + if cfg.verbose: + logger.info( + f""" + Detailed results: + {pdm_score_df.iloc[-3:].T} + """ + ) + if num_failed_scenarios > 0: + logger.info( + f""" + List of failed tokens: + {failed_tokens} + """ + ) + + +if __name__ == "__main__": + main() diff --git a/navsim/planning/script/run_pdm_score.py b/navsim/planning/script/run_pdm_score.py index 20a1a35..47ba7b7 100644 --- a/navsim/planning/script/run_pdm_score.py +++ b/navsim/planning/script/run_pdm_score.py @@ -18,7 +18,7 @@ from nuplan.planning.utils.multithreading.worker_utils import worker_map from omegaconf import DictConfig -from navsim.agents.abstract_agent import AbstractAgent +from navsim.agents.abstract_agent_diffusiondrive import AbstractAgent from navsim.common.dataclasses import PDMResults, SensorConfig from navsim.common.dataloader import MetricCacheLoader, SceneFilter, SceneLoader from navsim.common.enums import SceneFrameType @@ -29,12 +29,97 @@ from navsim.planning.simulation.planner.pdm_planner.simulation.pdm_simulator import PDMSimulator from navsim.planning.simulation.planner.pdm_planner.utils.pdm_enums import WeightedMetricIndex from navsim.traffic_agents_policies.abstract_traffic_agents_policy import AbstractTrafficAgentsPolicy +from navsim.agents.diffusiondrive.transfuser_agent import TransfuserAgent,TransfuserConfig + logger = logging.getLogger(__name__) CONFIG_PATH = "config/pdm_scoring" CONFIG_NAME = "default_run_pdm_score" +from navsim.common.dataclasses import AgentInput, Scene, Annotations,Trajectory +from navsim.visualization.plots import plot_bev_with_agent +from pathlib import Path +import matplotlib.pyplot as plt + + + +from pathlib import Path +import shutil + +# 定义全局路径和摄像头列表 + +def save_cameras_by_token(agent_input,tokens): + navsim_root = Path("/data42/DATASET/openscene/OpenDriveLab___OpenScene/openscene-v1.1/openscene-v1.1/sensor_blobs/test") + synthetic_root = Path("/data/hdd01/dingzx/dataset/synthetic_scenes/synthetic_sensor") + camera_names = ["cam_b0", "cam_f0", "cam_l0", "cam_l1", "cam_l2", "cam_r0", "cam_r1", "cam_r2"] + + output_dir = Path(f"/data/hdd01/dingzx/navsim_exp/image_bev_hechen/{tokens}") + output_dir.mkdir(parents=True, exist_ok=True) + print(f"创建文件夹: {output_dir.resolve()}") + + for cam_name in camera_names: + camera = getattr(agent_input.cameras[-1], cam_name, None) + if not camera or not camera.camera_path: + print(f"警告: 摄像头 {cam_name} 路径无效") + continue + + source_path = synthetic_root / camera.camera_path + target_path = output_dir / f"{tokens}_{cam_name}.jpg" + source_path1 = navsim_root / camera.camera_path + if source_path.exists(): + shutil.copy(source_path, target_path) + print(f"保存成功: {target_path.name}") + else:# zhenshi + shutil.copy(source_path1, target_path) + print(f"保存成功: {target_path.name}") + +def plot_bev(scene: Scene, token: str, + poses: np.ndarray,anchor_poses: np.ndarray, + agent: TransfuserAgent, + agent_input: AgentInput + ): + save_cameras_by_token(agent_input,token) + """保存包含所有轨迹的BEV图像""" + output_dir = Path(f"/data/hdd01/dingzx/navsim_exp/image_bev_hechen/{token}").expanduser() + output_dir.mkdir(parents=True, exist_ok=True) + # output_path = output_dir / f"{token}.png" + output_path = output_dir / f"random_command_{token}.png" + agent=agent + fig, ax = plot_bev_with_agent( + scene=scene, + agent=agent, + poses=poses, + anchor_poses=anchor_poses + ) + + ax.set_title(f"BEV Trajectory - {token}", fontsize=14) + fig.savefig(output_path, dpi=300, bbox_inches="tight") + + plt.close(fig) +# def plot_bev(scene: Scene,token: str,anchor_trajectories: np.ndarray): +# """ +# self._config = config +# self._lr = lr +# self._checkpoint_path = checkpoint_path +# self._transfuser_model = TransfuserModel(config) +# """ +# output_dir = Path("~/navsim_exp/image_trajectory").expanduser() # 跨平台路径处理 +# filename = f"{token}.png" # 动态文件名 +# output_path = output_dir / filename +# output_dir.mkdir(parents=True, exist_ok=True) # 自动创建目录 +# agent = TransfuserAgent(config=TransfuserConfig,lr=6e-4, +# checkpoint_path="/data/hdd01/dingzx/navsim_exp/training_diffusiondrive_agent/2025.03.28.23.58.29/lightning_logs/version_0/checkpoints/99.ckpt" +# ) +# # 生成图像 +# fig, ax = plot_bev_with_agent(scene, agent) +# # 自定义图例和标题(可选) +# ax.legend(["真实轨迹", "预测轨迹"], loc="upper left") +# ax.set_title("BEV Trajectory Comparison", fontsize=14) +# # 保存图像 +# fig.savefig(output_path,dpi=300, bbox_inches="tight") +# import pdb;pdb.set_trace() + def run_pdm_score(args: List[Dict[str, Union[List[str], DictConfig]]]) -> List[pd.DataFrame]: """ @@ -54,6 +139,8 @@ def run_pdm_score(args: List[Dict[str, Union[List[str], DictConfig]]]) -> List[p assert ( simulator.proposal_sampling == scorer.proposal_sampling ), "Simulator and scorer proposal sampling has to be identical" + # print(cfg.agent) + # import pdb;pdb.set_trace() agent: AbstractAgent = instantiate(cfg.agent) agent.initialize() @@ -75,18 +162,50 @@ def run_pdm_score(args: List[Dict[str, Union[List[str], DictConfig]]]) -> List[p tokens_to_evaluate = list(set(scene_loader.tokens) & set(metric_cache_loader.tokens)) pdm_results: List[pd.DataFrame] = [] + + # import pandas as pd + # import random + # import numpy as np + # csv_path = "/data/hdd01/dingzx/navsim_exp/diffusiondrive_agent_eval/2025.03.29.13.48.31/2025.03.29.14.08.58.csv" + # df = pd.read_csv(csv_path) + # zero_score_df = df[df['score'] == 0.0] + # tokens_zero = zero_score_df['token'].tolist() + for idx, (token) in enumerate(tokens_to_evaluate): + logger.info( f"Processing scenario {idx + 1} / {len(tokens_to_evaluate)} in thread_id={thread_id}, node_id={node_id}" ) try: + # 0a5a2eee51272b898 ego_statuses driving_command + # print(">>>>>>>>>") metric_cache = metric_cache_loader.get_from_token(token) agent_input = scene_loader.get_agent_input_from_token(token) + + # if token=='0a5a2eee51272b898' or token=='0aac715400ac14881' : + # agent_input.ego_statuses[-1].driving_command=np.array([0, 0, 1, 0], dtype=np.int64) + # if token in tokens_zero: + # idx=random.randint(0,2) + # random_command=np.array([0,0,0,0],dtype=np.int64) + # random_command[idx]=1 + # agent_input.ego_statuses[-1].driving_command=random_command + + scene = scene_loader.get_scene_from_token(token) + # a=scene.get_future_trajectory() if agent.requires_scene: - scene = scene_loader.get_scene_from_token(token) - trajectory = agent.compute_trajectory(agent_input, scene) + poses,anchor_poses = agent.compute_trajectory(agent_input) else: - trajectory = agent.compute_trajectory(agent_input) + poses,anchor_poses = agent.compute_trajectory(agent_input) + trajectory=Trajectory(poses) + # plot + # if token=='0a5a2eee51272b898' or token=='0aac715400ac14881': + # print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") + # plot_bev(scene, token, poses,anchor_poses,agent,agent_input) + # plot_bev(scene, token, poses,anchor_poses,agent,agent_input) + + # if token in tokens_zero: + # plot_bev(scene, token, poses,anchor_poses,agent,agent_input) + score_row, ego_simulated_states = pdm_score( metric_cache=metric_cache, @@ -243,7 +362,7 @@ def calculate_two_frame_extended_comfort(score_df: pd.DataFrame, proposal_sampli """ results = [] interval_length = proposal_sampling.interval_length # Default: 0.1s - + # print(score_df) grouped_logs = score_df[score_df["frame_type"] == SceneFrameType.ORIGINAL].groupby("log_name") for log_name, group_df in grouped_logs: @@ -384,7 +503,6 @@ def calculate_individual_mapping_scores(df: pd.DataFrame, two_stage_mapping: Dic return mapping_scroes_row - @hydra.main(config_path=CONFIG_PATH, config_name=CONFIG_NAME, version_base=None) def main(cfg: DictConfig) -> None: """ @@ -427,6 +545,8 @@ def main(cfg: DictConfig) -> None: pdm_score_df = pd.concat(score_rows) + + # Calculate two-frame extended comfort two_frame_comfort_df = calculate_two_frame_extended_comfort( pdm_score_df, proposal_sampling=instantiate(cfg.simulator.proposal_sampling) @@ -447,6 +567,8 @@ def main(cfg: DictConfig) -> None: # Compute final scores pdm_score_df = compute_final_scores(pdm_score_df) + + try: if hasattr(cfg.train_test_split, "two_stage_mapping"): two_stage_mapping: Dict[str, List[str]] = dict(cfg.train_test_split.two_stage_mapping) @@ -473,6 +595,7 @@ def main(cfg: DictConfig) -> None: else: failed_tokens = [] + score_cols = [ c for c in pdm_score_df.columns @@ -538,3 +661,4 @@ def main(cfg: DictConfig) -> None: if __name__ == "__main__": main() + diff --git a/navsim/planning/script/run_training.py b/navsim/planning/script/run_training.py index 44fd0c2..bc589f3 100644 --- a/navsim/planning/script/run_training.py +++ b/navsim/planning/script/run_training.py @@ -133,6 +133,7 @@ def main(cfg: DictConfig) -> None: logger.info("Building SceneLoader") train_data, val_data = build_datasets(cfg, agent) + logger.info("Building Datasets") train_dataloader = DataLoader(train_data, **cfg.dataloader.params, shuffle=True) logger.info("Num training samples: %d", len(train_data)) @@ -143,6 +144,7 @@ def main(cfg: DictConfig) -> None: trainer = pl.Trainer(**cfg.trainer.params, callbacks=agent.get_training_callbacks()) logger.info("Starting Training") + # import pdb;pdb.set_trace() trainer.fit( model=lightning_module, train_dataloaders=train_dataloader, diff --git a/navsim/planning/script/run_training_caching.py b/navsim/planning/script/run_training_caching.py new file mode 100644 index 0000000..01dcf4e --- /dev/null +++ b/navsim/planning/script/run_training_caching.py @@ -0,0 +1,134 @@ +import logging +from pathlib import Path +from typing import Tuple + +import hydra +import pytorch_lightning as pl +from hydra.utils import instantiate +from omegaconf import DictConfig +from torch.utils.data import DataLoader + +from navsim.agents.abstract_agent_diffusiondrive import AbstractAgent +from navsim.common.dataclasses import SceneFilter +from navsim.common.dataloader import SceneLoader +from navsim.planning.training.agent_lightning_module import AgentLightningModule +from navsim.planning.training.dataset import CacheOnlyDataset, Dataset + +logger = logging.getLogger(__name__) + +CONFIG_PATH = "config/training" +CONFIG_NAME = "default_training" + + +def build_datasets(cfg: DictConfig, agent: AbstractAgent) -> Tuple[Dataset, Dataset]: + """ + Builds training and validation datasets from omega config + :param cfg: omegaconf dictionary + :param agent: interface of agents in NAVSIM + :return: tuple for training and validation dataset + """ + train_scene_filter: SceneFilter = instantiate(cfg.train_test_split.scene_filter) + if train_scene_filter.log_names is not None: + train_scene_filter.log_names = [ + log_name + for log_name in train_scene_filter.log_names + if log_name in cfg.train_logs + ] + else: + train_scene_filter.log_names = cfg.train_logs + + val_scene_filter: SceneFilter = instantiate(cfg.train_test_split.scene_filter) + if val_scene_filter.log_names is not None: + val_scene_filter.log_names = [ + log_name + for log_name in val_scene_filter.log_names + if log_name in cfg.val_logs + ] + else: + val_scene_filter.log_names = cfg.val_logs + + data_path = Path(cfg.navsim_log_path) + sensor_blobs_path = Path(cfg.sensor_blobs_path) + navsim_blobs_path = Path(cfg.navsim_blobs_path) + synthetic_scenes_path = Path(cfg.synthetic_scenes_path) + + train_scene_loader = SceneLoader( + sensor_blobs_path=sensor_blobs_path, + navsim_blobs_path=navsim_blobs_path, + data_path=data_path, + synthetic_scenes_path=synthetic_scenes_path, + scene_filter=train_scene_filter, + sensor_config=agent.get_sensor_config(), + ) + + val_scene_loader = SceneLoader( + sensor_blobs_path=sensor_blobs_path, + navsim_blobs_path=navsim_blobs_path, + data_path=data_path, + synthetic_scenes_path=synthetic_scenes_path, + scene_filter=val_scene_filter, + sensor_config=agent.get_sensor_config(), + ) + + train_data = Dataset( + scene_loader=train_scene_loader, + feature_builders=agent.get_feature_builders(), + target_builders=agent.get_target_builders(), + cache_path=cfg.cache_path, + force_cache_computation=cfg.force_cache_computation, + ) + + val_data = Dataset( + scene_loader=val_scene_loader, + feature_builders=agent.get_feature_builders(), + target_builders=agent.get_target_builders(), + cache_path=cfg.cache_path, + force_cache_computation=cfg.force_cache_computation, + ) + + return train_data, val_data + + +@hydra.main(config_path=CONFIG_PATH, config_name=CONFIG_NAME, version_base=None) +def main(cfg: DictConfig) -> None: + """ + Main entrypoint for training an agent. + :param cfg: omegaconf dictionary + """ + pl.seed_everything(cfg.seed, workers=True) + logger.info(f"Global Seed set to {cfg.seed}") + + logger.info(f"Path where all results are stored: {cfg.output_dir}") + + logger.info("Building Agent") + agent: AbstractAgent = instantiate(cfg.agent) + + if cfg.use_cache_without_dataset: + logger.info("Using cached data without building SceneLoader") + assert ( + not cfg.force_cache_computation + ), "force_cache_computation must be False when using cached data without building SceneLoader" + assert ( + cfg.cache_path is not None + ), "cache_path must be provided when using cached data without building SceneLoader" + train_data = CacheOnlyDataset( + cache_path=cfg.cache_path, + feature_builders=agent.get_feature_builders(), + target_builders=agent.get_target_builders(), + log_names=cfg.train_logs, + ) + val_data = CacheOnlyDataset( + cache_path=cfg.cache_path, + feature_builders=agent.get_feature_builders(), + target_builders=agent.get_target_builders(), + log_names=cfg.val_logs, + ) + else: + logger.info("Building SceneLoader") + train_data, val_data = build_datasets(cfg, agent) + + + +if __name__ == "__main__": + main() + diff --git a/navsim/planning/script/run_training_diffusiondrive.py b/navsim/planning/script/run_training_diffusiondrive.py new file mode 100644 index 0000000..7d7d7e8 --- /dev/null +++ b/navsim/planning/script/run_training_diffusiondrive.py @@ -0,0 +1,202 @@ +import logging +from pathlib import Path +from typing import Tuple + +import hydra +import pytorch_lightning as pl +from hydra.utils import instantiate +from omegaconf import DictConfig +from torch.utils.data import DataLoader + +from navsim.agents.abstract_agent_diffusiondrive import AbstractAgent +from navsim.common.dataclasses import SceneFilter +from navsim.common.dataloader import SceneLoader +from navsim.planning.training.agent_lightning_module import AgentLightningModule +from navsim.planning.training.dataset import CacheOnlyDataset, Dataset + +logger = logging.getLogger(__name__) + +CONFIG_PATH = "config/training" +CONFIG_NAME = "default_training" + + +def build_datasets(cfg: DictConfig, agent: AbstractAgent) -> Tuple[Dataset, Dataset]: + """ + Builds training and validation datasets from omega config + :param cfg: omegaconf dictionary + :param agent: interface of agents in NAVSIM + :return: tuple for training and validation dataset + """ + train_scene_filter: SceneFilter = instantiate(cfg.train_test_split.scene_filter) + if train_scene_filter.log_names is not None: + train_scene_filter.log_names = [ + log_name + for log_name in train_scene_filter.log_names + if log_name in cfg.train_logs + ] + else: + train_scene_filter.log_names = cfg.train_logs + + val_scene_filter: SceneFilter = instantiate(cfg.train_test_split.scene_filter) + if val_scene_filter.log_names is not None: + val_scene_filter.log_names = [ + log_name + for log_name in val_scene_filter.log_names + if log_name in cfg.val_logs + ] + else: + val_scene_filter.log_names = cfg.val_logs + + data_path = Path(cfg.navsim_log_path) + sensor_blobs_path = Path(cfg.sensor_blobs_path) + navsim_blobs_path = Path(cfg.navsim_blobs_path) + synthetic_scenes_path = Path(cfg.synthetic_scenes_path) + + train_scene_loader = SceneLoader( + sensor_blobs_path=sensor_blobs_path, + navsim_blobs_path=navsim_blobs_path, + data_path=data_path, + synthetic_scenes_path=synthetic_scenes_path, + scene_filter=train_scene_filter, + sensor_config=agent.get_sensor_config(), + ) + + val_scene_loader = SceneLoader( + sensor_blobs_path=sensor_blobs_path, + navsim_blobs_path=navsim_blobs_path, + data_path=data_path, + synthetic_scenes_path=synthetic_scenes_path, + scene_filter=val_scene_filter, + sensor_config=agent.get_sensor_config(), + ) + + train_data = Dataset( + scene_loader=train_scene_loader, + feature_builders=agent.get_feature_builders(), + target_builders=agent.get_target_builders(), + cache_path=cfg.cache_path, + force_cache_computation=cfg.force_cache_computation, + ) + + val_data = Dataset( + scene_loader=val_scene_loader, + feature_builders=agent.get_feature_builders(), + target_builders=agent.get_target_builders(), + cache_path=cfg.cache_path, + force_cache_computation=cfg.force_cache_computation, + ) + + return train_data, val_data + + +@hydra.main(config_path=CONFIG_PATH, config_name=CONFIG_NAME, version_base=None) +def main(cfg: DictConfig) -> None: + """ + Main entrypoint for training an agent. + :param cfg: omegaconf dictionary + """ + # print(">>>>>") + pl.seed_everything(cfg.seed, workers=True) + logger.info(f"Global Seed set to {cfg.seed}") + + logger.info(f"Path where all results are stored: {cfg.output_dir}") + + logger.info("Building Agent") + agent: AbstractAgent = instantiate(cfg.agent) + # import pdb;pdb.set_trace() + # print("dzx") + logger.info("Building Lightning Module") + # if cfg.agent.checkpoint_path: + # lightning_module = AgentLightningModule.load_from_checkpoint( + # cfg.agent.checkpoint_path,agent=agent + # ) + # else: + lightning_module = AgentLightningModule( + agent=agent, + ) + # import pdb;pdb.set_trace() + # print("dzx") + # print("dzx") + if cfg.use_cache_without_dataset: + logger.info("Using cached data without building SceneLoader") + assert ( + not cfg.force_cache_computation + ), "force_cache_computation must be False when using cached data without building SceneLoader" + assert ( + cfg.cache_path is not None + ), "cache_path must be provided when using cached data without building SceneLoader" + train_data = CacheOnlyDataset( + cache_path=cfg.cache_path, + feature_builders=agent.get_feature_builders(), + target_builders=agent.get_target_builders(), + log_names=cfg.train_logs, + ) + val_data = CacheOnlyDataset( + cache_path=cfg.cache_path, + feature_builders=agent.get_feature_builders(), + target_builders=agent.get_target_builders(), + log_names=cfg.val_logs, + ) + else: + logger.info("Building SceneLoader") + train_data, val_data = build_datasets(cfg, agent) + + logger.info("Building Datasets") + train_dataloader = DataLoader(train_data, **cfg.dataloader.params, shuffle=True) + logger.info("Num training samples: %d", len(train_data)) + val_dataloader = DataLoader(val_data, **cfg.dataloader.params, shuffle=False) + logger.info("Num validation samples: %d", len(val_data)) + + logger.info("Building Trainer") + trainer = pl.Trainer(**cfg.trainer.params, callbacks=agent.get_training_callbacks()) + + # from pytorch_lightning.strategies import DDPStrategy + # trainer = pl.Trainer( + # **{ + # **cfg.trainer.params, + # "strategy": DDPStrategy(find_unused_parameters=True), + # }, + # callbacks=agent.get_training_callbacks() + # ) + # trainer = pl.Trainer( + # **cfg.trainer.params,callbacks=agent.get_training_callbacks() + # ) + logger.info("Starting Training") + # import pdb;pdb.set_trace() + + # checkpoint_path = "/data/hdd01/dingzx/navsim_exp/training_diffusiondrive_agent/2025.04.30.20.01.08/lightning_logs/version_0/checkpoints/89.ckpt" + # 初始化 Trainer 并恢复训练 + # trainer = pl.Trainer( + # resume_from_checkpoint=checkpoint_path, # PyTorch Lightning <2.0 的写法 + # # ckpt_path=checkpoint_path, + # ) + + trainer.fit( + model=lightning_module, + train_dataloaders=train_dataloader, + val_dataloaders=val_dataloader, + ) + # if cfg.agent.checkpoint_path: + # # trainer.max_epochs = 134 #70个 + # trainer.fit( + # model=lightning_module, + # train_dataloaders=train_dataloader, + # val_dataloaders=val_dataloader, + # ckpt_path=cfg.agent.checkpoint_path + # ) + # else: + # trainer.fit( + # model=lightning_module, + # train_dataloaders=train_dataloader, + # val_dataloaders=val_dataloader + # ) + # trainer.fit( + # model=lightning_module, + # train_dataloaders=train_dataloader, + # val_dataloaders=val_dataloader, + # ckpt_path=checkpoint_path + # ) + +if __name__ == "__main__": + main() + diff --git a/navsim/planning/simulation/planner/pdm_planner/scoring/scene_aggregator.py b/navsim/planning/simulation/planner/pdm_planner/scoring/scene_aggregator.py new file mode 100644 index 0000000..698e9a5 --- /dev/null +++ b/navsim/planning/simulation/planner/pdm_planner/scoring/scene_aggregator.py @@ -0,0 +1,120 @@ +from dataclasses import dataclass +from typing import List, Optional, Tuple + +import numpy as np +import pandas as pd +from nuplan.planning.simulation.trajectory.trajectory_sampling import TrajectorySampling + +from navsim.planning.simulation.planner.pdm_planner.scoring.pdm_comfort_metrics import ego_is_two_frame_extended_comfort + + +@dataclass +class SceneAggregator: + now_frame: str + previous_frame: str + score_df: pd.DataFrame + proposal_sampling: TrajectorySampling + second_stage: Optional[List[Tuple[str, str]]] = None + + def calculate_pseudo_closed_loop_weights(self, first_stage_row, second_stage_scores) -> pd.Series: + + pd.options.mode.copy_on_write = True + + def _calc_distance(x1, y1, x2, y2): + return np.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2) + + second_stage_scores = second_stage_scores.copy() + second_stage_scores["distance"] = second_stage_scores.apply( + lambda x: _calc_distance( + first_stage_row["endpoint_x"], + first_stage_row["endpoint_y"], + x["start_point_x"], + x["start_point_y"], + ), + axis=1, + ) + second_stage_scores["weight"] = np.exp(-second_stage_scores["distance"]) + second_stage_scores["weight"] /= second_stage_scores["weight"].sum() + + assert np.isclose( + second_stage_scores["weight"].sum(), 1.0, atol=1e-6 + ), f"Second-stage weights do not sum to 1. Got {second_stage_scores['weight'].sum()}" + + return second_stage_scores[["weight"]].reset_index() + + def _compute_two_frame_comfort(self, current_token: str, previous_token: str) -> float: + try: + current_row = self.score_df.loc[current_token] + prev_row = self.score_df.loc[previous_token] + except KeyError as e: + raise ValueError(f"Missing token in score_df: {e}") + + current_states = current_row.get("ego_simulated_states") + prev_states = prev_row.get("ego_simulated_states") + + interval_length = self.proposal_sampling.interval_length + observation_interval = current_row["start_time"] - prev_row["start_time"] + + assert 0 < observation_interval < 0.55, f"Invalid interval {observation_interval}" + + overlap_start = round(observation_interval / interval_length) + current_states_overlap = current_states[:-overlap_start] + prev_states_overlap = prev_states[overlap_start:] + + n_overlap = current_states_overlap.shape[0] + time_point_s = np.arange(n_overlap) * interval_length + + two_frame_comfort = ego_is_two_frame_extended_comfort( + current_states_overlap[None, :], + prev_states_overlap[None, :], + time_point_s, + )[0].astype(np.float64) + + return two_frame_comfort + + def aggregate_scores(self, one_stage_only=False) -> pd.DataFrame: + updates = [] + + if one_stage_only: + main_comfort = self._compute_two_frame_comfort(self.now_frame, self.previous_frame) + updates.append({"token": self.now_frame, "two_frame_extended_comfort": main_comfort}) + + else: + # =====First stage===== + main_comfort = self._compute_two_frame_comfort(self.now_frame, self.previous_frame) + updates.append({"token": self.now_frame, "two_frame_extended_comfort": main_comfort, "weight": 1.0}) + updates.append({"token": self.previous_frame, "two_frame_extended_comfort": main_comfort, "weight": 1.0}) + + # =====Second stage===== + # t = 0s and t = 4s + second_stage_now_tokens = [pair[0] for pair in self.second_stage] + second_stage_now_scores = self.score_df.loc[second_stage_now_tokens] + + first_stage_now_row = self.score_df.loc[self.now_frame] + weights_now = self.calculate_pseudo_closed_loop_weights(first_stage_now_row, second_stage_now_scores) + + # t = -0.5s and t = 3.5s + second_stage_prev_tokens = [pair[1] for pair in self.second_stage] + second_stage_prev_scores = self.score_df.loc[second_stage_prev_tokens] + + first_stage_prev_row = self.score_df.loc[self.previous_frame] + weights_prev = self.calculate_pseudo_closed_loop_weights(first_stage_prev_row, second_stage_prev_scores) + + weights = pd.concat([weights_now, weights_prev], ignore_index=True) + + weight_map = dict(zip(weights["token"], weights["weight"])) + + for (now_token, prev_token) in self.second_stage: + + two_frame_comfort = self._compute_two_frame_comfort(now_token, prev_token) + weight_now = weight_map[now_token] + weight_prev = weight_map[prev_token] + + updates.append( + {"token": now_token, "two_frame_extended_comfort": two_frame_comfort, "weight": weight_now} + ) + updates.append( + {"token": prev_token, "two_frame_extended_comfort": two_frame_comfort, "weight": weight_prev} + ) + + return pd.DataFrame(updates) diff --git a/navsim/planning/simulation/planner/pdm_planner/utils/pdm_geometry_utils.py b/navsim/planning/simulation/planner/pdm_planner/utils/pdm_geometry_utils.py index 91da6dd..8c294cf 100644 --- a/navsim/planning/simulation/planner/pdm_planner/utils/pdm_geometry_utils.py +++ b/navsim/planning/simulation/planner/pdm_planner/utils/pdm_geometry_utils.py @@ -83,6 +83,9 @@ def convert_absolute_to_relative_se2_array( :param state_se2_array: array of SE2 states with (x,y,θ) in last dim :return: SE2 coords array in relative coordinates """ + # print(len(SE2Index)) + # print(state_se2_array.shape[-1]) + # print(">>>>") assert len(SE2Index) == state_se2_array.shape[-1] theta = -origin.heading diff --git a/navsim/planning/training/abstract_feature_target_builder.py b/navsim/planning/training/abstract_feature_target_builder.py index 2e73b6d..2f23be2 100644 --- a/navsim/planning/training/abstract_feature_target_builder.py +++ b/navsim/planning/training/abstract_feature_target_builder.py @@ -44,3 +44,4 @@ def compute_targets(self, scene: Scene) -> Dict[str, Tensor]: Outputs a dictionary where each item has a unique identifier and maps to a single target tensor. One TargetBuilder can return a dict with multiple TargetTensors. """ + pass diff --git a/navsim/planning/training/agent_lightning_module.py b/navsim/planning/training/agent_lightning_module.py index 2ba872a..8a17ae0 100644 --- a/navsim/planning/training/agent_lightning_module.py +++ b/navsim/planning/training/agent_lightning_module.py @@ -3,7 +3,7 @@ import pytorch_lightning as pl from torch import Tensor -from navsim.agents.abstract_agent import AbstractAgent +from navsim.agents.abstract_agent_diffusiondrive import AbstractAgent class AgentLightningModule(pl.LightningModule): @@ -25,10 +25,21 @@ def _step(self, batch: Tuple[Dict[str, Tensor], Dict[str, Tensor]], logging_pref :return: scalar loss """ features, targets = batch - prediction = self.agent.forward(features) + # prediction = self.agent.forward(features) + prediction = self.agent.forward(features,targets=targets) loss = self.agent.compute_loss(features, targets, prediction) - self.log(f"{logging_prefix}/loss", loss, on_step=True, on_epoch=True, prog_bar=True, sync_dist=True) - return loss + + + # # transfuser + # self.log(f"{logging_prefix}/loss", loss, on_step=True, on_epoch=True, prog_bar=True, sync_dist=True) + # return loss + + # diffusion + for k, v in loss.items(): + if v is not None: + self.log(f"{logging_prefix}/{k}", v, on_step=True, on_epoch=True, prog_bar=True, sync_dist=True, batch_size=len(batch[0])) + return loss['loss'] + def training_step(self, batch: Tuple[Dict[str, Tensor], Dict[str, Tensor]], batch_idx: int) -> Tensor: """ diff --git a/navsim/planning/training/dataset.py b/navsim/planning/training/dataset.py index d0d002b..3977818 100644 --- a/navsim/planning/training/dataset.py +++ b/navsim/planning/training/dataset.py @@ -14,6 +14,784 @@ logger = logging.getLogger(__name__) + +import torch +from torchvision import transforms + +import skimage as sk +from skimage.filters import gaussian +from io import BytesIO +from wand.image import Image as WandImage +from wand.api import library as wandlibrary +import wand.color as WandColor +import ctypes +from PIL import Image as PILImage +import cv2 +from scipy.ndimage import zoom as scizoom +from scipy.ndimage import map_coordinates +import warnings +import numpy as np + +# -*- coding: utf-8 -*- + +import os +from PIL import Image +import os.path +import time +import torch +import torchvision.datasets as dset +import torchvision.transforms as trn +import torch.utils.data as data +import numpy as np +from torchvision import transforms + +from PIL import Image + +# /////////////// Data Loader /////////////// + + +IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm'] + + +def is_image_file(filename): + """Checks if a file is an image. + Args: + filename (string): path to a file + Returns: + bool: True if the filename ends with a known image extension + """ + filename_lower = filename.lower() + return any(filename_lower.endswith(ext) for ext in IMG_EXTENSIONS) + + +def find_classes(dir): + classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))] + classes.sort() + class_to_idx = {classes[i]: i for i in range(len(classes))} + return classes, class_to_idx + + +def make_dataset(dir, class_to_idx): + images = [] + dir = os.path.expanduser(dir) + for target in sorted(os.listdir(dir)): + d = os.path.join(dir, target) + if not os.path.isdir(d): + continue + + for root, _, fnames in sorted(os.walk(d)): + for fname in sorted(fnames): + if is_image_file(fname): + path = os.path.join(root, fname) + item = (path, class_to_idx[target]) + images.append(item) + + return images + + +def pil_loader(path): + # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835) + with open(path, 'rb') as f: + img = Image.open(f) + return img.convert('RGB') + + +def accimage_loader(path): + import accimage + try: + return accimage.Image(path) + except IOError: + # Potentially a decoding problem, fall back to PIL.Image + return pil_loader(path) + + +def default_loader(path): + from torchvision import get_image_backend + if get_image_backend() == 'accimage': + return accimage_loader(path) + else: + return pil_loader(path) + + +class DistortImageFolder(data.Dataset): + def __init__(self, root, method, severity, transform=None, target_transform=None, + loader=default_loader): + classes, class_to_idx = find_classes(root) + imgs = make_dataset(root, class_to_idx) + if len(imgs) == 0: + raise (RuntimeError("Found 0 images in subfolders of: " + root + "\n" + "Supported image extensions are: " + ",".join( + IMG_EXTENSIONS))) + + self.root = root + self.method = method + self.severity = severity + self.imgs = imgs + self.classes = classes + self.class_to_idx = class_to_idx + self.idx_to_class = {v: k for k, v in class_to_idx.items()} + self.transform = transform + self.target_transform = target_transform + self.loader = loader + + def __getitem__(self, index): + path, target = self.imgs[index] + img = self.loader(path) + if self.transform is not None: + img = self.transform(img) + img = self.method(img, self.severity) + if self.target_transform is not None: + target = self.target_transform(target) + + save_path = '/share/data/vision-greg/DistortedImageNet/JPEG/' + self.method.__name__ + \ + '/' + str(self.severity) + '/' + self.idx_to_class[target] + + if not os.path.exists(save_path): + os.makedirs(save_path) + + save_path += path[path.rindex('/'):] + + Image.fromarray(np.uint8(img)).save(save_path, quality=85, optimize=True) + + return 0 # we do not care about returning the data + + def __len__(self): + return len(self.imgs) + + +warnings.simplefilter("ignore", UserWarning) + + +def auc(errs): # area under the alteration error curve + area = 0 + for i in range(1, len(errs)): + area += (errs[i] + errs[i - 1]) / 2 + area /= len(errs) - 1 + return area + + +def disk(radius, alias_blur=0.1, dtype=np.float32): + if radius <= 8: + L = np.arange(-8, 8 + 1) + ksize = (3, 3) + else: + L = np.arange(-radius, radius + 1) + ksize = (5, 5) + X, Y = np.meshgrid(L, L) + aliased_disk = np.array((X ** 2 + Y ** 2) <= radius ** 2, dtype=dtype) + aliased_disk /= np.sum(aliased_disk) + + # supersample disk to antialias + return cv2.GaussianBlur(aliased_disk, ksize=ksize, sigmaX=alias_blur) + + +# Tell Python about the C method +wandlibrary.MagickMotionBlurImage.argtypes = (ctypes.c_void_p, # wand + ctypes.c_double, # radius + ctypes.c_double, # sigma + ctypes.c_double) # angle + + +# Extend wand.image.Image class to include method signature +class MotionImage(WandImage): + def motion_blur(self, radius=0.0, sigma=0.0, angle=0.0): + wandlibrary.MagickMotionBlurImage(self.wand, radius, sigma, angle) + + +# modification of https://github.com/FLHerne/mapgen/blob/master/diamondsquare.py +def plasma_fractal(mapsize=256, wibbledecay=3): + """ + Generate a heightmap using diamond-square algorithm. + Return square 2d array, side length 'mapsize', of floats in range 0-255. + 'mapsize' must be a power of two. + """ + assert (mapsize & (mapsize - 1) == 0) + maparray = np.empty((mapsize, mapsize), dtype=np.float_) + maparray[0, 0] = 0 + stepsize = mapsize + wibble = 100 + + def wibbledmean(array): + return array / 4 + wibble * np.random.uniform(-wibble, wibble, array.shape) + + def fillsquares(): + """For each square of points stepsize apart, + calculate middle value as mean of points + wibble""" + cornerref = maparray[0:mapsize:stepsize, 0:mapsize:stepsize] + squareaccum = cornerref + np.roll(cornerref, shift=-1, axis=0) + squareaccum += np.roll(squareaccum, shift=-1, axis=1) + maparray[stepsize // 2:mapsize:stepsize, + stepsize // 2:mapsize:stepsize] = wibbledmean(squareaccum) + + def filldiamonds(): + """For each diamond of points stepsize apart, + calculate middle value as mean of points + wibble""" + mapsize = maparray.shape[0] + drgrid = maparray[stepsize // 2:mapsize:stepsize, stepsize // 2:mapsize:stepsize] + ulgrid = maparray[0:mapsize:stepsize, 0:mapsize:stepsize] + ldrsum = drgrid + np.roll(drgrid, 1, axis=0) + lulsum = ulgrid + np.roll(ulgrid, -1, axis=1) + ltsum = ldrsum + lulsum + maparray[0:mapsize:stepsize, stepsize // 2:mapsize:stepsize] = wibbledmean(ltsum) + tdrsum = drgrid + np.roll(drgrid, 1, axis=1) + tulsum = ulgrid + np.roll(ulgrid, -1, axis=0) + ttsum = tdrsum + tulsum + maparray[stepsize // 2:mapsize:stepsize, 0:mapsize:stepsize] = wibbledmean(ttsum) + + while stepsize >= 2: + fillsquares() + filldiamonds() + stepsize //= 2 + wibble /= wibbledecay + + maparray -= maparray.min() + return maparray / maparray.max() + + +def clipped_zoom(img, zoom_factor): + + h, w = img.shape[:2] + + new_h = int(np.round(h / zoom_factor)) + new_w = int(np.round(w / zoom_factor)) + + top = (h - new_h) // 2 + left = (w - new_w) // 2 + + img = scizoom(img[top:top+new_h, left:left+new_w], + (zoom_factor, zoom_factor, 1), + order=1) + + return cv2.resize(img, (w, h), interpolation=cv2.INTER_LINEAR) + + +# /////////////// End Distortion Helpers /////////////// + + +# /////////////// Distortions /////////////// + +def gaussian_noise(x, severity=1): + c = [.08, .12, 0.18, 0.26, 0.38][severity - 1] + + x = np.array(x) / 255. + return np.clip(x + np.random.normal(size=x.shape, scale=c), 0, 1) * 255 + + +def shot_noise(x, severity=1): + c = [60, 25, 12, 5, 3][severity - 1] + + x = np.array(x) / 255. + return np.clip(np.random.poisson(x * c) / c, 0, 1) * 255 + + +def impulse_noise(x, severity=1): + c = [.03, .06, .09, 0.17, 0.27][severity - 1] + + x = sk.util.random_noise(np.array(x) / 255., mode='s&p', amount=c) + return np.clip(x, 0, 1) * 255 + + +def speckle_noise(x, severity=1): + c = [.15, .2, 0.35, 0.45, 0.6][severity - 1] + + x = np.array(x) / 255. + return np.clip(x + x * np.random.normal(size=x.shape, scale=c), 0, 1) * 255 + + +def fgsm(x, source_net, severity=1): + c = [8, 16, 32, 64, 128][severity - 1] + + x = V(x, requires_grad=True) + logits = source_net(x) + source_net.zero_grad() + loss = F.cross_entropy(logits, V(logits.data.max(1)[1].squeeze_()), size_average=False) + loss.backward() + + return standardize(torch.clamp(unstandardize(x.data) + c / 255. * unstandardize(torch.sign(x.grad.data)), 0, 1)) + + +def gaussian_blur(x, severity=1): + c = [1, 2, 3, 4, 6][severity - 1] + + x = gaussian(np.array(x) / 255., sigma=c, multichannel=True) + return np.clip(x, 0, 1) * 255 + + +def glass_blur(x, severity=1): + # sigma, max_delta, iterations + c = [(0.7, 1, 2), (0.9, 2, 1), (1, 2, 3), (1.1, 3, 2), (1.5, 4, 2)][severity - 1] + + x = np.uint8(gaussian(np.array(x) / 255., sigma=c[0], multichannel=True) * 255) + + # locally shuffle pixels + for i in range(c[2]): + for h in range(224 - c[1], c[1], -1): + for w in range(224 - c[1], c[1], -1): + dx, dy = np.random.randint(-c[1], c[1], size=(2,)) + h_prime, w_prime = h + dy, w + dx + # swap + x[h, w], x[h_prime, w_prime] = x[h_prime, w_prime], x[h, w] + + return np.clip(gaussian(x / 255., sigma=c[0], multichannel=True), 0, 1) * 255 + + +def defocus_blur(x, severity=1): + c = [(3, 0.1), (4, 0.5), (6, 0.5), (8, 0.5), (10, 0.5)][severity - 1] + + x = np.array(x) / 255. + kernel = disk(radius=c[0], alias_blur=c[1]) + + channels = [] + for d in range(3): + channels.append(cv2.filter2D(x[:, :, d], -1, kernel)) + channels = np.array(channels).transpose((1, 2, 0)) # 3x224x224 -> 224x224x3 + + return np.clip(channels, 0, 1) * 255 + + +def motion_blur(x, severity=1): + c = [(10, 3), (15, 5), (15, 8), (15, 12), (20, 15)][severity - 1] + + output = BytesIO() + x.save(output, format='PNG') + x = MotionImage(blob=output.getvalue()) + + x.motion_blur(radius=c[0], sigma=c[1], angle=np.random.uniform(-45, 45)) + + x = cv2.imdecode(np.frombuffer(x.make_blob(), np.uint8), + cv2.IMREAD_UNCHANGED) + + if x.shape != (224, 224): + return np.clip(x[..., [2, 1, 0]], 0, 255) # BGR to RGB + else: # greyscale to RGB + return np.clip(np.array([x, x, x]).transpose((1, 2, 0)), 0, 255) + + +def clipped_zoom(img, zoom_factor): + h, w = img.shape[:2] + + new_h = int(np.round(h / zoom_factor)) + new_w = int(np.round(w / zoom_factor)) + + top = (h - new_h) // 2 + left = (w - new_w) // 2 + + img = scizoom(img[top:top+new_h, left:left+new_w], + (zoom_factor, zoom_factor, 1), + order=1) + + return cv2.resize(img, (w, h), interpolation=cv2.INTER_LINEAR) + +def zoom_blur(x, severity=1): + c = [np.arange(1, 1.11, 0.01), + np.arange(1, 1.16, 0.01), + np.arange(1, 1.21, 0.02), + np.arange(1, 1.26, 0.02), + np.arange(1, 1.31, 0.03)][severity - 1] + + x = (np.array(x) / 255.).astype(np.float32) + h, w = x.shape[:2] + + out = np.zeros_like(x) + + for zoom_factor in c: + zoomed = clipped_zoom(x, zoom_factor) + out += zoomed + + x = (x + out) / (len(c) + 1) + return np.clip(x, 0, 1) * 255 + + +# def barrel(x, severity=1): +# c = [(0,0.03,0.03), (0.05,0.05,0.05), (0.1,0.1,0.1), +# (0.2,0.2,0.2), (0.1,0.3,0.6)][severity - 1] +# +# output = BytesIO() +# x.save(output, format='PNG') +# +# x = WandImage(blob=output.getvalue()) +# x.distort('barrel', c) +# +# x = cv2.imdecode(np.fromstring(x.make_blob(), np.uint8), +# cv2.IMREAD_UNCHANGED) +# +# if x.shape != (224, 224): +# return np.clip(x[..., [2, 1, 0]], 0, 255) # BGR to RGB +# else: # greyscale to RGB +# return np.clip(np.array([x, x, x]).transpose((1, 2, 0)), 0, 255) + + +def fog(x, severity=1): + c = [(1.5, 2), (2, 2), (2.5, 1.7), (2.5, 1.5), (3, 1.4)][severity - 1] + + x = np.array(x) / 255. + max_val = x.max() + x += c[0] * plasma_fractal(wibbledecay=c[1])[:224, :224][..., np.newaxis] + return np.clip(x * max_val / (max_val + c[0]), 0, 1) * 255 + + +def frost(x, severity=1): + c = [(1, 0.4), + (0.8, 0.6), + (0.7, 0.7), + (0.65, 0.7), + (0.6, 0.75)][severity - 1] + idx = np.random.randint(5) + filename = ['./frost1.png', './frost2.png', './frost3.png', './frost4.jpg', './frost5.jpg', './frost6.jpg'][idx] + frost = cv2.imread(filename) + # randomly crop and convert to rgb + x_start, y_start = np.random.randint(0, frost.shape[0] - 224), np.random.randint(0, frost.shape[1] - 224) + frost = frost[x_start:x_start + 224, y_start:y_start + 224][..., [2, 1, 0]] + + return np.clip(c[0] * np.array(x) + c[1] * frost, 0, 255) + + +def snow(x, severity=1): + c = [(0.1, 0.3, 3, 0.5, 10, 4, 0.8), + (0.2, 0.3, 2, 0.5, 12, 4, 0.7), + (0.55, 0.3, 4, 0.9, 12, 8, 0.7), + (0.55, 0.3, 4.5, 0.85, 12, 8, 0.65), + (0.55, 0.3, 2.5, 0.85, 12, 12, 0.55)][severity - 1] + + # 获取原始图像尺寸 + if isinstance(x, Image.Image): + w, h = x.size + x = np.array(x).astype(np.float32) / 255. + else: + h, w = x.shape[:2] + x = x.astype(np.float32) / 255. + + # 生成雪层(适配任意尺寸) + snow_layer = np.random.normal(size=(h, w), loc=c[0], scale=c[1]) + + # 缩放雪层(保持宽高比) + zoom_factor = c[2] + snow_layer_zoomed = clipped_zoom(snow_layer[..., np.newaxis], zoom_factor) + snow_layer = snow_layer_zoomed.squeeze() + + # 应用阈值 + snow_layer[snow_layer < c[3]] = 0 + + # 创建运动模糊雪层 + snow_pil = Image.fromarray((np.clip(snow_layer, 0, 1) * 255).astype(np.uint8)) + with WandImage() as wand_img: + wand_img.read(blob=snow_pil.tobytes()) + wand_img.motion_blur(radius=c[4], sigma=c[5], angle=np.random.uniform(-135, -45)) + snow_layer = np.array(wand_img).astype(np.float32) / 255. + + # 适配不同通道情况 + if snow_layer.ndim == 2: + snow_layer = snow_layer[..., np.newaxis] + + # 动态调整灰度转换 + gray = cv2.cvtColor(x, cv2.COLOR_RGB2GRAY) + gray_reshaped = gray.reshape(h, w, 1) # 使用动态尺寸 + + # 合成最终图像 + blended = c[6] * x + (1 - c[6]) * np.maximum(x, gray_reshaped * 1.5 + 0.5) + result = np.clip(blended + snow_layer + np.rot90(snow_layer, k=2), 0, 1) + + return (result * 255).astype(np.uint8) + + +def spatter(x, severity=1): + c = [(0.65, 0.3, 4, 0.69, 0.6, 0), + (0.65, 0.3, 3, 0.68, 0.6, 0), + (0.65, 0.3, 2, 0.68, 0.5, 0), + (0.65, 0.3, 1, 0.65, 1.5, 1), + (0.67, 0.4, 1, 0.65, 1.5, 1)][severity - 1] + x = np.array(x, dtype=np.float32) / 255. + + liquid_layer = np.random.normal(size=x.shape[:2], loc=c[0], scale=c[1]) + + liquid_layer = gaussian(liquid_layer, sigma=c[2]) + liquid_layer[liquid_layer < c[3]] = 0 + if c[5] == 0: + liquid_layer = (liquid_layer * 255).astype(np.uint8) + dist = 255 - cv2.Canny(liquid_layer, 50, 150) + dist = cv2.distanceTransform(dist, cv2.DIST_L2, 5) + _, dist = cv2.threshold(dist, 20, 20, cv2.THRESH_TRUNC) + dist = cv2.blur(dist, (3, 3)).astype(np.uint8) + dist = cv2.equalizeHist(dist) + # ker = np.array([[-1,-2,-3],[-2,0,0],[-3,0,1]], dtype=np.float32) + # ker -= np.mean(ker) + ker = np.array([[-2, -1, 0], [-1, 1, 1], [0, 1, 2]]) + dist = cv2.filter2D(dist, cv2.CV_8U, ker) + dist = cv2.blur(dist, (3, 3)).astype(np.float32) + + m = cv2.cvtColor(liquid_layer * dist, cv2.COLOR_GRAY2BGRA) + m /= np.max(m, axis=(0, 1)) + m *= c[4] + + # water is pale turqouise + color = np.concatenate((175 / 255. * np.ones_like(m[..., :1]), + 238 / 255. * np.ones_like(m[..., :1]), + 238 / 255. * np.ones_like(m[..., :1])), axis=2) + + color = cv2.cvtColor(color, cv2.COLOR_BGR2BGRA) + x = cv2.cvtColor(x, cv2.COLOR_BGR2BGRA) + + return cv2.cvtColor(np.clip(x + m * color, 0, 1), cv2.COLOR_BGRA2BGR) * 255 + else: + m = np.where(liquid_layer > c[3], 1, 0) + m = gaussian(m.astype(np.float32), sigma=c[4]) + m[m < 0.8] = 0 + # m = np.abs(m) ** (1/c[4]) + + # mud brown + color = np.concatenate((63 / 255. * np.ones_like(x[..., :1]), + 42 / 255. * np.ones_like(x[..., :1]), + 20 / 255. * np.ones_like(x[..., :1])), axis=2) + + color *= m[..., np.newaxis] + x *= (1 - m[..., np.newaxis]) + + return np.clip(x + color, 0, 1) * 255 + + +def contrast(x, severity=1): + c = [0.4, .3, .2, .1, .05][severity - 1] + + x = np.array(x) / 255. + means = np.mean(x, axis=(0, 1), keepdims=True) + return np.clip((x - means) * c + means, 0, 1) * 255 + + +def brightness(x, severity=1): + c = [.1, .2, .3, .4, .5][severity - 1] + + x = np.array(x) / 255. + x = sk.color.rgb2hsv(x) + x[:, :, 2] = np.clip(x[:, :, 2] + c, 0, 1) + x = sk.color.hsv2rgb(x) + + return np.clip(x, 0, 1) * 255 + + +def saturate(x, severity=1): + c = [(0.3, 0), (0.1, 0), (2, 0), (5, 0.1), (20, 0.2)][severity - 1] + + x = np.array(x) / 255. + x = sk.color.rgb2hsv(x) + x[:, :, 1] = np.clip(x[:, :, 1] * c[0] + c[1], 0, 1) + x = sk.color.hsv2rgb(x) + + return np.clip(x, 0, 1) * 255 + + +def jpeg_compression(x, severity=1): + c = [25, 18, 15, 10, 7][severity - 1] + + output = BytesIO() + x.save(output, 'JPEG', quality=c) + x = PILImage.open(output) + + return x + + +def pixelate(x, severity=1): + c = [0.6, 0.5, 0.4, 0.3, 0.25][severity - 1] + + x = x.resize((int(224 * c), int(224 * c)), PILImage.BOX) + x = x.resize((224, 224), PILImage.BOX) + + return x + + +# mod of https://gist.github.com/erniejunior/601cdf56d2b424757de5 +def elastic_transform(image, severity=1): + c = [(244 * 2, 244 * 0.7, 244 * 0.1), # 244 should have been 224, but ultimately nothing is incorrect + (244 * 2, 244 * 0.08, 244 * 0.2), + (244 * 0.05, 244 * 0.01, 244 * 0.02), + (244 * 0.07, 244 * 0.01, 244 * 0.02), + (244 * 0.12, 244 * 0.01, 244 * 0.02)][severity - 1] + + image = np.array(image, dtype=np.float32) / 255. + shape = image.shape + shape_size = shape[:2] + + # random affine + center_square = np.float32(shape_size) // 2 + square_size = min(shape_size) // 3 + pts1 = np.float32([center_square + square_size, + [center_square[0] + square_size, center_square[1] - square_size], + center_square - square_size]) + pts2 = pts1 + np.random.uniform(-c[2], c[2], size=pts1.shape).astype(np.float32) + M = cv2.getAffineTransform(pts1, pts2) + image = cv2.warpAffine(image, M, shape_size[::-1], borderMode=cv2.BORDER_REFLECT_101) + + dx = (gaussian(np.random.uniform(-1, 1, size=shape[:2]), + c[1], mode='reflect', truncate=3) * c[0]).astype(np.float32) + dy = (gaussian(np.random.uniform(-1, 1, size=shape[:2]), + c[1], mode='reflect', truncate=3) * c[0]).astype(np.float32) + dx, dy = dx[..., np.newaxis], dy[..., np.newaxis] + + x, y, z = np.meshgrid(np.arange(shape[1]), np.arange(shape[0]), np.arange(shape[2])) + indices = np.reshape(y + dy, (-1, 1)), np.reshape(x + dx, (-1, 1)), np.reshape(z, (-1, 1)) + return np.clip(map_coordinates(image, indices, order=1, mode='reflect').reshape(shape), 0, 1) * 255 + + +# /////////////// End Distortions /////////////// + + +# /////////////// Further Setup /////////////// + + +def save_distorted(method=gaussian_noise): + for severity in range(1, 6): + print(method.__name__, severity) + distorted_dataset = DistortImageFolder( + root="/share/data/vision-greg/ImageNet/clsloc/images/val", + method=method, severity=severity, + transform=trn.Compose([trn.Resize(256), trn.CenterCrop(224)])) + distorted_dataset_loader = torch.utils.data.DataLoader( + distorted_dataset, batch_size=100, shuffle=False, num_workers=4) + + for _ in distorted_dataset_loader: continue + + +# /////////////// End Further Setup /////////////// + + +# /////////////// Display Results /////////////// +import collections + +# print('\nUsing ImageNet data') + +d = collections.OrderedDict() + # d['Zoom Blur'] = zoom_blur + # d['JPEG'] = jpeg_compression + # d['Pixelate'] = pixelate + # d['Motion Blur'] = motion_blur + # d['Defocus Blur'] = defocus_blur + # d['Elastic'] = elastic_transform + + + + +# d['Gaussian Noise'] = gaussian_noise +# d['Shot Noise'] = shot_noise +# d['Impulse Noise'] = impulse_noise +# d['Brightness'] = brightness +# d['Contrast'] = contrast +# d['Speckle Noise'] = speckle_noise +# d['Spatter'] = spatter +# d['Saturate'] = saturate +import os +from pathlib import Path +from PIL import Image +import numpy as np +import torchvision.transforms as trn + +import os +import numpy as np +import torch +from PIL import Image +from torchvision import transforms +from pathlib import Path + +def save_distorted_images_from_numpy( + input_image: np.ndarray, # h w c + methods: dict, + severity: int = 4, +): + + img_pil = Image.fromarray(input_image.astype(np.uint8)).convert('RGB') + for method_name, method in methods.items(): + distorted_img = method(img_pil, severity) + to_pil = transforms.ToPILImage() + if not isinstance(distorted_img, Image.Image): + if isinstance(distorted_img, np.ndarray): + distorted_img = Image.fromarray(distorted_img.astype(np.uint8)) + else: + distorted_img = to_pil(distorted_img) + + if distorted_img.size != img_pil.size: + distorted_img = distorted_img.resize(img_pil.size, Image.LANCZOS) + + distorted_tensor = transforms.ToTensor()(distorted_img) + + return distorted_tensor + +def camera_feature_improve(camera_feature): + original_device = camera_feature.device + tensor_image = camera_feature.detach().cpu() + + denormalized_image = tensor_image * 255.0 + denormalized_image = denormalized_image.byte() + # h w c + image_np = denormalized_image.numpy().transpose(1, 2, 0) + + # noise + dd = collections.OrderedDict() + # import random + # idx=random.randint(1,60) + # dd['Motion Blur'] = motion_blur + # if idx<11: + # dd['Motion Blur'] = motion_blur + # elif idx<21: + # dd['Zoom Blur'] = zoom_blur + # elif idx<31: + # dd['JPEG'] = jpeg_compression + # elif idx<41: + # dd['Pixelate'] = pixelate + # elif idx<51: + # dd['Defocus Blur'] = defocus_blur + # else: + # dd['Elastic'] = elastic_transform + dd['Zoom Blur'] = zoom_blur + # dzx + camera_feature = save_distorted_images_from_numpy(image_np, dd) + camera_feature = camera_feature.to(original_device) + return camera_feature + + +import numpy as np +from PIL import Image +import cv2 +import math +import os +from torchvision import transforms +def rotate_with_nearest_fill(img_np, angle_range=(-8, 8)): + h, w = img_np.shape[:2] + angle = np.random.uniform(angle_range[0], angle_range[1]) + center = (w//2, h//2) + rot_mat = cv2.getRotationMatrix2D(center, angle, 1.0) + rotated_np = cv2.warpAffine( + img_np, + rot_mat, + (w, h), + flags=cv2.INTER_NEAREST, + borderMode=cv2.BORDER_REPLICATE + ) + + distorted_tensor = transforms.ToTensor()(rotated_np) + return distorted_tensor + +def camera_feature_rotate(camera_feature): + original_device = camera_feature.device + tensor_image = camera_feature.detach().cpu() + + denormalized_image = tensor_image * 255.0 + denormalized_image = denormalized_image.byte() + # h w c + image_np = denormalized_image.numpy().transpose(1, 2, 0) + + camera_feature=rotate_with_nearest_fill(image_np, angle_range=(-8, 8)) + + camera_feature = camera_feature.to(original_device) + return camera_feature + +def tensor_to_np_tensor_1024(tensor): + original_device = tensor.device + tensor=tensor.detach().cpu() + denormalized=tensor * 255.0 + image_np=denormalized.permute(1, 2, 0).to(torch.uint8).numpy() + resized_np=cv2.resize(image_np, (1024, 256), interpolation=cv2.INTER_LINEAR) + result=torch.from_numpy(resized_np).permute(2, 0, 1).to(original_device) + result=result.to(torch.float32) + return result + def load_feature_target_from_pickle(path: Path) -> Dict[str, torch.Tensor]: """Helper function to load pickled feature/target from path.""" with gzip.open(path, "rb") as f: @@ -122,13 +900,55 @@ def _load_scene_with_token(self, token: str) -> Tuple[Dict[str, torch.Tensor], D data_dict_path = token_path / (builder.get_unique_name() + ".gz") data_dict = load_feature_target_from_pickle(data_dict_path) features.update(data_dict) - + targets: Dict[str, torch.Tensor] = {} for builder in self._target_builders: data_dict_path = token_path / (builder.get_unique_name() + ".gz") data_dict = load_feature_target_from_pickle(data_dict_path) targets.update(data_dict) - + # rotate + # import random + # random_int = random.randint(1, 100) + # random_int1 = random.randint(1, 100) + # random_int2 = random.randint(1, 100) + + # # 微调不用 + # # camera_feature = torch.rand(3, 512, 2048) + # # features["camera_feature"]=tensor_to_np_tensor_1024(features["camera_feature"]) + + # if random_int1<61: + # features["camera_feature"]=camera_feature_rotate(features["camera_feature"]) + # if random_int<61: + # # noise_image + # features["camera_feature"] = camera_feature_improve(features["camera_feature"]) + # # noise_status_feature + # if random_int2<61: + # velo = features["status_feature"][4:] + # sigma = 0.2 * torch.abs(velo) + # noise = torch.normal(mean=0.0, std=sigma) + # velo += noise + # command=features["status_feature"][:4] + # features["status_feature"]=torch.cat([command,velo],dim=0) + # # feature + + # # 修改变成一维 + # import math + # vle=math.sqrt(features["status_feature"][4]**2 + features["status_feature"][5]**2) + # acc=math.sqrt(features["status_feature"][6]**2 + features["status_feature"][7]**2) + # tag_vle=1 + # tag_acc=1 + # if features["status_feature"][4]<0: + # tag_vle=-1 + + # dot_product = np.dot(features["status_feature"][4:6], features["status_feature"][6:8]) + # if dot_product>0: + # tag_acc=tag_vle + # else: + # tag_acc=-1*tag_vle + + # vle_acc=torch.tensor([tag_vle*vle,tag_acc*acc],dtype=torch.float32) + # command=features["status_feature"][:4] + # features["status_feature"]=torch.cat([command,vle_acc],dim=0) return (features, targets) diff --git a/navsim/visualization/bev.py b/navsim/visualization/bev.py index a5eed72..293dc2f 100644 --- a/navsim/visualization/bev.py +++ b/navsim/visualization/bev.py @@ -34,8 +34,8 @@ def add_configured_bev_on_ax(ax: plt.Axes, map_api: AbstractMap, frame: Frame) - if "annotations" in BEV_PLOT_CONFIG["layers"]: add_annotations_to_bev_ax(ax, frame.annotations) - if "lidar" in BEV_PLOT_CONFIG["layers"]: - add_lidar_to_bev_ax(ax, frame.lidar) + # if "lidar" in BEV_PLOT_CONFIG["layers"]: + # add_lidar_to_bev_ax(ax, frame.lidar) return ax @@ -167,11 +167,21 @@ def add_trajectory_to_bev_ax(ax: plt.Axes, trajectory: Trajectory, config: Dict[ alpha=config["line_color_alpha"], linewidth=config["line_width"], linestyle=config["line_style"], - marker=config["marker"], + # marker=config["marker"], markersize=config["marker_size"], markeredgecolor=config["marker_edge_color"], zorder=config["zorder"], ) + + if "fill_color" in config: + ax.fill_between( + poses[:, 1], + poses[:, 0] - 0.5, + poses[:, 0] + 0.5, + color=config["fill_color"], + alpha=config.get("fill_alpha", 0.3), + zorder=config["zorder"] - 1 + ) return ax diff --git a/navsim/visualization/camera.py b/navsim/visualization/camera.py index cd49233..93ffd14 100644 --- a/navsim/visualization/camera.py +++ b/navsim/visualization/camera.py @@ -33,7 +33,11 @@ def add_lidar_to_camera_ax(ax: plt.Axes, camera: Camera, lidar: Lidar) -> plt.Ax :param lidar: navsim lidar dataclass :return: ax object with image """ - + image = camera.image.copy() + # ax.imshow(image) # 直接显示图像 + # return + # import pdb;pdb.set_trace(); + # image, lidar_pc = camera.image.copy(), camera.image.copy() image, lidar_pc = camera.image.copy(), lidar.lidar_pc.copy() image_height, image_width = image.shape[:2] diff --git a/navsim/visualization/plots.py b/navsim/visualization/plots.py index deb5af1..cac1c26 100644 --- a/navsim/visualization/plots.py +++ b/navsim/visualization/plots.py @@ -5,12 +5,12 @@ from PIL import Image from tqdm import tqdm -from navsim.agents.abstract_agent import AbstractAgent -from navsim.common.dataclasses import Scene +from navsim.agents.abstract_agent_diffusiondrive import AbstractAgent +from navsim.common.dataclasses import Scene,Trajectory from navsim.visualization.bev import add_configured_bev_on_ax, add_trajectory_to_bev_ax from navsim.visualization.camera import add_annotations_to_camera_ax, add_camera_ax, add_lidar_to_camera_ax from navsim.visualization.config import BEV_PLOT_CONFIG, CAMERAS_PLOT_CONFIG, TRAJECTORY_CONFIG - +import numpy as np; def configure_bev_ax(ax: plt.Axes) -> plt.Axes: """ @@ -71,26 +71,71 @@ def plot_bev_frame(scene: Scene, frame_idx: int) -> Tuple[plt.Figure, plt.Axes]: return fig, ax -def plot_bev_with_agent(scene: Scene, agent: AbstractAgent) -> Tuple[plt.Figure, plt.Axes]: +def plot_bev_with_agent( + scene: Scene, + agent: AbstractAgent, + poses: np.ndarray, + anchor_poses: np.ndarray, + anchor_colors: List[str] = None +) -> Tuple[plt.Figure, plt.Axes]: """ - Plots agent and human trajectory in birds-eye-view visualization - :param scene: navsim scene dataclass - :param agent: navsim agent - :return: figure and ax object of matplotlib + 绘制BEV视图下的真实轨迹、预测轨迹及Anchor轨迹 + :param anchor_trajectories: Anchor轨迹数组 (num_anchors, N, 3) + :param anchor_colors: 每个Anchor的颜色列表 """ + # 获取轨迹数据 + # human_trajectory = scene.get_future_trajectory() - human_trajectory = scene.get_future_trajectory() - agent_trajectory = agent.compute_trajectory(scene.get_agent_input()) - + # 创建画布 frame_idx = scene.scene_metadata.num_history_frames - 1 fig, ax = plt.subplots(1, 1, figsize=BEV_PLOT_CONFIG["figure_size"]) + + # 绘制基础地图 add_configured_bev_on_ax(ax, scene.map_api, scene.frames[frame_idx]) - add_trajectory_to_bev_ax(ax, human_trajectory, TRAJECTORY_CONFIG["human"]) - add_trajectory_to_bev_ax(ax, agent_trajectory, TRAJECTORY_CONFIG["agent"]) + # 绘制真实轨迹 + # add_trajectory_to_bev_ax(ax, human_trajectory, TRAJECTORY_CONFIG["human"]) + + num_anchors = 20 + if anchor_colors is None: + cmap = plt.cm.get_cmap('tab10', num_anchors) + anchor_colors = [cmap(i) for i in range(num_anchors)] + # 20*64绘制所有Anchor轨迹 + anchor_config = TRAJECTORY_CONFIG["agent"].copy() + for i in range(num_anchors): + for j in range(64): + trajectory = Trajectory(anchor_poses[i][j]) + anchor_config.update({ + "line_color": anchor_colors[i], + "line_color_alpha": 0.4, + "label": f"Anchor {i+1}" + }) + add_trajectory_to_bev_ax(ax, trajectory, anchor_config) + + # 配置坐标轴和图例 configure_bev_ax(ax) configure_ax(ax) - + # ax.legend(loc="upper right") return fig, ax +# def plot_bev_with_agent(scene: Scene, agent: AbstractAgent) -> Tuple[plt.Figure, plt.Axes]: +# """ +# Plots agent and human trajectory in birds-eye-view visualization +# :param scene: navsim scene dataclass +# :param agent: navsim agent +# :return: figure and ax object of matplotlib +# """ + +# human_trajectory = scene.get_future_trajectory() +# agent_trajectory = agent.compute_trajectory(scene.get_agent_input()) + +# frame_idx = scene.scene_metadata.num_history_frames - 1 +# fig, ax = plt.subplots(1, 1, figsize=BEV_PLOT_CONFIG["figure_size"]) +# add_configured_bev_on_ax(ax, scene.map_api, scene.frames[frame_idx]) +# add_trajectory_to_bev_ax(ax, human_trajectory, TRAJECTORY_CONFIG["human"]) +# add_trajectory_to_bev_ax(ax, agent_trajectory, TRAJECTORY_CONFIG["agent"]) +# configure_bev_ax(ax) +# configure_ax(ax) + +# return fig, ax def plot_cameras_frame(scene: Scene, frame_idx: int) -> Tuple[plt.Figure, Any]: diff --git a/requirements.txt b/requirements.txt index 7c4e795..e6d64c2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -39,7 +39,7 @@ tornado # Used in nuboard.py tqdm # Used widely ujson # Used in serialiation_callback.py -torch==2.0.1 +torch==2.0.1pi torchvision==0.15.2 pytorch-lightning==2.2.1 tensorboard==2.16.2 diff --git a/scripts/evaluation/run_human_agent_pdm_score_evaluation.sh b/scripts/evaluation/run_human_agent_pdm_score_evaluation.sh index b005ce8..620810b 100755 --- a/scripts/evaluation/run_human_agent_pdm_score_evaluation.sh +++ b/scripts/evaluation/run_human_agent_pdm_score_evaluation.sh @@ -3,4 +3,4 @@ TRAIN_TEST_SPLIT=navtest python $NAVSIM_DEVKIT_ROOT/navsim/planning/script/run_pdm_score.py \ train_test_split=$TRAIN_TEST_SPLIT \ agent=human_agent \ -experiment_name=human_agent +experiment_name=human_agent \ No newline at end of file diff --git a/scripts/evaluation/run_metric_caching.sh b/scripts/evaluation/run_metric_caching.sh index b8d92d8..0fd0f1e 100755 --- a/scripts/evaluation/run_metric_caching.sh +++ b/scripts/evaluation/run_metric_caching.sh @@ -1,6 +1,36 @@ -TRAIN_TEST_SPLIT=navtest -CACHE_PATH=$NAVSIM_EXP_ROOT/metric_cache +# # navtest +# export HYDRA_FULL_ERROR=1 +# TRAIN_TEST_SPLIT=navtest +# CACHE_PATH=$NAVSIM_EXP_ROOT/metric_cache_navtest -python $NAVSIM_DEVKIT_ROOT/navsim/planning/script/run_metric_caching.py \ -train_test_split=$TRAIN_TEST_SPLIT \ -metric_cache_path=$CACHE_PATH +# python $NAVSIM_DEVKIT_ROOT/navsim/planning/script/run_metric_caching.py \ +# train_test_split=$TRAIN_TEST_SPLIT \ +# metric_cache_path=$CACHE_PATH + + +# export HYDRA_FULL_ERROR=1 +# TRAIN_TEST_SPLIT=warmup_two_stage +# CACHE_PATH=$NAVSIM_EXP_ROOT/metric_warmup_two_stage + +# python $NAVSIM_DEVKIT_ROOT/navsim/planning/script/run_metric_caching.py \ +# train_test_split=$TRAIN_TEST_SPLIT \ +# metric_cache_path=$CACHE_PATH + + +#!/bin/bash + +# 进入checkpoints目录(根据你的实际路径修改) +cd /data/hdd01/dingzx/navsim_exp/training_diffusiondrive_agent/2025.05.18.22.59.47/lightning_logs/version_0/checkpoints/ + +# 循环处理所有符合格式的ckpt文件 +for file in epoch=*-step=*.ckpt; do + # 使用sed提取epoch数字并构建新文件名 + newname=$(echo "$file" | sed -E 's/epoch=([0-9]+)-step=.*\.ckpt/\1.ckpt/') + # print(f"{newname") + # print($file) + echo "新文件名: $newname" + echo "$file" + + # 执行重命名操作(测试时可以先在下一行加上echo) + mv -- "$file" "$newname" +done \ No newline at end of file diff --git a/scripts/evaluation/run_transfuser.sh b/scripts/evaluation/run_transfuser.sh index 96762da..c8f13ef 100755 --- a/scripts/evaluation/run_transfuser.sh +++ b/scripts/evaluation/run_transfuser.sh @@ -1,9 +1,28 @@ -TRAIN_TEST_SPLIT=navtest -CHECKPOINT=/path/to/transfuser.ckpt +export HYDRA_FULL_ERROR=1 +TRAIN_TEST_SPLIT=warmup_navsafe_two_stage_extended + +# CHECKPOINT="/data/hdd01/dingzx/navsim_exp/training_diffusiondrive_agent/2025.03.28.23.58.29/lightning_logs/version_0/checkpoints/99.ckpt" +# 加上画图不要token限制 motion_blur数据增强 +# 6个 分数一样 +# CHECKPOINT="/data/hdd01/dingzx/navsim_exp/training_diffusiondrive_agent/2025.04.03.22.48.51/lightning_logs/version_0/checkpoints/99.ckpt" +# 1个 motion +# CHECKPOINT="/data/hdd01/dingzx/navsim_exp/training_diffusiondrive_agent/2025.04.02.16.07.18/lightning_logs/version_0/checkpoints/99.ckpt" +# 0.6 +# CHECKPOINT='/data/hdd01/dingzx/navsim_exp/training_diffusiondrive_agent/2025.04.09.22.54.54/lightning_logs/version_0/checkpoints/99.ckpt' +# 2个 +# CHECKPOINT='/data/hdd01/dingzx/navsim_exp/training_diffusiondrive_agent/2025.04.04.20.02.40/lightning_logs/version_0/checkpoints/99.ckpt' +# 无 +# CHECKPOINT='/data/hdd01/dingzx/navsim_exp/training_diffusiondrive_agent/2025.03.28.08.55.12/lightning_logs/version_0/checkpoints/99.ckpt' +# rotate +CHECKPOINT='/data/hdd01/dingzx/navsim_exp/training_diffusiondrive_agent/2025.04.15.22.26.33/lightning_logs/version_0/checkpoints/99.ckpt' +export CUDA_VISIBLE_DEVICES="" +export PYTHONPATH=/data/hdd01/dingzx/workspace/navsim +cd /data/hdd01/dingzx/workspace/navsim python $NAVSIM_DEVKIT_ROOT/navsim/planning/script/run_pdm_score.py \ train_test_split=$TRAIN_TEST_SPLIT \ -agent=transfuser_agent \ +agent=diffusiondrive_agent \ worker=single_machine_thread_pool \ -agent.checkpoint_path=$CHECKPOINT \ -experiment_name=transfuser_agent_eval +agent.checkpoint_path="$CHECKPOINT" \ +experiment_name=diffusiondrive_agent_eval \ +agent.config.latent=True \ No newline at end of file diff --git a/scripts/submission/run_cv_create_submission_pickle.sh b/scripts/submission/run_cv_create_submission_pickle.sh index c5a0033..5506971 100755 --- a/scripts/submission/run_cv_create_submission_pickle.sh +++ b/scripts/submission/run_cv_create_submission_pickle.sh @@ -1,17 +1,23 @@ -TEAM_NAME="MUST_SET" -AUTHORS="MUST_SET" -EMAIL="MUST_SET" -INSTITUTION="MUST_SET" -COUNTRY="MUST_SET" - -TRAIN_TEST_SPLIT=navtest +TEAM_NAME="111" +AUTHORS="ddd" +EMAIL="11@qq.com" +INSTITUTION="1" +COUNTRY="CHINA" +export HYDRA_FULL_ERROR=1 +export CUDA_VISIBLE_DEVICES="" +TRAIN_TEST_SPLIT=warmup_two_stage +# CHECKPOINT="/data/hdd01/dingzx/navsim_exp/training_diffusiondrive_agent/2025.03.28.23.58.29/lightning_logs/version_0/checkpoints/99.ckpt" +# CHECKPOINT="/data/hdd01/dingzx/navsim_exp/training_diffusiondrive_agent/2025.04.02.16.07.18/lightning_logs/version_0/checkpoints/99.ckpt" +CHECKPOINT='/data/hdd01/dingzx/navsim_exp/training_diffusiondrive_agent/2025.04.09.22.54.54/lightning_logs/version_0/checkpoints/99.ckpt' python $NAVSIM_DEVKIT_ROOT/navsim/planning/script/run_create_submission_pickle.py \ train_test_split=$TRAIN_TEST_SPLIT \ -agent=constant_velocity_agent \ +agent=diffusiondrive_agent \ experiment_name=submission_cv_agent \ team_name=$TEAM_NAME \ authors=$AUTHORS \ email=$EMAIL \ +agent.checkpoint_path="$CHECKPOINT" \ +agent.config.latent=True \ institution=$INSTITUTION \ -country=$COUNTRY \ +country=$COUNTRY diff --git a/scripts/training/run_ego_mlp_agent_training.sh b/scripts/training/run_ego_mlp_agent_training.sh index 040f27b..9b9acec 100755 --- a/scripts/training/run_ego_mlp_agent_training.sh +++ b/scripts/training/run_ego_mlp_agent_training.sh @@ -1,6 +1,16 @@ TRAIN_TEST_SPLIT=navtrain - -python $NAVSIM_DEVKIT_ROOT/navsim/planning/script/run_training.py \ -experiment_name=training_ego_mlp_agent \ -trainer.params.max_epochs=50 \ +export CUDA_VISIBLE_DEVICES=8,9 +export HYDRA_FULL_ERROR=1 +FEATURE_CACHE='/data/hdd01/xingzb/navsim_exp/training_cache' +export NAVSIM_DEVKIT_ROOT="/data/hdd01/dingzx/workspace/navsim" +cd /data/hdd01/dingzx/workspace/navsim +# /data/hdd01/dingzx/workspace/navsim/navsim/planning/script/run_training.py +python /data/hdd01/dingzx/workspace/navsim/navsim/planning/script/run_training.py \ +experiment_name=debug_diffusiondriv_agent \ train_test_split=$TRAIN_TEST_SPLIT \ +agent=ego_status_mlp_agent \ +cache_path=$FEATURE_CACHE \ +force_cache_computation=False \ +use_cache_without_dataset=True + +# ego_status_mlp_agent \ No newline at end of file diff --git a/scripts/training/run_training_cache.sh b/scripts/training/run_training_cache.sh new file mode 100644 index 0000000..735a86f --- /dev/null +++ b/scripts/training/run_training_cache.sh @@ -0,0 +1,9 @@ +export HYDRA_FULL_ERROR=1 +export CUDA_VISIBLE_DEVICES=8,9 +TRAIN_TEST_SPLIT=navtrain +python $NAVSIM_DEVKIT_ROOT/navsim/planning/script/run_training_caching.py \ +agent=diffusiondrive_agent \ +experiment_name=cache_agent \ +train_test_split=$TRAIN_TEST_SPLIT \ ++trainer.params.devices=2 \ +agent.config.latent=True diff --git a/scripts/training/run_transfuser_training.sh b/scripts/training/run_transfuser_training.sh index 9476eec..b7016ae 100755 --- a/scripts/training/run_transfuser_training.sh +++ b/scripts/training/run_transfuser_training.sh @@ -1,6 +1,101 @@ -TRAIN_TEST_SPLIT=navtrain +export HYDRA_FULL_ERROR=1 +export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 +FEATURE_CACHE='/data/hdd01/dingzx/dzx_navsim_exp/training_cache_noise_6_zoom_vleacc_1_rotate_vleaccnoise' +# 5.20训的noise_6_zoom_vleacc_1_rotate_vleaccnoise -python $NAVSIM_DEVKIT_ROOT/navsim/planning/script/run_training.py \ -agent=transfuser_agent \ -experiment_name=training_transfuser_agent \ +TRAIN_TEST_SPLIT=navtrain +export NAVSIM_DEVKIT_ROOT="/data/hdd01/dingzx/workspace/navsim" +# export PYTHONPATH="${PYTHONPATH}:/data/hdd01/dingzx/workspace1/navsim" +cd /data/hdd01/dingzx/workspace/navsim/ +export PYTHONPATH=/data/hdd01/dingzx/workspace/navsim +python $NAVSIM_DEVKIT_ROOT/navsim/planning/script/run_training_diffusiondrive.py \ +agent=diffusiondrive_agent \ +experiment_name=training_diffusiondrive_agent \ train_test_split=$TRAIN_TEST_SPLIT \ +cache_path=$FEATURE_CACHE \ +force_cache_computation=False \ +use_cache_without_dataset=True \ ++trainer.params.devices=8 \ +agent.config.latent=True + + +# export HYDRA_FULL_ERROR=1 +# export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 +# FEATURE_CACHE='/data/hdd01/xingzb/navsim_exp/training_cache' +# TRAIN_TEST_SPLIT=navtrain +# export NAVSIM_DEVKIT_ROOT=/data/hdd01/dingzx/workspace/navsim +# python $NAVSIM_DEVKIT_ROOT/navsim/planning/script/run_training.py \ +# agent=transfuser_agent \ +# experiment_name=dzx_training_transfuser_agent \ +# train_test_split=$TRAIN_TEST_SPLIT \ +# cache_path=$FEATURE_CACHE \ +# force_cache_computation=False \ +# use_cache_without_dataset=True \ +# +trainer.params.devices=8 \ +# agent.config.latent=True + + + + + + + + + + + + + + + +# agent.lr=6e-5 \ +# agent.checkpoint_path="$CHECKPOINT" \ + + +# # you lidar +# export HYDRA_FULL_ERROR=1 +# TRAIN_TEST_SPLIT=navtrain +# export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7,8,9 +# FEATURE_CACHE='/data/hdd01/xingzb/navsim_exp/training_cache' +# # # FREEZE_PERCEPTION=False +# python $NAVSIM_DEVKIT_ROOT/navsim/planning/script/run_training_diffusiondrive.py \ +# agent=diffusiondrive_agent \ +# experiment_name=training_diffusiondrive_agent \ +# train_test_split=$TRAIN_TEST_SPLIT \ +# cache_path=$FEATURE_CACHE \ +# force_cache_computation=False \ +# use_cache_without_dataset=True \ +# +trainer.params.devices=10 \ + + +# export HYDRA_FULL_ERROR=1 +# TRAIN_TEST_SPLIT=navtrain +# export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7,8,9 +# FEATURE_CACHE='/data/hdd01/xingzb/navsim_exp/training_cache' +# # # FREEZE_PERCEPTION=False +# python $NAVSIM_DEVKIT_ROOT/navsim/planning/script/run_training_diffusiondrive.py \ +# agent=diffusiondrive_agent \ +# experiment_name=training_diffusiondrive_agent \ +# train_test_split=$TRAIN_TEST_SPLIT \ +# cache_path=$FEATURE_CACHE \ +# force_cache_computation=False \ +# use_cache_without_dataset=True \ +# +trainer.params.devices=10 \ +# agent.config.latent=True + + +## transfuser +# export HYDRA_FULL_ERROR=1 +# TRAIN_TEST_SPLIT=navtrain +# export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7,8,9 +# FEATURE_CACHE='/data/hdd01/xingzb/navsim_exp/training_cache' +# # # FREEZE_PERCEPTION=False +# python $NAVSIM_DEVKIT_ROOT/navsim/planning/script/run_training.py \ +# agent=transfuser_agent \ +# experiment_name=training_transfuser_agent \ +# train_test_split=$TRAIN_TEST_SPLIT \ +# cache_path=$FEATURE_CACHE \ +# force_cache_computation=False \ +# use_cache_without_dataset=True \ +# +trainer.params.devices=10 \ +# agent.config.latent=True diff --git a/tutorial/1.py b/tutorial/1.py new file mode 100644 index 0000000..31336fa --- /dev/null +++ b/tutorial/1.py @@ -0,0 +1,78 @@ +import os +from pathlib import Path + +import hydra +from hydra.utils import instantiate +import numpy as np +import matplotlib.pyplot as plt + +from navsim.common.dataloader import SceneLoader +from navsim.common.dataclasses import SceneFilter, SensorConfig + +SPLIT = "test" # ["mini", "test", "trainval"] +FILTER = "warmup_navsafe_two_stage_extended" + +hydra.initialize(config_path="/data/hdd01/dingzx/workspace/navsim/planning/script/config/common/train_test_split/scene_filter") +cfg = hydra.compose(config_name=FILTER) +scene_filter: SceneFilter = instantiate(cfg) +openscene_data_root = Path(os.getenv("OPENSCENE_DATA_ROOT")) +scene_loader = SceneLoader( + "${oc.env:OPENSCENE_DATA_ROOT}/openscene-v1.1/meta_datas/${SPLIT}", # data_path + "/data/hdd01/dingzx/dataset/synthetic_scenes/synthetic_sensor", # sensor_blobs_path + "${oc.env:OPENSCENE_DATA_ROOT}/openscene-v1.1/sensor_blobs/${SPLIT}", # navsim_blobs_path + "/data/hdd01/dingzx/dataset/synthetic_scenes/scene_pickles", # synthetic_scenes_path + scene_filter, + sensor_config=SensorConfig.build_all_sensors(), +) +token = np.random.choice(scene_loader.tokens) +scene = scene_loader.get_scene_from_token(token) + + +from navsim.visualization.plots import plot_bev_frame + +frame_idx = scene.scene_metadata.num_history_frames - 1 # current frame +fig, ax = plot_bev_frame(scene, frame_idx) +plt.show() +from navsim.visualization.plots import plot_bev_with_agent +from navsim.agents.diffusiondrive.transfuser_agent import TransfuserAgent +from navsim.agents.abstract_agent_diffusiondrive import AbstractAgent + +agent: AbstractAgent = instantiate() +# agent = ConstantVelocityAgent() +fig, ax = plot_bev_with_agent(scene, agent) +plt.show() +from navsim.visualization.plots import plot_cameras_frame + +fig, ax = plot_cameras_frame(scene, frame_idx) +plt.show() +from navsim.visualization.plots import plot_cameras_frame_with_annotations + +fig, ax = plot_cameras_frame_with_annotations(scene, frame_idx) +plt.show() + +from navsim.visualization.plots import plot_cameras_frame_with_lidar + +fig, ax = plot_cameras_frame_with_lidar(scene, frame_idx) +plt.show() + +from navsim.visualization.plots import configure_bev_ax +from navsim.visualization.bev import add_annotations_to_bev_ax, add_lidar_to_bev_ax + + +fig, ax = plt.subplots(1, 1, figsize=(6, 6)) + +ax.set_title("Custom plot") + +add_annotations_to_bev_ax(ax, scene.frames[frame_idx].annotations) +add_lidar_to_bev_ax(ax, scene.frames[frame_idx].lidar) + +# configures frame to BEV view +configure_bev_ax(ax) + +plt.show() + +from navsim.visualization.plots import frame_plot_to_gif + +frame_indices = [idx for idx in range(len(scene.frames))] # all frames in scene +file_name = f"./{token}.gif" +images = frame_plot_to_gif(file_name, plot_cameras_frame_with_annotations, scene, frame_indices) \ No newline at end of file diff --git a/tutorial/tutorial_visualization.ipynb b/tutorial/tutorial_visualization.ipynb index b286495..3c8d583 100644 --- a/tutorial/tutorial_visualization.ipynb +++ b/tutorial/tutorial_visualization.ipynb @@ -53,19 +53,18 @@ "from navsim.common.dataloader import SceneLoader\n", "from navsim.common.dataclasses import SceneFilter, SensorConfig\n", "\n", - "SPLIT = \"mini\" # [\"mini\", \"test\", \"trainval\"]\n", - "FILTER = \"all_scenes\"\n", + "SPLIT = \"test\" # [\"mini\", \"test\", \"trainval\"]\n", + "FILTER = \"warmup_navsafe_two_stage_extended\"\n", "\n", "hydra.initialize(config_path=\"./navsim/planning/script/config/common/train_test_split/scene_filter\")\n", "cfg = hydra.compose(config_name=FILTER)\n", "scene_filter: SceneFilter = instantiate(cfg)\n", "openscene_data_root = Path(os.getenv(\"OPENSCENE_DATA_ROOT\"))\n", - "\n", "scene_loader = SceneLoader(\n", - " openscene_data_root / f\"navsim_logs/{SPLIT}\", # data_path\n", - " openscene_data_root / \"synthetic_scenes/synthetic_sensor\", # sensor_blobs_path\n", - " openscene_data_root / f\"sensor_blobs/{SPLIT}\", # navsim_blobs_path\n", - " openscene_data_root / \"synthetic_scenes/scene_pickles\", # synthetic_scenes_path\n", + " \"${oc.env:OPENSCENE_DATA_ROOT}/openscene-v1.1/meta_datas/${SPLIT}\", # data_path\n", + " \"/data/hdd01/dingzx/dataset/synthetic_scenes/synthetic_sensor\", # sensor_blobs_path\n", + " \"${oc.env:OPENSCENE_DATA_ROOT}/openscene-v1.1/sensor_blobs/${SPLIT}\", # navsim_blobs_path\n", + " \"/data/hdd01/dingzx/dataset/synthetic_scenes/scene_pickles\", # synthetic_scenes_path\n", " scene_filter,\n", " sensor_config=SensorConfig.build_all_sensors(),\n", ")"