diff --git a/behavior_metrics/brains/f1rl/trainContinuous.py b/behavior_metrics/brains/f1rl/trainContinuous.py new file mode 100755 index 00000000..c8edc868 --- /dev/null +++ b/behavior_metrics/brains/f1rl/trainContinuous.py @@ -0,0 +1,182 @@ +import time +from datetime import datetime +import pickle +import torch +from torch.utils.tensorboard import SummaryWriter +import gym +from brains.f1rl.utils import liveplot +import gym_gazebo +import numpy as np +from gym import logger, wrappers +from brains.f1rl.utils.ddpg import DDPG +import brains.f1rl.utils.ddpg_utils.settingsDDPG as settings +from PIL import Image + +def render(): + render_skip = 0 # Skip first X episodes. + render_interval = 50 # Show render Every Y episodes. + render_episodes = 10 # Show Z episodes every rendering. + + if (episode % render_interval == 0) and (episode != 0) and (episode > render_skip): + env.render() + elif ((episode - render_episodes) % render_interval == 0) and (episode != 0) and (episode > render_skip) and \ + (render_episodes < episode): + env.render(close=True) + +# if __name__ == '__main__': +print(settings.title) +print(settings.description) + +current_env = settings.current_env +if current_env == "laser": + env_id = "GazeboF1LaserEnvDDPG-v0" + env = gym.make('GazeboF1LaserEnvDDPG-v0') +elif current_env == "camera": + env_id = "GazeboF1CameraEnvDDPG-v0" + env = gym.make('GazeboF1CameraEnvDDPG-v0') +else: + print("NO correct env selected") + +outdir = './logs/f1_ddpg_gym_experiments/' + +if not os.path.exists(outdir): + os.makedirs(outdir+'images/') + +env = gym.wrappers.Monitor(env, outdir, force=True) +plotter = liveplot.LivePlot(outdir) +last_time_steps = np.ndarray(0) +stimate_step_per_lap = 4000 +lap_completed = False + +if settings.load_model: + save_path = outdir+'model/' + model_path = save_path +else: + save_path = outdir+'model/' + model_path = None + +highest_reward = 0 + +total_episodes = settings.total_episodes + +start_time = time.time() + +seed = 123 +save_iter = int(total_episodes/20) +render = False +writer = SummaryWriter(outdir) +env.seed(seed) + +ddpg = DDPG(env_id, + 32, + 2, + render=False, + num_process=1, + memory_size=1000000, + lr_p=1e-3, + lr_v=1e-3, + gamma=0.99, + polyak=0.995, + explore_size=2000, + step_per_iter=1000, + batch_size=256, + min_update_step=1000, + update_step=50, + action_noise=0.1, + seed=seed) + +print(settings.lets_go) + +max_action = [12., 2] +min_action = [2., -2] + +for episode in range(total_episodes): + + global_steps = (episode - 1) * ddpg.step_per_iter + log = dict() + num_steps = 0 + num_episodes = 0 + total_reward = 0 + min_episode_reward = float('inf') + max_episode_reward = float('-inf') + lap_completed = False + cumulated_reward = 0 # Should going forward give more reward then L/R z? + + while num_steps < ddpg.step_per_iter: + state = env.reset() + # state = self.running_state(state) + episode_reward = 0 + + for t in range(1000): + + if global_steps < ddpg.explore_size: # explore + action = env.action_space.sample() + else: # action with noise + action = ddpg.choose_action(state, ddpg.action_noise) + + mod_action = action + + for itr in range(len(action)): + mod_action[itr] = min_action[itr] + 0.5*(max_action[itr] - min_action[itr])*(action[itr]+1) + + next_state, reward, done, info = env.step(action) + # next_state = self.running_state(next_state) + mask = 0 if done else 1 + # ('state', 'action', 'reward', 'next_state', 'mask', 'log_prob') + ddpg.memory.push(state, action, reward, next_state, mask, None) + + # print("Points:", info['points']) + # print("Errors:", info['errors']) + # observation_image = Image.fromarray(info['image'].reshape(32,32)) + # observation_image.save(outdir+'/images/obs'+str(episode)+str(t)+'.jpg') + + episode_reward += reward + cumulated_reward += reward + global_steps += 1 + num_steps += 1 + + if global_steps >= ddpg.min_update_step and global_steps % ddpg.update_step == 0: + for _ in range(ddpg.update_step): + batch = ddpg.memory.sample( + ddpg.batch_size) # random sample batch + ddpg.update(batch) + + if done or num_steps >= ddpg.step_per_iter: + if highest_reward < cumulated_reward: + highest_reward = cumulated_reward + break + + state = next_state + + if num_steps > 4000 and not lap_completed: + print("LAP COMPLETED!!") + lap_completed = True + + num_episodes += 1 + total_reward += episode_reward + min_episode_reward = min(episode_reward, min_episode_reward) + max_episode_reward = max(episode_reward, max_episode_reward) + + log['num_steps'] = num_steps + log['num_episodes'] = num_episodes + log['total_reward'] = total_reward + log['avg_reward'] = total_reward / num_episodes + log['max_episode_reward'] = max_episode_reward + log['min_episode_reward'] = min_episode_reward + + print(f"Iter: {episode}, num steps: {log['num_steps']}, total reward: {log['total_reward']: .4f}, " + f"min reward: {log['min_episode_reward']: .4f}, max reward: {log['max_episode_reward']: .4f}, " + f"average reward: {log['avg_reward']: .4f}") + + # record reward information + writer.add_scalar("total reward", log['total_reward'], episode) + writer.add_scalar("average reward", log['avg_reward'], episode) + writer.add_scalar("min reward", log['min_episode_reward'], episode) + writer.add_scalar("max reward", log['max_episode_reward'], episode) + writer.add_scalar("num steps", log['num_steps'], episode) + + if episode % save_iter == 0: + ddpg.save(save_path) + + torch.cuda.empty_cache() +env.close() diff --git a/behavior_metrics/brains/f1rl/utils/ddpg.py b/behavior_metrics/brains/f1rl/utils/ddpg.py new file mode 100644 index 00000000..4fad5c53 --- /dev/null +++ b/behavior_metrics/brains/f1rl/utils/ddpg.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python +import pickle + +import numpy as np +import torch +import torch.optim as optim + +from brains.f1rl.utils.ddpg_utils.ddpg_step import ddpg_step +from brains.f1rl.utils.ddpg_utils.Policy_ddpg import Policy +from brains.f1rl.utils.ddpg_utils.Value_ddpg import Value +from brains.f1rl.utils.ddpg_utils.ddpg_utils import Memory, get_env_info, check_path, device, FLOAT, ZFilter + + +class DDPG: + def __init__(self, + env_id, + num_states, + num_actions, + render=False, + num_process=1, + memory_size=1000000, + lr_p=1e-3, + lr_v=1e-3, + gamma=0.99, + polyak=0.995, + explore_size=10000, + step_per_iter=3000, + batch_size=100, + min_update_step=1000, + update_step=50, + action_noise=0.1, + seed=1, + model_path=None + ): + self.env_id = env_id + self.num_states = num_states + self.num_actions = num_actions + self.gamma = gamma + self.polyak = polyak + self.memory = Memory(memory_size) + self.explore_size = explore_size + self.step_per_iter = step_per_iter + self.render = render + self.num_process = num_process + self.lr_p = lr_p + self.lr_v = lr_v + self.batch_size = batch_size + self.min_update_step = min_update_step + self.update_step = update_step + self.action_noise = action_noise + self.model_path = model_path + self.seed = seed + + self._init_model() + + def _init_model(self): + """init model from parameters""" + + self.action_low = -1 + self.action_high = 1 + + # seeding + np.random.seed(self.seed) + torch.manual_seed(self.seed) + + self.policy_net = Policy( + self.num_actions, self.action_high).to(device) + self.policy_net_target = Policy( + self.num_actions, self.action_high).to(device) + + self.value_net = Value(64, self.num_actions).to(device) + self.value_net_target = Value(64, self.num_actions).to(device) + + self.running_state = ZFilter((self.num_states,), clip=5) + + if self.model_path: + print("Loading Saved Model {}_ddpg.p".format(self.env_id)) + self.policy_net, self.value_net, self.running_state = pickle.load( + open('{}/{}_ddpg.p'.format(self.model_path, self.env_id), "rb")) + + self.policy_net_target.load_state_dict(self.policy_net.state_dict()) + self.value_net_target.load_state_dict(self.value_net.state_dict()) + + self.optimizer_p = optim.Adam( + self.policy_net.parameters(), lr=self.lr_p) + self.optimizer_v = optim.Adam( + self.value_net.parameters(), lr=self.lr_v) + + def choose_action(self, state, noise_scale): + """select action""" + state = np.random.rand(1,32,32) + state = FLOAT(state).unsqueeze(0).to(device) + with torch.no_grad(): + action, log_prob = self.policy_net.get_action_log_prob(state) + action = action.cpu().numpy()[0] + # add noise + noise = noise_scale * np.random.randn(self.num_actions) + action += noise + action = np.clip(action, -self.action_high, self.action_high) + return action + + def eval(self, i_iter, render=False): + """evaluate model""" + state = self.env.reset() + state = np.random.rand(1,32,32) + test_reward = 0 + while True: + if render: + self.env.render() + # state = self.running_state(state) + action = self.choose_action(state, 0) + state, reward, done, _ = self.env.step(action) + state = np.random.rand(1,32,32) + test_reward += reward + if done: + break + print(f"Iter: {i_iter}, test Reward: {test_reward}") + self.env.close() + + def update(self, batch): + """learn model""" + batch_state = FLOAT(batch.state).to(device) + batch_action = FLOAT(batch.action).to(device) + batch_reward = FLOAT(batch.reward).to(device) + batch_next_state = FLOAT(batch.next_state).to(device) + batch_mask = FLOAT(batch.mask).to(device) + + # update by DDPG + alg_step_stats = ddpg_step(self.policy_net, self.policy_net_target, self.value_net, self.value_net_target, self.optimizer_p, + self.optimizer_v, batch_state, batch_action, batch_reward, batch_next_state, batch_mask, + self.gamma, self.polyak) + + def save(self, save_path): + """save model""" + check_path(save_path) + pickle.dump((self.policy_net, self.value_net, self.running_state), + open('{}/{}_ddpg.p'.format(save_path, self.env_id), 'wb')) diff --git a/behavior_metrics/brains/f1rl/utils/ddpg_utils/Policy_ddpg.py b/behavior_metrics/brains/f1rl/utils/ddpg_utils/Policy_ddpg.py new file mode 100644 index 00000000..b6feb84a --- /dev/null +++ b/behavior_metrics/brains/f1rl/utils/ddpg_utils/Policy_ddpg.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python +import torch +import torch.nn as nn +import torch.optim as optim +import torch.nn.functional as F + +# if gpu is to be used +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + +def init_weight(m): + if isinstance(m, nn.Linear): + nn.init.xavier_normal_(m.weight) + nn.init.constant_(m.bias, 0.0) + + +class Policy(nn.Module): + def __init__( + self, + dim_action, + max_action=None, + activation=nn.LeakyReLU + ): + super(Policy, self).__init__() + self.dim_action = dim_action + self.max_action = max_action + + self.conv1 = nn.Conv2d(1, 16, kernel_size=5, stride=2) + self.bn1 = nn.BatchNorm2d(16) + self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=2) + self.bn2 = nn.BatchNorm2d(32) + self.conv3 = nn.Conv2d(32, 32, kernel_size=5, stride=2) + self.bn3 = nn.BatchNorm2d(32) + + # Number of Linear input connections depends on output of conv2d layers + # and therefore the input image size, so compute it. + def conv2d_size_out(size, kernel_size = 5, stride = 2): + return (size - (kernel_size - 1) - 1) // stride + 1 + + convw = conv2d_size_out(conv2d_size_out(conv2d_size_out(32))) + convh = conv2d_size_out(conv2d_size_out(conv2d_size_out(32))) + linear_input_size = convw * convh * 32 + self.head = nn.Linear(linear_input_size, self.dim_action) + + self.apply(init_weight) + + def forward(self, x): + x = x.to(device) + x = F.relu(self.bn1(self.conv1(x))) + x = F.relu(self.bn2(self.conv2(x))) + x = F.relu(self.bn3(self.conv3(x))) + action = self.head(x.view(x.size(0), -1)) + return action * self.max_action + + def get_action_log_prob(self, states): + action = self.forward(states) + return action, None + diff --git a/behavior_metrics/brains/f1rl/utils/ddpg_utils/Value_ddpg.py b/behavior_metrics/brains/f1rl/utils/ddpg_utils/Value_ddpg.py new file mode 100644 index 00000000..4106e331 --- /dev/null +++ b/behavior_metrics/brains/f1rl/utils/ddpg_utils/Value_ddpg.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python +import torch +import torch.nn as nn +import torch.optim as optim +import torch.nn.functional as F + +# if gpu is to be used +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + +def init_weight(m): + if isinstance(m, nn.Linear): + nn.init.xavier_normal_(m.weight) + nn.init.constant_(m.bias, 0.0) + + +class Value(nn.Module): + def __init__(self, dim_state, dim_action, dim_hidden=32, activation=nn.LeakyReLU): + super(Value, self).__init__() + + self.dim_state = dim_state + self.dim_action = dim_action + self.dim_hidden = dim_hidden + + self.conv1 = nn.Conv2d(1, 16, kernel_size=5, stride=2) + self.bn1 = nn.BatchNorm2d(16) + self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=2) + self.bn2 = nn.BatchNorm2d(32) + self.conv3 = nn.Conv2d(32, 32, kernel_size=5, stride=2) + self.bn3 = nn.BatchNorm2d(32) + + # Number of Linear input connections depends on output of conv2d layers + # and therefore the input image size, so compute it. + def conv2d_size_out(size, kernel_size = 5, stride = 2): + return (size - (kernel_size - 1) - 1) // stride + 1 + + convw = conv2d_size_out(conv2d_size_out(conv2d_size_out(32))) + convh = conv2d_size_out(conv2d_size_out(conv2d_size_out(32))) + linear_input_size = convw * convh * 32 + self.head = nn.Linear(linear_input_size, self.dim_state) + + self.apply(init_weight) + + self.value = nn.Sequential( + nn.Linear(self.dim_state + self.dim_action, self.dim_hidden), + activation(), + nn.Linear(self.dim_hidden, 1) + ) + + self.value.apply(init_weight) + + def forward(self, states, actions): + + x = states.to(device) + x = F.relu(self.bn1(self.conv1(x))) + x = F.relu(self.bn2(self.conv2(x))) + x = F.relu(self.bn3(self.conv3(x))) + encoded_state = self.head(x.view(x.size(0), -1)) + state_actions = torch.cat([encoded_state, actions], dim=1) + value = self.value(state_actions) + return value + diff --git a/behavior_metrics/brains/f1rl/utils/ddpg_utils/__init__.py b/behavior_metrics/brains/f1rl/utils/ddpg_utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/behavior_metrics/brains/f1rl/utils/ddpg_utils/ddpg_step.py b/behavior_metrics/brains/f1rl/utils/ddpg_utils/ddpg_step.py new file mode 100644 index 00000000..2404aad6 --- /dev/null +++ b/behavior_metrics/brains/f1rl/utils/ddpg_utils/ddpg_step.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +import torch +import torch.nn as nn + +from brains.f1rl.utils.ddpg_utils.ddpg_utils import get_flat_params, set_flat_params + + +def ddpg_step(policy_net, policy_net_target, value_net, value_net_target, optimizer_policy, optimizer_value, + states, actions, rewards, next_states, masks, gamma, polyak): + masks = masks.unsqueeze(-1) + rewards = rewards.unsqueeze(-1) + """update critic""" + + values = value_net(states, actions) + + with torch.no_grad(): + target_next_values = value_net_target( + next_states, policy_net_target(next_states)) + target_values = rewards + gamma * masks * target_next_values + value_loss = nn.MSELoss()(values, target_values) + + optimizer_value.zero_grad() + value_loss.backward() + optimizer_value.step() + + """update actor""" + + policy_loss = - value_net(states, policy_net(states)).mean() + optimizer_policy.zero_grad() + policy_loss.backward() + optimizer_policy.step() + + """soft update target nets""" + policy_net_flat_params = get_flat_params(policy_net) + policy_net_target_flat_params = get_flat_params(policy_net_target) + set_flat_params(policy_net_target, polyak * + policy_net_target_flat_params + (1 - polyak) * policy_net_flat_params) + + value_net_flat_params = get_flat_params(value_net) + value_net_target_flat_params = get_flat_params(value_net_target) + set_flat_params(value_net_target, polyak * + value_net_target_flat_params + (1 - polyak) * value_net_flat_params) + + return {"critic_loss": value_loss, + "policy_loss": policy_loss + } + diff --git a/behavior_metrics/brains/f1rl/utils/ddpg_utils/ddpg_utils.py b/behavior_metrics/brains/f1rl/utils/ddpg_utils/ddpg_utils.py new file mode 100644 index 00000000..dfc87c65 --- /dev/null +++ b/behavior_metrics/brains/f1rl/utils/ddpg_utils/ddpg_utils.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python + +import random +from collections import namedtuple, deque +import gym +from gym.spaces import Discrete +import os +import numpy as np +import torch +import torch.nn as nn + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +FLOAT = torch.FloatTensor +DOUBLE = torch.DoubleTensor +LONG = torch.LongTensor + + +def to_device(*args): + return [arg.to(device) for arg in args] + + +def get_flat_params(model: nn.Module): + """ + get tensor flatted parameters from model + :param model: + :return: tensor + """ + return torch.cat([param.view(-1) for param in model.parameters()]) + + +def get_flat_grad_params(model: nn.Module): + """ + get flatted grad of parameters from the model + :param model: + :return: tensor + """ + return torch.cat( + [param.grad.view(-1) if param.grad is not None else torch.zeros(param.view(-1).shape) for param in + model.parameters()]) + + +def set_flat_params(model, flat_params): + """ + set tensor flatted parameters to model + :param model: + :param flat_params: tensor + :return: + """ + prev_ind = 0 + for param in model.parameters(): + flat_size = param.numel() + param.data.copy_( + flat_params[prev_ind:prev_ind + flat_size].view(param.size())) + prev_ind += flat_size + + +def resolve_activate_function(name): + if name.lower() == "relu": + return nn.ReLU + if name.lower() == "sigmoid": + return nn.Sigmoid + if name.lower() == "leakyrelu": + return nn.LeakyReLU + if name.lower() == "prelu": + return nn.PReLU + if name.lower() == "softmax": + return nn.Softmax + if name.lower() == "tanh": + return nn.Tanh + + +Transition = namedtuple( + 'Transition', ('state', 'action', 'reward', 'next_state', 'mask', 'log_prob')) + + +def check_path(path): + if not os.path.exists(path): + print(f"{path} not exist") + os.makedirs(path) + print(f"Create {path} success") + +def get_env_space(env_id): + env = gym.make(env_id) + num_states = env.observation_space.shape[0] + if type(env.action_space) == Discrete: + num_actions = env.action_space.n + else: + num_actions = env.action_space.shape[0] + + return env, num_states, num_actions + + +def get_env_info(env_id, unwrap=False): + env = gym.make(env_id) + if unwrap: + env = env.unwrapped + num_states = env.observation_space.shape[0] + env_continuous = False + if type(env.action_space) == Discrete: + num_actions = env.action_space.n + else: + num_actions = env.action_space.shape[0] + env_continuous = True + + return env, env_continuous, num_states, num_actions + + +class Memory(object): + def __init__(self, size=None): + self.memory = deque(maxlen=size) + + # save item + def push(self, *args): + self.memory.append(Transition(*args)) + + def clear(self): + self.memory.clear() + + def append(self, other): + self.memory += other.memory + + # sample a mini_batch + def sample(self, batch_size=None): + # sample all transitions + if batch_size is None: + return Transition(*zip(*self.memory)) + else: # sample with size: batch_size + random_batch = random.sample(self.memory, batch_size) + return Transition(*zip(*random_batch)) + + def __len__(self): + return len(self.memory) + +# from https://github.com/joschu/modular_rl +# http://www.johndcook.com/blog/standard_deviation/ + + +class RunningStat(object): + def __init__(self, shape): + self._n = 0 + self._M = np.zeros(shape) + self._S = np.zeros(shape) + + def push(self, x): + x = np.asarray(x) + assert x.shape == self._M.shape + self._n += 1 + if self._n == 1: + self._M[...] = x + else: + oldM = self._M.copy() + self._M[...] = oldM + (x - oldM) / self._n + self._S[...] = self._S + (x - oldM) * (x - self._M) + + @property + def n(self): + return self._n + + @property + def mean(self): + return self._M + + @property + def var(self): + return self._S / (self._n - 1) if self._n > 1 else np.square(self._M) + + @property + def std(self): + return np.sqrt(self.var) + + @property + def shape(self): + return self._M.shape + +class ZFilter: + """ + y = (x-mean)/std + using running estimates of mean,std + """ + + def __init__(self, shape, demean=True, destd=True, clip=10.0): + self.demean = demean + self.destd = destd + self.clip = clip + + self.rs = RunningStat(shape) + self.fix = False + + def __call__(self, x, update=True): + if update and not self.fix: + self.rs.push(x) + if self.demean: + x = x - self.rs.mean + if self.destd: + x = x / (self.rs.std + 1e-8) + if self.clip: + x = np.clip(x, -self.clip, self.clip) + return x diff --git a/behavior_metrics/brains/f1rl/utils/ddpg_utils/settingsDDPG.py b/behavior_metrics/brains/f1rl/utils/ddpg_utils/settingsDDPG.py new file mode 100644 index 00000000..2e426dc4 --- /dev/null +++ b/behavior_metrics/brains/f1rl/utils/ddpg_utils/settingsDDPG.py @@ -0,0 +1,88 @@ +# Global variables + +import numpy as np +from utils.configuration import Config + +app_configuration = Config('default-rlddpg.yml') + +debug_level = 0 +telemetry = False +telemetry_mask = False +plotter_graphic = True +my_board = True +save_model = False +load_model = False + +tau = app_configuration.tau +gamma = app_configuration.gamma +epsdecay = app_configuration.epsilon_decay +hidden_size = app_configuration.hidden_size +batch_size = 64 +total_episodes = app_configuration.total_episodes + +gazebo_positions_set = app_configuration.gazebo_positions_set + +current_env = app_configuration.env + +# === POSES === +if gazebo_positions_set == "pista_simple": + gazebo_positions = [(0, 53.462, -41.988, 0.004, 0, 0, 1.57, -1.57), + (1, 53.462, -8.734, 0.004, 0, 0, 1.57, -1.57), + (2, 39.712, -30.741, 0.004, 0, 0, 1.56, 1.56), + (3, -6.861, -36.481, 0.004, 0, 0.01, -0.858, 0.613), + (4, 20.043, 37.130, 0.003, 0, 0.103, -1.4383, -1.4383)] + +elif gazebo_positions_set == "nurburgring": + gazebo_positions = [(0, -23.0937, -2.9703, 0, 0.0050, 0.0013, -0.9628, 0.2699), + (1, -32.3188, 12.2921, 0, 0.0014, 0.0049, -0.2727, 0.9620), + (2, -17.4155, -24.1243, 0, 0.0001, 0.0051, -0.0192, 1), + (3, 31.3967, -4.6166, 0, 0.0030, 0.0041, 0.6011, 0.7991), + (4, -56.1261, 4.1047, 0, 0.0043, -0.0027, -0.8517, -0.5240)] + +# === CAMERA === +# Images size +width = 640 +height = 480 +center_image = width/2 + +# Coord X ROW +# x_row = [250, 300, 350, 400, 450] +# x_row = [250, 450] +x_row = [350] + +# Maximum distance from the line +ranges = [300, 280, 250] # Line 1, 2 and 3 +reset_range = [-40, 40] +last_center_line = 0 + +lets_go = ''' + ______ __ + / ____/___ / / + / / __/ __ \/ / +/ /_/ / /_/ /_/ +\____/\____(_) +''' + +description = ''' + ___ ___ ____ ____ +| _ \| _ \| _ \ / ___| +| | \ | | \ | (_| | / _ +| |_/ | |_/ | __/| \_| | +|____/|____/|_| \____/ + ____ + / ___|__ _ _ __ ___ ___ _ __ __ _ + _____ | | / _` | '_ ` _ \ / _ \ '__/ _` | +|_____| | |__| (_| | | | | | | __/ | | (_| | + \____\__,_|_| |_| |_|\___|_| \__,_| + +''' + +title = ''' + ___ _ ______ _ _ + |_ | | | | ___ \ | | | | + | | __| | ___| |_/ /___ | |__ ___ | |_ + | |/ _` |/ _ \ // _ \| '_ \ / _ \| __| +/\__/ / (_| | __/ |\ \ (_) | |_) | (_) | |_ +\____/ \__,_|\___\_| \_\___/|_.__/ \___/ \__| + +''' diff --git a/behavior_metrics/brains/f1rl/utils/memory.py b/behavior_metrics/brains/f1rl/utils/memory.py index 3bf5b306..ab4f11f2 100644 --- a/behavior_metrics/brains/f1rl/utils/memory.py +++ b/behavior_metrics/brains/f1rl/utils/memory.py @@ -46,4 +46,4 @@ def addMemory(self, state, action, reward, newState, isFinal) : self.newStates.append(newState) self.finals.append(isFinal) - self.currentPosition += 1 \ No newline at end of file + self.currentPosition += 1 diff --git a/behavior_metrics/default-rl.yml b/behavior_metrics/default-rl.yml index 02f813ab..42d1d306 100644 --- a/behavior_metrics/default-rl.yml +++ b/behavior_metrics/default-rl.yml @@ -17,8 +17,9 @@ Behaviors: MaxV: 3 MaxW: 0.3 RL: True - BrainPath: 'brains/f1rl/train.py' + BrainPath: 'brains/f1rl/f1_follow_line_camera_dqn.py' Type: 'f1rl' + Action: 'discrete' Parameters: action_set: 'hard' gazebo_positions_set: 'pista_simple' diff --git a/behavior_metrics/default-rlddpg.yml b/behavior_metrics/default-rlddpg.yml new file mode 100644 index 00000000..868b940e --- /dev/null +++ b/behavior_metrics/default-rlddpg.yml @@ -0,0 +1,55 @@ +Behaviors: + Robot: + Sensors: + Cameras: + Camera_0: + Name: 'camera_0' + Topic: '/F1ROS/cameraL/image_raw' + Pose3D: + Pose3D_0: + Name: 'pose3d_0' + Topic: '/F1ROS/odom' + Actuators: + Motors: + Motors_0: + Name: 'motors_0' + Topic: '/F1ROS/cmd_vel' + MaxV: 3 + MaxW: 0.3 + RL: True + BrainPath: 'brains/f1rl/trainContinuous.py' + Type: 'f1rl' + Action: 'continuous' + Parameters: + gazebo_positions_set: 'pista_simple' + tau: 0.2 + gamma: 0.9 + epsilon_decay: 0.98 + hidden_size: 0.99 + total_episodes: 2000000 + env: 'camera' + Simulation: + World: /opt/jderobot/share/jderobot/gazebo/launch/simple_circuit.launch + Dataset: + In: '/tmp/my_bag.bag' + Out: '' + Stats: + Out: './' + PerfectLap: 'lap-simple-circuit.bag' + Layout: + Frame_0: + Name: frame_0 + Geometry: [1, 1, 2, 2] + Data: rgbimage + Frame_1: + Name: frame_1 + Geometry: [0, 1, 1, 1] + Data: rgbimage + Frame_2: + Name: frame_2 + Geometry: [0, 2, 1, 1] + Data: rgbimage + Frame_3: + Name: frame_3 + Geometry: [0, 3, 3, 1] + Data: rgbimage diff --git a/behavior_metrics/utils/configuration.py b/behavior_metrics/utils/configuration.py index 1ccb4244..ef433ca2 100644 --- a/behavior_metrics/utils/configuration.py +++ b/behavior_metrics/utils/configuration.py @@ -116,14 +116,26 @@ def initialize_configuration(self, config_data): self.experiment_repetitions = config_data['Behaviors']['Experiment']['Repetitions'] if self.robot_type == 'f1rl': - self.action_set = robot['Parameters']['action_set'] - self.gazebo_positions_set = robot['Parameters']['gazebo_positions_set'] - self.alpha = robot['Parameters']['alpha'] - self.gamma = robot['Parameters']['gamma'] - self.epsilon = robot['Parameters']['epsilon'] - self.total_episodes = robot['Parameters']['total_episodes'] - self.epsilon_discount = robot['Parameters']['epsilon_discount'] - self.env = robot['Parameters']['env'] + self.action_type = robot['Action'] + + if self.action_type == 'discrete': + self.action_set = robot['Parameters']['action_set'] + self.gazebo_positions_set = robot['Parameters']['gazebo_positions_set'] + self.alpha = robot['Parameters']['alpha'] + self.gamma = robot['Parameters']['gamma'] + self.epsilon = robot['Parameters']['epsilon'] + self.total_episodes = robot['Parameters']['total_episodes'] + self.epsilon_discount = robot['Parameters']['epsilon_discount'] + self.env = robot['Parameters']['env'] + + elif self.action_type == 'continuous': + self.gazebo_positions_set = robot['Parameters']['gazebo_positions_set'] + self.tau = robot['Parameters']['tau'] + self.gamma = robot['Parameters']['gamma'] + self.epsilon_decay = robot['Parameters']['epsilon_decay'] + self.hidden_size = robot['Parameters']['hidden_size'] + self.total_episodes = robot['Parameters']['total_episodes'] + self.env = robot['Parameters']['env'] def create_layout_from_cfg(self, cfg): """Creates the configuration for the layout of the sensors view panels specified from configuration file diff --git a/gym-gazebo/gym_gazebo/__init__.py b/gym-gazebo/gym_gazebo/__init__.py index 77560a27..4ce5dd25 100755 --- a/gym-gazebo/gym_gazebo/__init__.py +++ b/gym-gazebo/gym_gazebo/__init__.py @@ -26,6 +26,20 @@ # More arguments here ) +# F1 DDPG CAMERA +register( + id='GazeboF1CameraEnvDDPG-v0', + entry_point='gym_gazebo.envs.f1:GazeboF1CameraEnvDDPG', + # More arguments here +) + +# F1 DDPG LASER +register( + id='GazeboF1LaserEnvDDPG-v0', + entry_point='gym_gazebo.envs.f1:GazeboF1LaserEnvDDPG', + # More arguments here +) + # Turtlebot envs register( id='GazeboMazeTurtlebotLidar-v0', diff --git a/gym-gazebo/gym_gazebo/envs/f1/__init__.py b/gym-gazebo/gym_gazebo/envs/f1/__init__.py index 2bfa8a5a..9fa3d036 100755 --- a/gym-gazebo/gym_gazebo/envs/f1/__init__.py +++ b/gym-gazebo/gym_gazebo/envs/f1/__init__.py @@ -3,3 +3,7 @@ # DQN from gym_gazebo.envs.f1.env_gazebo_f1_dqn_camera import GazeboF1CameraEnvDQN + +# DDPG +from gym_gazebo.envs.f1.env_gazebo_f1_ddpg_camera import GazeboF1CameraEnvDDPG +from gym_gazebo.envs.f1.env_gazebo_f1_ddpg_laser import GazeboF1LaserEnvDDPG diff --git a/gym-gazebo/gym_gazebo/envs/f1/env_gazebo_f1_ddpg_camera.py b/gym-gazebo/gym_gazebo/envs/f1/env_gazebo_f1_ddpg_camera.py new file mode 100755 index 00000000..fb0aab4a --- /dev/null +++ b/gym-gazebo/gym_gazebo/envs/f1/env_gazebo_f1_ddpg_camera.py @@ -0,0 +1,452 @@ +import os +import random +import sys +import time + +import cv2 +import gym +from gym import spaces +import numpy as np +import roslaunch +import rospkg +import rospy +import skimage as skimage + +from cv_bridge import CvBridge, CvBridgeError +from gazebo_msgs.msg import ModelState +from gazebo_msgs.srv import GetModelState, SetModelState +from geometry_msgs.msg import Twist +from gym import spaces, utils +from gym.utils import seeding +from sensor_msgs.msg import Image, LaserScan +from skimage import color, exposure, transform +from skimage.transform import rotate +from skimage.viewer import ImageViewer +from std_srvs.srv import Empty + +from gym_gazebo.envs import gazebo_env +from agents.f1.settings import telemetry + +# Images size +witdh = 640 +center_image = int(witdh/2) + +# Coord X ROW +x_row = [260, 360, 450] +# Maximum distance from the line +RANGES = [300, 280, 250] # Line 1, 2 and 3 + +RESET_RANGE = [-40, 40] + +# Deprecated? +space_reward = np.flip(np.linspace(0, 1, 300)) + +last_center_line = 0 + +font = cv2.FONT_HERSHEY_COMPLEX + +# OUTPUTS +v_lineal = [3, 8, 15] +w_angular = [-1, -0.6, 0, 1, 0.6] + +# POSES +positions = [(0, 53.462, -41.988, 0.004, 0, 0, 1.57, -1.57), + (1, 53.462, -8.734, 0.004, 0, 0, 1.57, -1.57), + (2, 39.712, -30.741, 0.004, 0, 0, 1.56, 1.56), + (3, -7.894, -39.051, 0.004, 0, 0.01, -2.021, 2.021), + (4, 20.043, 37.130, 0.003, 0, 0.103, -1.4383, -1.4383)] + + +class ImageF1: + def __init__(self): + self.height = 3 # Image height [pixels] + self.width = 3 # Image width [pixels] + self.width = 3 # Image width [pixels] + self.timeStamp = 0 # Time stamp [s] */ + self.format = "" # Image format string (RGB8, BGR,...) + self.data = np.zeros((self.height, self.width, 3), np.uint8) # The image data itself + self.data.shape = self.height, self.width, 3 + + def __str__(self): + s = "Image: {\n height: " + str(self.height) + "\n width: " + str(self.width) + s = s + "\n format: " + self.format + "\n timeStamp: " + str(self.timeStamp) + return s + "\n data: " + str(self.data) + "\n}" + + +class GazeboF1CameraEnvDDPG(gazebo_env.GazeboEnv): + """ + Description: + A Formula 1 car has to complete one lap of a circuit following a red line painted on the ground. Initially it + will not use the complete information of the image but some coordinates that refer to the error made with + respect to the center of the line. + Source: + Master's final project at Universidad Rey Juan Carlos. RoboticsLab Urjc. JdeRobot. Author: Ignacio Arranz + Observation: + Type: Array + Num Observation Min Max + ---------------------------------------- + 0 Vel. Lineal (m/s) 1 10 + 1 Vel. Angular (rad/seg) -2 2 + 2 Error 1 -300 300 + 3 Error 2 -280 280 + 4 Error 3 -250 250 + Actions: + Continuous with Max and Min Linear velocity of 12 and 2 m/s respectively, and, Max Absolute Angular Velocity + of 1.5 rad/s + Reward: + The reward depends on the set of the 3 errors. As long as the lowest point is within range, there will still + be steps. If errors 1 and 2 fall outside the range, a joint but weighted reward will be posted, always giving + more importance to the lower one. + Starting State: + The observations will start from different points in order to prevent you from performing the same actions + initially. This variability will enrich the set of state/actions. + Episode Termination: + The episode ends when the lower error is outside the established range (see table in the observation space). + """ + + def __init__(self): + # Launch the simulation with the given launchfile name + gazebo_env.GazeboEnv.__init__(self, "F1Cameracircuit_v0.launch") + self.vel_pub = rospy.Publisher('/F1ROS/cmd_vel', Twist, queue_size=5) + self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty) + self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty) + self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) + # self.state_msg = ModelState() + # self.set_state = rospy.ServiceProxy("/gazebo/set_model_state", SetModelState) + self.position = None + self.reward_range = (-np.inf, np.inf) + self._seed() + self.img_rows = 32 + self.img_cols = 32 + self.img_channels = 1 + # self.bridge = CvBridge() + # self.image_sub = rospy.Subscriber("/F1ROS/cameraL/image_raw", Image, self.callback) + self.action_space = self._generate_continuous_action_space() + + def render(self, mode='human'): + pass + + def _gazebo_pause(self): + rospy.wait_for_service('/gazebo/pause_physics') + try: + # resp_pause = pause.call() + self.pause() + except rospy.ServiceException as e: + print("/gazebo/pause_physics service call failed: {}".format(e)) + + def _gazebo_unpause(self): + rospy.wait_for_service('/gazebo/unpause_physics') + try: + self.unpause() + except rospy.ServiceException as e: + print(e) + print("/gazebo/unpause_physics service call failed") + + def _gazebo_reset(self): + # Resets the state of the environment and returns an initial observation. + rospy.wait_for_service('/gazebo/reset_simulation') + try: + # reset_proxy.call() + self.reset_proxy() + self.unpause() + except rospy.ServiceException as e: + print("/gazebo/reset_simulation service call failed: {}".format(e)) + + @staticmethod + def _generate_continuous_action_space(): + + min_ang_speed = -1.5 + max_ang_speed = 1.5 + min_lin_speed = 2 + max_lin_speed = 12 + + action_space = spaces.Box(np.array([min_lin_speed, min_ang_speed]),np.array([max_lin_speed, max_ang_speed])) + + return action_space + + def _seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def show_telemetry(self, img, vel_cmd, point_1, point_2, point_3, action, reward): + # Puntos centrales de la imagen (verde) + cv2.line(img, (320, x_row[0]), (320, x_row[0]), (255, 255, 0), thickness=5) + cv2.line(img, (320, x_row[1]), (320, x_row[1]), (255, 255, 0), thickness=5) + cv2.line(img, (320, x_row[2]), (320, x_row[2]), (255, 255, 0), thickness=5) + # Linea diferencia entre punto central - error (blanco) + cv2.line(img, (center_image, x_row[0]), (int(point_1), x_row[0]), (255, 255, 255), thickness=2) + cv2.line(img, (center_image, x_row[1]), (int(point_2), x_row[1]), (255, 255, 255), thickness=2) + cv2.line(img, (center_image, x_row[2]), (int(point_3), x_row[2]), (255, 255, 255), thickness=2) + # Telemetry + cv2.putText(img, str("action1: {}, action2: {}".format(action[0], action[1])), (18, 280), font, 0.4, (255, 255, 255), 1) + cv2.putText(img, str("l_vel: {}, w_ang: {}".format(vel_cmd.linear.x, vel_cmd.angular.z)), (18, 300), font, 0.4, (255, 255, 255), 1) + cv2.putText(img, str("reward: {}".format(reward)), (18, 320), font, 0.4, (255, 255, 255), 1) + cv2.putText(img, str("err1: {}".format(center_image - point_1)), (18, 340), font, 0.4, (255, 255, 255), 1) + cv2.putText(img, str("err2: {}".format(center_image - point_2)), (18, 360), font, 0.4, (255, 255, 255), 1) + cv2.putText(img, str("err3: {}".format(center_image - point_3)), (18, 380), font, 0.4, (255, 255, 255), 1) + cv2.putText(img, str("pose: {}".format(self.position)), (18, 400), font, 0.4, (255, 255, 255), 1) + + cv2.imshow("Image window", img) + cv2.waitKey(3) + + @staticmethod + def set_new_pose(new_pos): + """ + (pos_number, pose_x, pose_y, pose_z, or_x, or_y, or_z, or_z) + """ + pos_number = positions[0] + + state = ModelState() + state.model_name = "f1_renault" + state.pose.position.x = positions[new_pos][1] + state.pose.position.y = positions[new_pos][2] + state.pose.position.z = positions[new_pos][3] + state.pose.orientation.x = positions[new_pos][4] + state.pose.orientation.y = positions[new_pos][5] + state.pose.orientation.z = positions[new_pos][6] + state.pose.orientation.w = positions[new_pos][7] + + rospy.wait_for_service('/gazebo/set_model_state') + + try: + set_state = rospy.ServiceProxy('/gazebo/set_model_state', SetModelState) + set_state(state) + except rospy.ServiceException as e: + print("Service call failed: {}".format(e)) + + return pos_number + + @staticmethod + def image_msg_to_image(img, cv_image): + + image = ImageF1() + image.width = img.width + image.height = img.height + image.format = "RGB8" + image.timeStamp = img.header.stamp.secs + (img.header.stamp.nsecs * 1e-9) + image.data = cv_image + + return image + + @staticmethod + def get_center(image_line): + try: + coords = np.divide(np.max(np.nonzero(image_line)) - np.min(np.nonzero(image_line)), 2) + coords = np.min(np.nonzero(image_line)) + coords + except: + coords = -1 + + return coords + + def processed_image(self, img): + """ + Conver img to HSV. Get the image processed. Get 3 lines from the image. + + :parameters: input image 640x480 + :return: x, y, z: 3 coordinates + """ + + img_proc = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) + line_pre_proc = cv2.inRange(img_proc, (0, 30, 30), (0, 255, 200)) + + # gray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) + _, mask = cv2.threshold(line_pre_proc, 240, 255, cv2.THRESH_BINARY) + + line_1 = mask[x_row[0], :] + line_2 = mask[x_row[1], :] + line_3 = mask[x_row[2], :] + + central_1 = self.get_center(line_1) + central_2 = self.get_center(line_2) + central_3 = self.get_center(line_3) + + # print(central_1, central_2, central_3) + + return central_1, central_2, central_3 + + def callback(self, data): + + # print("CALLBACK!!!!: ", ros_data.height, ros_data.width) + # np_arr = np.fromstring(ros_data.data, np.uint8) + # image_np = cv2.imdecode(np_arr, cv2.IMREAD_COLOR) + + # self.my_image = image_np + # rospy.loginfo(rospy.get_caller_id() + "I see %s", data.data) + + try: + cv_image = self.bridge.imgmsg_to_cv2(data, "bgr8") + except CvBridgeError as e: + print(e) + + (rows, cols,channels) = cv_image.shape + if cols > 60 and rows > 60: + cv2.circle(cv_image, (50, 50), 10, 255) + + cv2.imshow("Image window", cv_image) + cv2.waitKey(3) + + # try: + # self.image_pub.publish(self.bridge.cv2_to_imgmsg(cv_image, "bgr8")) + # except CvBridgeError as e: + # print(e) + + @staticmethod + def calculate_error(point_1, point_2, point_3): + + error_1 = abs(center_image - point_1) + error_2 = abs(center_image - point_2) + error_3 = abs(center_image - point_3) + + return error_1, error_2, error_3 + + @staticmethod + def calculate_reward(error_1, error_2, error_3): + + global center_image + alpha = 0 + beta = 0 + gamma = 1 + + # if error_1 > RANGES[0] and error_2 > RANGES[1]: + # ALPHA = 0.1 + # BETA = 0.2 + # GAMMA = 0.7 + # elif error_1 > RANGES[0]: + # ALPHA = 0.1 + # BETA = 0 + # GAMMA = 0.9 + # elif error_2 > RANGES[1]: + # ALPHA = 0 + # BETA = 0.1 + # GAMMA = 0.9 + + # d = ALPHA * np.true_divide(error_1, center_image) + \ + # beta * np.true_divide(error_2, center_image) + \ + # gamma * np.true_divide(error_3, center_image) + d = np.true_divide(error_3, center_image) + reward = np.round(np.exp(-d), 4) + + return reward + + @staticmethod + def is_game_over(point_1, point_2, point_3): + + done = False + + if center_image-RANGES[2] < point_3 < center_image+RANGES[2]: + if center_image-RANGES[0] < point_1 < center_image+RANGES[0] or \ + center_image-RANGES[1] < point_2 < center_image+RANGES[1]: + pass # In Line + else: + done = True + + return done + + def step(self, action): + + self._gazebo_unpause() + + # === ACTIONS === - 5 actions + vel_cmd = Twist() + vel_cmd.linear.x = action[0] + vel_cmd.angular.z = action[1] + self.vel_pub.publish(vel_cmd) + # print("Action: {} - V_Lineal: {} - W_Angular: {}".format(action, vel_cmd.linear.x, vel_cmd.angular.z)) + + # === IMAGE === + image_data = None + success = False + cv_image = None + f1_image_camera = None + while image_data is None or success is False: + image_data = rospy.wait_for_message('/F1ROS/cameraL/image_raw', Image, timeout=5) + # Transform the image data from ROS to CVMat + cv_image = CvBridge().imgmsg_to_cv2(image_data, "bgr8") + f1_image_camera = self.image_msg_to_image(image_data, cv_image) + + if f1_image_camera: + success = True + + point_1, point_2, point_3 = self.processed_image(f1_image_camera.data) + + # DONE + done = self.is_game_over(point_1, point_2, point_3) + + self._gazebo_pause() + + # action_sum = sum(self.last50actions) + + # == CALCULATE ERROR == + error_1, error_2, error_3 = self.calculate_error(point_1, point_2, point_3) + + # == REWARD == + #if not done: + # reward = self.calculate_reward(error_1, error_2, error_3) + #else: + # reward = -200 + + reward = self.calculate_reward(error_1, error_2, error_3) + + # == TELEMETRY == + if telemetry: + self.show_telemetry(f1_image_camera.data, vel_cmd, point_1, point_2, point_3, action, reward) + + cv_image = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY) + cv_image = cv2.resize(cv_image, (self.img_rows, self.img_cols)) + observation = cv_image.reshape(1, cv_image.shape[0], cv_image.shape[1]) + + vehicle_state = [vel_cmd.linear.x, vel_cmd.angular.z] + info = {'error_1': error_1, 'error_2': error_2, 'error_3': error_3} + + stacked_obs = {'image':observation, + 'state':vehicle_state, + 'points': [point_1, point_2, point_3], + 'errors': [error_1, error_2, error_3]} + + # OpenAI standard return: observation, reward, done, info + return observation, reward, done, stacked_obs + + # test STACK 4 + # cv_image = cv_image.reshape(1, 1, cv_image.shape[0], cv_image.shape[1]) + # self.s_t = np.append(cv_image, self.s_t[:, :3, :, :], axis=1) + # return self.s_t, reward, done, {} # observation, reward, done, info + + def reset(self): + + # === POSE === + pos = random.choice(list(enumerate(positions)))[0] + self.position = pos + self.set_new_pose(pos) + + # === RESET === + # Resets the state of the environment and returns an initial observation. + time.sleep(0.05) + # self._gazebo_reset() + self._gazebo_unpause() + + image_data = None + cv_image = None + success = False + while image_data is None or success is False: + image_data = rospy.wait_for_message('/F1ROS/cameraL/image_raw', Image, timeout=5) + # Transform the image data from ROS to CVMat + cv_image = CvBridge().imgmsg_to_cv2(image_data, "bgr8") + f1_image_camera = self.image_msg_to_image(image_data, cv_image) + if f1_image_camera: + success = True + + self._gazebo_pause() + + cv_image = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY) + cv_image = cv2.resize(cv_image, (self.img_rows, self.img_cols)) + + state = cv_image.reshape(1, cv_image.shape[0], cv_image.shape[1]) + + stacked_obs = {'image': state, 'state':[0., 0.]} + + return state + + # test STACK 4 + # self.s_t = np.stack((cv_image, cv_image, cv_image, cv_image), axis=0) + # self.s_t = self.s_t.reshape(1, self.s_t.shape[0], self.s_t.shape[1], self.s_t.shape[2]) + # return self.s_t diff --git a/gym-gazebo/gym_gazebo/envs/f1/env_gazebo_f1_ddpg_laser.py b/gym-gazebo/gym_gazebo/envs/f1/env_gazebo_f1_ddpg_laser.py new file mode 100644 index 00000000..1bd12069 --- /dev/null +++ b/gym-gazebo/gym_gazebo/envs/f1/env_gazebo_f1_ddpg_laser.py @@ -0,0 +1,222 @@ +import rospy +import time +import random +import numpy as np + +from gym import spaces +from gym.utils import seeding + +from gym_gazebo.envs import gazebo_env +from gazebo_msgs.msg import ModelState +from gazebo_msgs.srv import SetModelState + +from geometry_msgs.msg import Twist +from std_srvs.srv import Empty +from sensor_msgs.msg import LaserScan + +from agents.f1.settings import gazebo_positions + + +class GazeboF1LaserEnvDDPG(gazebo_env.GazeboEnv): + + def __init__(self): + # Launch the simulation with the given launchfile name + gazebo_env.GazeboEnv.__init__(self, "F1Lasercircuit_v0.launch") + # gazebo_env.GazeboEnv.__init__(self, "f1_1_nurburgrinlineROS_laser.launch") + self.vel_pub = rospy.Publisher('/F1ROS/cmd_vel', Twist, queue_size=5) + self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty) + self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty) + self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) + self.action_space = self._generate_continuous_action_space() # spaces.Discrete(5) # F, L, R + self.reward_range = (-np.inf, np.inf) + self.position = None + self._seed() + + def render(self, mode='human'): + pass + + def _gazebo_pause(self): + rospy.wait_for_service('/gazebo/pause_physics') + try: + # resp_pause = pause.call() + self.pause() + except rospy.ServiceException as e: + print("/gazebo/pause_physics service call failed: {}".format(e)) + + def _gazebo_unpause(self): + rospy.wait_for_service('/gazebo/unpause_physics') + try: + self.unpause() + except rospy.ServiceException as e: + print(e) + print("/gazebo/unpause_physics service call failed") + + def _gazebo_reset(self): + # Resets the state of the environment and returns an initial observation. + rospy.wait_for_service('/gazebo/reset_simulation') + try: + # reset_proxy.call() + self.reset_proxy() + self.unpause() + except rospy.ServiceException as e: + print("/gazebo/reset_simulation service call failed: {}".format(e)) + + def set_new_pose(self): + """ + (pos_number, pose_x, pose_y, pose_z, or_x, or_y, or_z, or_z) + """ + pos = random.choice(list(enumerate(gazebo_positions)))[0] + self.position = pos + + pos_number = gazebo_positions[0] + + state = ModelState() + state.model_name = "f1_renault" + state.pose.position.x = gazebo_positions[pos][1] + state.pose.position.y = gazebo_positions[pos][2] + state.pose.position.z = gazebo_positions[pos][3] + state.pose.orientation.x = gazebo_positions[pos][4] + state.pose.orientation.y = gazebo_positions[pos][5] + state.pose.orientation.z = gazebo_positions[pos][6] + state.pose.orientation.w = gazebo_positions[pos][7] + + rospy.wait_for_service('/gazebo/set_model_state') + try: + set_state = rospy.ServiceProxy('/gazebo/set_model_state', SetModelState) + set_state(state) + except rospy.ServiceException as e: + print("Service call failed: {}".format(e)) + return pos_number + + @staticmethod + def _generate_continuous_action_space(): + + min_ang_speed = -2.0 + max_ang_speed = 2.0 + min_lin_speed = 2 + max_lin_speed = 12 + + action_space = spaces.Box(np.array([min_lin_speed, min_ang_speed]),np.array([max_lin_speed, max_ang_speed])) + + return action_space + + @staticmethod + def discrete_observation(data, new_ranges): + + discrete_ranges = [] + min_range = 0.05 + done = False + mod = len(data.ranges) / new_ranges + filter_data = data.ranges[10:-10] + for i, item in enumerate(filter_data): + if i % mod == 0: + if filter_data[i] == float('Inf') or np.isinf(filter_data[i]): + discrete_ranges.append(6) + elif np.isnan(filter_data[i]): + discrete_ranges.append(0) + else: + discrete_ranges.append(int(filter_data[i])) + if min_range > filter_data[i] > 0: + # print("Data ranges: {}".format(data.ranges[i])) + done = True + break + + return discrete_ranges, done + + @staticmethod + def calculate_observation(data): + min_range = 0.5 # Default: 0.21 + done = False + for i, item in enumerate(data.ranges): + if min_range > data.ranges[i] > 0: + done = True + return done + + @staticmethod + def get_center_of_laser(data): + + laser_len = len(data.ranges) + left_sum = sum(data.ranges[laser_len - (laser_len / 5):laser_len - (laser_len / 10)]) # 80-90 + right_sum = sum(data.ranges[(laser_len / 10):(laser_len / 5)]) # 10-20 + + center_detour = (right_sum - left_sum) / 5 + + return center_detour + + def _seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def step(self, actions): + + self._gazebo_unpause() + + vel_cmd = Twist() + vel_cmd.linear.x = actions[0] + vel_cmd.angular.z = actions[1] + self.vel_pub.publish(vel_cmd) + + laser_data = None + success = False + while laser_data is None or not success: + try: + laser_data = rospy.wait_for_message('/F1ROS/laser/scan', LaserScan, timeout=5) + finally: + success = True + + if laser_data is None: + state = [0]*5 + return state, -200, True, {} + + self._gazebo_pause() + + state, _ = self.discrete_observation(laser_data, 5) + + laser_len = len(laser_data.ranges) + left_sum = sum(laser_data.ranges[laser_len - (laser_len // 5):laser_len - (laser_len // 10)]) # 80-90 + right_sum = sum(laser_data.ranges[(laser_len // 10):(laser_len // 5)]) # 10-20 + # center_detour = (right_sum - left_sum) / 5 + left_boundary = left_sum / 5 + right_boundary = right_sum / 5 + center_detour = right_boundary - left_boundary + # print("LEFT: {} - RIGHT: {}".format(left_boundary, right_boundary)) + + done = False + if abs(center_detour) > 2 or left_boundary < 2 or right_boundary < 2: + done = True + # print("center: {}".format(center_detour)) + + if not done: + if abs(center_detour) < 4: + reward = 5 + elif abs(center_detour < 2) and action == 1: + reward = 10 + else: # L or R no looping + reward = 2 + else: + reward = -200 + + return state, reward, done, {} + + def reset(self): + # === POSE === + self.set_new_pose() + time.sleep(0.1) + + # self._gazebo_reset() + self._gazebo_unpause() + + # Read laser data + laser_data = None + success = False + while laser_data is None or not success: + try: + laser_data = rospy.wait_for_message('/F1ROS/laser/scan', LaserScan, timeout=5) + finally: + success = True + + self._gazebo_pause() + + state = self.discrete_observation(laser_data, 5) + + return state diff --git a/gym-gazebo/gym_gazebo/envs/f1/env_gazebo_f1_qlearn_laser.py b/gym-gazebo/gym_gazebo/envs/f1/env_gazebo_f1_qlearn_laser.py index a668935b..b3cb4a97 100644 --- a/gym-gazebo/gym_gazebo/envs/f1/env_gazebo_f1_qlearn_laser.py +++ b/gym-gazebo/gym_gazebo/envs/f1/env_gazebo_f1_qlearn_laser.py @@ -28,7 +28,7 @@ def __init__(self): self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty) self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty) self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) - self.action_space = actions # spaces.Discrete(5) # F, L, R + self.action_space = spaces.Discrete(len(actions.keys())) # F, L, R self.reward_range = (-np.inf, np.inf) self.position = None self._seed() diff --git a/requirements.txt b/requirements.txt index ea4ad79f..2227a79e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,7 +15,7 @@ pathlib==1.0.1 jderobot-jderobottypes==1.0.0 catkin_pkg==0.4.23 numpy==1.20.1 -matplotlib==3.3.0 +matplotlib==3.2.1 kiwisolver==1.2 defusedxml==0.6.0 netifaces==0.10.9 @@ -29,3 +29,4 @@ vcstool==0.2.14 scikit-image==0.17.2 bagpy==0.4.2 pycryptodomex==3.9.9 +torch==1.8.1