Autonomous Racing  1
f1tenth Project Group of Technical University Dortmund, Germany
training_node.py
Go to the documentation of this file.
1 from reinforcement_learning_node import ReinforcementLearningNode, device
2 
3 from topics import TOPIC_CRASH, TOPIC_GAZEBO_MODEL_STATE, TOPIC_EPISODE_RESULT
4 
5 import time
6 import random
7 import math
8 from collections import deque
9 import rospy
10 from std_msgs.msg import Empty
11 from reinforcement_learning.msg import EpisodeResult
12 from gazebo_msgs.msg import ModelStates
13 
14 from simulation_tools.reset_car import Point
15 import simulation_tools.reset_car as reset_car
16 from simulation_tools.track import track
17 
18 import torch.optim
19 
20 
22  ''' Abstract class for all methods that are common between
23  Q-Learning training and Policy Gradient training
24  '''
25 
26  def __init__(
27  self,
28  policy,
29  actions,
30  laser_sample_count,
31  max_episode_length,
32  learn_rate):
33  ReinforcementLearningNode.__init__(self, actions, laser_sample_count)
34 
35  self.policy = policy
36  self.max_episode_length = max_episode_length
37 
38  self.episode_count = 0
39  self.episode_length = 0
42  self.is_terminal_step = False
43 
45  self.episode_length_history = deque(maxlen=50)
46  self.cumulative_reward_history = deque(maxlen=50)
47 
48  self.state = None
49  self.action = None
50  self.car_position = None
51  self.car_orientation = None
52 
53  self.drive_forward = None
55 
56  self.episode_start_time_real = time.time()
57  self.episode_start_time_sim = rospy.Time.now().to_sec()
58 
59  self.optimizer = torch.optim.Adam(
60  self.policy.parameters(), lr=learn_rate)
61 
62  reset_car.register_service()
63 
64  rospy.Subscriber(TOPIC_CRASH, Empty, self.on_crash)
65  rospy.Subscriber(TOPIC_GAZEBO_MODEL_STATE, ModelStates, self.on_model_state_callback) # nopep8
66  self.episode_result_publisher = rospy.Publisher(
67  TOPIC_EPISODE_RESULT, EpisodeResult, queue_size=1)
68 
69  def on_crash(self, _):
70  if self.episode_length > 5:
71  self.is_terminal_step = True
72 
74  average_episode_length = sum(
76  average_cumulative_reward = sum(
78 
79  result = "Episode " + str(self.episode_count) + ": " \
80  + str(self.episode_length).rjust(3) + " steps (" + str(int(average_episode_length)).rjust(3) + " avg), " \
81  + "return: " + "{0:.1f}".format(self.cumulative_reward).rjust(5) \
82  + " (" + "{0:.1f}".format(average_cumulative_reward).rjust(5) + " avg)"
83 
84  episode_duration_real = time.time() - self.episode_start_time_real
85  episode_duration_sim = rospy.Time.now().to_sec() - self.episode_start_time_sim
86  if episode_duration_real != 0:
87  result += ", time: {0:.1f}x".format(episode_duration_sim / episode_duration_real) # nopep8
88  if episode_duration_sim != 0:
89  result += ", laser: {0:.1f} Hz".format(float(self.episode_length) / episode_duration_sim) # nopep8
90  return result
91 
93  self.episode_length_history.append(self.episode_length)
94  self.cumulative_reward_history.append(self.cumulative_reward)
95  self.episode_result_publisher.publish(
96  reward=self.cumulative_reward, length=int(
97  self.episode_length))
98 
99  rospy.loginfo(self.get_episode_summary())
100  self.episode_start_time_real = time.time()
101  self.episode_start_time_sim = rospy.Time.now().to_sec()
102 
103  self.episode_count += 1
104  self.episode_length = 0
105  self.cumulative_reward = 0
106 
107  if self.episode_count % 50 == 0:
108  self.policy.save()
109  rospy.loginfo("Model parameters saved.")
110 
111  def on_receive_laser_scan(self, message):
112  new_state = self.convert_laser_message_to_tensor(message)
113 
114  if self.state is not None:
115  self.check_car_orientation()
116  reward = self.get_reward()
117  self.cumulative_reward += reward
118  self.on_complete_step(self.state, self.action, reward, new_state)
119 
120  if self.is_terminal_step or self.episode_length >= self.max_episode_length:
121  self.drive_forward = random.random() > 0.5
122  reset_car.reset_random(
123  max_angle=math.pi / 180 * 20,
124  max_offset_from_center=0.2,
125  forward=self.drive_forward)
126  self.is_terminal_step = False
127  self.state = None
128  if self.episode_length != 0:
129  self.on_complete_episode()
130  else:
131  self.state = new_state
132  self.action = self.select_action(new_state)
133  self.perform_action(self.action)
134  self.episode_length += 1
135  self.total_step_count += 1
136 
137  def on_complete_step(self, state, action, reward, next_state):
138  pass
139 
141  if self.car_position is None:
142  return
143 
144  track_position = track.localize(self.car_position)
145  car_direction = track_position.faces_forward(self.car_orientation)
146  if car_direction != self.drive_forward:
147  self.steps_with_wrong_orientation += 1
148  else:
149  self.steps_with_wrong_orientation = 0
150 
151  if self.steps_with_wrong_orientation > 2:
152  self.is_terminal_step = True
153 
154  def on_model_state_callback(self, message):
155  if len(message.pose) < 2:
156  return
157  self.car_position = Point(
158  message.pose[1].position.x,
159  message.pose[1].position.y)
160  self.car_orientation = message.pose[1].orientation
def convert_laser_message_to_tensor(self, message, use_device=True)
def __init__(self, policy, actions, laser_sample_count, max_episode_length, learn_rate)
def on_receive_laser_scan(self, message)
Point
Definition: circle.py:8
def on_complete_step(self, state, action, reward, next_state)
def on_model_state_callback(self, message)