1 from reinforcement_learning_node
import ReinforcementLearningNode, device
3 from topics
import TOPIC_CRASH, TOPIC_GAZEBO_MODEL_STATE, TOPIC_EPISODE_RESULT
8 from collections
import deque
10 from std_msgs.msg
import Empty
11 from reinforcement_learning.msg
import EpisodeResult
12 from gazebo_msgs.msg
import ModelStates
22 ''' Abstract class for all methods that are common between 23 Q-Learning training and Policy Gradient training 33 ReinforcementLearningNode.__init__(self, actions, laser_sample_count)
60 self.policy.parameters(), lr=learn_rate)
62 reset_car.register_service()
64 rospy.Subscriber(TOPIC_CRASH, Empty, self.
on_crash)
67 TOPIC_EPISODE_RESULT, EpisodeResult, queue_size=1)
74 average_episode_length = sum(
76 average_cumulative_reward = sum(
80 + str(self.
episode_length).rjust(3) +
" steps (" + str(int(average_episode_length)).rjust(3) +
" avg), " \
82 +
" (" +
"{0:.1f}".format(average_cumulative_reward).rjust(5) +
" avg)" 86 if episode_duration_real != 0:
87 result +=
", time: {0:.1f}x".format(episode_duration_sim / episode_duration_real)
88 if episode_duration_sim != 0:
89 result +=
", laser: {0:.1f} Hz".format(float(self.
episode_length) / episode_duration_sim)
95 self.episode_result_publisher.publish(
109 rospy.loginfo(
"Model parameters saved.")
114 if self.
state is not None:
116 reward = self.get_reward()
122 reset_car.reset_random(
123 max_angle=math.pi / 180 * 20,
124 max_offset_from_center=0.2,
131 self.
state = new_state
132 self.
action = self.select_action(new_state)
141 if self.car_position
is None:
144 track_position = track.localize(self.car_position)
145 car_direction = track_position.faces_forward(self.car_orientation)
146 if car_direction != self.drive_forward:
147 self.steps_with_wrong_orientation += 1
149 self.steps_with_wrong_orientation = 0
151 if self.steps_with_wrong_orientation > 2:
152 self.is_terminal_step =
True 155 if len(message.pose) < 2:
158 message.pose[1].position.x,
159 message.pose[1].position.y)
def on_complete_episode(self)
def get_episode_summary(self)
def convert_laser_message_to_tensor(self, message, use_device=True)
def __init__(self, policy, actions, laser_sample_count, max_episode_length, learn_rate)
def check_car_orientation(self)
steps_with_wrong_orientation
def perform_action(self, action_index)
def on_receive_laser_scan(self, message)
def on_complete_step(self, state, action, reward, next_state)
cumulative_reward_history
def on_model_state_callback(self, message)