4 from training_node
import TrainingNode
7 from parameters_policy_gradient
import *
10 from torch.distributions
import Categorical
17 FLOAT_EPS = np.finfo(np.float32).eps
21 ''' ROS node to train the Policy Gradient model 25 TrainingNode.__init__(
46 R = reward + DISCOUNT_FACTOR * R
50 rewards = torch.tensor(rewards)
51 if rewards.numel() > 1:
52 rewards = (rewards - rewards.mean()) / (rewards.std() + FLOAT_EPS)
55 loss = torch.sum(self.
policy_history[:len(rewards)] * rewards) * -1
59 self.optimizer.zero_grad()
70 action_probabilities = self.
policy(state.cpu())
71 action_distribution = Categorical(action_probabilities)
72 action = action_distribution.sample()
90 distance = abs(track_position.distance_to_center)
95 return 1 * scaleForSpeed
97 return 0.7 * scaleForSpeed
99 return -0.4 * scaleForSpeed
103 TrainingNode.on_complete_episode(self)
106 self.current_episode_rewards.append(reward)
109 rospy.init_node(
'policy_gradient_training', anonymous=
True)
def on_complete_episode(self)
def select_action(self, state)
def on_complete_step(self, state, action, reward, next_state)