5 #include "CuriosityModule.h"
13 struct PPOTrainerCInfo : RLTrainerCInfo
117 virtual float GetPolicyLoss() = 0;
140 SMARTENGINE_EXPORT ObjPtr PPOTrainer_CreateInstance(
const PPOTrainerCInfo& cinfo);
141 SMARTENGINE_EXPORT
float PPOTrainer_GetPolicyLoss(ObjPtr
object);
142 SMARTENGINE_EXPORT
float PPOTrainer_GetValueLoss(ObjPtr
object);
143 SMARTENGINE_EXPORT
float PPOTrainer_GetEntropyLoss(ObjPtr
object);
const char * valueNodeName
The name of the output of the critic node. This node should be a linear layer with one output neuron ...
Definition: PPOTrainer.h:33
int batchSize
How many rows of data we should train in a single batch.
Definition: PPOTrainer.h:65
float valueCoefficient
How much the value contributes to the loss
Definition: PPOTrainer.h:38
float policyClipEpsilon
Range (percent) that we allow the policy to change within in one step.
Definition: PPOTrainer.h:50
int epochCount
How many times we should train over the trajectory.
Definition: PPOTrainer.h:70
float gaeLambda
Multiplier on top of gamma.
Definition: PPOTrainer.h:55
ICuriosityModule * curiosityModule
Optional curiosity module for additional exploration rewards
Definition: PPOTrainer.h:27
Base class for all reinforcement learning trainers.
Definition: RLTrainer.h:69
float entropyCoefficient
How much entropy contributes to the loss. Entropy is a measure of how random our output is....
Definition: PPOTrainer.h:45
int trajectorySize
How many rows of data we should wait for before training
Definition: PPOTrainer.h:60
Smart pointer to an IObject. Automatic ref counting.
Definition: ObjectPtr.h:16
The PPO Trainer is a reinforcement learning trainer that is composed of two parts: an actor sub graph...
Definition: PPOTrainer.h:112
Definition: A2CTrainer.h:10
Data used to construct an IPPOTrainer instance
Definition: PPOTrainer.h:17
virtual float GetEntropyLoss()=0
Returns the entropy loss - a measure of how random the network is.
A curiosity module is a way of rewarding an agent for behavior not yet seen. Rewards are given based ...
Definition: CuriosityModule.h:108
A graph is a collection of buffers and nodes that together form a neural network. The graph is create...
Definition: Graph.h:61
SMARTENGINE_EXPORT ObjectPtr< IPPOTrainer > CreatePPOTrainer(const PPOTrainerCInfo &cinfo)
Creates an instance of IPPOTrainer
virtual float GetValueLoss()=0
Returns the loss in value sub-graph
bool normalizeAdvantage
If true, the advantage (actual reward - expected reward) is normalized by subtracting the mean and di...
Definition: PPOTrainer.h:77
IGraph * graph
The graph we are training. This should contain the policy network and value network.
Definition: PPOTrainer.h:22