SmartEngine  1.6.0
D4PGTrainer.h
1 // Copyright (C) Entropy Software LLC - All Rights Reserved
2 
3 #pragma once
4 
5 #include "Object.h"
6 #include "RLTrainer.h"
7 
8 namespace SmartEngine
9 {
10 
11 #pragma pack(push, 4)
12 struct D4PGTrainerCInfo : RLTrainerCInfo
16 {
20  IGraph* actorGraph = nullptr;
21 
26 
32  int criticAtomCount = 32;
33 
38  float minValue = -10.0f;
39 
44  float maxValue = 10.0f;
45 
49  int minSampleCount = 10000;
50 
55  int criticPreTrainSteps = 10000;
56 
60  int lookAheadSteps = 2;
61 
65  int batchSize = 64;
66 
71 
75  float syncLerpPercent = 1e-3f;
76 };
77 #pragma pack(pop)
78 
103 class SMARTENGINE_EXPORT ID4PGTrainer : public IRLTrainer
104 {
105 public:
106  SMARTENGINE_DECLARE_CLASS(ID4PGTrainer)
107 
108 
109  virtual float GetActorLoss() = 0;
112 
116  virtual float GetCriticLoss() = 0;
117 };
118 
123 
125 extern "C"
126 {
127  SMARTENGINE_EXPORT ObjPtr D4PGTrainer_CreateInstance(const D4PGTrainerCInfo& cinfo);
128  SMARTENGINE_EXPORT float D4PGTrainer_GetActorLoss(ObjPtr object);
129  SMARTENGINE_EXPORT float D4PGTrainer_GetCriticLoss(ObjPtr object);
130 }
132 
133 } // namespace SmartEngine
SmartEngine::D4PGTrainerCInfo::minSampleCount
int minSampleCount
How many samples we wait for before we start training.
Definition: D4PGTrainer.h:49
SmartEngine::ID4PGTrainer::GetCriticLoss
virtual float GetCriticLoss()=0
Returns the loss in the critic graph.
SmartEngine::D4PGTrainerCInfo::criticPreTrainSteps
int criticPreTrainSteps
How many samples to train the critic before training the actor.
Definition: D4PGTrainer.h:55
SmartEngine::D4PGTrainerCInfo::minValue
float minValue
The minimum expected rewards we can track in our probability distribution.
Definition: D4PGTrainer.h:38
SmartEngine::D4PGTrainerCInfo::criticNeuronCount
int criticNeuronCount
How many neurons are in the critic hidden layer
Definition: D4PGTrainer.h:25
SmartEngine::D4PGTrainerCInfo::syncLerpPercent
float syncLerpPercent
How much to sync the training network with the actual network
Definition: D4PGTrainer.h:75
SmartEngine::D4PGTrainerCInfo::maxValue
float maxValue
The maximum expected rewards we can track in our probability distribution.
Definition: D4PGTrainer.h:44
SmartEngine::IRLTrainer
Base class for all reinforcement learning trainers.
Definition: RLTrainer.h:69
SmartEngine::D4PGTrainerCInfo
Data used to construct an ID4PGTrainer instance
Definition: D4PGTrainer.h:16
SmartEngine::D4PGTrainerCInfo::actorGraph
IGraph * actorGraph
The actor graph to train
Definition: D4PGTrainer.h:20
SmartEngine::ObjectPtr
Smart pointer to an IObject. Automatic ref counting.
Definition: ObjectPtr.h:16
SmartEngine
Definition: A2CTrainer.h:10
SmartEngine::CreateD4PGTrainer
SMARTENGINE_EXPORT ObjectPtr< ID4PGTrainer > CreateD4PGTrainer(const D4PGTrainerCInfo &cinfo)
Creates an instance of ID4PGTrainer
SmartEngine::D4PGTrainerCInfo::batchSize
int batchSize
How many data samples we should try to train at a time.
Definition: D4PGTrainer.h:65
SmartEngine::D4PGTrainerCInfo::criticAtomCount
int criticAtomCount
How many neurons are in the critic output layer. This is also the granularity of the probability dist...
Definition: D4PGTrainer.h:32
SmartEngine::IGraph
A graph is a collection of buffers and nodes that together form a neural network. The graph is create...
Definition: Graph.h:61
SmartEngine::D4PGTrainerCInfo::lookAheadSteps
int lookAheadSteps
How many actual experiences we should look at before using an estimate for total rewards this episode...
Definition: D4PGTrainer.h:60
SmartEngine::ID4PGTrainer
The D4PGTrainer is a reinforcement learning trainer that is composed of two parts: an actor sub graph...
Definition: D4PGTrainer.h:104
SmartEngine::D4PGTrainerCInfo::syncGenerationCount
int syncGenerationCount
How often we should sync the training network with the actual network
Definition: D4PGTrainer.h:70