SmartEngine  1.6.0
A2CTrainer.h
1 // Copyright (C) Entropy Software LLC - All Rights Reserved
2 
3 #pragma once
4 
5 #include "CuriosityModule.h"
6 #include "Object.h"
7 #include "RLTrainer.h"
8 
9 namespace SmartEngine
10 {
11 
12 #pragma pack(push, 4)
17 {
22  IGraph* graph = nullptr;
23 
27  ICuriosityModule* curiosityModule = nullptr; // Optional
28 
33  const char* valueNodeName = "";
34 
38  float valueCoefficient = 1.0f;
39 
45  float entropyCoefficient = 0.01f;
46 
50  int lookAheadSteps = 2;
51 
55  int minBatchSize = 32;
56 };
57 #pragma pack(pop)
58 
84 class SMARTENGINE_EXPORT IA2CTrainer : public IRLTrainer
85 {
86 public:
87  SMARTENGINE_DECLARE_CLASS(IA2CTrainer)
88 
89 
90  virtual float GetPolicyLoss() = 0;
93 
97  virtual float GetValueLoss() = 0;
98 
102  virtual float GetEntropyLoss() = 0;
103 };
104 
108 SMARTENGINE_EXPORT ObjectPtr<IA2CTrainer> CreateA2CTrainer(const A2CTrainerCInfo& cinfo);
109 
111 extern "C"
112 {
113  SMARTENGINE_EXPORT ObjPtr A2CTrainer_CreateInstance(const A2CTrainerCInfo& cinfo);
114  SMARTENGINE_EXPORT float A2CTrainer_GetPolicyLoss(ObjPtr object);
115  SMARTENGINE_EXPORT float A2CTrainer_GetValueLoss(ObjPtr object);
116  SMARTENGINE_EXPORT float A2CTrainer_GetEntropyLoss(ObjPtr object);
117 }
119 
120 } // namespace SmartEngine
SmartEngine::A2CTrainerCInfo::graph
IGraph * graph
The graph we are training. This should contain the policy network and value network.
Definition: A2CTrainer.h:22
SmartEngine::IA2CTrainer
The A2C Trainer is a reinforcement learning trainer that is composed of two parts: an actor sub graph...
Definition: A2CTrainer.h:85
SmartEngine::A2CTrainerCInfo::valueCoefficient
float valueCoefficient
How much weight the value contributes to the loss
Definition: A2CTrainer.h:38
SmartEngine::A2CTrainerCInfo
Data used to construct an IA2CTrainer instance
Definition: A2CTrainer.h:17
SmartEngine::A2CTrainerCInfo::valueNodeName
const char * valueNodeName
The name of the output of the critic node. This node should be a linear layer with one output neuron ...
Definition: A2CTrainer.h:33
SmartEngine::A2CTrainerCInfo::minBatchSize
int minBatchSize
How many data samples we should try to train at a time.
Definition: A2CTrainer.h:55
SmartEngine::IRLTrainer
Base class for all reinforcement learning trainers.
Definition: RLTrainer.h:69
SmartEngine::A2CTrainerCInfo::entropyCoefficient
float entropyCoefficient
How much weight the entropy contributes to the loss. Entropy is a measure of how random our output is...
Definition: A2CTrainer.h:45
SmartEngine::ObjectPtr
Smart pointer to an IObject. Automatic ref counting.
Definition: ObjectPtr.h:16
SmartEngine
Definition: A2CTrainer.h:10
SmartEngine::A2CTrainerCInfo::lookAheadSteps
int lookAheadSteps
How many actual experiences we should look at before using an estimate for total rewards this episode...
Definition: A2CTrainer.h:50
SmartEngine::ICuriosityModule
A curiosity module is a way of rewarding an agent for behavior not yet seen. Rewards are given based ...
Definition: CuriosityModule.h:108
SmartEngine::CreateA2CTrainer
SMARTENGINE_EXPORT ObjectPtr< IA2CTrainer > CreateA2CTrainer(const A2CTrainerCInfo &cinfo)
Creates an instance of IA2CTrainer
SmartEngine::IGraph
A graph is a collection of buffers and nodes that together form a neural network. The graph is create...
Definition: Graph.h:61
SmartEngine::A2CTrainerCInfo::curiosityModule
ICuriosityModule * curiosityModule
Optional curiosity module for additional exploration rewards
Definition: A2CTrainer.h:27
SmartEngine::IA2CTrainer::GetValueLoss
virtual float GetValueLoss()=0
Returns the loss in value sub-graph
SmartEngine::IA2CTrainer::GetEntropyLoss
virtual float GetEntropyLoss()=0
Returns the entropy loss - a measure of how random the network is.
SmartEngine::RLTrainerCInfo
Data used to construct an IRLTrainer instance
Definition: RLTrainer.h:20