Discussing the article: "Neural networks made easy (Part 49): Soft Actor-Critic"

MetaQuotes 2023.11.03 14:43

Check out the new article: Neural networks made easy (Part 49): Soft Actor-Critic.

We continue our discussion of reinforcement learning algorithms for solving continuous action space problems. In this article, I will present the Soft Actor-Critic (SAC) algorithm. The main advantage of SAC is the ability to find optimal policies that not only maximize the expected reward, but also have maximum entropy (diversity) of actions.

In this article, we will focus our attention on another algorithm - Soft Actor-Critic (SAC). It was first presented in the article "Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor" (January 2018). The method was presented almost simultaneously with TD3. It has some similarities, but there are also differences in the algorithms. The main goal of SAC is to maximize the expected reward given the maximum entropy of the policy, which allows finding a variety of optimal solutions in stochastic environments.

Soft Actor-Critic uses an Actor with the stochastic policy. This means that the Actor in S state is able to choose the A' action from the entire action space with a certain Pa' probability. In other words, the Actor’s policy in each specific state allows us to choose not one specific optimal action, but any of the possible actions (but with a certain degree of probability). During the training, the Actor learns this probabilistic distribution of obtaining the maximum reward.

This property of a stochastic Actor policy allows us to explore different strategies and discover optimal solutions that may be hidden when using a deterministic policy. In addition, the stochastic Actor policy takes into account the uncertainty in the environment. In case of a noise or random factors, such policies can be more resilient and adaptive, since they can generate a variety of actions to effectively interact with the environment.

Author: Dmitriy Gizlyk

JimReaper 2023.07.12 21:00 #1

Hey Dimitriy,

I discovered a new NN sequence: enjoy <3

bool CreateDescriptions(CArrayObj *actor, CArrayObj *critic)

{

//---

CLayerDescription *descr;

//---

if(!actor)

{

actor = new CArrayObj();

if(!actor)

return false;

}

if(!critic)

{

critic = new CArrayObj();

if(!critic)

return false;

}

//--- Actor

actor.Clear();

//--- Input layer

if(!(descr = new CLayerDescription()))

return false;

descr.type = defNeuronBaseOCL;

int prev_count = descr.count = (HistoryBars * BarDescr);

descr.window = 0;

descr.activation = None;

descr.optimisation = ADAM;

if(!actor.Add(descr))

{

delete descr;

return false;

}

//--- layer 1

if(!(descr = new CLayerDescription())))

return false;

descr.type = defNeuronBatchNormOCL;

descr.count = prev_count;

descr.batch = 1000;

descr.activation = None;

descr.optimisation = ADAM;

if(!actor.Add(descr))

{

delete descr;

return false;

}

//--- layer 2

if(!(descr = new CLayerDescription())))

return false;

descr.type = defNeuronConvOCL;

prev_count = descr.count = BarDescr;

descr.window = HistoryBars;

descr.step = HistoryBars;

descr.window_out = 8;

descr.activation = LReLU;

descr.optimisation = ADAM;

if(!actor.Add(descr))

{

delete descr;

return false;

}

//--- layer 3

if(!(descr = new CLayerDescription())))

return false;

descr.type = defNeuronConvOCL;

prev_count = descr.count = prev_count;

descr.window = 8;

descr.step = 8;

descr.window_out = 4;

descr.activation = LReLU;

descr.optimisation = ADAM;

if(!actor.Add(descr))

{

delete descr;

return false;

}

//--- layer 4

if(!(descr = new CLayerDescription())))

return false;

descr.type = defNeuronBaseOCL;

prev_count = descr.count = 1024;

descr.optimise = ADAM;

descr.activation = LReLU;

if(!actor.Add(descr))

{

delete descr;

return false;

}

//--- layer 5

if(!(descr = new CLayerDescription())))

return false;

descr.type = defNeuronBaseOCL;

prev_count = descr.count = 1024;

descr.optimise = ADAM;

descr.activation = LReLU;

if(!actor.Add(descr))

{

delete descr;

return false;

}

// Softmax Layer

if(!(descr = new CLayerDescription())))

return false;

descr.type = defNeuronSoftMaxOCL;

descr.count = 1024;

descr.optimise = ADAM;

descr.activation = LReLU;

if(!actor.Add(descr))

{

delete descr;

return false;

}

// Multilayer Multi-Head Attention Layer

if(!(descr = new CLayerDescription())))

return false;

descr.type = defNeuronMLMHAttentionOCL;

descr.count = 1024;

descr.optimisation = ADAM;

descr.activation = LReLU;

if(!actor.Add(descr))

{

delete descr;

return false;

}

//--- layer 6

if(!(descr = new CLayerDescription())))

return false;

descr.type = defNeuronConcatenate;

descr.count = LatentCount;

descr.window = prev_count;

descr.step = AccountDescr + 6;

descr.optimise = ADAM;

descr.activation = SIGMOID;

if(!actor.Add(descr))

{

delete descr;

return false;

}

//--- layer 7

if(!(descr = new CLayerDescription())))

return false;

descr.type = defNeuronBaseOCL;

descr.count = 1024;

descr.activation = LReLU;

descr.optimisation = ADAM;

if(!actor.Add(descr))

{

delete descr;

return false;

}

//--- layer 8

if(!(descr = new CLayerDescription())))

return false;

descr.type = defNeuronBaseOCL;

descr.count = 1024;

descr.activation = LReLU;

descr.optimisation = ADAM;

if(!actor.Add(descr))

{

delete descr;

return false;

}

//--- layer 9

if(!(descr = new CLayerDescription())))

return false;

descr.type = defNeuronBaseOCL;

descr.count = 6;

descr.optimisation = ADAM;

descr.activation = SIGMOID;

if(!actor.Add(descr))

{

delete descr;

return false;

}

//--- Critic

critic.Clear();

//--- Input layer

if(!(descr = new CLayerDescription())))

return false;

descr.type = defNeuronBaseOCL;

prev_count = descr.count = LatentCount;

descr.window = 0;

descr.activation = None;

descr.optimisation = ADAM;

if(!critic.Add(descr))

{

delete descr;

return false;

}

//--- layer 1

if(!(descr = new CLayerDescription())))

return false;

descr.type = defNeuronConcatenate;

descr.count = 512;

descr.window = prev_count;

descr.step = 6;

descr.optimise = ADAM;

descr.activation = LReLU;

if(!critic.Add(descr))

{

delete descr;

return false;

}

//--- layer 2

if(!(descr = new CLayerDescription())))

return false;

descr.type = defNeuronBaseOCL;

descr.count = 512;

descr.activation = LReLU;

descr.optimisation = ADAM;

if(!critic.Add(descr))

{

delete descr;

return false;

}

//--- layer 3

if(!(descr = new CLayerDescription())))

return false;

descr.type = defNeuronBaseOCL;

descr.count = 512;

descr.activation = LReLU;

descr.optimisation = ADAM;

if(!critic. Add(descr))

{

delete descr;

return false;

}

--- layer 4

if(!( descr = new CLayerDescription())))

return false;

descr.type = defNeuronBaseOCL;

descr.count = 1;

descr.optimise = ADAM;

descr.activation = None;

if(!critic. Add(descr))

{

delete descr;

return false;

}

//---

return true;

}

The picture can also be simply dragged into the text or pasted with Ctrl+V

Discussing the article: "Neural Discussing the article: "Neural Discussing the article: "Neural

lidaxing 2024.03.13 10:36 #2

I can't connect to my trading software mt4 account and can't trade to place orders. What is the problem?

New comment