Skip to content

Commit

Permalink
using a linear layar for out put intead of a tanh which clamp the act…
Browse files Browse the repository at this point in the history
…ion output.
  • Loading branch information
JulioJerez committed Sep 27, 2024
1 parent 5f7f768 commit 42118f0
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1112,8 +1112,8 @@ namespace ndAdvancedRobot

ndInt32 countX = 22;
ndInt32 countZ = 23;
//countX = 10;
//countZ = 11;
countX = 10;
countZ = 11;

// add a hidden battery of model to generate trajectories in parallel
for (ndInt32 i = 0; i < countZ; ++i)
Expand Down
4 changes: 2 additions & 2 deletions newton-4.00/applications/ndSandbox/ndDemoEntityManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
//#define DEFAULT_SCENE 6 // basic Trigger
//#define DEFAULT_SCENE 7 // object Placement
//#define DEFAULT_SCENE 8 // particle fluid
#define DEFAULT_SCENE 9 // static mesh collision
//#define DEFAULT_SCENE 9 // static mesh collision
//#define DEFAULT_SCENE 10 // static user mesh collision
//#define DEFAULT_SCENE 11 // basic joints
//#define DEFAULT_SCENE 12 // basic vehicle
Expand All @@ -58,7 +58,7 @@
//#define DEFAULT_SCENE 18 // cart pole continue controller
//#define DEFAULT_SCENE 19 // unit cycle controller
//#define DEFAULT_SCENE 20 // simple industrial robot
//#define DEFAULT_SCENE 21 // advanced industrial robot
#define DEFAULT_SCENE 21 // advanced industrial robot
//#define DEFAULT_SCENE 22 // quadruped test 1
//#define DEFAULT_SCENE 23 // quadruped test 2
//#define DEFAULT_SCENE 24 // quadruped test 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,17 +62,18 @@ ndBrainAgentContinuePolicyGradient_TrainerMaster::HyperParameters::HyperParamete
//*********************************************************************************************
//
//*********************************************************************************************
class ndBrainAgentContinuePolicyGradient_TrainerMaster::LastActivationLayer : public ndBrainLayerActivationTanh
//class ndBrainAgentContinuePolicyGradient_TrainerMaster::LastActivationLayer : public ndBrainLayerActivationTanh
class ndBrainAgentContinuePolicyGradient_TrainerMaster::LastActivationLayer : public ndBrainLayerActivation
{
public:
LastActivationLayer(ndInt32 neurons)
:ndBrainLayerActivationTanh(neurons * 2)
:ndBrainLayerActivation(neurons * 2)
,m_minimumSigma(ND_CONTINUE_POLICY_GRADIENT_MIN_VARIANCE)
{
}

LastActivationLayer(const LastActivationLayer& src)
:ndBrainLayerActivationTanh(src)
:ndBrainLayerActivation(src)
,m_minimumSigma(src.m_minimumSigma)
{
}
Expand All @@ -82,9 +83,10 @@ class ndBrainAgentContinuePolicyGradient_TrainerMaster::LastActivationLayer : pu
return new LastActivationLayer(*this);
}

#pragma optimize( "", off )
void MakePrediction(const ndBrainVector& input, ndBrainVector& output) const
{
ndBrainLayerActivationTanh::MakePrediction(input, output);
ndBrainLayerActivation::MakePrediction(input, output);
#ifdef ND_USE_LOG_DEVIATION
for (ndInt32 i = m_neurons / 2 - 1; i >= 0; --i)
{
Expand All @@ -98,9 +100,11 @@ class ndBrainAgentContinuePolicyGradient_TrainerMaster::LastActivationLayer : pu
#endif
}

#pragma optimize( "", off )
void InputDerivative(const ndBrainVector& input, const ndBrainVector& output, const ndBrainVector& outputDerivative, ndBrainVector& inputDerivative) const
{
ndBrainLayerActivationTanh::InputDerivative(input, output, outputDerivative, inputDerivative);
//ndBrainLayerActivationTanh::InputDerivative(input, output, outputDerivative, inputDerivative);
ndBrainLayerActivation::InputDerivative(input, output, outputDerivative, inputDerivative);
#ifdef ND_USE_LOG_DEVIATION
for (ndInt32 i = m_neurons / 2 - 1; i >= 0; --i)
{
Expand Down Expand Up @@ -274,7 +278,8 @@ void ndBrainAgentContinuePolicyGradient_Trainer::SelectAction(ndBrainVector& act
for (ndInt32 i = numberOfActions - 1; i >= 0; --i)
{
ndBrainFloat sample = ndBrainFloat(actions[i] + generator.m_d(generator.m_gen) * actions[i + numberOfActions]);
ndBrainFloat squashedAction = ndClamp(sample, ndBrainFloat(-1.0f), ndBrainFloat(1.0f));
//ndBrainFloat squashedAction = ndClamp(sample, ndBrainFloat(-1.0f), ndBrainFloat(1.0f));
ndBrainFloat squashedAction = sample;
actions[i] = squashedAction;
}
}
Expand Down Expand Up @@ -412,7 +417,7 @@ ndBrainAgentContinuePolicyGradient_TrainerMaster::ndBrainAgentContinuePolicyGrad
}

m_policy.InitWeights();
ndAssert(!strcmp((m_policy[m_policy.GetCount() - 1])->GetLabelId(), "ndBrainLayerActivationTanh"));
//ndAssert(!strcmp((m_policy[m_policy.GetCount() - 1])->GetLabelId(), "ndBrainLayerActivationTanh"));

m_trainers.SetCount(0);
m_auxiliaryTrainers.SetCount(0);
Expand Down

0 comments on commit 42118f0

Please sign in to comment.