Skip to content

Commit

Permalink
it seems polynomial reward functions are better that gaussian
Browse files Browse the repository at this point in the history
  • Loading branch information
JulioJerez committed Sep 18, 2024
1 parent b7318ed commit b229e7a
Showing 1 changed file with 15 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -539,33 +539,39 @@ namespace ndAdvancedRobot
return ND_DEAD_PENALTY;
}

ndFloat32 invRewardSigma2 = 500.0f;
ndFloat32 rewardWeigh = 1.0f / 6.0f;

const ndMatrix effectorMatrix(m_effectorLocalTarget * m_arm_4->GetBody0()->GetMatrix());
const ndMatrix baseMatrix(m_effectorLocalBase * m_base_rotator->GetBody1()->GetMatrix());
const ndMatrix currentEffectorMatrix(effectorMatrix * baseMatrix.OrthoInverse());
const ndVector positError(CalculateDeltaTargetPosit(currentEffectorMatrix));
const ndVector positError2 = positError * positError;

auto ScalarReward = [](ndFloat32 param2)
{
ndFloat32 rewardWeigh = 1.0f / 6.0f;
//ndFloat32 invRewardSigma2 = 500.0f;
//return rewardWeigh* ndExp(-invRewardSigma2 * positError2.m_z);
return rewardWeigh * ndClamp(ndFloat32(1.0f - param2), ndFloat32(0.0f), ndFloat32(1.0f));
};

//ndFloat32 azimuthReward = rewardWeigh * ndExp(-invRewardSigma2 * positError2.m_z);
//ndFloat32 azimuthReward = rewardWeigh * ndExp(-50.0f * positError2.m_z);
ndFloat32 azimuthReward = ndClamp (ndFloat32 (1.0f - positError2.m_z), 0.0f, 1.0f);
//ndFloat32 azimuthReward = ndClamp (ndFloat32 (1.0f - positError2.m_z), 0.0f, 1.0f);
ndFloat32 azimuthReward = ScalarReward(positError2.m_z);

ndFloat32 reward = azimuthReward;
if (azimuthReward > 0.5f)
{
const ndVector rotationError(CalculateDeltaTargetRotation(currentEffectorMatrix));
const ndVector rotationError2 = rotationError * rotationError;

ndFloat32 omega_xReward = rewardWeigh * ndExp(-invRewardSigma2 * rotationError2.m_x);
ndFloat32 omega_yReward = rewardWeigh * ndExp(-invRewardSigma2 * rotationError2.m_y);
ndFloat32 omega_zReward = rewardWeigh * ndExp(-invRewardSigma2 * rotationError2.m_z);
ndFloat32 omega_xReward = ScalarReward(rotationError2.m_x);
ndFloat32 omega_yReward = ScalarReward(rotationError2.m_y);
ndFloat32 omega_zReward = ScalarReward(rotationError2.m_z);
reward += (omega_xReward + omega_yReward + omega_zReward);
if ((omega_xReward > 1.0e-3f) || (omega_yReward > 1.0e-3f) || (omega_zReward > 1.0e-3f))
{
ndFloat32 posit_xReward = rewardWeigh * ndExp(-invRewardSigma2 * positError2.m_x);
ndFloat32 posit_yReward = rewardWeigh * ndExp(-invRewardSigma2 * positError2.m_y);
ndFloat32 posit_xReward = ScalarReward(positError2.m_x);
ndFloat32 posit_yReward = ScalarReward(positError2.m_y);
reward += (posit_xReward + posit_yReward);
}
}
Expand Down

0 comments on commit b229e7a

Please sign in to comment.