From b229e7a7c80b63d139acafc0c3f78b421a5ad060 Mon Sep 17 00:00:00 2001
From: JulioJerez <jerezjulio0@gmail.com>
Date: Wed, 18 Sep 2024 12:19:50 -0700
Subject: [PATCH] it seems polynomial reward functions are better that gaussian

---
 .../demos/ndAdvancedIndustrialRobot.cpp       | 24 ++++++++++++-------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/newton-4.00/applications/ndSandbox/demos/ndAdvancedIndustrialRobot.cpp b/newton-4.00/applications/ndSandbox/demos/ndAdvancedIndustrialRobot.cpp
index e49db4d96..cbdbf06f9 100644
--- a/newton-4.00/applications/ndSandbox/demos/ndAdvancedIndustrialRobot.cpp
+++ b/newton-4.00/applications/ndSandbox/demos/ndAdvancedIndustrialRobot.cpp
@@ -539,18 +539,24 @@ namespace ndAdvancedRobot
 				return ND_DEAD_PENALTY;
 			}
 
-			ndFloat32 invRewardSigma2 = 500.0f;
-			ndFloat32 rewardWeigh = 1.0f / 6.0f;
-
 			const ndMatrix effectorMatrix(m_effectorLocalTarget * m_arm_4->GetBody0()->GetMatrix());
 			const ndMatrix baseMatrix(m_effectorLocalBase * m_base_rotator->GetBody1()->GetMatrix());
 			const ndMatrix currentEffectorMatrix(effectorMatrix * baseMatrix.OrthoInverse());
 			const ndVector positError(CalculateDeltaTargetPosit(currentEffectorMatrix));
 			const ndVector positError2 = positError * positError;
 
+			auto ScalarReward = [](ndFloat32 param2)
+			{
+				ndFloat32 rewardWeigh = 1.0f / 6.0f;
+				//ndFloat32 invRewardSigma2 = 500.0f;
+				//return rewardWeigh* ndExp(-invRewardSigma2 * positError2.m_z);
+				return rewardWeigh * ndClamp(ndFloat32(1.0f - param2), ndFloat32(0.0f), ndFloat32(1.0f));
+			};
+
 			//ndFloat32 azimuthReward = rewardWeigh * ndExp(-invRewardSigma2 * positError2.m_z);
 			//ndFloat32 azimuthReward = rewardWeigh * ndExp(-50.0f * positError2.m_z);
-			ndFloat32 azimuthReward = ndClamp (ndFloat32 (1.0f - positError2.m_z), 0.0f, 1.0f);
+			//ndFloat32 azimuthReward = ndClamp (ndFloat32 (1.0f - positError2.m_z), 0.0f, 1.0f);
+			ndFloat32 azimuthReward = ScalarReward(positError2.m_z);
 
 			ndFloat32 reward = azimuthReward;
 			if (azimuthReward > 0.5f)
@@ -558,14 +564,14 @@ namespace ndAdvancedRobot
 				const ndVector rotationError(CalculateDeltaTargetRotation(currentEffectorMatrix));
 				const ndVector rotationError2 = rotationError * rotationError;
 
-				ndFloat32 omega_xReward = rewardWeigh * ndExp(-invRewardSigma2 * rotationError2.m_x);
-				ndFloat32 omega_yReward = rewardWeigh * ndExp(-invRewardSigma2 * rotationError2.m_y);
-				ndFloat32 omega_zReward = rewardWeigh * ndExp(-invRewardSigma2 * rotationError2.m_z);
+				ndFloat32 omega_xReward = ScalarReward(rotationError2.m_x);
+				ndFloat32 omega_yReward = ScalarReward(rotationError2.m_y);
+				ndFloat32 omega_zReward = ScalarReward(rotationError2.m_z);
 				reward += (omega_xReward + omega_yReward + omega_zReward);
 				if ((omega_xReward > 1.0e-3f) || (omega_yReward > 1.0e-3f) || (omega_zReward > 1.0e-3f))
 				{
-					ndFloat32 posit_xReward = rewardWeigh * ndExp(-invRewardSigma2 * positError2.m_x);
-					ndFloat32 posit_yReward = rewardWeigh * ndExp(-invRewardSigma2 * positError2.m_y);
+					ndFloat32 posit_xReward = ScalarReward(positError2.m_x);
+					ndFloat32 posit_yReward = ScalarReward(positError2.m_y);
 					reward += (posit_xReward + posit_yReward);
 				}
 			}