it seems polynomial reward functions are better that gaussian

MADEAPPS · Sep 18, 2024 · b229e7a · b229e7a
1 parent b7318ed
commit b229e7a
Showing 1 changed file with 15 additions and 9 deletions.
diff --git a/newton-4.00/applications/ndSandbox/demos/ndAdvancedIndustrialRobot.cpp b/newton-4.00/applications/ndSandbox/demos/ndAdvancedIndustrialRobot.cpp
@@ -539,33 +539,39 @@ namespace ndAdvancedRobot
 				return ND_DEAD_PENALTY;
 			}
 
-			ndFloat32 invRewardSigma2 = 500.0f;
-			ndFloat32 rewardWeigh = 1.0f / 6.0f;
-
 			const ndMatrix effectorMatrix(m_effectorLocalTarget * m_arm_4->GetBody0()->GetMatrix());
 			const ndMatrix baseMatrix(m_effectorLocalBase * m_base_rotator->GetBody1()->GetMatrix());
 			const ndMatrix currentEffectorMatrix(effectorMatrix * baseMatrix.OrthoInverse());
 			const ndVector positError(CalculateDeltaTargetPosit(currentEffectorMatrix));
 			const ndVector positError2 = positError * positError;
 
+			auto ScalarReward = [](ndFloat32 param2)
+			{
+				ndFloat32 rewardWeigh = 1.0f / 6.0f;
+				//ndFloat32 invRewardSigma2 = 500.0f;
+				//return rewardWeigh* ndExp(-invRewardSigma2 * positError2.m_z);
+				return rewardWeigh * ndClamp(ndFloat32(1.0f - param2), ndFloat32(0.0f), ndFloat32(1.0f));
+			};
+
 			//ndFloat32 azimuthReward = rewardWeigh * ndExp(-invRewardSigma2 * positError2.m_z);
 			//ndFloat32 azimuthReward = rewardWeigh * ndExp(-50.0f * positError2.m_z);
-			ndFloat32 azimuthReward = ndClamp (ndFloat32 (1.0f - positError2.m_z), 0.0f, 1.0f);
+			//ndFloat32 azimuthReward = ndClamp (ndFloat32 (1.0f - positError2.m_z), 0.0f, 1.0f);
+			ndFloat32 azimuthReward = ScalarReward(positError2.m_z);
 
 			ndFloat32 reward = azimuthReward;
 			if (azimuthReward > 0.5f)
 			{
 				const ndVector rotationError(CalculateDeltaTargetRotation(currentEffectorMatrix));
 				const ndVector rotationError2 = rotationError * rotationError;
 
-				ndFloat32 omega_xReward = rewardWeigh * ndExp(-invRewardSigma2 * rotationError2.m_x);
-				ndFloat32 omega_yReward = rewardWeigh * ndExp(-invRewardSigma2 * rotationError2.m_y);
-				ndFloat32 omega_zReward = rewardWeigh * ndExp(-invRewardSigma2 * rotationError2.m_z);
+				ndFloat32 omega_xReward = ScalarReward(rotationError2.m_x);
+				ndFloat32 omega_yReward = ScalarReward(rotationError2.m_y);
+				ndFloat32 omega_zReward = ScalarReward(rotationError2.m_z);
 				reward += (omega_xReward + omega_yReward + omega_zReward);
 				if ((omega_xReward > 1.0e-3f) || (omega_yReward > 1.0e-3f) || (omega_zReward > 1.0e-3f))
 				{
-					ndFloat32 posit_xReward = rewardWeigh * ndExp(-invRewardSigma2 * positError2.m_x);
-					ndFloat32 posit_yReward = rewardWeigh * ndExp(-invRewardSigma2 * positError2.m_y);
+					ndFloat32 posit_xReward = ScalarReward(positError2.m_x);
+					ndFloat32 posit_yReward = ScalarReward(positError2.m_y);
 					reward += (posit_xReward + posit_yReward);
 				}
 			}