Skip to content

Commit

Permalink
Migrating Actor Critic Method example to Keras 3 (TF-Only) (#1759)
Browse files Browse the repository at this point in the history
* migrated the example to tf only backend

* .md and .ipynb file added
  • Loading branch information
sitamgithub-MSIT authored Feb 22, 2024
1 parent ce463ed commit 77f512b
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 24 deletions.
19 changes: 12 additions & 7 deletions examples/rl/actor_critic_cartpole.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
Title: Actor Critic Method
Author: [Apoorv Nandan](https://twitter.com/NandanApoorv)
Date created: 2020/05/13
Last modified: 2020/05/13
Last modified: 2024/02/22
Description: Implement Actor Critic Method in CartPole environment.
Accelerator: NONE
Converted to Keras 3 by: [Sitam Meur](https://github.com/sitamgithub-MSIT)
"""

"""
Expand Down Expand Up @@ -40,11 +41,15 @@
## Setup
"""

import os

os.environ["KERAS_BACKEND"] = "tensorflow"
import gym
import numpy as np
import keras
from keras import ops
from keras import layers
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Configuration parameters for the whole setup
seed = 42
Expand Down Expand Up @@ -98,8 +103,8 @@
# env.render(); Adding this line would show the attempts
# of the agent in a pop up window.

state = tf.convert_to_tensor(state)
state = tf.expand_dims(state, 0)
state = ops.convert_to_tensor(state)
state = ops.expand_dims(state, 0)

# Predict action probabilities and estimated future rewards
# from environment state
Expand All @@ -108,7 +113,7 @@

# Sample action from action probability distribution
action = np.random.choice(num_actions, p=np.squeeze(action_probs))
action_probs_history.append(tf.math.log(action_probs[0, action]))
action_probs_history.append(ops.log(action_probs[0, action]))

# Apply the sampled action in our environment
state, reward, done, _ = env.step(action)
Expand Down Expand Up @@ -152,7 +157,7 @@
# The critic must be updated so that it predicts a better estimate of
# the future rewards.
critic_losses.append(
huber_loss(tf.expand_dims(value, 0), tf.expand_dims(ret, 0))
huber_loss(ops.expand_dims(value, 0), ops.expand_dims(ret, 0))
)

# Backpropagation
Expand Down
23 changes: 13 additions & 10 deletions examples/rl/ipynb/actor_critic_cartpole.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"\n",
"**Author:** [Apoorv Nandan](https://twitter.com/NandanApoorv)<br>\n",
"**Date created:** 2020/05/13<br>\n",
"**Last modified:** 2020/05/13<br>\n",
"**Last modified:** 2024/02/22<br>\n",
"**Description:** Implement Actor Critic Method in CartPole environment."
]
},
Expand Down Expand Up @@ -60,17 +60,20 @@
},
{
"cell_type": "code",
"execution_count": 0,
"execution_count": null,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"import os\n",
"os.environ[\"KERAS_BACKEND\"] = \"tensorflow\"\n",
"import gym\n",
"import numpy as np\n",
"import keras\n",
"from keras import ops\n",
"from keras import layers\n",
"import tensorflow as tf\n",
"from tensorflow import keras\n",
"from tensorflow.keras import layers\n",
"\n",
"# Configuration parameters for the whole setup\n",
"seed = 42\n",
Expand Down Expand Up @@ -101,7 +104,7 @@
},
{
"cell_type": "code",
"execution_count": 0,
"execution_count": null,
"metadata": {
"colab_type": "code"
},
Expand Down Expand Up @@ -130,7 +133,7 @@
},
{
"cell_type": "code",
"execution_count": 0,
"execution_count": null,
"metadata": {
"colab_type": "code"
},
Expand All @@ -152,8 +155,8 @@
" # env.render(); Adding this line would show the attempts\n",
" # of the agent in a pop up window.\n",
"\n",
" state = tf.convert_to_tensor(state)\n",
" state = tf.expand_dims(state, 0)\n",
" state = ops.convert_to_tensor(state)\n",
" state = ops.expand_dims(state, 0)\n",
"\n",
" # Predict action probabilities and estimated future rewards\n",
" # from environment state\n",
Expand All @@ -162,7 +165,7 @@
"\n",
" # Sample action from action probability distribution\n",
" action = np.random.choice(num_actions, p=np.squeeze(action_probs))\n",
" action_probs_history.append(tf.math.log(action_probs[0, action]))\n",
" action_probs_history.append(ops.log(action_probs[0, action]))\n",
"\n",
" # Apply the sampled action in our environment\n",
" state, reward, done, _ = env.step(action)\n",
Expand Down Expand Up @@ -206,7 +209,7 @@
" # The critic must be updated so that it predicts a better estimate of\n",
" # the future rewards.\n",
" critic_losses.append(\n",
" huber_loss(tf.expand_dims(value, 0), tf.expand_dims(ret, 0))\n",
" huber_loss(ops.expand_dims(value, 0), ops.expand_dims(ret, 0))\n",
" )\n",
"\n",
" # Backpropagation\n",
Expand Down
17 changes: 10 additions & 7 deletions examples/rl/md/actor_critic_cartpole.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

**Author:** [Apoorv Nandan](https://twitter.com/NandanApoorv)<br>
**Date created:** 2020/05/13<br>
**Last modified:** 2020/05/13<br>
**Last modified:** 2024/02/22<br>
**Description:** Implement Actor Critic Method in CartPole environment.


Expand Down Expand Up @@ -46,11 +46,14 @@ remains upright. The agent, therefore, must learn to keep the pole from falling


```python
import os
os.environ["KERAS_BACKEND"] = "tensorflow"
import gym
import numpy as np
import keras
from keras import ops
from keras import layers
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Configuration parameters for the whole setup
seed = 42
Expand Down Expand Up @@ -112,8 +115,8 @@ while True: # Run until solved
# env.render(); Adding this line would show the attempts
# of the agent in a pop up window.

state = tf.convert_to_tensor(state)
state = tf.expand_dims(state, 0)
state = ops.convert_to_tensor(state)
state = ops.expand_dims(state, 0)

# Predict action probabilities and estimated future rewards
# from environment state
Expand All @@ -122,7 +125,7 @@ while True: # Run until solved

# Sample action from action probability distribution
action = np.random.choice(num_actions, p=np.squeeze(action_probs))
action_probs_history.append(tf.math.log(action_probs[0, action]))
action_probs_history.append(ops.log(action_probs[0, action]))

# Apply the sampled action in our environment
state, reward, done, _ = env.step(action)
Expand Down Expand Up @@ -166,7 +169,7 @@ while True: # Run until solved
# The critic must be updated so that it predicts a better estimate of
# the future rewards.
critic_losses.append(
huber_loss(tf.expand_dims(value, 0), tf.expand_dims(ret, 0))
huber_loss(ops.expand_dims(value, 0), ops.expand_dims(ret, 0))
)

# Backpropagation
Expand Down

0 comments on commit 77f512b

Please sign in to comment.