Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrating Actor Critic Method example to Keras 3 (TF-Only) #1759

Merged
merged 2 commits into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 12 additions & 7 deletions examples/rl/actor_critic_cartpole.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
Title: Actor Critic Method
Author: [Apoorv Nandan](https://twitter.com/NandanApoorv)
Date created: 2020/05/13
Last modified: 2020/05/13
Last modified: 2024/02/22
Description: Implement Actor Critic Method in CartPole environment.
Accelerator: NONE
Converted to Keras 3 by: [Sitam Meur](https://github.com/sitamgithub-MSIT)
"""
"""
## Introduction
Expand Down Expand Up @@ -39,11 +40,15 @@
## Setup
"""

import os

os.environ["KERAS_BACKEND"] = "tensorflow"
import gym
import numpy as np
import keras
from keras import ops
from keras import layers
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Configuration parameters for the whole setup
seed = 42
Expand Down Expand Up @@ -97,8 +102,8 @@
# env.render(); Adding this line would show the attempts
# of the agent in a pop up window.

state = tf.convert_to_tensor(state)
state = tf.expand_dims(state, 0)
state = ops.convert_to_tensor(state)
state = ops.expand_dims(state, 0)

# Predict action probabilities and estimated future rewards
# from environment state
Expand All @@ -107,7 +112,7 @@

# Sample action from action probability distribution
action = np.random.choice(num_actions, p=np.squeeze(action_probs))
action_probs_history.append(tf.math.log(action_probs[0, action]))
action_probs_history.append(ops.log(action_probs[0, action]))

# Apply the sampled action in our environment
state, reward, done, _ = env.step(action)
Expand Down Expand Up @@ -151,7 +156,7 @@
# The critic must be updated so that it predicts a better estimate of
# the future rewards.
critic_losses.append(
huber_loss(tf.expand_dims(value, 0), tf.expand_dims(ret, 0))
huber_loss(ops.expand_dims(value, 0), ops.expand_dims(ret, 0))
)

# Backpropagation
Expand Down
23 changes: 13 additions & 10 deletions examples/rl/ipynb/actor_critic_cartpole.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"\n",
"**Author:** [Apoorv Nandan](https://twitter.com/NandanApoorv)<br>\n",
"**Date created:** 2020/05/13<br>\n",
"**Last modified:** 2020/05/13<br>\n",
"**Last modified:** 2024/02/22<br>\n",
"**Description:** Implement Actor Critic Method in CartPole environment."
]
},
Expand Down Expand Up @@ -60,17 +60,20 @@
},
{
"cell_type": "code",
"execution_count": 0,
"execution_count": null,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"import os\n",
"os.environ[\"KERAS_BACKEND\"] = \"tensorflow\"\n",
"import gym\n",
"import numpy as np\n",
"import keras\n",
"from keras import ops\n",
"from keras import layers\n",
"import tensorflow as tf\n",
"from tensorflow import keras\n",
"from tensorflow.keras import layers\n",
"\n",
"# Configuration parameters for the whole setup\n",
"seed = 42\n",
Expand Down Expand Up @@ -101,7 +104,7 @@
},
{
"cell_type": "code",
"execution_count": 0,
"execution_count": null,
"metadata": {
"colab_type": "code"
},
Expand Down Expand Up @@ -130,7 +133,7 @@
},
{
"cell_type": "code",
"execution_count": 0,
"execution_count": null,
"metadata": {
"colab_type": "code"
},
Expand All @@ -152,8 +155,8 @@
" # env.render(); Adding this line would show the attempts\n",
" # of the agent in a pop up window.\n",
"\n",
" state = tf.convert_to_tensor(state)\n",
" state = tf.expand_dims(state, 0)\n",
" state = ops.convert_to_tensor(state)\n",
" state = ops.expand_dims(state, 0)\n",
"\n",
" # Predict action probabilities and estimated future rewards\n",
" # from environment state\n",
Expand All @@ -162,7 +165,7 @@
"\n",
" # Sample action from action probability distribution\n",
" action = np.random.choice(num_actions, p=np.squeeze(action_probs))\n",
" action_probs_history.append(tf.math.log(action_probs[0, action]))\n",
" action_probs_history.append(ops.log(action_probs[0, action]))\n",
"\n",
" # Apply the sampled action in our environment\n",
" state, reward, done, _ = env.step(action)\n",
Expand Down Expand Up @@ -206,7 +209,7 @@
" # The critic must be updated so that it predicts a better estimate of\n",
" # the future rewards.\n",
" critic_losses.append(\n",
" huber_loss(tf.expand_dims(value, 0), tf.expand_dims(ret, 0))\n",
" huber_loss(ops.expand_dims(value, 0), ops.expand_dims(ret, 0))\n",
" )\n",
"\n",
" # Backpropagation\n",
Expand Down
17 changes: 10 additions & 7 deletions examples/rl/md/actor_critic_cartpole.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

**Author:** [Apoorv Nandan](https://twitter.com/NandanApoorv)<br>
**Date created:** 2020/05/13<br>
**Last modified:** 2020/05/13<br>
**Last modified:** 2024/02/22<br>
**Description:** Implement Actor Critic Method in CartPole environment.


Expand Down Expand Up @@ -46,11 +46,14 @@ remains upright. The agent, therefore, must learn to keep the pole from falling


```python
import os
os.environ["KERAS_BACKEND"] = "tensorflow"
import gym
import numpy as np
import keras
from keras import ops
from keras import layers
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Configuration parameters for the whole setup
seed = 42
Expand Down Expand Up @@ -112,8 +115,8 @@ while True: # Run until solved
# env.render(); Adding this line would show the attempts
# of the agent in a pop up window.

state = tf.convert_to_tensor(state)
state = tf.expand_dims(state, 0)
state = ops.convert_to_tensor(state)
state = ops.expand_dims(state, 0)

# Predict action probabilities and estimated future rewards
# from environment state
Expand All @@ -122,7 +125,7 @@ while True: # Run until solved

# Sample action from action probability distribution
action = np.random.choice(num_actions, p=np.squeeze(action_probs))
action_probs_history.append(tf.math.log(action_probs[0, action]))
action_probs_history.append(ops.log(action_probs[0, action]))

# Apply the sampled action in our environment
state, reward, done, _ = env.step(action)
Expand Down Expand Up @@ -166,7 +169,7 @@ while True: # Run until solved
# The critic must be updated so that it predicts a better estimate of
# the future rewards.
critic_losses.append(
huber_loss(tf.expand_dims(value, 0), tf.expand_dims(ret, 0))
huber_loss(ops.expand_dims(value, 0), ops.expand_dims(ret, 0))
)

# Backpropagation
Expand Down
Loading