You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I replace the second character with Gouki and define it as white Gouki_WHITE = [255, 255, 255]. If the two characters do not switch positions, the game proceeds normally, but once the two characters switch positions, the two characters cannot approach each other. play1 KEN_RED = [248, 0, 0], play 2 Gouki_WHITE = [255, 255, 255].
Please point out my programming error.
robot.py:
import os
import random
import re
import time
from collections import defaultdict
from typing import Dict, List, Literal, Optional
import numpy as np
from gymnasium import spaces
from loguru import logger
from phospho.lab import get_provider_and_model, get_sync_client
from rich import print
from .config import (
INDEX_TO_MOVE,
META_INSTRUCTIONS,
META_INSTRUCTIONS_WITH_LOWER,
MOVES,
NB_FRAME_WAIT,
X_SIZE,
Y_SIZE,
)
from .observer import detect_position_from_color
class Robot:
observations: List[Optional[dict]] = None # memory
next_steps: List[int] # action plan
actions: dict # actions of the agents during a step of the game
# actions of the agents during the previous step of the game
previous_actions: Dict[str, List[int]]
reward: float # reward of the agent
action_space: spaces.Space
character: Optional[str] = None # character name
side: int # side of the stage where playing: 0 = left, 1 = right
current_direction: Literal["Left", "Right"] # current direction facing
sleepy: Optional[bool] = False # if the robot is sleepy
only_punch: Optional[bool] = False # if the robot only punch
model: str # model of the robot
super_bar_own: int
player_nb: int # player number
def __init__(
self,
action_space: spaces.Space,
character: str,
side: int,
character_color: list,
ennemy_color: list,
sleepy: bool = False,
only_punch: bool = False,
model: str = "mistral:mistral-large-latest",
player_nb: int = 0, # 0 means not specified
):
self.action_space = action_space
self.character = character
if side == 0:
self.current_direction = "Right"
elif side == 1:
self.current_direction = "Left"
self.observations = []
self.next_steps = []
self.character_color = character_color # Character color list
self.ennemy_color = ennemy_color # ennemy color list
self.side = side
self.sleepy = sleepy
self.only_punch = only_punch
self.model = model
self.previous_actions = defaultdict(list)
self.actions = {}
self.player_nb = player_nb
def act(self) -> int:
"""
At each game frame, we execute the first action in the list of next steps.
An action is an integer from 0 to 18, where 0 is no action.
See the MOVES dictionary for the mapping of actions to moves.
"""
if not self.next_steps or len(self.next_steps) == 0:
return 0 # No move
if self.sleepy:
return 0
if self.only_punch:
# Do a Hadouken
if self.current_direction == "Right":
self.next_steps.extend(
[
MOVES["Down"],
MOVES["Right+Down"],
MOVES["Right"],
MOVES["High Punch"],
]
)
elif self.current_direction == "Left":
self.next_steps.extend(
[
MOVES["Down"],
MOVES["Down+Left"],
MOVES["Left"],
MOVES["High Punch"],
]
)
next_step = self.next_steps.pop(0)
return next_step
def plan(self) -> None:
"""
The robot will plan its next steps by calling this method.
In SF3, moves are based on combos, which are list of actions that must be executed in a sequence.
Moves of Ken
https://www.eventhubs.com/guides/2009/may/11/ken-street-fighter-3-third-strike-character-guide/
Moves of Ryu
https://www.eventhubs.com/guides/2008/may/09/ryu-street-fighter-3-third-strike-character-guide/
"""
# If we already have a next step, we don't need to plan
if len(self.next_steps) > 0:
return
# Call the LLM to get the next steps
next_steps_from_llm = self.get_moves_from_llm()
next_buttons_to_press = [
button
for combo in next_steps_from_llm
for button in META_INSTRUCTIONS_WITH_LOWER[combo][
self.current_direction.lower()
]
# We add a wait time after each button press
+ [0] * NB_FRAME_WAIT
]
self.next_steps.extend(next_buttons_to_press)
def observe(self, observation: dict, actions: dict, reward: float):
"""
The robot will observe the environment by calling this method.
The latest observations are at the end of the list.
"""
# detect the position of characters and ennemy based on color
character_position = detect_position_from_color(
observation, self.character_color
)
ennemy_position = detect_position_from_color(
observation, self.ennemy_color
)
if character_position is not None and ennemy_position is not None:
# Store the positions in the observation dictionary
observation["character_position"] = character_position
observation["ennemy_position"] = ennemy_position
else:
# If positions couldn't be detected, set them to None
observation["character_position"] = None
observation["ennemy_position"] = None
self.observations.append(observation)
# we delete the oldest observation if we have more than 10 observations
if len(self.observations) > 10:
self.observations.pop(0)
self.reward = reward
# Update previous actions
for key, value in actions.items():
if value != 0:
self.previous_actions[key].append(value)
if len(self.previous_actions[key]) > 10:
self.previous_actions[key].pop(0)
# Update current direction based on character and ennemy positions
self.update_current_direction(observation)
def update_current_direction(self, observation):
"""
Update the current direction based on character and ennemy positions.
"""
character_position = observation.get("character_position")
ennemy_position = observation.get("ennemy_position")
if character_position is not None and ennemy_position is not None:
if character_position[0] < ennemy_position[0]:
self.current_direction = "Right"
else:
self.current_direction = "Left"
def context_prompt(self) -> str:
"""
Return a str of the context
"The observation for you is Left"
"The observation for the opponent is Left+Up"
"The action history is Up"
"""
# Create the position prompt
side = self.side
obs_own = self.observations[-1]["character_position"]
obs_opp = self.observations[-1]["ennemy_position"]
super_bar_own = self.observations[-1]["P" + str(side + 1)]["super_bar"][0]
position_prompt = ""
if obs_own is not None and obs_opp is not None:
own_x, own_y = obs_own
opp_x, opp_y = obs_opp
distance_x = opp_x - own_x
if distance_x < 0:
position_prompt += "Your opponent is on the left."
elif distance_x > 0:
position_prompt += "Your opponent is on the right."
else:
position_prompt += "You are very close to the opponent."
else:
position_prompt = "Unable to determine opponent's position."
power_prompt = ""
if super_bar_own >= 30:
power_prompt = "You can now use a powerful move. The names of the powerful moves are: Megafireball, Super attack 2."
if super_bar_own >= 120 or super_bar_own == 0:
power_prompt = "You can now only use very powerful moves. The names of the very powerful moves are: Super attack 3, Super attack 4"
last_action_prompt = ""
if len(self.previous_actions.keys()) > 0:
act_own_list = self.previous_actions["agent_" + str(side)]
act_opp_list = self.previous_actions["agent_" + str(abs(1 - side))]
if len(act_own_list) == 0:
act_own = 0
else:
act_own = act_own_list[-1]
if len(act_opp_list) == 0:
act_opp = 0
else:
act_opp = act_opp_list[-1]
str_act_own = INDEX_TO_MOVE.get(act_own, "No action")
str_act_opp = INDEX_TO_MOVE.get(act_opp, "No action")
last_action_prompt += f"Your last action was {str_act_own}. The opponent's last action was {str_act_opp}."
reward = self.reward
score_prompt = ""
if reward > 0:
score_prompt += "You are winning. Keep attacking the opponent."
elif reward < 0:
score_prompt += "You are losing. Continue to attack the opponent but don't get hit."
context = f"""{position_prompt}
{power_prompt}
{last_action_prompt}
Your current score is {reward}. {score_prompt}
To increase your score, move toward the opponent and attack the opponent. To prevent your score from decreasing, don't get hit by the opponent.
"""
return context
def get_moves_from_llm(
self,
) -> List[str]:
"""
Get a list of moves from the language model.
"""
# Filter the moves that are not in the list of moves
invalid_moves = []
valid_moves = []
# If we are in the test environment, we don't want to call the LLM
if os.getenv("DISABLE_LLM", "False") == "True":
# Choose a random int from the list of moves
logger.debug("DISABLE_LLM is True, returning a random move")
return [random.choice(list(MOVES.values()))]
while len(valid_moves) == 0:
llm_response = self.call_llm()
# The response is a bullet point list of moves. Use regex
matches = re.findall(r"- ([\w ]+)", llm_response)
moves = ["".join(match) for match in matches]
invalid_moves = []
valid_moves = []
for move in moves:
cleaned_move_name = move.strip().lower()
if cleaned_move_name in META_INSTRUCTIONS_WITH_LOWER.keys():
if self.player_nb == 1:
print(
f"[red] Player {self.player_nb} move: {cleaned_move_name}"
)
elif self.player_nb == 2:
print(
f"[green] Player {self.player_nb} move: {cleaned_move_name}"
)
valid_moves.append(cleaned_move_name)
else:
logger.debug(f"Invalid completion: {move}")
logger.debug(f"Cleaned move name: {cleaned_move_name}")
invalid_moves.append(move)
if len(invalid_moves) > 1:
logger.warning(f"Many invalid moves: {invalid_moves}")
logger.debug(f"Next moves: {valid_moves}")
return valid_moves
def call_llm(
self,
temperature: float = 0.7,
max_tokens: int = 50,
top_p: float = 1.0,
) -> str:
"""
Make an API call to the language model.
Edit this method to change the behavior of the robot!
"""
provider_name, model_name = get_provider_and_model(self.model)
client = get_sync_client(provider_name)
# Generate the prompts
move_list = "- " + "\n - ".join([move for move in META_INSTRUCTIONS])
system_prompt = f"""You are the best and most aggressive Street Fighter III 3rd strike player in the world.
Your character is {self.character}. Your goal is to defeat the opponent. While constantly approaching and striking your opponent, you should also pay attention to your defense. If the opponent attacks you with moves like Fireball, you should retreat to defend, then seize the opportunity to close in and launch a deadly attack on your opponent.
{self.context_prompt()}
The moves you can use are:
{move_list}
Reply with a bullet point list of moves. The format should be: - <name of the move> separated by a new line.
Example if the opponent is close:
I replace the second character with Gouki and define it as white Gouki_WHITE = [255, 255, 255]. If the two characters do not switch positions, the game proceeds normally, but once the two characters switch positions, the two characters cannot approach each other. play1 KEN_RED = [248, 0, 0], play 2 Gouki_WHITE = [255, 255, 255].
Please point out my programming error.
robot.py:
import os
import random
import re
import time
from collections import defaultdict
from typing import Dict, List, Literal, Optional
import numpy as np
from gymnasium import spaces
from loguru import logger
from phospho.lab import get_provider_and_model, get_sync_client
from rich import print
from .config import (
INDEX_TO_MOVE,
META_INSTRUCTIONS,
META_INSTRUCTIONS_WITH_LOWER,
MOVES,
NB_FRAME_WAIT,
X_SIZE,
Y_SIZE,
)
from .observer import detect_position_from_color
class Robot:
observations: List[Optional[dict]] = None # memory
next_steps: List[int] # action plan
actions: dict # actions of the agents during a step of the game
# actions of the agents during the previous step of the game
previous_actions: Dict[str, List[int]]
reward: float # reward of the agent
{power_prompt}
{last_action_prompt}
Your current score is {reward}. {score_prompt}
To increase your score, move toward the opponent and attack the opponent. To prevent your score from decreasing, don't get hit by the opponent.
"""
Your character is {self.character}. Your goal is to defeat the opponent. While constantly approaching and striking your opponent, you should also pay attention to your defense. If the opponent attacks you with moves like Fireball, you should retreat to defend, then seize the opportunity to close in and launch a deadly attack on your opponent.
{self.context_prompt()}
The moves you can use are:
{move_list}
Reply with a bullet point list of moves. The format should be:
- <name of the move>
separated by a new line.Example if the opponent is close:
Example if the opponent is far:
Fireball
Move closer"""
The text was updated successfully, but these errors were encountered: