diff --git a/scripts/parse_jsons.py b/scripts/parse_jsons.py index 53390c2..cc3aafd 100644 --- a/scripts/parse_jsons.py +++ b/scripts/parse_jsons.py @@ -20,6 +20,9 @@ def read_json_files(directory: str) -> List[Dict[str, Any]]: episode_stats = [] for filename in os.listdir(directory): if filename.endswith(".json"): + # Ignore empty files + if os.path.getsize(os.path.join(directory, filename)) == 0: + continue with open(os.path.join(directory, filename), "r") as f: episode_stats.append(json.load(f)) return episode_stats @@ -97,7 +100,7 @@ def calculate_avg_fail_per_category(stats: List[Dict[str, Any]]) -> None: # Add each row to the table for category, stats in sorted( category_stats.items(), - key=lambda x: (x[1]["fail_count"] / x[1]["total_count"]), + key=lambda x: x[1]["fail_count"], reverse=True, ): avg_failure_rate = (stats["fail_count"] / stats["total_count"]) * 100 @@ -159,6 +162,7 @@ def main() -> None: """ parser = argparse.ArgumentParser(description="Process some integers.") parser.add_argument("directory", type=str, help="Directory to process") + parser.add_argument("--compact", "-c", action="store_true", help="Compact output") args = parser.parse_args() episode_stats = read_json_files(args.directory) @@ -170,6 +174,9 @@ def main() -> None: print() calculate_avg_performance(episode_stats) + if args.compact: + return + print() calculate_avg_fail_per_category(episode_stats) diff --git a/zsos/policy/base_objectnav_policy.py b/zsos/policy/base_objectnav_policy.py index 2d7ab53..32183d0 100644 --- a/zsos/policy/base_objectnav_policy.py +++ b/zsos/policy/base_objectnav_policy.py @@ -23,7 +23,7 @@ from habitat_baselines.common.tensor_dict import TensorDict from zsos.policy.base_policy import BasePolicy -except ModuleNotFoundError: +except Exception: class BasePolicy: pass diff --git a/zsos/policy/itm_policy.py b/zsos/policy/itm_policy.py index 1e4eb32..209176e 100644 --- a/zsos/policy/itm_policy.py +++ b/zsos/policy/itm_policy.py @@ -15,7 +15,7 @@ try: from habitat_baselines.common.tensor_dict import TensorDict -except ModuleNotFoundError: +except Exception: pass PROMPT_SEPARATOR = "|" diff --git a/zsos/policy/utils/non_habitat_policy/nh_pointnav_policy.py b/zsos/policy/utils/non_habitat_policy/nh_pointnav_policy.py index 8353e34..b9ed850 100644 --- a/zsos/policy/utils/non_habitat_policy/nh_pointnav_policy.py +++ b/zsos/policy/utils/non_habitat_policy/nh_pointnav_policy.py @@ -44,11 +44,14 @@ def forward( class PointNavResNetNet(nn.Module): - def __init__(self): + def __init__(self, discrete_actions: bool = False, no_fwd_dict: bool = False): super().__init__() - self.prev_action_embedding = nn.Linear( - in_features=2, out_features=32, bias=True - ) + if discrete_actions: + self.prev_action_embedding = nn.Embedding(4 + 1, 32) + else: + self.prev_action_embedding = nn.Linear( + in_features=2, out_features=32, bias=True + ) self.tgt_embeding = nn.Linear(in_features=3, out_features=32, bias=True) self.visual_encoder = ResNetEncoder() self.visual_fc = nn.Sequential( @@ -58,6 +61,8 @@ def __init__(self): ) self.state_encoder = LSTMStateEncoder(576, 512, 2) self.num_recurrent_layers = self.state_encoder.num_recurrent_layers + self.discrete_actions = discrete_actions + self.no_fwd_dict = no_fwd_dict def forward( self, @@ -84,7 +89,15 @@ def forward( x.append(self.tgt_embeding(goal_observations)) - prev_actions = self.prev_action_embedding(masks * prev_actions.float()) + if self.discrete_actions: + prev_actions = prev_actions.squeeze(-1) + start_token = torch.zeros_like(prev_actions) + # The mask means the previous action will be zero, an extra dummy action + prev_actions = self.prev_action_embedding( + torch.where(masks.view(-1), prev_actions + 1, start_token) + ) + else: + prev_actions = self.prev_action_embedding(masks * prev_actions.float()) x.append(prev_actions) @@ -93,6 +106,9 @@ def forward( out, rnn_hidden_states, masks, rnn_build_seq_info ) + if self.no_fwd_dict: + return out, rnn_hidden_states # type: ignore + return out, rnn_hidden_states, {} diff --git a/zsos/policy/utils/pointnav_policy.py b/zsos/policy/utils/pointnav_policy.py index d50f22d..2d41290 100644 --- a/zsos/policy/utils/pointnav_policy.py +++ b/zsos/policy/utils/pointnav_policy.py @@ -7,15 +7,32 @@ from gym.spaces import Discrete from torch import Tensor +habitat_version = "" + try: - from habitat_baselines.common.tensor_dict import TensorDict + import habitat from habitat_baselines.rl.ddppo.policy import PointNavResNetPolicy - from habitat_baselines.rl.ppo.policy import PolicyActionData - class PointNavResNetTensorOutputPolicy(PointNavResNetPolicy): - def act(self, *args, **kwargs) -> Tuple[Tensor, Tensor]: - policy_actions: "PolicyActionData" = super().act(*args, **kwargs) - return policy_actions.actions, policy_actions.rnn_hidden_states + habitat_version = habitat.__version__ + + if habitat_version == "0.1.5": + print("Using habitat 0.1.5; assuming SemExp code is being used") + + class PointNavResNetTensorOutputPolicy(PointNavResNetPolicy): + def act(self, *args, **kwargs) -> Tuple[Tensor, Tensor]: + value, action, action_log_probs, rnn_hidden_states = super().act( + *args, **kwargs + ) + return action, rnn_hidden_states + + else: + from habitat_baselines.common.tensor_dict import TensorDict + from habitat_baselines.rl.ppo.policy import PolicyActionData + + class PointNavResNetTensorOutputPolicy(PointNavResNetPolicy): + def act(self, *args, **kwargs) -> Tuple[Tensor, Tensor]: + policy_actions: "PolicyActionData" = super().act(*args, **kwargs) + return policy_actions.actions, policy_actions.rnn_hidden_states HABITAT_BASELINES_AVAILABLE = True except ModuleNotFoundError: @@ -121,8 +138,6 @@ def load_pointnav_policy(file_path: str) -> PointNavResNetTensorOutputPolicy: Returns: PointNavResNetTensorOutputPolicy: The policy. """ - ckpt_dict = torch.load(file_path, map_location="cpu") - if HABITAT_BASELINES_AVAILABLE: obs_space = SpaceDict( { @@ -138,13 +153,40 @@ def load_pointnav_policy(file_path: str) -> PointNavResNetTensorOutputPolicy: } ) action_space = Discrete(4) - pointnav_policy = PointNavResNetTensorOutputPolicy.from_config( - ckpt_dict["config"], obs_space, action_space - ) - pointnav_policy.load_state_dict(ckpt_dict["state_dict"]) + if habitat_version == "0.1.5": + pointnav_policy = PointNavResNetTensorOutputPolicy( + obs_space, + action_space, + hidden_size=512, + num_recurrent_layers=2, + rnn_type="LSTM", + resnet_baseplanes=32, + backbone="resnet18", + normalize_visual_inputs=False, + obs_transform=None, + ) + # Need to overwrite the visual encoder because it uses an older version of + # ResNet that calculates the compression size differently + from zsos.policy.utils.non_habitat_policy.nh_pointnav_policy import ( + PointNavResNetNet, + ) + + # print(pointnav_policy) + pointnav_policy.net = PointNavResNetNet( + discrete_actions=True, no_fwd_dict=True + ) + state_dict = torch.load(file_path + ".state_dict", map_location="cpu") + else: + ckpt_dict = torch.load(file_path, map_location="cpu") + pointnav_policy = PointNavResNetTensorOutputPolicy.from_config( + ckpt_dict["config"], obs_space, action_space + ) + state_dict = ckpt_dict["state_dict"] + pointnav_policy.load_state_dict(state_dict) return pointnav_policy else: + ckpt_dict = torch.load(file_path, map_location="cpu") pointnav_policy = PointNavResNetTensorOutputPolicy() current_state_dict = pointnav_policy.state_dict() pointnav_policy.load_state_dict( diff --git a/zsos/semexp_env/eval.py b/zsos/semexp_env/eval.py index 178f49e..ea8503d 100644 --- a/zsos/semexp_env/eval.py +++ b/zsos/semexp_env/eval.py @@ -1,15 +1,22 @@ import os +from typing import Any, Dict, List, Tuple +import cv2 import numpy as np import torch from arguments import get_args from envs import make_vec_envs +from moviepy.editor import ImageSequenceClip + +from zsos.semexp_env.semexp_policy import SemExpITMPolicyV3 +from zsos.utils.img_utils import reorient_rescale_map, resize_images os.environ["OMP_NUM_THREADS"] = "1" args = get_args() args.agent = "zsos" # Doesn't really matter as long as it's not "sem_exp" args.split = "val" +args.task_config = "objnav_gibson_zsos.yaml" np.random.seed(args.seed) torch.manual_seed(args.seed) @@ -22,23 +29,145 @@ def main(): num_episodes = int(args.num_eval_episodes) args.device = torch.device("cuda:0" if args.cuda else "cpu") + policy = SemExpITMPolicyV3( + text_prompt="Seems like there is a target_object ahead.", + pointnav_policy_path="data/pointnav_weights.pth", + depth_image_shape=(224, 224), + det_conf_threshold=0.8, + pointnav_stop_radius=0.9, + use_max_confidence=False, + object_map_erosion_size=5, + exploration_thresh=0.0, + obstacle_map_area_threshold=1.5, # in square meters + min_obstacle_height=0.61, + max_obstacle_height=0.88, + hole_area_thresh=100000, + use_vqa=False, + vqa_prompt="Is this ", + coco_threshold=0.8, + non_coco_threshold=0.4, + camera_height=0.88, + min_depth=0.5, + max_depth=5.0, + camera_fov=79, + image_width=640, + visualize=True, + ) + torch.set_num_threads(1) envs = make_vec_envs(args) obs, infos = envs.reset() - print(obs, infos) for ep_num in range(num_episodes): + vis_imgs = [] for step in range(args.max_episode_length): - action = torch.randint(1, 3, (args.num_processes,)) + obs_dict = merge_obs_infos(obs, infos) + if step == 0: + masks = torch.zeros(1, 1, device=obs.device) + else: + masks = torch.ones(1, 1, device=obs.device) + action, policy_infos = policy.act(obs_dict, masks) + + if "VIDEO_DIR" in os.environ: + vis_imgs.append(create_frame(policy_infos)) + + action = action.squeeze(0) + obs, rew, done, infos = envs.step(action) - print(obs.shape) - print(obs.device) if done: + print("Success:", infos[0]["success"]) + print("SPL:", infos[0]["spl"]) + if "VIDEO_DIR" in os.environ: + generate_video(vis_imgs, infos[0]) break print("Test successfully completed") +def merge_obs_infos( + obs: torch.Tensor, infos: Tuple[Dict, ...] +) -> Dict[str, torch.Tensor]: + """Merge the observations and infos into a single dictionary.""" + rgb = obs[:, :3, ...].permute(0, 2, 3, 1) + depth = obs[:, 3:4, ...].permute(0, 2, 3, 1) + info_dict = infos[0] + + def tensor_from_numpy( + tensor: torch.Tensor, numpy_array: np.ndarray + ) -> torch.Tensor: + device = tensor.device + new_tensor = torch.from_numpy(numpy_array).to(device) + return new_tensor + + obs_dict = { + "rgb": rgb, + "depth": depth, + "objectgoal": info_dict["goal_name"].replace("-", " "), + "gps": tensor_from_numpy(obs, info_dict["gps"]).unsqueeze(0), + "compass": tensor_from_numpy(obs, info_dict["compass"]).unsqueeze(0), + "heading": tensor_from_numpy(obs, info_dict["heading"]).unsqueeze(0), + } + + return obs_dict + + +def create_frame(policy_infos: Dict[str, Any]) -> np.ndarray: + vis_imgs = [] + for k in ["annotated_rgb", "annotated_depth", "obstacle_map", "value_map"]: + img = policy_infos[k] + if "map" in k: + img = reorient_rescale_map(img) + if k == "annotated_depth" and np.array_equal(img, np.ones_like(img) * 255): + # Put text in the middle saying "Target not curently detected" + text = "Target not currently detected" + text_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 1, 1)[0] + cv2.putText( + img, + text, + (img.shape[1] // 2 - text_size[0] // 2, img.shape[0] // 2), + cv2.FONT_HERSHEY_SIMPLEX, + 1, + (0, 0, 0), + 1, + ) + vis_imgs.append(img) + vis_img = np.hstack(resize_images(vis_imgs, match_dimension="height")) + return vis_img + + +def generate_video(frames: List[np.ndarray], infos: Dict[str, Any]) -> None: + """ + Saves the given list of rgb frames as a video at 10 FPS. Uses the infos to get the + files name, which should contain the following: + - episode_id + - scene_id + - success + - spl + - dtg + - goal_name + + """ + video_dir = os.environ.get("VIDEO_DIR", "video_dir") + if not os.path.exists(video_dir): + os.makedirs(video_dir) + episode_id = int(infos["episode_id"]) + scene_id = infos["scene_id"] + success = int(infos["success"]) + spl = infos["spl"] + dtg = infos["distance_to_goal"] + goal_name = infos["goal_name"] + filename = ( + f"epid={episode_id:03d}-scid={scene_id}-succ={success}-spl={spl:.2f}" + f"-dtg={dtg:.2f}-goal={goal_name}.mp4" + ) + filename = os.path.join(video_dir, filename) + # Create a video clip from the frames + clip = ImageSequenceClip(frames, fps=10) + + # Write the video file + clip.write_videofile(filename) + + if __name__ == "__main__": main() diff --git a/zsos/semexp_env/objnav_gibson_zsos.yaml b/zsos/semexp_env/objnav_gibson_zsos.yaml index eadfeeb..4c6ff56 100644 --- a/zsos/semexp_env/objnav_gibson_zsos.yaml +++ b/zsos/semexp_env/objnav_gibson_zsos.yaml @@ -31,7 +31,7 @@ SIMULATOR: TASK: TYPE: ObjectNav-v1 POSSIBLE_ACTIONS: ["STOP", "MOVE_FORWARD", "TURN_LEFT", "TURN_RIGHT", "LOOK_UP", "LOOK_DOWN"] - SENSORS: ['GPS_SENSOR', 'COMPASS_SENSOR'] + SENSORS: ['GPS_SENSOR', 'COMPASS_SENSOR', 'HEADING_SENSOR'] MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SUCCESS', 'SPL'] SUCCESS: SUCCESS_DISTANCE: 0.2 diff --git a/zsos/semexp_env/semexp_policy.py b/zsos/semexp_env/semexp_policy.py new file mode 100644 index 0000000..84ffd91 --- /dev/null +++ b/zsos/semexp_env/semexp_policy.py @@ -0,0 +1,161 @@ +from typing import Any, Dict, Tuple, Union + +import numpy as np +import torch +from depth_camera_filtering import filter_depth +from torch import Tensor + +from zsos.mapping.obstacle_map import ObstacleMap +from zsos.policy.base_objectnav_policy import BaseObjectNavPolicy +from zsos.policy.itm_policy import ITMPolicy, ITMPolicyV2, ITMPolicyV3 +from zsos.utils.geometry_utils import xyz_yaw_to_tf_matrix +from zsos.vlm.grounding_dino import ObjectDetections + + +class TorchActionIDs: + STOP = torch.tensor([[0]], dtype=torch.long) + MOVE_FORWARD = torch.tensor([[1]], dtype=torch.long) + TURN_LEFT = torch.tensor([[2]], dtype=torch.long) + TURN_RIGHT = torch.tensor([[3]], dtype=torch.long) + + +class SemExpMixin: + """This Python mixin only contains code relevant for running a BaseObjectNavPolicy + explicitly within Habitat (vs. the real world, etc.) and will endow any parent class + (that is a subclass of BaseObjectNavPolicy) with the necessary methods to run in + Habitat. + """ + + _stop_action: Tensor = TorchActionIDs.STOP + _start_yaw: Union[float, None] = None # must be set by _reset() method + + def __init__( + self: BaseObjectNavPolicy, + camera_height: float, + min_depth: float, + max_depth: float, + camera_fov: float, + image_width: int, + *args: Any, + **kwargs: Any, + ) -> None: + super().__init__(*args, **kwargs) + assert self._compute_frontiers, "Must set self._compute_frontiers = True" + self._camera_height = camera_height + self._min_depth = min_depth + self._max_depth = max_depth + camera_fov_rad = np.deg2rad(camera_fov) + self._camera_fov = camera_fov_rad + self._fx = self._fy = image_width / (2 * np.tan(camera_fov_rad / 2)) + + def act( + self: Union["SemExpMixin", BaseObjectNavPolicy], + observations: Dict[str, Union[Tensor, str]], + masks: Tensor, + deterministic=True, + ) -> Tuple[Tensor, Dict[str, Any]]: + """Converts object ID to string name, returns action as PolicyActionData""" + parent_cls: BaseObjectNavPolicy = super() # type: ignore + try: + action, rnn_hidden_states = parent_cls.act( + observations, None, None, masks, deterministic + ) + except StopIteration: + action = self._stop_action + return action, self._policy_info + + def _initialize(self) -> Tensor: + """Turn left 30 degrees 12 times to get a 360 view at the beginning""" + self._done_initializing = not self._num_steps < 11 # type: ignore + return TorchActionIDs.TURN_LEFT + + def _reset(self) -> None: + parent_cls: BaseObjectNavPolicy = super() # type: ignore + parent_cls._reset() + self._start_yaw = None + + def _get_policy_info(self, detections: ObjectDetections) -> Dict[str, Any]: + """Get policy info for logging""" + parent_cls: BaseObjectNavPolicy = super() # type: ignore + info = parent_cls._get_policy_info(detections) + + if not self._visualize: # type: ignore + return info + + if self._start_yaw is None: + self._start_yaw = self._observations_cache["habitat_start_yaw"] + info["start_yaw"] = self._start_yaw + return info + + def _cache_observations( + self: Union["SemExpMixin", BaseObjectNavPolicy], observations: Dict[str, Any] + ): + """Caches the rgb, depth, and camera transform from the observations. + + Args: + observations (TensorDict): The observations from the current timestep. + """ + if len(self._observations_cache) > 0: + return + rgb = observations["rgb"][0].cpu().numpy() + depth = observations["depth"][0].cpu().numpy() + x, y = observations["gps"][0].cpu().numpy() + camera_yaw = observations["compass"][0].cpu().item() + depth = filter_depth(depth.reshape(depth.shape[:2]), blur_type=None) + # Habitat GPS makes west negative, so flip y + camera_position = np.array([x, -y, self._camera_height]) + robot_xy = camera_position[:2] + tf_camera_to_episodic = xyz_yaw_to_tf_matrix(camera_position, camera_yaw) + + self._obstacle_map: ObstacleMap + self._obstacle_map.update_map( + depth, + tf_camera_to_episodic, + self._min_depth, + self._max_depth, + self._fx, + self._fy, + self._camera_fov, + ) + frontiers = self._obstacle_map.frontiers + self._obstacle_map.update_agent_traj(robot_xy, camera_yaw) + self._observations_cache = { + "frontier_sensor": frontiers, + "nav_depth": observations["depth"], # for pointnav + "robot_xy": robot_xy, + "robot_heading": camera_yaw, + "object_map_rgbd": [ + ( + rgb, + depth, + tf_camera_to_episodic, + self._min_depth, + self._max_depth, + self._fx, + self._fy, + ) + ], + "value_map_rgbd": [ + ( + rgb, + depth, + tf_camera_to_episodic, + self._min_depth, + self._max_depth, + self._camera_fov, + ) + ], + "habitat_start_yaw": observations["heading"][0].item(), + } + + +class SemExpITMPolicy(SemExpMixin, ITMPolicy): + pass + + +class SemExpITMPolicyV2(SemExpMixin, ITMPolicyV2): + pass + + +class SemExpITMPolicyV3(SemExpMixin, ITMPolicyV3): + pass diff --git a/zsos/vlm/blip2.py b/zsos/vlm/blip2.py index f2dccd3..3b5dd84 100644 --- a/zsos/vlm/blip2.py +++ b/zsos/vlm/blip2.py @@ -2,11 +2,15 @@ import numpy as np import torch -from lavis.models import load_model_and_preprocess from PIL import Image from .server_wrapper import ServerMixin, host_model, send_request, str_to_image +try: + from lavis.models import load_model_and_preprocess +except ModuleNotFoundError: + print("Could not import lavis. This is OK if you are only using the client.") + class BLIP2: def __init__( diff --git a/zsos/vlm/blip2itm.py b/zsos/vlm/blip2itm.py index 4aba8ad..251634a 100644 --- a/zsos/vlm/blip2itm.py +++ b/zsos/vlm/blip2itm.py @@ -1,10 +1,14 @@ import numpy as np import torch -from lavis.models import load_model_and_preprocess from PIL import Image from .server_wrapper import ServerMixin, host_model, send_request, str_to_image +try: + from lavis.models import load_model_and_preprocess +except ModuleNotFoundError: + print("Could not import lavis. This is OK if you are only using the client.") + class BLIP2ITM: """BLIP 2 Image-Text Matching model.""" @@ -55,6 +59,7 @@ def __init__(self, port: int = 12182): self.url = f"http://localhost:{port}/blip2itm" def cosine(self, image: np.ndarray, txt: str) -> float: + print(f"BLIP2ITMClient.cosine: {image.shape}, {txt}") response = send_request(self.url, image=image, txt=txt) return float(response["response"]) diff --git a/zsos/vlm/detections.py b/zsos/vlm/detections.py index 9299cc3..68201b4 100644 --- a/zsos/vlm/detections.py +++ b/zsos/vlm/detections.py @@ -3,7 +3,14 @@ import cv2 import numpy as np import torch -from torchvision.ops import box_convert + +try: + from torchvision.ops import box_convert +except ImportError: + print("Could not import box_convert. This is OK if you are only using the client.") + + def box_convert(boxes, in_fmt, out_fmt): + raise NotImplementedError class ObjectDetections: @@ -21,7 +28,10 @@ def __init__( fmt: str = "cxcywh", ): self.image_source = image_source - self.boxes = box_convert(boxes=boxes, in_fmt=fmt, out_fmt="xyxy") + if fmt != "xyxy": + self.boxes = box_convert(boxes=boxes, in_fmt=fmt, out_fmt="xyxy") + else: + self.boxes = boxes self.logits = logits self.phrases = phrases self._annotated_frame: Optional[np.ndarray] = None diff --git a/zsos/vlm/grounding_dino.py b/zsos/vlm/grounding_dino.py index 6c59552..0291773 100644 --- a/zsos/vlm/grounding_dino.py +++ b/zsos/vlm/grounding_dino.py @@ -4,11 +4,17 @@ import torch import torchvision.transforms.functional as F -from groundingdino.util.inference import load_model, predict from zsos.vlm.detections import ObjectDetections from .server_wrapper import ServerMixin, host_model, send_request, str_to_image +try: + from groundingdino.util.inference import load_model, predict +except ModuleNotFoundError: + print( + "Could not import groundingdino. This is OK if you are only using the client." + ) + GROUNDING_DINO_CONFIG = os.environ["GROUNDING_DINO_CONFIG"] GROUNDING_DINO_WEIGHTS = os.environ["GROUNDING_DINO_WEIGHTS"] diff --git a/zsos/vlm/sam.py b/zsos/vlm/sam.py index 9352e90..3b34299 100644 --- a/zsos/vlm/sam.py +++ b/zsos/vlm/sam.py @@ -3,7 +3,6 @@ import numpy as np import torch -from mobile_sam import SamPredictor, sam_model_registry from .server_wrapper import ( ServerMixin, @@ -14,6 +13,11 @@ str_to_image, ) +try: + from mobile_sam import SamPredictor, sam_model_registry +except ModuleNotFoundError: + print("Could not import mobile_sam. This is OK if you are only using the client.") + class MobileSAM: def __init__( diff --git a/zsos/vlm/yolov7.py b/zsos/vlm/yolov7.py index 7ff36e1..2db4301 100644 --- a/zsos/vlm/yolov7.py +++ b/zsos/vlm/yolov7.py @@ -10,16 +10,17 @@ from .server_wrapper import ServerMixin, host_model, send_request, str_to_image sys.path.insert(0, "yolov7/") - -from models.experimental import attempt_load # noqa: E402 -from utils.datasets import letterbox # noqa: E402 -from utils.general import ( # noqa: E402 - check_img_size, - non_max_suppression, - scale_coords, -) -from utils.torch_utils import TracedModel # noqa: E402 - +try: + from models.experimental import attempt_load # noqa: E402 + from utils.datasets import letterbox # noqa: E402 + from utils.general import ( # noqa: E402 + check_img_size, + non_max_suppression, + scale_coords, + ) + from utils.torch_utils import TracedModel # noqa: E402 +except Exception: + print("Could not import yolov7. This is OK if you are only using the client.") sys.path.pop(0)