Merge pull request #183 from agimus-project/torch2

Torch 2
agimus-project · Oct 25, 2024 · a86aa36 · a86aa36
2 parents ef4deb6 + 670ad0d
commit a86aa36
Show file tree

Hide file tree

Showing 34 changed files with 6,453 additions and 4,529 deletions.
diff --git a/.github/workflows/conda-test.yml b/.github/workflows/conda-test.yml
@@ -10,7 +10,7 @@ jobs:
         shell: bash -el {0}
     strategy:
       matrix:
-        python-version: ["3.9", "3.10"]
+        python-version: ["3.10", "3.11"]
     steps:
       - uses: actions/checkout@v4
         with:
@@ -52,7 +52,7 @@ jobs:
         if: steps.cache.outputs.cache-hit != 'true'
 
       - name: Install happypose
-        run: pip install -e .
+        run: pip install -e .[multiview]
 
       - name: Download pre-trained models required for tests
         run: |
@@ -69,4 +69,4 @@ jobs:
       - name: Run tests
         run: |
           python -m unittest
-          pytest
+          pytest tests -v
diff --git a/.github/workflows/pip-test.yml b/.github/workflows/pip-test.yml
@@ -7,7 +7,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.9", "3.10"]
+        python-version: ["3.10", "3.11"]
     steps:
       - uses: actions/checkout@v4
         with:
@@ -30,7 +30,7 @@ jobs:
         run: pip install -U pip
 
       - name: Install happypose
-        run: pip install ".[cpu,pypi]" --extra-index-url https://download.pytorch.org/whl/cpu
+        run: pip install ".[multiview,pypi]"
 
       - name: Download pre-trained models required for tests
         run: |
@@ -47,4 +47,4 @@ jobs:
       - name: Run tests
         run: |
           python -m unittest
-          pytest
+          pytest tests -v
diff --git a/.github/workflows/poetry-test.yml b/.github/workflows/poetry-test.yml
@@ -7,7 +7,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.9", "3.10"]
+        python-version: ["3.10", "3.11"]
     steps:
       - uses: actions/checkout@v4
         with:
@@ -31,7 +31,7 @@ jobs:
           cache: poetry
 
       - name: Install happypose
-        run: poetry install --with dev -E cpu -E pypi
+        run: poetry install --with dev -E pypi -E multiview
 
       - name: Download pre-trained models required for tests
         run: |
@@ -48,7 +48,7 @@ jobs:
       - name: Run tests
         run: |
           poetry run coverage run --source=happypose -m unittest
-          poetry run coverage run --source=happypose -m pytest
+          poetry run coverage run --source=happypose -m pytest tests -v
 
       - name: Process coverage
         run: poetry run coverage xml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
 - repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: v0.4.1
+  rev: v0.5.2
   hooks:
   - id: ruff
     args:

diff --git a/README.md b/README.md
@@ -39,19 +39,71 @@ git clone --branch dev --recurse-submodules https://github.com/agimus-project/ha
 cd happypose
 python -m venv .venv
 source .venv/bin/activate
-pip install .[pypi,cpu] --extra-index-url https://download.pytorch.org/whl/cpu
+pip install .[pypi]
 ```
 
 ### Install extras:
 
-- `cpu`: required to get pytorch CPU from PyPI (don't use this for GPU or with conda)
-- `gpu`: required to get pytorch GPU from PyPI (don't use this for CPU or with conda)
 - `multiview`: installs cosypose c++ extension
-- `pypi`: install pinocchio & opencv from PyPI (don't use this with conda)
+- `pypi`: install torch, pinocchio & opencv from PyPI (don't use this with conda)
 
 ## Create data directory
 
 ```
 Create data dir /somewhere/convenient. The dataset to store are quite large.
 export HAPPYPOSE_DATA_DIR=/somewhere/convenient
 ```
+
+## Test the install
+
+### CPU
+
+If you work on CPU, these models need to be download :
+
+```
+#hope dataset models for CosyPose
+python -m happypose.toolbox.utils.download --cosypose_models \
+            detector-bop-hope-pbr--15246 \
+            coarse-bop-hope-pbr--225203 \
+            refiner-bop-hope-pbr--955392
+```
+
+```
+# For MegaPose
+python -m happypose.toolbox.utils.download --megapose_models
+```
+
+and the examples
+
+```
+python -m happypose.toolbox.utils.download --examples barbecue-sauce
+```
+
+In the HappyPose folder:
+
+```
+pytest -v ./tests
+```
+
+You may need to install `pytest-order` : `pip installp pytest-order`. In this case, test related to the `evaluation` and the `training` of CosyPose are not run. If you want to use these functionalities, you need a GPU.
+
+### GPU
+
+Tests related to `evaluation` and `training` will be run if a GPU is available. Hence, a few more downloads are needed :
+
+```
+#ycbv models
+python -m happypose.toolbox.utils.download --cosypose_models \
+            coarse-bop-ycbv-pbr--724183 \
+            refiner-bop-ycbv-pbr--604090
+```
+
+```
+python -m happypose.toolbox.utils.download --bop_dataset ycbv
+```
+
+```
+python -m happypose.toolbox.utils.download --test-results
+```
+
+The tests take much longer in this case.
diff --git a/environment.yml b/environment.yml
@@ -1,18 +1,20 @@
-name: happypose
+name: happypose_torch2
 channels:
    - conda-forge
    - pytorch
    - nvidia
    - anaconda
    - defaults
 dependencies:
-  - nvidia::cudatoolkit==11.3.1
-  - python=3.9
+  - pytorch-cuda==12.1
   - pip
-  - pytorch::pytorch==1.11.0
-  - torchvision==0.12.0
-  - mkl==2024.0.0
+  - pytorch
+  - torchvision
   - geckodriver
   - firefox
   - opencv
   - pinocchio
+  - torchtnt
+  - pip:
+    - torchnet
+    - numpy<2
diff --git a/happypose/pose_estimators/cosypose/cosypose/models/mask_rcnn.py b/happypose/pose_estimators/cosypose/cosypose/models/mask_rcnn.py
@@ -12,7 +12,7 @@ def __init__(
         anchor_sizes=((32,), (64,), (128,), (256,), (512,)),
     ):
         assert backbone_str == "resnet50-fpn"
-        backbone = resnet_fpn_backbone("resnet50", pretrained=False)
+        backbone = resnet_fpn_backbone(backbone_name="resnet50", weights=None)
 
         aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
         rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)

diff --git a/happypose/pose_estimators/cosypose/cosypose/scripts/run_detector_training.py b/happypose/pose_estimators/cosypose/cosypose/scripts/run_detector_training.py
@@ -2,7 +2,6 @@
 import os
 
 import numpy as np
-from colorama import Fore, Style
 
 from happypose.pose_estimators.cosypose.cosypose.training.train_detector import (
     train_detector,
@@ -23,13 +22,13 @@
     cfg = argparse.ArgumentParser("").parse_args([])
     if args.config:
         logger.info(
-            f"{Fore.GREEN}Training with config: {args.config} {Style.RESET_ALL}",
+            f"Training with config: {args.config}",
         )
 
     cfg.resume_run_id = None
     if len(args.resume) > 0:
         cfg.resume_run_id = args.resume
-        logger.info(f"{Fore.RED}Resuming {cfg.resume_run_id} {Style.RESET_ALL}")
+        logger.info(f"Resuming {cfg.resume_run_id}")
 
     N_CPUS = int(os.environ.get("N_CPUS", 10))
     N_GPUS = int(os.environ.get("N_PROCS", 1))

diff --git a/happypose/pose_estimators/cosypose/cosypose/training/train_detector.py b/happypose/pose_estimators/cosypose/cosypose/training/train_detector.py
@@ -12,7 +12,6 @@
 from torch.hub import load_state_dict_from_url
 from torch.utils.data import DataLoader
 from torchnet.meter import AverageValueMeter
-from torchvision.models.detection.mask_rcnn import model_urls
 from tqdm import tqdm
 
 from happypose.pose_estimators.cosypose.cosypose.config import EXP_DIR
@@ -232,7 +231,9 @@ def make_datasets(dataset_names):
         logger.info(f"Using pretrained model from {pretrain_path}.")
         model.load_state_dict(torch.load(pretrain_path)["state_dict"])
     elif args.pretrain_coco:
-        state_dict = load_state_dict_from_url(model_urls["maskrcnn_resnet50_fpn_coco"])
+        state_dict = load_state_dict_from_url(
+            "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth"
+        )
 
         def keep(k):
             return "box_predictor" not in k and "mask_predictor" not in k
@@ -289,7 +290,8 @@ def lambd(batch):
         gamma=0.1,
     )
     lr_scheduler.last_epoch = start_epoch - 1
-    lr_scheduler.step()
+    # This led to a warning in newer version of PyTorch?
+    # lr_scheduler.step()
 
     for epoch in range(start_epoch, end_epoch):
         meters_train = defaultdict(AverageValueMeter)

diff --git a/happypose/pose_estimators/cosypose/cosypose/training/train_pose.py b/happypose/pose_estimators/cosypose/cosypose/training/train_pose.py
@@ -421,52 +421,45 @@ def train_epoch():
             iterator = tqdm(ds_iter_train, ncols=80)
             t = time.time()
             for n, data in enumerate(iterator):
-                if n < 3:
-                    if n > 0:
-                        meters_time["data"].add(time.time() - t)
-
-                    optimizer.zero_grad()
-
-                    t = time.time()
-                    loss = h(data=data, meters=meters_train)
-                    meters_time["forward"].add(time.time() - t)
-                    iterator.set_postfix(loss=loss.item())
-                    meters_train["loss_total"].add(loss.item())
-
-                    t = time.time()
-                    loss.backward()
-                    total_grad_norm = torch.nn.utils.clip_grad_norm_(
-                        model.parameters(),
-                        max_norm=args.clip_grad_norm,
-                        norm_type=2,
-                    )
-                    meters_train["grad_norm"].add(
-                        torch.as_tensor(total_grad_norm).item()
-                    )
-
-                    optimizer.step()
-                    meters_time["backward"].add(time.time() - t)
-                    meters_time["memory"].add(
-                        torch.cuda.max_memory_allocated() / 1024.0**2,
-                    )
-
-                    if epoch < args.n_epochs_warmup:
-                        lr_scheduler_warmup.step()
-                    t = time.time()
-                else:
-                    break
+                if n > 0:
+                    meters_time["data"].add(time.time() - t)
+
+                optimizer.zero_grad()
+
+                t = time.time()
+                loss = h(data=data, meters=meters_train)
+                meters_time["forward"].add(time.time() - t)
+                iterator.set_postfix(loss=loss.item())
+                meters_train["loss_total"].add(loss.item())
+
+                t = time.time()
+                loss.backward()
+                total_grad_norm = torch.nn.utils.clip_grad_norm_(
+                    model.parameters(),
+                    max_norm=args.clip_grad_norm,
+                    norm_type=2,
+                )
+                meters_train["grad_norm"].add(torch.as_tensor(total_grad_norm).item())
+
+                optimizer.step()
+                meters_time["backward"].add(time.time() - t)
+                meters_time["memory"].add(
+                    torch.cuda.max_memory_allocated() / 1024.0**2,
+                )
+
+                if epoch < args.n_epochs_warmup:
+                    lr_scheduler_warmup.step()
+                t = time.time()
+
             if epoch >= args.n_epochs_warmup:
                 lr_scheduler.step()
 
         @torch.no_grad()
         def validation():
             model.eval()
             for n, sample in enumerate(tqdm(ds_iter_val, ncols=80)):
-                if n < 3:
-                    loss = h(data=sample, meters=meters_val)
-                    meters_val["loss_total"].add(loss.item())
-                else:
-                    break
+                loss = h(data=sample, meters=meters_val)
+                meters_val["loss_total"].add(loss.item())
 
         @torch.no_grad()
         def test():

diff --git a/happypose/pose_estimators/megapose/evaluation/bop.py b/happypose/pose_estimators/megapose/evaluation/bop.py
@@ -119,18 +119,19 @@ def convert_results_to_bop(
         t = TCO_n[:3, -1] * 1e3  # m -> mm conversion
         R = TCO_n[:3, :3]
         row = predictions.infos.iloc[n]
+        print("row =", row)
         obj_id = int(row.label.split("_")[-1])
         if use_pose_score:
-            score = row.pose_score
+            score = row["pose_score"]
         else:
-            score = row.score
+            score = row["score"]
         if "time" in row:
-            time = row.time
+            time = row["time"]
         else:
             time = -1
         pred = dict(
-            scene_id=row.scene_id,
-            im_id=row.view_id,
+            scene_id=row["scene_id"],
+            im_id=row["view_id"],
             obj_id=obj_id,
             score=score,
             t=t,
@@ -184,7 +185,6 @@ def _run_bop_evaluation(filename, eval_dir, eval_detection=False, dummy=False):
 
 def run_evaluation(cfg: BOPEvalConfig) -> None:
     """Runs the bop evaluation for the given setting."""
-    print(cfg)
     results_path = Path(cfg.results_path)
     eval_dir = Path(cfg.eval_dir)
 
@@ -203,9 +203,7 @@ def run_evaluation(cfg: BOPEvalConfig) -> None:
         csv_path = eval_dir / f"{method}_{cfg.dataset.split('.')[0]}-{cfg.split}.csv"
 
         # pose scores give better AR scores in general
-        convert_results_to_bop(
-            results_path, csv_path, cfg.method, use_pose_score=cfg.use_post_score
-        )
+        convert_results_to_bop(results_path, csv_path, cfg.method, use_pose_score=False)
 
         if not cfg.convert_only:
             _run_bop_evaluation(csv_path, cfg.eval_dir, eval_detection=False)