Skip to content

Commit

Permalink
changes to make bop challenge work on jeanzay
Browse files Browse the repository at this point in the history
  • Loading branch information
ElliotMaitre committed Sep 11, 2023
1 parent 693d9f2 commit 2d7e77a
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 8 deletions.
9 changes: 5 additions & 4 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
name: happypose
name: happypose2
channels:
- conda-forge
- pytorch
- nvidia
- anaconda
- defaults
dependencies:
- nvidia::cudatoolkit==11.3.1
- python=3.9
- pip
- wget
- python-wget
- joblib
- pytorch==1.11.0
- pytorch::pytorch==1.11.0
- torchvision==0.12.0
- cudatoolkit==11.3.1
- ipython
- ipykernel
- jupyterlab
Expand Down Expand Up @@ -90,4 +91,4 @@ dependencies:
- webdataset
- opencv-contrib-python
- roma
- torchgeometry
- torchgeometry
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,9 @@

CNOS_SUBMISSION_PATHS = {ds_name: CNOS_SUBMISSION_DIR / fname for ds_name, fname in CNOS_SUBMISSION_FILES.items()}
# Check if all paths exist
print("cnos values =", CNOS_SUBMISSION_PATHS.values())
print("len cnos =", len(CNOS_SUBMISSION_FILES))
print("sum=", sum(p.exists() for p in CNOS_SUBMISSION_PATHS.values()))
assert( sum(p.exists() for p in CNOS_SUBMISSION_PATHS.values()) == len(CNOS_SUBMISSION_FILES))
##################################
##################################
Expand Down Expand Up @@ -292,7 +295,7 @@ def get_predictions(self, pose_estimator: PoseEstimator) -> Dict[str, PoseEstima

# ############ RUN ONLY BEGINNING OF DATASET
# # if n > 0:
# if n < 298:
#if n < 220:
# # if n != 582:
# print('################')
# print('Prediction runner SKIP')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def init_distributed_mode():
backend="nccl",
rank=rank,
world_size=world_size,
timeout=datetime.timedelta(seconds=1800 * 4),
timeout=datetime.timedelta(seconds=1800 * 16),
)
torch.distributed.barrier()

Expand Down
4 changes: 2 additions & 2 deletions happypose/toolbox/utils/distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def get_tmp_dir() -> Path:
if "JOB_DIR" in os.environ:
tmp_dir = Path(os.environ["JOB_DIR"]) / "tmp"
else:
tmp_dir = Path("/tmp/megapose_job")
tmp_dir = Path("/gpfsscratch/rech/zja/udg82mu/happypose_datasets/results/tmp/megapose_job")
tmp_dir.parent.mkdir(exist_ok=True)
tmp_dir.mkdir(exist_ok=True)
return tmp_dir
Expand Down Expand Up @@ -149,6 +149,6 @@ def init_distributed_mode() -> None:
backend="nccl",
rank=rank,
world_size=world_size,
timeout=datetime.timedelta(seconds=4 * 1800), # 2 hours
timeout=datetime.timedelta(seconds=16 * 1800), # 2 hours
)
torch.distributed.barrier()
4 changes: 4 additions & 0 deletions happypose/toolbox/utils/tensor_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@
# MegaPose
from happypose.toolbox.utils.distributed import get_rank, get_world_size

from happypose.pose_estimators.megapose.src.megapose.config import (
RESULTS_DIR,
)

def concatenate(datas):
datas = [data for data in datas if len(data) > 0]
Expand Down Expand Up @@ -168,6 +171,7 @@ def gather_distributed(self, tmp_dir=None):

if rank > 0:
tmp_file = tmp_file_template.format(rank=rank)
print("tmp_file =", tmp_file)
torch.save(self, tmp_file)

if world_size > 1:
Expand Down

0 comments on commit 2d7e77a

Please sign in to comment.