You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Hello, when I am using MineCLIP, I don't understand why the results obtained from running the same text and video twice are different, and the numerical difference is huge. In one of my tests, it directly changed from 0.9 to -0.5. Here is my test script, and I have also confirmed the random seed. This result appears to be purely random.
import torch
import hydra
from omegaconf import OmegaConf
from mineclip import MineCLIP
from PIL import Image
from torchvision import transforms
import cv2
import numpy as np
import random
@torch.no_grad()
@hydra.main(config_name="conf", config_path=".", version_base="1.1")
def main(cfg):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
OmegaConf.set_struct(cfg, False)
cfg.pop("ckpt", None)
OmegaConf.set_struct(cfg, True)
model = MineCLIP(**cfg).to(device)
cap = cv2.VideoCapture("obs_0.mp4")
frames_list = []
while(cap.isOpened()):
ret,frame = cap.read()
if not ret:
break
else:
frame = cv2.resize(frame,(160,256))
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frames_list.append(frame)
cap.release()
result_frames = torch.as_tensor(np.stack(frames_list))
result_frames = result_frames.permute(0,3,1,2)
result_frames = result_frames.reshape(10,3,160,256)
video = result_frames.unsqueeze(0)
video = video.to(device)
image_feats = model.forward_image_features(video)
video_feats = model.forward_video_features(image_feats)
text = ["harvest 1 coal with stone pickaxe"]
text_feats = model.encode_text(text)
reward_scores, _ = model.forward_reward_head(video_feats, text_tokens=text_feats)
print("Reward score between the image and text:", reward_scores.item())
if __name__ == "__main__":
random.seed(0)
main()
The text was updated successfully, but these errors were encountered:
Hello, when I am using MineCLIP, I don't understand why the results obtained from running the same text and video twice are different, and the numerical difference is huge. In one of my tests, it directly changed from 0.9 to -0.5. Here is my test script, and I have also confirmed the random seed. This result appears to be purely random.
The text was updated successfully, but these errors were encountered: