Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

hypersim preprocessing scripts #64

Merged
merged 1 commit into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions script/dataset_preprocess/hypersim/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Hypersim preprocessing

## Download

Download [Hypersim](https://github.com/apple/ml-hypersim) dataset using [this script](https://github.com/apple/ml-hypersim/blob/20f398f4387aeca73175494d6a2568f37f372150/code/python/tools/dataset_download_images.py).

Download the scene split file from [here](https://github.com/apple/ml-hypersim/blob/main/evermotion_dataset/analysis/metadata_images_split_scene_v1.csv).

## Process dataset

Run the preprocessing script:

```bash
python script/dataset_preprocess/hypersim/preprocess_hypersim.py --split_csv /path/to/metadata_images_split_scene_v1.csv
```

(optional) Tar the processed data, for example:

```bash
cd data/Hypersim/processed/train
tar -cf ../../hypersim_processed_train.tar .
```
69 changes: 69 additions & 0 deletions script/dataset_preprocess/hypersim/hypersim_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Author: Bingxin Ke
# Last modified: 2024-02-19


from pylab import count_nonzero, clip, np


# Adapted from https://github.com/apple/ml-hypersim/blob/main/code/python/tools/scene_generate_images_tonemap.py
def tone_map(rgb, entity_id_map):
assert (entity_id_map != 0).all()

gamma = 1.0 / 2.2 # standard gamma correction exponent
inv_gamma = 1.0 / gamma
percentile = (
90 # we want this percentile brightness value in the unmodified image...
)
brightness_nth_percentile_desired = 0.8 # ...to be this bright after scaling

valid_mask = entity_id_map != -1

if count_nonzero(valid_mask) == 0:
scale = 1.0 # if there are no valid pixels, then set scale to 1.0
else:
brightness = (
0.3 * rgb[:, :, 0] + 0.59 * rgb[:, :, 1] + 0.11 * rgb[:, :, 2]
) # "CCIR601 YIQ" method for computing brightness
brightness_valid = brightness[valid_mask]

eps = 0.0001 # if the kth percentile brightness value in the unmodified image is less than this, set the scale to 0.0 to avoid divide-by-zero
brightness_nth_percentile_current = np.percentile(brightness_valid, percentile)

if brightness_nth_percentile_current < eps:
scale = 0.0
else:
# Snavely uses the following expression in the code at https://github.com/snavely/pbrs_tonemapper/blob/master/tonemap_rgbe.py:
# scale = np.exp(np.log(brightness_nth_percentile_desired)*inv_gamma - np.log(brightness_nth_percentile_current))
#
# Our expression below is equivalent, but is more intuitive, because it follows more directly from the expression:
# (scale*brightness_nth_percentile_current)^gamma = brightness_nth_percentile_desired

scale = (
np.power(brightness_nth_percentile_desired, inv_gamma)
/ brightness_nth_percentile_current
)

rgb_color_tm = np.power(np.maximum(scale * rgb, 0), gamma)
rgb_color_tm = clip(rgb_color_tm, 0, 1)
return rgb_color_tm


# According to https://github.com/apple/ml-hypersim/issues/9
def dist_2_depth(width, height, flt_focal, distance):
img_plane_x = (
np.linspace((-0.5 * width) + 0.5, (0.5 * width) - 0.5, width)
.reshape(1, width)
.repeat(height, 0)
.astype(np.float32)[:, :, None]
)
img_plane_y = (
np.linspace((-0.5 * height) + 0.5, (0.5 * height) - 0.5, height)
.reshape(height, 1)
.repeat(width, 1)
.astype(np.float32)[:, :, None]
)
img_plane_z = np.full([height, width, 1], flt_focal, np.float32)
img_plane = np.concatenate([img_plane_x, img_plane_y, img_plane_z], 2)

depth = distance / np.linalg.norm(img_plane, 2, 2) * flt_focal
return depth
149 changes: 149 additions & 0 deletions script/dataset_preprocess/hypersim/preprocess_hypersim.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
# Author: Bingxin Ke
# Last modified: 2024-02-19

import argparse
import os

import cv2
import h5py
import numpy as np
import pandas as pd
from hypersim_util import dist_2_depth, tone_map
from tqdm import tqdm

IMG_WIDTH = 1024
IMG_HEIGHT = 768
FOCAL_LENGTH = 886.81

if "__main__" == __name__:
parser = argparse.ArgumentParser()
parser.add_argument(
"--split_csv",
type=str,
default="data/Hypersim/metadata_images_split_scene_v1.csv",
)
parser.add_argument("--dataset_dir", type=str, default="data/Hypersim/raw_data")
parser.add_argument("--output_dir", type=str, default="data/Hypersim/processed")

args = parser.parse_args()

split_csv = args.split_csv
dataset_dir = args.dataset_dir
output_dir = args.output_dir

# %%
raw_meta_df = pd.read_csv(split_csv)
meta_df = raw_meta_df[raw_meta_df.included_in_public_release].copy()

# %%
for split in ["train", "val", "test"]:
split_output_dir = os.path.join(output_dir, split)
os.makedirs(split_output_dir)

split_meta_df = meta_df[meta_df.split_partition_name == split].copy()
split_meta_df["rgb_path"] = None
split_meta_df["rgb_mean"] = np.nan
split_meta_df["rgb_std"] = np.nan
split_meta_df["rgb_min"] = np.nan
split_meta_df["rgb_max"] = np.nan
split_meta_df["depth_path"] = None
split_meta_df["depth_mean"] = np.nan
split_meta_df["depth_std"] = np.nan
split_meta_df["depth_min"] = np.nan
split_meta_df["depth_max"] = np.nan
split_meta_df["invalid_ratio"] = np.nan

for i, row in tqdm(split_meta_df.iterrows(), total=len(split_meta_df)):
# Load data
rgb_path = os.path.join(
row.scene_name,
"images",
f"scene_{row.camera_name}_final_hdf5",
f"frame.{row.frame_id:04d}.color.hdf5",
)
dist_path = os.path.join(
row.scene_name,
"images",
f"scene_{row.camera_name}_geometry_hdf5",
f"frame.{row.frame_id:04d}.depth_meters.hdf5",
)
render_entity_id_path = os.path.join(
row.scene_name,
"images",
f"scene_{row.camera_name}_geometry_hdf5",
f"frame.{row.frame_id:04d}.render_entity_id.hdf5",
)
assert os.path.exists(os.path.join(dataset_dir, rgb_path))
assert os.path.exists(os.path.join(dataset_dir, dist_path))

with h5py.File(os.path.join(dataset_dir, rgb_path), "r") as f:
rgb = np.array(f["dataset"]).astype(float)
with h5py.File(os.path.join(dataset_dir, dist_path), "r") as f:
dist_from_center = np.array(f["dataset"]).astype(float)
with h5py.File(os.path.join(dataset_dir, render_entity_id_path), "r") as f:
render_entity_id = np.array(f["dataset"]).astype(int)

# Tone map
rgb_color_tm = tone_map(rgb, render_entity_id)
rgb_int = (rgb_color_tm * 255).astype(np.uint8) # [H, W, RGB]

# Distance -> depth
plane_depth = dist_2_depth(
IMG_WIDTH, IMG_HEIGHT, FOCAL_LENGTH, dist_from_center
)
valid_mask = render_entity_id != -1

# Record invalid ratio
invalid_ratio = (np.prod(valid_mask.shape) - valid_mask.sum()) / np.prod(
valid_mask.shape
)
plane_depth[~valid_mask] = 0

# Save as png
scene_path = row.scene_name
if not os.path.exists(os.path.join(split_output_dir, row.scene_name)):
os.makedirs(os.path.join(split_output_dir, row.scene_name))

rgb_name = f"rgb_{row.camera_name}_fr{row.frame_id:04d}.png"
rgb_path = os.path.join(scene_path, rgb_name)
cv2.imwrite(
os.path.join(split_output_dir, rgb_path),
cv2.cvtColor(rgb_int, cv2.COLOR_RGB2BGR),
)

plane_depth *= 1000.0
plane_depth = plane_depth.astype(np.uint16)
depth_name = f"depth_plane_{row.camera_name}_fr{row.frame_id:04d}.png"
depth_path = os.path.join(scene_path, depth_name)
cv2.imwrite(os.path.join(split_output_dir, depth_path), plane_depth)

# Meta data
split_meta_df.at[i, "rgb_path"] = rgb_path
split_meta_df.at[i, "rgb_mean"] = np.mean(rgb_int)
split_meta_df.at[i, "rgb_std"] = np.std(rgb_int)
split_meta_df.at[i, "rgb_min"] = np.min(rgb_int)
split_meta_df.at[i, "rgb_max"] = np.max(rgb_int)

split_meta_df.at[i, "depth_path"] = depth_path
restored_depth = plane_depth / 1000.0
split_meta_df.at[i, "depth_mean"] = np.mean(restored_depth)
split_meta_df.at[i, "depth_std"] = np.std(restored_depth)
split_meta_df.at[i, "depth_min"] = np.min(restored_depth)
split_meta_df.at[i, "depth_max"] = np.max(restored_depth)

split_meta_df.at[i, "invalid_ratio"] = invalid_ratio

with open(
os.path.join(split_output_dir, f"filename_list_{split}.txt"), "w+"
) as f:
lines = split_meta_df.apply(
lambda r: f"{r['rgb_path']} {r['depth_path']}", axis=1
).tolist()
f.writelines("\n".join(lines))

with open(
os.path.join(split_output_dir, f"filename_meta_{split}.csv"), "w+"
) as f:
split_meta_df.to_csv(f, header=True)

print("Preprocess finished")
Loading