Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace oss URLs with http #7

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 44 additions & 23 deletions ofasys/adaptor/image_vqgan.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,15 @@ def make_vqgan_code_bucket_position(bucket_size, num_relative_distance):
coords_w = torch.arange(bucket_size)
coords = torch.stack(torch.meshgrid([coords_h, coords_w])) # 2, Wh, Ww
coords_flatten = torch.flatten(coords, 1) # 2, Wh*Ww
relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :] # 2, Wh*Ww, Wh*Ww
relative_coords = relative_coords.permute(1, 2, 0).contiguous() # Wh*Ww, Wh*Ww, 2
relative_coords = coords_flatten[:, :, None] - \
coords_flatten[:, None, :] # 2, Wh*Ww, Wh*Ww
relative_coords = relative_coords.permute(
1, 2, 0).contiguous() # Wh*Ww, Wh*Ww, 2
relative_coords[:, :, 0] += bucket_size - 1 # shift to start from 0
relative_coords[:, :, 1] += bucket_size - 1
relative_coords[:, :, 0] *= 2 * bucket_size - 1
relative_position_index = torch.zeros(size=(bucket_size * bucket_size + 1,) * 2, dtype=relative_coords.dtype)
relative_position_index = torch.zeros(
size=(bucket_size * bucket_size + 1,) * 2, dtype=relative_coords.dtype)
relative_position_index[1:, 1:] = relative_coords.sum(-1) # Wh*Ww, Wh*Ww
relative_position_index[0, 0:] = num_relative_distance - 3
relative_position_index[0:, 0] = num_relative_distance - 2
Expand All @@ -47,15 +50,17 @@ class ImageVqganAdaptorConfig(BaseAdaptorConfig):
)
vqgan_factor: int = field(default=8, metadata={"help": "vqgan factor"})
vqgan_model_path: str = field(
default="oss://ofasys/tasks/image_gen/vqgan/last.ckpt",
default="http://ofasys.oss-cn-zhangjiakou.aliyuncs.com/tasks/image_gen/vqgan/last.ckpt",
metadata={"help": "path of vqgan model"},
)
vqgan_config_path: str = field(
default="oss://ofasys/tasks/image_gen/vqgan/model.yaml",
default="http://ofasys.oss-cn-zhangjiakou.aliyuncs.com/tasks/image_gen/vqgan/model.yaml",
metadata={"help": "path of vqgan config"},
)
use_encode: bool = field(default=True, metadata={"help": "where to use tokenizer.encode in map"})
code_entry_prefix: str = field(default='code', metadata={"help": "prefix of code entry in the global_dict"})
use_encode: bool = field(default=True, metadata={
"help": "where to use tokenizer.encode in map"})
code_entry_prefix: str = field(default='code', metadata={
"help": "prefix of code entry in the global_dict"})


@register_config("ofasys.adaptor", "image_vqgan", ImageVqganAdaptorConfig)
Expand All @@ -72,21 +77,29 @@ def __init__(

self.window_size = cfg.code_image_size // cfg.vqgan_factor

self.embed_code_positions = Embedding(cfg.code_bucket_size**2 + 1, cfg.embed_dim)
self.embed_code_positions = Embedding(
cfg.code_bucket_size**2 + 1, cfg.embed_dim)

code_num_rel_dis = (2 * cfg.code_bucket_size - 1) * (2 * cfg.code_bucket_size - 1) + 3
code_rp_bucket = make_vqgan_code_bucket_position(cfg.code_bucket_size, code_num_rel_dis)
code_num_rel_dis = (2 * cfg.code_bucket_size - 1) * \
(2 * cfg.code_bucket_size - 1) + 3
code_rp_bucket = make_vqgan_code_bucket_position(
cfg.code_bucket_size, code_num_rel_dis)
code_position_idx = (
torch.arange(self.window_size).unsqueeze(0).expand(self.window_size, self.window_size)
+ torch.arange(self.window_size).unsqueeze(1) * cfg.code_bucket_size
torch.arange(self.window_size).unsqueeze(
0).expand(self.window_size, self.window_size)
+ torch.arange(self.window_size).unsqueeze(1) *
cfg.code_bucket_size
+ 1
)
code_position_idx = torch.cat([torch.tensor([0]), code_position_idx.view(-1)])
code_position_idx = torch.cat([code_position_idx, torch.tensor([1024] * 768)])
code_position_idx = torch.cat(
[torch.tensor([0]), code_position_idx.view(-1)])
code_position_idx = torch.cat(
[code_position_idx, torch.tensor([1024] * 768)])

num_rel_pos_tables = 1 if self.cfg.share_attn_bias else self.num_layers
self.code_rel_pos_table_list = nn.ModuleList(
[Embedding(code_num_rel_dis, cfg.num_attention_heads, zero_init=True) for _ in range(num_rel_pos_tables)]
[Embedding(code_num_rel_dis, cfg.num_attention_heads, zero_init=True)
for _ in range(num_rel_pos_tables)]
)
self.tokenizer = VQGANTokenizer(
vqgan_config_path=cfg.vqgan_config_path,
Expand All @@ -102,8 +115,10 @@ def __init__(

def get_rel_pos_bias(self, batch_size, seq_length, idx, **kwargs):
code_position_idx = self.code_position_idx[:seq_length]
rp_bucket = self.code_rp_bucket[code_position_idx][:, code_position_idx]
values = F.embedding(rp_bucket, self.code_rel_pos_table_list[idx].weight)
rp_bucket = self.code_rp_bucket[code_position_idx][:,
code_position_idx]
values = F.embedding(
rp_bucket, self.code_rel_pos_table_list[idx].weight)
return values

def update_sample(self, sample: Dict):
Expand All @@ -122,13 +137,18 @@ def update_sample(self, sample: Dict):
for i, slot in enumerate(sample['net_input']['slots']):
if self.check_adaptor_slot(slot):
image_tensor = slot.value
codes = self.tokenizer.encode(image_tensor.float()) + self.code_index_start
codes = self.tokenizer.encode(
image_tensor.float()) + self.code_index_start
batch_size = codes.size()[0]
codes = torch.cat([codes.new_ones((batch_size, 1)) * 0, codes], dim=-1)
codes = torch.cat([codes, codes.new_ones((batch_size, 1)) * 2], dim=-1)
sample['net_input']['slots'][i].value = codes[:, :-1].contiguous()
codes = torch.cat(
[codes.new_ones((batch_size, 1)) * 0, codes], dim=-1)
codes = torch.cat(
[codes, codes.new_ones((batch_size, 1)) * 2], dim=-1)
sample['net_input']['slots'][i].value = codes[:,
:-1].contiguous()
sample['target'] = codes[:, 1:].contiguous()
sample['ntokens'] = sample['target'].ne(1).long().sum().item()
sample['ntokens'] = sample['target'].ne(
1).long().sum().item()

return sample

Expand Down Expand Up @@ -172,7 +192,8 @@ def upgrade_state_dict_named(self, state_dict, name):
num_posids_to_add = len(self.state_dict()["embed_code_positions.weight"]) - len(
state_dict[prefix + "embed_code_positions.weight"]
)
embed_dim = state_dict[prefix + "embed_code_positions.weight"].size(1)
embed_dim = state_dict[prefix +
"embed_code_positions.weight"].size(1)
new_pos_embed_to_add = torch.zeros(num_posids_to_add, embed_dim)
nn.init.normal_(new_pos_embed_to_add, mean=0, std=embed_dim**-0.5)
new_pos_embed_to_add = new_pos_embed_to_add.to(
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ rapidfuzz
pillow
opencv-python
timm>=0.5.0
http://ofasys.oss-cn-zhangjiakou.aliyuncs.com/dependency/clip-1.0-py3-none-any.whl
git+https://github.com/openai/clip.git
diffusers
matplotlib

Expand Down