Skip to content

Commit

Permalink
fix ray rely.
Browse files Browse the repository at this point in the history
  • Loading branch information
zhwang4ai committed Mar 28, 2023
1 parent 55d3bc5 commit 1f13f4f
Show file tree
Hide file tree
Showing 22 changed files with 1,813 additions and 6 deletions.
2 changes: 1 addition & 1 deletion configs/defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ model:
weight_path: ''

pretrains:
clip_path: /home/zhwang/workspace/minerl/Planners/checkpoints/attn.pth
clip_path: /home/zhwang/workspace/minerl/MC-Planner/checkpoints/attn.pth

loss:
action_loss:
Expand Down
2 changes: 1 addition & 1 deletion configs/model/simple.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ max_ep_len: 1000
backbone_name: goal_impala_1x
frozen_cnn: False
only_load_cnn: False
load_ckpt_path: ''
load_ckpt_path: '/home/zhwang/workspace/minerl/MC-Planner/checkpoints/transformer.pt'

c: 8
3 changes: 2 additions & 1 deletion controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,8 @@ def making_exp_name(cfg):

return "@".join(component)

from ray.rllib.models.torch.mineclip_lib.mineclip_model import MineCLIP
# from ray.rllib.models.torch.mineclip_lib.mineclip_model import MineCLIP
from src.mineclip_lib.mineclip_model import MineCLIP
def accquire_goal_embeddings(clip_path, goal_list, device="cuda"):
clip_cfg = {'arch': 'vit_base_p16_fz.v2.t2', 'hidden_dim': 512, 'image_feature_dim': 512, 'mlp_adapter_spec': 'v0-2.t0',
'pool_type': 'attn.d2.nh8.glusw', 'resolution': [160, 256]}
Expand Down
2 changes: 1 addition & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def resize_image_numpy(img, target_resolution = (128, 128)):
task_list = list(task_info.keys())

env = MineDojoEnv(
name='crafting_forest',
name='Plains',
img_size=(640, 480),
rgb_only=False,
)
Expand Down
2 changes: 1 addition & 1 deletion planner.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def query_codex(self, prompt_text):
try:
self.update_key()
response = openai.Completion.create(
model="code-davinci-002",
model="code-davinci-002", # openai cancel the access of codex in 23.03
prompt=prompt_text,
temperature=0.7,
max_tokens=1024,
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ pandas==1.4.3
Pillow==9.4.0
rich==13.3.1
seaborn==0.12.2
tensorflow==2.11.0
tqdm==4.64.0
transformers==4.21.1
tree==0.2.4
Expand Down
104 changes: 104 additions & 0 deletions src/mineclip_lib/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""
Base API for importing pretrained video models
"""
from __future__ import annotations
import torch
import torch.nn as nn
import torch.nn.functional

import src.mineclip_lib.utils as U


__all__ = ["VideoRewardBase"]

# calculated from 21K video clips, which contains 2.8M frames
MC_IMAGE_MEAN = (0.3331, 0.3245, 0.3051)
MC_IMAGE_STD = (0.2439, 0.2493, 0.2873)


class VideoRewardBase(nn.Module):
def __init__(
self,
*,
image_encoder: nn.Module,
temporal_encoder: nn.Module,
reward_head: nn.Module,
):
"""
Args:
image_encoder: [B, C, H, W] -> [B, F]
temporal_encoder: [B, L, F] -> [B, F]
reward_head: [B, F] -> [B, D] softmax over D classes/dims
"""
super().__init__()
self.image_encoder = image_encoder
self.temporal_encoder = temporal_encoder
self.reward_head = reward_head

def forward_image_features(self, frames):
"""
[..., C, H, W] -> [..., F], independent encoding of each frame image
"""
assert frames.ndim >= 4
leading_dims = frames.size()[:-3]
C, H, W = frames.size()[-3:]
frames = frames.view(-1, C, H, W)
frames = U.basic_image_tensor_preprocess(
frames, mean=MC_IMAGE_MEAN, std=MC_IMAGE_STD
)
features = self.image_encoder(frames)
return features.view(*leading_dims, features.size(-1))

def forward_video_features(self, image_features):
"""
[B, L, F] -> [B, F]
"""
B, L, F = image_features.size()
video_feats = self.temporal_encoder(image_features)
assert video_feats.shape[0] == B
return video_feats

def forward_reward_head(self, video_features, text_tokens=None, softmax=False):
"""
[B, F] -> [B, D]
"""
B, F = video_features.size()
if text_tokens is not None:
rewards = self.reward_head(video_features, text_tokens)
else:
rewards = self.reward_head(video_features)
if torch.is_tensor(rewards):
assert rewards.shape[0] == B
if softmax:
rewards = torch.nn.functional.softmax(rewards, dim=1)
return rewards

def forward(self, videos, text_tokens=None, is_video_features=False):
"""
Args:
videos: [B, F] if is_video_features else [B, L, C, H, W]
is_video_features: pass in [B, F] of already-computed video features
text_tokens: [B, L, D]
"""
if is_video_features:
assert videos.ndim == 2
return self.forward_reward_head(videos, text_tokens=text_tokens)
else:
assert videos.ndim == 5, "video must be 5D (raw pixels)"
return self.forward_reward_head(
self.forward_video_features(self.forward_image_features(videos)),
text_tokens=text_tokens,
)

def load_ckpt(self, ckpt_or_path, strip_prefix="model.", strict=False):
if isinstance(ckpt_or_path, dict):
ckpt = ckpt_or_path
else:
ckpt_path = U.f_expand(ckpt_or_path)
assert U.f_exists(ckpt_path), f"ckpt not found: {ckpt_path}"
ckpt = U.torch_load(ckpt_path)
# `ret` might contain key matching info if strict=False
ret = U.load_state_dict(
self, ckpt["state_dict"], strip_prefix=strip_prefix, strict=strict
)
return ret
Loading

0 comments on commit 1f13f4f

Please sign in to comment.