Skip to content

Commit

Permalink
feat(tokenizer): new logic
Browse files Browse the repository at this point in the history
  • Loading branch information
AdityaNG committed Feb 29, 2024
1 parent 7c113ef commit f08c6b0
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 23 deletions.
14 changes: 11 additions & 3 deletions drivellava/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,16 @@ def run(self, query: str, image_files: List[str]):
else:
qs = DEFAULT_IMAGE_TOKEN + "\n" + qs

print("qs", qs)

# Prepare conversation
conv = conv_templates[self.conv_mode].copy()
conv.append_message(conv.roles[0], qs)
conv.append_message(conv.roles[1], None)
prompt = conv.get_prompt()

print("prompt", prompt)

# Process images
images = load_images(image_files)
image_sizes = [x.size for x in images]
Expand All @@ -135,21 +139,25 @@ def run(self, query: str, image_files: List[str]):
.to(self.model.device)
)

print("input_ids", input_ids)

# Inference
with torch.inference_mode():
output_ids = self.model.generate(
input_ids,
images=images_tensor,
image_sizes=image_sizes,
do_sample=True if self.args.temperature > 0 else False,
temperature=self.args.temperature,
top_p=self.args.top_p,
# temperature=self.args.temperature,
# top_p=self.args.top_p,
num_beams=self.args.num_beams,
max_new_tokens=self.args.max_new_tokens,
use_cache=True,
)

outputs = self.tokenizer.batch_decode(output_ids)
outputs = self.tokenizer.batch_decode(
output_ids, skip_special_tokens=True
)
print("outputs", outputs)

outputs = outputs[0]
Expand Down
36 changes: 26 additions & 10 deletions drivellava/scripts/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,10 @@ def main():

# from transformers.models.llava.configuration_llava import LlavaConfig

# fine_tuned_model_path = "liuhaotian/llava-v1.5-7b"
fine_tuned_model_path = os.path.expanduser(
"~/Datasets/checkpoints/checkpoint-4000/"
# '~/Datasets/checkpoints/checkpoint-4000/drivellava.bin'
)
fine_tuned_model_path = "liuhaotian/llava-v1.5-7b"
# fine_tuned_model_path = os.path.expanduser(
# "~/Datasets/checkpoints/checkpoint-1000/"
# )

args = type(
"Args",
Expand All @@ -50,30 +49,36 @@ def main():
"temperature": 0,
"top_p": None,
"num_beams": 1,
"max_new_tokens": 4,
"max_new_tokens": 64,
},
)()

model = DriveLLaVA(args)

print(dir(model.tokenizer))
print(model.tokenizer.get_vocab())
# print(model.tokenizer.get_vocab())

NUM_FRAMES = 20 * 1

encoded_video_path = "/root/Datasets/commavq/val/fe809f0fff5562cc4d2bdc073d242123_31.npy" # noqa
# encoded_video_path = "/root/Datasets/commavq/val/fe809f0fff5562cc4d2bdc073d242123_31.npy" # noqa
encoded_video_path = "/root/Datasets/commavq/data_0_to_2500/000e83c564317de4668c2cb372f89b91_6.npy" # noqa
# encoded_video_path = "/root/Datasets/commavq/img_data_0_to_2500/000e83c564317de4668c2cb372f89b91_6.npy" # noqa

# assert os.path.isfile(encoded_video_path), encoded_video_path

pose_path = encoded_video_path.replace("data_", "pose_data_").replace(
"val", "pose_val"
# pose_path = encoded_video_path.replace("img_data_", "pose_data_").replace(
"val",
"pose_val",
)
assert os.path.isfile(pose_path), pose_path

decoded_imgs_list = []

for frame_index in range(1200):
frame_path = get_image_path(encoded_video_path, frame_index)
frame_path = frame_path.replace("data_", "img_data_")
# print('frame_path', frame_path)
if os.path.isfile(frame_path):
decoded_imgs_list.append(frame_path)

Expand Down Expand Up @@ -108,8 +113,19 @@ def main():
trajectory, trajectory_encoded = pose_dataset[i]
trajectory_quantized = trajectory_encoder.decode(trajectory_encoded)

traj_tokens = model.tokenizer.tokenize(trajectory_encoded)
traj_tokens_encoded = model.tokenizer.encode(trajectory_encoded)
print(
"traj_tokens",
trajectory_encoded,
"->",
traj_tokens,
"->",
traj_tokens_encoded,
)

model_trajectory_quantized = model.run(
get_drivellava_prompt(trajectory_encoder),
get_drivellava_prompt(trajectory_encoder, default_image_token=""),
[
decoded_imgs_list[i],
],
Expand Down
5 changes: 5 additions & 0 deletions drivellava/scripts/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ def load_json_dataset(
loaded = json.load(f)
for index in range(len(loaded)):
assert len(loaded[index]["conversations"][1]["value"]) == 1

loaded[index]["conversations"][1]["value"] = (
"Selected Trajectory: "
+ loaded[index]["conversations"][1]["value"]
)
loaded[index]["conversations"][0]["value"] = (
get_drivellava_prompt(trajectory_encoder)
)
Expand Down
23 changes: 13 additions & 10 deletions drivellava/sparse_llava_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,26 +113,29 @@ def visualize_pose(
exit()


def get_drivellava_prompt(trajectory_encoder: TrajectoryEncoder):
def get_drivellava_prompt(
trajectory_encoder: TrajectoryEncoder,
default_image_token: str = DEFAULT_IMAGE_TOKEN,
):
traj_list = list(trajectory_encoder.token2trajectory.keys())
random.shuffle(traj_list)
traj_str = ",".join(list(map(str, traj_list)))
P1 = (
f"{DEFAULT_IMAGE_TOKEN}\nYou are DriveLLaVA, a "
f"{default_image_token}\nYou are DriveLLaVA, a "
+ "self-driving car. You will select the "
+ "appropriate trrajectory token given the "
+ "above image as context.\n"
+ "You may select one from the "
+ f"following templates: {traj_str}"
)
P2 = f"""{DEFAULT_IMAGE_TOKEN} As DriveLLaVA, the autonomous vehicle, your task is to analyze the given image and determine the optimal driving path. Choose the most suitable trajectory option from the list provided based on the visual information. {traj_str}""" # noqa
P3 = f"""{DEFAULT_IMAGE_TOKEN} You are the AI system DriveLLaVA, responsible for navigating self-driving cars. With the image provided as your guide, select the correct trajectory from the options below to ensure a safe and efficient route. {traj_str}""" # noqa
P4 = f"""{DEFAULT_IMAGE_TOKEN} Imagine yourself as DriveLLaVA, an advanced self-driving vehicle intelligence. Examine the scenario depicted in the image and decide on the best course of action by selecting an appropriate trajectory from the given templates. {traj_str}""" # noqa
P5 = f"""{DEFAULT_IMAGE_TOKEN} You embody DriveLLaVA, the brain behind autonomous driving technology. Given the context shown in the image, it's your job to pick the right trajectory from the available choices to navigate safely. {traj_str}""" # noqa
P6 = f"""{DEFAULT_IMAGE_TOKEN} As DriveLLaVA, a pioneering self-driving car AI, you're tasked with interpreting the visual cues in the provided image to choose the most suitable trajectory from the list of options to ensure a smooth journey. {traj_str}""" # noqa
P7 = f"""{DEFAULT_IMAGE_TOKEN} You, as DriveLLaVA, are at the forefront of autonomous navigation. Assess the situation depicted in the image and select the trajectory that best aligns with safe and efficient driving principles from the options provided. {traj_str}""" # noqa
P8 = f"""{DEFAULT_IMAGE_TOKEN} Functioning as DriveLLaVA, the self-driving car's decision-making system, you must look at the image and determine the best path forward by choosing from the predefined trajectory templates. {traj_str}""" # noqa
P9 = f"""{DEFAULT_IMAGE_TOKEN} You are DriveLLaVA, an AI designed for autonomous vehicles. Your objective is to analyze the context presented in the image and select a trajectory that guarantees the safety and comfort of your passengers from the given templates. {traj_str}""" # noqa
P2 = f"""{default_image_token} As DriveLLaVA, the autonomous vehicle, your task is to analyze the given image and determine the optimal driving path. Choose the most suitable trajectory option from the list provided based on the visual information. {traj_str}""" # noqa
P3 = f"""{default_image_token} You are the AI system DriveLLaVA, responsible for navigating self-driving cars. With the image provided as your guide, select the correct trajectory from the options below to ensure a safe and efficient route. {traj_str}""" # noqa
P4 = f"""{default_image_token} Imagine yourself as DriveLLaVA, an advanced self-driving vehicle intelligence. Examine the scenario depicted in the image and decide on the best course of action by selecting an appropriate trajectory from the given templates. {traj_str}""" # noqa
P5 = f"""{default_image_token} You embody DriveLLaVA, the brain behind autonomous driving technology. Given the context shown in the image, it's your job to pick the right trajectory from the available choices to navigate safely. {traj_str}""" # noqa
P6 = f"""{default_image_token} As DriveLLaVA, a pioneering self-driving car AI, you're tasked with interpreting the visual cues in the provided image to choose the most suitable trajectory from the list of options to ensure a smooth journey. {traj_str}""" # noqa
P7 = f"""{default_image_token} You, as DriveLLaVA, are at the forefront of autonomous navigation. Assess the situation depicted in the image and select the trajectory that best aligns with safe and efficient driving principles from the options provided. {traj_str}""" # noqa
P8 = f"""{default_image_token} Functioning as DriveLLaVA, the self-driving car's decision-making system, you must look at the image and determine the best path forward by choosing from the predefined trajectory templates. {traj_str}""" # noqa
P9 = f"""{default_image_token} You are DriveLLaVA, an AI designed for autonomous vehicles. Your objective is to analyze the context presented in the image and select a trajectory that guarantees the safety and comfort of your passengers from the given templates. {traj_str}""" # noqa

return random.choice([P1, P2, P3, P4, P5, P6, P7, P8, P9])

Expand Down

0 comments on commit f08c6b0

Please sign in to comment.