feat(carla): connected carla sim to gpt vision

AdityaNG · Apr 24, 2024 · 8471e67 · 8471e67
1 parent d2c93e0
commit 8471e67
Show file tree

Hide file tree

Showing 18 changed files with 384 additions and 53 deletions.
diff --git a/README.md b/README.md
@@ -156,6 +156,11 @@ Launching Carla
 CUDA_VISIBLE_DEVICES=0 ./CarlaUE4.sh -quality-level=Low -prefernvidia -ResX=10 -ResY=10
 ```
 
+Generate the simple trajectory templates
+```bash
+python3 -m drivellava.scripts.generate_trajectory_templates_simple
+```
+
 Running the carla client
 ```bash
 python3 -m drivellava.scripts.carla_run
@@ -179,6 +184,12 @@ Read the [CONTRIBUTING.md](CONTRIBUTING.md) file.
   - [x] Quantize the trajectory
   - [x] Visualize the trajectory on the image
   - [x] Generate JSON dataset
+- [ ] Carla
+  - [x] Build a Carla Client
+  - [x] Connect Vision Model to Carla
+  - [ ] Pause sim when thinking
+  - [ ] Feed in multi-cam input
+  - [ ] Feed in top down map view
 - [ ] CI and Features
   - [ ] There's a lot of issues with the sanity of the dataset since the trajectory was derived from accelerometer data. Find out how to filter out bad data without affecting the distribution of data. Write test cases to enforce
   - [ ] Write test cases for the following scripts

diff --git a/drivellava/carla/client.py b/drivellava/carla/client.py
@@ -14,6 +14,7 @@
 from drivellava.carla.helpers import HUD, KeyboardControl, World
 from drivellava.schema.carla import DroneControls, DroneState
 from drivellava.schema.image import Image
+from drivellava.trajectory_encoder import TrajectoryEncoder
 
 
 class CarlaClient:
@@ -24,8 +25,8 @@ def __init__(
         port=2000,
         sync=False,
         autopilot=False,
-        width=512,
-        height=256,
+        width=256,
+        height=128,
         rolename="hero",
         filter="vehicle.tesla.model3",
         generation="2",
@@ -120,8 +121,32 @@ def get_car_state(self, default=None) -> DroneState:
             steering_angle=controls.steer,
         )
 
-    def set_car_controls(self, controls: DroneControls):
+    def set_car_controls(
+        self, controls: DroneControls, trajectory_encoder: TrajectoryEncoder
+    ):
         """
         Set the car controls
         """
-        pass
+        if controls.speed_index == 0:
+            self.world.player.enable_constant_velocity(
+                carla.Vector3D(0, 0, 0)  # 30 Km/h
+            )
+            self.world.constant_velocity_enabled = True
+        elif controls.speed_index == 1:
+            self.world.player.enable_constant_velocity(
+                carla.Vector3D(1, 0, 0)  # 5 Km/h
+            )
+            self.world.constant_velocity_enabled = True
+
+            steering_angle = (
+                2.0
+                * (
+                    float(controls.trajectory_index)
+                    / trajectory_encoder.num_trajectory_templates
+                )
+                - 1.0
+            )
+
+            assert -1 <= steering_angle <= 1
+
+            self.controller.set_steering_angle(steering_angle)
diff --git a/drivellava/carla/helpers.py b/drivellava/carla/helpers.py
@@ -1524,6 +1524,12 @@ def _parse_vehicle_keys(self, keys, milliseconds):
         else:
             self._ackermann_control.steer = round(self._steer_cache, 1)
 
+    def set_steering_angle(self, steering_angle):
+        if not self._ackermann_enabled:
+            self._control.steer = round(steering_angle, 1)
+        else:
+            self._ackermann_control.steer = round(steering_angle, 1)
+
     def _parse_walker_keys(self, keys, milliseconds, world):
         self._control.speed = 0.0
         if keys[K_DOWN] or keys[K_s]:
@@ -1749,7 +1755,9 @@ def render(self, display):
                     )
                     display.blit(surface, (8, v_offset))
                 v_offset += 18
-        self._notifications.render(display)
+
+        # self._notifications.render(display)
+
         self.help.render(display)
 
 

diff --git a/drivellava/gpt/gpt_vision.py b/drivellava/gpt/gpt_vision.py
@@ -2,6 +2,8 @@
 GPT Vision to make Control Decisions
 """
 
+from textwrap import dedent
+
 import instructor
 import numpy as np
 from openai import OpenAI
@@ -46,12 +48,19 @@ def llm_client_factory(llm_provider: str):
 
 
 class GPTVision:
-    def __init__(self, max_history=10):
+    def __init__(self, max_history=1):
         self.client = llm_client_factory(settings.system.SYSTEM_LLM_PROVIDER)
 
         self.previous_messages = GPTState()
         self.max_history = max_history
-        self.trajectory_encoder = TrajectoryEncoder()
+        self.num_trajectory_templates = 9
+        TRAJECTORY_TEMPLATES_NPY = f"./trajectory_templates/proposed_trajectory_templates_simple_{self.num_trajectory_templates}.npy"  # noqa
+        TRAJECTORY_TEMPLATES_KMEANS_PKL = f"./trajectory_templates/kmeans_simple_{self.num_trajectory_templates}.pkl"  # noqa
+        self.trajectory_encoder = TrajectoryEncoder(
+            num_trajectory_templates=self.num_trajectory_templates,
+            trajectory_templates_npy=TRAJECTORY_TEMPLATES_NPY,
+            trajectory_templates_kmeans_pkl=TRAJECTORY_TEMPLATES_KMEANS_PKL,
+        )
 
     def step(
         self,
@@ -60,7 +69,20 @@ def step(
         mission: str,
     ) -> DroneControls:
         # base64_image = encode_opencv_image(image)
-        traj_str = self.trajectory_encoder.get_traj_str()
+        # traj_str = self.trajectory_encoder.get_traj_str()
+        traj_str = dedent(
+            f"""
+            The trajectories are numbered from:
+            [0,{self.num_trajectory_templates-1}]
+
+            They are labelled from left to right.
+            Select trajectory 0 to get the left most.
+            Select trajectory {(self.num_trajectory_templates-1)//2} to get a
+             more centered trajectory
+            Select trajectory {self.num_trajectory_templates-1} to get the
+             right most.
+            """
+        )
         new_messages = [
             # User
             GPTMessage(

diff --git a/drivellava/gpt/prompts.py b/drivellava/gpt/prompts.py
@@ -3,7 +3,7 @@
 """
 
 GPT_SYSTEM = """
-You are an autonomous vehicle. You are given the drone's environment state \
+You are an autonomous vehicle. You are given the vehicle's environment state \
 and you must control the vehicle to complete the mission.
 """
 
@@ -20,12 +20,12 @@
 Trajectories: {traj_str}
 
 Note that translation happens first, followed by rotation.
-What are your next actions? Let's think step by step.
+What are your next actions? Be short and brief with your thoughts
 """
 
 GPT_PROMPT_UPDATE = """MISSION: {mission}
 Updated visual is provided
-What are your next actions? Let's think step by step.
+What are your next actions? Be short and brief with your thoughts
 """
 
 GPT_PROMPT_CONTROLS = """
@@ -36,15 +36,19 @@
 
 Trajectories: {traj_str}
 
+Speed: Select 0 to stop the vehicle and 1 to move the vehicle at the constant
+speed
+
 Based on the description, what are the next actions the pilot should take.
 You will provide the next actions in the form of a JSON object:
 
-    "trajectory_index":     trajectory_index (str),
-    "speed_index":     speed_index (str),
+    "trajectory_index":     trajectory_index (int),
+    "speed_index":     speed_index (int),
 
 You can select one or more of the following actions as your next immediate \
 action:
  - trajectory_index: select one of the trajectories from those drawn
- - speed_index: select a speed index from one of those provided
+ - speed_index: Select 0 to stop the vehicle and 1 to move the vehicle at
+ the constant speed
 
 """
diff --git a/drivellava/scripts/carla_run.py b/drivellava/scripts/carla_run.py
@@ -2,15 +2,17 @@
 GPT Vision to make Control Decisions
 """
 
+import time
 import traceback
 
 import cv2
 import numpy as np
+from matplotlib import colormaps
 
 from drivellava.carla.client import CarlaClient
-
-# from drivellava.gpt.gpt_vision import GPTVision
+from drivellava.gpt.gpt_vision import GPTVision
 from drivellava.settings import settings
+from drivellava.utils import plot_steering_traj
 
 
 def main():  # pragma: no cover
@@ -23,37 +25,78 @@ def main():  # pragma: no cover
 
     print("mission", mission)
 
-    # gpt = GPTVision()
+    gpt = GPTVision()
     client = CarlaClient()
 
+    trajectory_templates = gpt.trajectory_encoder.left_to_right_traj()
+    NUM_TEMLATES = gpt.num_trajectory_templates
+
+    # Select colors based on templates
+    COLORS = [
+        (255 * colormaps["gist_rainbow"]([float(i + 1) / NUM_TEMLATES])[0])
+        for i in range(NUM_TEMLATES)
+    ]
+
     drone_state = client.get_car_state()
     # image = drone_state.image.cv_image()
 
-    # last_update = drone_state.timestamp
+    last_update = drone_state.timestamp
 
     try:
         while True:
             client.game_loop()
 
-            # # Get GPT Controls
-            # if (
-            #     drone_state.timestamp > last_update
-            # ):
-            #     # last_update = drone_state.timestamp
-            #     last_update = int(time.time() * 1000)
-            #     # gpt_controls = gpt.step(image, drone_state, mission)
-
-            #     # client.set_car_controls(gpt_controls)
-            #     gpt.previous_messages.timestamp = last_update
-
             drone_state = client.get_car_state(default=drone_state)
+            image_raw = np.array(
+                drone_state.image.cv_image(),
+            )
             image = np.array(
                 drone_state.image.cv_image(),
             )
 
+            # Draw all template trajectories
+            for index in range(NUM_TEMLATES):
+                template_trajectory = trajectory_templates[index]
+                template_trajectory_3d = np.zeros(
+                    (settings.system.TRAJECTORY_SIZE, 3)
+                )
+                template_trajectory_3d[:, 0] = template_trajectory[:, 0]
+                template_trajectory_3d[:, 2] = template_trajectory[:, 1]
+                color = COLORS[index]
+                plot_steering_traj(
+                    image, template_trajectory_3d, color=color, track=False
+                )
+
             # print(type(image), image.dtype)
 
-            cv2.imshow("carla", image)
+            image_vis = image_raw.copy()
+            # Get GPT Controls
+            if drone_state.timestamp > last_update:
+                # last_update = drone_state.timestamp
+                last_update = int(time.time() * 1000)
+                gpt_controls = gpt.step(image, drone_state, mission)
+
+                client.set_car_controls(gpt_controls, gpt.trajectory_encoder)
+                gpt.previous_messages.timestamp = last_update
+
+                template_trajectory = trajectory_templates[
+                    gpt_controls.trajectory_index
+                ]
+                template_trajectory_3d = np.zeros(
+                    (settings.system.TRAJECTORY_SIZE, 3)
+                )
+                template_trajectory_3d[:, 0] = template_trajectory[:, 0]
+                template_trajectory_3d[:, 2] = template_trajectory[:, 1]
+
+                plot_steering_traj(
+                    image_vis,
+                    template_trajectory_3d,
+                    color=(255, 0, 0),
+                    track=True,
+                )
+
+                cv2.imshow("carla", image_vis)
+
             key = cv2.waitKey(10) & 0xFF
             if key == ord("q"):
                 break