diff --git a/yolov5/combined.py b/yolov5/combined.py index 12d3ab1..649888c 100644 --- a/yolov5/combined.py +++ b/yolov5/combined.py @@ -93,7 +93,7 @@ def window_mask(width, height, img_ref, center, level): return output def process_image_lane(image): - img = cv2.undistort(image, mtx, dist, None, mtx) + img = cv2.undistort(np.transpose(image, (1, 2, 0)), mtx, dist, None, mtx) preprocessImage = np.zeros_like(img[:,:,0]) gradx = abs_sobel_thresh(img, orient='x', thresh=(12,255)) grady = abs_sobel_thresh(img, orient='x', thresh=(25,255)) @@ -161,7 +161,7 @@ def process_image_lane(image): return result # Load YOLOv5 model -model_path = "yolov5/yolov5s.pt" # Replace with your model path +model_path = "./yolov5/yolov5s.pt" # Replace with your model path device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = DetectMultiBackend(model_path, device=device) model.eval() @@ -172,7 +172,27 @@ def process_image_lane(image): dist = dist_pickle["dist"] def process_image(image): + + # Load the image + image_path = "../assets/how-it-works/img1.jpeg" + image = cv2.imread(image_path) + + # Ensure NCHW format (channels first) + image = image.transpose(2, 0, 1) + + # Convert data type and normalize + image = image.astype(np.float32) / 255.0 + + # Convert to PyTorch tensor + image_tensor = torch.from_numpy(image) + + # Check the shape (height, width, channels) + print("Image shape:", image.shape) + + # Check the data type + print("Image data type:", image.dtype) lane_image = process_image_lane(image) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) results = model(torch.from_numpy(image).unsqueeze(0).float()) labels, cord = results.xyxyn[0][:, -1], results.xyxyn[0][:, :-1] n = len(labels) diff --git a/yolov5/combinedv2.py b/yolov5/combinedv2.py new file mode 100644 index 0000000..f0d96a3 --- /dev/null +++ b/yolov5/combinedv2.py @@ -0,0 +1,188 @@ +import numpy as np +import cv2 +import pickle +import glob +from tracker import tracker +from models.experimental import attempt_load +import torch + +dist_pickle = pickle.load(open("calibration_pickle.p", "rb")) +mtx = dist_pickle["mtx"] +dist = dist_pickle["dist"] + +model_path = 'yolov5s.pt' +model = attempt_load(model_path) +device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') +model.to(device) + +def abs_sobel_thresh(img, orient='x', sobel_kernel=3, thresh=(0, 255)): + gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) + if orient == 'x': + abs_sobel = np.absolute(cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=sobel_kernel)) + if orient == 'y': + abs_sobel = np.absolute(cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=sobel_kernel)) + scaled_sobel = np.uint8(255*abs_sobel/np.max(abs_sobel)) + binary_output = np.zeros_like(scaled_sobel) + binary_output[(scaled_sobel >= thresh[0]) & (scaled_sobel <= thresh[1])] = 1 + return binary_output + +def mag_thresh(img, sobel_kernel=3, mag_thresh=(0, 255)): + gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) + sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=sobel_kernel) + sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=sobel_kernel) + gradmag = np.sqrt(sobelx**2 + sobely**2) + scale_factor = np.max(gradmag)/255 + gradmag = (gradmag/scale_factor).astype(np.uint8) + binary_output = np.zeros_like(gradmag) + binary_output[(gradmag >= mag_thresh[0]) & (gradmag <= mag_thresh[1])] = 1 + return binary_output + +def dir_threshold(img, sobel_kernel=3, thresh=(0, np.pi/2)): + gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) + sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=sobel_kernel) + sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=sobel_kernel) + with np.errstate(divide='ignore', invalid='ignore'): + absgraddir = np.absolute(np.arctan(sobely/sobelx)) + binary_output = np.zeros_like(absgraddir) + binary_output[(absgraddir >= thresh[0]) & (absgraddir <= thresh[1])] = 1 + return binary_output + +def color_threshold(image, sthresh=(0, 255), vthresh=(0, 255)): + hls = cv2.cvtColor(image, cv2.COLOR_RGB2HLS) + s_channel = hls[:,:,2] + s_binary = np.zeros_like(s_channel) + s_binary[(s_channel >= sthresh[0]) & (s_channel <= sthresh[1])] = 1 + hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV) + v_channel = hsv[:,:,2] + v_binary = np.zeros_like(v_channel) + v_binary[(v_channel >= vthresh[0]) & (v_channel <= vthresh[1])] = 1 + output = np.zeros_like(s_channel) + output[(s_binary == 1) & (v_binary == 1)] = 1 + return output + +def window_mask(width, height, img_ref, center,level): + output = np.zeros_like(img_ref) + output[int(img_ref.shape[0]-(level+1)*height):int(img_ref.shape[0]-level*height),max(0,int(center-width/2)):min(int(center+width/2),img_ref.shape[1])] = 1 + return output + +def process_image(image): + img = cv2.undistort(image, mtx, dist, None, mtx) + preprocessImage = np.zeros_like(img[:,:,0]) + gradx = abs_sobel_thresh(img, orient='x', thresh=(12,255)) + grady = abs_sobel_thresh(img, orient='x', thresh=(25,255)) + c_binary = color_threshold(img, sthresh=(100,255), vthresh=(50,255)) + preprocessImage[((gradx==1) & (grady==1) | (c_binary==1))] = 255 + img_size = (img.shape[1], img.shape[0]) + bot_width = .76 + mid_width = .08 + height_pct = .62 + bottom_trim = .935 + src = np.float32([[img.shape[1]*(.5-mid_width/2),img.shape[0]*height_pct], + [img.shape[1]*(.5+mid_width/2),img.shape[0]*height_pct], + [img.shape[1]*(.5+bot_width/2),img.shape[0]*bottom_trim], + [img.shape[1]*(.5-bot_width/2),img.shape[0]*bottom_trim]]) + offset = img_size[0]*.25 + dst = np.float32([[offset, 0], [img_size[0]-offset, 0], + [img_size[0]-offset, img_size[1]], + [offset, img_size[1]]]) + M = cv2.getPerspectiveTransform(src, dst) + Minv = cv2.getPerspectiveTransform(dst, src) + warped = cv2.warpPerspective(preprocessImage, M, img_size, flags=cv2.INTER_LINEAR) + window_width = 25 + window_height = 80 + curve_centers = tracker(Mywindow_width=window_width, Mywindow_height=window_height, + Mymargin=25, My_ym=10/720, My_xm=4/384, Mysmooth_factor=15) + window_centroids = curve_centers.find_window_centroids(warped) + l_points = np.zeros_like(warped) + r_points = np.zeros_like(warped) + rightx = [] + leftx = [] + for level in range(0,len(window_centroids)): + l_mask = window_mask(window_width,window_height,warped,window_centroids[level][0],level) + r_mask = window_mask(window_width,window_height,warped,window_centroids[level][1],level) + # Add center value found in frame to the list of lane points per left, right + leftx.append(window_centroids[level][0]) + rightx.append(window_centroids[level][1]) + # Draw the window masks + l_points[(l_points == 255) | ((l_mask == 1) ) ] = 255 + r_points[(r_points == 255) | ((r_mask == 1) ) ] = 255 + # Draw the lane onto the warped blank image + template = np.array(r_points+l_points,np.uint8) # add both left and right window pixels together + zero_channel = np.zeros_like(template) # create a zero color channel + template = np.array(cv2.merge((zero_channel,template,zero_channel)),np.uint8) # make window pixels green + warpage = np.array(cv2.merge((warped,warped,warped)),np.uint8) # making the original road pixels 3 color channels + result = cv2.addWeighted(warpage, 1, template, 0.5, 0.0) # overlay the orignal road image with window results + yvals = range(0,warped.shape[0]) + res_yvals = np.arange(warped.shape[0]-(window_height/2),0,-window_height) + left_fit = np.polyfit(res_yvals, leftx, 2) + left_fitx = left_fit[0]*yvals*yvals + left_fit[1]*yvals + left_fit[2] + left_fitx = np.array(left_fitx,np.int32) + right_fit = np.polyfit(res_yvals, rightx, 2) + right_fitx = right_fit[0]*yvals*yvals + right_fit[1]*yvals + right_fit[2] + right_fitx = np.array(right_fitx,np.int32) + left_lane = np.array(list(zip(np.concatenate((left_fitx-window_width/2,left_fitx[::-1]+window_width/2), axis=0), + np.concatenate((yvals,yvals[::-1]), axis=0))), np.int32) + right_lane = np.array(list(zip(np.concatenate((right_fitx-window_width/2,right_fitx[::-1]+window_width/2), axis=0), + np.concatenate((yvals,yvals[::-1]), axis=0))), np.int32) + inner_lane = np.array(list(zip(np.concatenate((left_fitx+window_width/2,right_fitx[::-1]-window_width/2), axis=0), + np.concatenate((yvals,yvals[::-1]), axis=0))), np.int32) + # Create a blank image to draw the lines on + lane_image = np.zeros_like(img) + # Draw the lane onto the warped blank image + cv2.fillPoly(lane_image, [left_lane], [255, 0, 0]) + cv2.fillPoly(lane_image, [right_lane], [0, 0, 255]) + cv2.fillPoly(lane_image, [inner_lane], [0, 255, 0]) + # Warp the blank back to original image space using inverse perspective matrix (Minv) + lane_image_unwarped = cv2.warpPerspective(lane_image, Minv, (img.shape[1], img.shape[0])) + # Combine the result with the original image + result = cv2.addWeighted(img, 1, lane_image_unwarped, 0.3, 0) + # Calculate curvature and vehicle position + ym_per_pix = curve_centers.ym_per_pix + xm_per_pix = curve_centers.xm_per_pix + curve_fit_cr = np.polyfit(np.array(res_yvals,np.float32)*ym_per_pix, np.array(leftx,np.float32)*xm_per_pix, 2) + curverad = ((1 + (2*curve_fit_cr[0]*yvals[-1]*ym_per_pix + curve_fit_cr[1])**2)**1.5) / np.absolute(2*curve_fit_cr[0]) + camera_center = (left_fitx[-1] + right_fitx[-1]) / 2 + center_diff = (camera_center - warped.shape[1]/2)*xm_per_pix + side_pos = 'left' + if center_diff <= 0: + side_pos = 'right' + # Add text to the image + cv2.putText(result,'Radius of curvature = '+str(round(curverad,3))+'(m)',(50,50), cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255),2) + cv2.putText(result,'Vehicle is '+str(abs(round(center_diff,3)))+'m '+side_pos+' of center',(50,100), cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255),2) + + # YOLOv5 Object Detection + img_tensor = torch.from_numpy(img).to(device).float() # Convert to tensor and move to device + results = model(img_tensor) + # Filter results (example: only persons with confidence > 0.6) + detections = results.pandas().xyxy[0] + detections = detections[(detections['class'] == 0) & (detections['confidence'] > 0.6)] + + # Draw bounding boxes and labels + for index, row in detections.iterrows(): + x1, y1, x2, y2 = int(row['xmin']), int(row['ymin']), int(row['xmax']), int(row['ymax']) + cv2.rectangle(result, (x1, y1), (x2, y2), (0, 255, 0), 2) + cv2.putText(result, f"person {row['confidence']:.2f}", (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2) + + # Display the processed image + cv2.imshow('Lane and Object Detection', result) + cv2.waitKey(1) # Adjust the delay as needed + + return result + + +def process_video_realtime(input_video_path): + cap = cv2.VideoCapture(input_video_path) + while True: + ret, frame = cap.read() + if not ret: + break + processed_frame = process_image(frame) + cv2.imshow('Lane and Object Detection', processed_frame) + if cv2.waitKey(1) & 0xFF == ord('q'): # Press 'q' to quit + break + cap.release() + cv2.destroyAllWindows() + +# Example usage +input_video_path = 'C:\\Users\\Dhruv\\Documents\\CODEBURGLARY\\code-burglary\\video.mp4' # Replace with your video path +process_video_realtime(input_video_path) \ No newline at end of file diff --git a/yolov5/tracker.py b/yolov5/tracker.py new file mode 100644 index 0000000..38c9699 --- /dev/null +++ b/yolov5/tracker.py @@ -0,0 +1,46 @@ +import numpy as np +import cv2 +class tracker(): + + def __init__(self, Mywindow_width, Mywindow_height, Mymargin, My_ym = 1, My_xm = 1, Mysmooth_factor = 15): + self.recent_centers = [] + self.window_width = Mywindow_width + self.window_height = Mywindow_height + self.margin = Mymargin + self.ym_per_pix = My_ym + self.xm_per_pix = My_xm + self.smooth_factor = Mysmooth_factor + + def find_window_centroids(self, warped): + window_width = self.window_width + window_height = self.window_height + margin = self.margin + + window_centroids = [] + window = np.ones(window_width) + + l_sum = np.sum(warped[int(3*warped.shape[0]/4):,:int(warped.shape[1]/2)],axis=0) + l_center = np.argmax(np.convolve(window,l_sum))-window_width/2 + + r_sum = np.sum(warped[int(3*warped.shape[0]/4):,int(warped.shape[1]/2):],axis=0) + r_center = np.argmax(np.convolve(window,r_sum))-window_width/2+int(warped.shape[1]/2) + + window_centroids.append((l_center,r_center)) + + for level in range(1,(int)(warped.shape[0]/window_height)): + image_layer = np.sum(warped[int(warped.shape[0]-(level+1)*window_height):int(warped.shape[0]-level*window_height),:],axis=0) + conv_signal = np.convolve(window,image_layer) + + offset = window_width/2 + l_min_index = int(max(l_center+offset-margin,0)) + l_max_index = int(min(l_center+offset+margin,warped.shape[1])) + l_center = np.argmax(conv_signal[l_min_index:l_max_index])+l_min_index-offset + + r_min_index = int(max(r_center+offset-margin,0)) + r_max_index = int(min(r_center+offset+margin,warped.shape[1])) + r_center = np.argmax(conv_signal[r_min_index:r_max_index])+r_min_index-offset + + window_centroids.append((l_center,r_center)) + + self.recent_centers.append(window_centroids) + return np.average(self.recent_centers[-self.smooth_factor:],axis=0) \ No newline at end of file diff --git a/yolov5/yolov5/print_mod.py b/yolov5/yolov5/print_mod.py new file mode 100644 index 0000000..12d9d89 --- /dev/null +++ b/yolov5/yolov5/print_mod.py @@ -0,0 +1,7 @@ +import torch + +# Load the model from the .pt file +model = torch.load("./yolov5s.pt") + +# Print the model's architecture +print(model) \ No newline at end of file