-
Notifications
You must be signed in to change notification settings - Fork 0
/
Real_Detect.py
executable file
·128 lines (104 loc) · 4.28 KB
/
Real_Detect.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import numpy as np
import cv2
import torch
class ObjectDetector:
def __init__(self):
# Camera and object parameters
self.focal = 665 # Focal length in pixels
self.width = 7.1 # Real-world width of the object in cm
# OpenCV text parameters
self.font = cv2.FONT_HERSHEY_SIMPLEX
self.fontScale = 0.6
self.color = (0, 0, 255)
self.thickness = 2
# Initialize YOLO model
self.model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
self.model.eval()
# Initialize camera
self.cap = cv2.VideoCapture(0) # Use 0 for default camera
if not self.cap.isOpened():
raise ValueError("Failed to open camera")
# Set up display window
cv2.namedWindow('Object Distance Measurement', cv2.WINDOW_NORMAL)
cv2.resizeWindow('Object Distance Measurement', 700, 600)
def get_dist(self, rectangle_params):
"""Calculate distance from camera based on object width in pixels"""
pixels = rectangle_params[1][0]
if pixels > 0:
dist = (self.width * self.focal) / pixels
else:
dist = float('inf')
return dist
def get_3d_coordinates(self, x, y, dist):
"""Calculate 3D coordinates from 2D image coordinates and distance"""
Z = dist
X = (x - 320) * Z / self.focal
Y = (y - 240) * Z / self.focal
return X, Y, Z
def annotate_image(self, image, bbox, dist, coords_3d):
"""Draw bounding box and information on the image"""
x1, y1, x2, y2 = bbox
X, Y, Z = coords_3d
# Draw bounding box
cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), 2)
# Draw labels
label = f'Coords: ({x1}, {y1}), ({x2}, {y2})'
distance_label = f'Distance: {dist:.2f} cm'
coords_3d_label = f'3D Coords: (X={X:.2f}, Y={Y:.2f}, Z={Z:.2f}) cm'
# Position text
cv2.putText(image, label, (x1, y1 - 40), self.font, self.fontScale,
self.color, self.thickness, cv2.LINE_AA)
cv2.putText(image, distance_label, (x1, y1 - 20), self.font, self.fontScale,
self.color, self.thickness, cv2.LINE_AA)
cv2.putText(image, coords_3d_label, (x1, y1), self.font, self.fontScale,
self.color, self.thickness, cv2.LINE_AA)
return image
def process_frame(self):
"""Process a single frame from the camera"""
ret, img = self.cap.read()
if not ret:
return False, None
# Perform object detection
results = self.model(img)
detections = results.xyxy[0].cpu().numpy()
# Process each detection
for *xyxy, conf, cls in detections:
x1, y1, x2, y2 = map(int, xyxy)
bbox = (x1, y1, x2, y2)
bbox_width = x2 - x1
# Calculate distance and 3D coordinates
dist = self.get_dist(((0, 0), (bbox_width, 0), 0))
center_x = (x1 + x2) // 2
center_y = (y1 + y2) // 2
coords_3d = self.get_3d_coordinates(center_x, center_y, dist)
# Annotate image
img = self.annotate_image(img, bbox, dist, coords_3d)
# Print information to console
print(f'Object 2D Coordinates: ({x1}, {y1}), ({x2}, {y2})')
print(f'Distance from Camera: {dist:.2f} cm')
print(f'3D Coordinates: (X={coords_3d[0]:.2f}, Y={coords_3d[1]:.2f}, Z={coords_3d[2]:.2f}) cm')
return True, img
def run(self):
"""Main loop for continuous camera processing"""
try:
while True:
success, frame = self.process_frame()
if not success:
print("Failed to grab frame")
break
cv2.imshow('Object Distance Measurement', frame)
# Exit on 'q' key press
if cv2.waitKey(1) & 0xFF == ord('q'):
break
finally:
self.cleanup()
def cleanup(self):
"""Release resources"""
self.cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
try:
detector = ObjectDetector()
detector.run()
except Exception as e:
print(f"Error: {e}")