-
Notifications
You must be signed in to change notification settings - Fork 8
/
MaskDetector.py
128 lines (115 loc) · 5.65 KB
/
MaskDetector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import cv2
import argparse
import numpy as np
from utils.anchor_generator import generate_anchors
from utils.anchor_decode import decode_bbox
from utils.nms import single_class_non_max_suppression
from PIL import Image, ImageDraw, ImageFont
#import caffe
import pyglet
#music = pyglet.resource.media('music.mp3')
#music.play()
#pyglet.app.run()
#||FROM HERE DONT MESS WITH CODE Bois
#anchor configuration
feature_map_sizes = [[33, 33], [17, 17], [9, 9], [5, 5], [3, 3]]
anchor_sizes = [[0.04, 0.056], [0.08, 0.11], [0.16, 0.22], [0.32, 0.45], [0.64, 0.72]]
anchor_ratios = [[1, 0.62, 0.42]] * 5
# generate anchors
anchors = generate_anchors(feature_map_sizes, anchor_sizes, anchor_ratios)
# for inference , the batch size is 1, the model output shape is [1, N, 4],
# so we expand dim for anchors to [1, anchor_num, 4]
anchors_exp = np.expand_dims(anchors, axis=0)
id2class = {0: 'Mask', 1: 'NoMask'}
id2chiclass = {0: 'Mask Found', 1: 'Mask Not Found'}
colors = ((0, 255, 0), (255, 0 , 0))
def puttext_onscreen(img, text, point, color):
pilimg = Image.fromarray(img)
draw = ImageDraw.Draw(pilimg) # Print on pictures
fontsize = int(min(img.shape[:2])*0.04)
font = ImageFont.truetype("simhei.ttf", fontsize, encoding="utf-8")
y = point[1]-font.getsize(text)[1]
if y <= font.getsize(text)[1]:
y = point[1]+font.getsize(text)[1]
draw.text((point[0], y), text, color, font=font)
img = np.asarray(pilimg)
return img
##OUTPUT.KEAras
def getOutputsNames(net):
# Get the names of all the layers in the network
layersNames = net.getLayerNames()
# Get the names of the output layers, i.e. the layers with unconnected outputs
return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]
def inference(net, image, conf_thresh=0.5, iou_thresh=0.4, target_shape=(160, 160), draw_result=True, english=True):
height, width, _ = image.shape
blob = cv2.dnn.blobFromImage(image, scalefactor=1/255.0, size=target_shape)
net.setInput(blob)
y_bboxes_output, y_cls_output = net.forward(getOutputsNames(net))
# remove the batch dimension, for batch is always 1 for inference.
y_bboxes = decode_bbox(anchors_exp, y_bboxes_output)[0]
y_cls = y_cls_output[0]
# To speed up, do single class NMS, not multiple classes NMS.
bbox_max_scores = np.max(y_cls, axis=1)
bbox_max_score_classes = np.argmax(y_cls, axis=1)
# keep_idx is the alive bounding box after nms.
keep_idxs = single_class_non_max_suppression(y_bboxes, bbox_max_scores, conf_thresh=conf_thresh, iou_thresh=iou_thresh)
# keep_idxs = cv2.dnn.NMSBoxes(y_bboxes.tolist(), bbox_max_scores.tolist(), conf_thresh, iou_thresh)[:,0]
tl = round(0.002 * (height + width) * 0.5) + 1 # line thickness
for idx in keep_idxs:
conf = float(bbox_max_scores[idx])
class_id = bbox_max_score_classes[idx]
bbox = y_bboxes[idx]
# clip the coordinate, avoid the value exceed the image boundary.
xmin = max(0, int(bbox[0] * width))
ymin = max(0, int(bbox[1] * height))
xmax = min(int(bbox[2] * width), width)
ymax = min(int(bbox[3] * height), height)
if draw_result:
cv2.rectangle(image, (xmin, ymin), (xmax, ymax), colors[class_id], thickness=tl)
if english:
image = puttext_onscreen(image, id2chiclass[class_id], (xmin, ymin), colors[class_id]) ###Window TeXT
else:
cv2.putText(image, "%s: %.2f" % (id2class[class_id], conf), (xmin + 2, ymin - 2),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, colors[class_id])
# pyglet.app.run() #Sound Alert
return image
def run_on_video(Net, video_path, conf_thresh=1.0):
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
raise ValueError("Video open failed.")
return
status = True
while status:
status, img_raw = cap.read()
if not status:
print("Done processing !!!")
break
img_raw = cv2.cvtColor(img_raw, cv2.COLOR_BGR2RGB)
img_raw = inference(Net, img_raw, target_shape=(260, 260), conf_thresh=conf_thresh)
cv2.imshow('image', img_raw[:,:,::-1])
cv2.waitKey(1)
cv2.destroyAllWindows()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Face Mask Detection") #AI&ML Part
parser.add_argument('--proto', type=str, default='models/face_mask_detection.prototxt', help='prototxt path') #Prototxt Model for Import (A)
parser.add_argument('--model', type=str, default='models/face_mask_detection.caffemodel', help='model path') #Caffemodel (ML)
parser.add_argument('--img-mode', type=int, default=0, help='set 1 to run on image, 0 to run on video.')
parser.add_argument('--img-path', type=str, default='img/demo2.jpg', help='path to your image.')
parser.add_argument('--video-path', type=str, default='0', help='path to your video, `0` means to use camera.')
parser.add_argument('--cam', type=str, default='1', help='path to your video, `0` means to use camera.')
# parser.add_argument('--hdf5', type=str, help='keras hdf5 file')
args = parser.parse_args()
Net = cv2.dnn.readNet(args.model, args.proto)
if args.img_mode:
img = cv2.imread(args.img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
result = inference(Net, img, target_shape=(260, 260))
cv2.namedWindow('detect', cv2.WINDOW_NORMAL)
cv2.imshow('detect', result[:,:,::-1])
cv2.waitKey(0)
cv2.destroyAllWindows()
else:
video_path = args.video_path
if args.video_path == '0':
video_path = 0
run_on_video(Net, video_path, conf_thresh=0.5)