import os
import cv2
import sys
from zipfile import ZipFile
from urllib.request import urlretrieve
# ========================-Downloading Assets-========================
def download_and_unzip(url, save_path):
print(f"Downloading and extracting assests....", end="")
# Downloading zip file using urllib package.
urlretrieve(url, save_path)
try:
# Extracting zip file using the zipfile package.
with ZipFile(save_path) as z:
# Extract ZIP file contents in the same directory.
0])
z.extractall(os.path.split(save_path)[
print("Done")
except Exception as e:
print("\nInvalid file.", e)
= r"https://www.dropbox.com/s/efitgt363ada95a/opencv_bootcamp_assets_12.zip?dl=1"
URL
= os.path.join(os.getcwd(), f"opencv_bootcamp_assets_12.zip")
asset_zip_path
# Download if assest ZIP does not exists.
if not os.path.exists(asset_zip_path):
download_and_unzip(URL, asset_zip_path)# ====================================================================
# Set the device index for the camera as we have in prior pages
= 0
s if len(sys.argv) > 1:
= sys.argv[1]
s
# Create a video capture object & set a window output
= cv2.VideoCapture(s)
source
= "Camera Output
win_name cv2.namedWindow(win_name, cv2.WINDOW_NORMAL)
# Read-in pretrained model from Caffe one of the deep learning frameworks (tensorflow, pytorch are others)
# readNetFromCaffe takes two arguments: 1: contains the network information
# 2: Caffe modelfile that contains the weights the model was trained at
= cv2.dnn.readNetFromCaffe("deploy.prototxt", "res10_300x300_ssd_iter_140000_fp16.caffemodel")
net
# once we read it above it returns an instance of the network which we will use later to produce inferences
# Model parameters how the model was trained so we have to stick to what was done before.
= 300
in_width = 300
in_height = [104, 117, 123] # of color channel
mean = 0.7 # we set
conf_threshold
# Here we have a while loop to read the frames one by one as produced by the camera
while cv2.waitKey(1) != 27:
= source.read()
has_frame, frame if not has_frame:
break
= cv2.flip(frame, 1) # flip it to match how we are looking on the display
frame = frame.shape[0] # we dynamically retrieve the size of video frame
frame_height = frame.shape[1]
frame_width
# Create a 4D blob from a frame. Here we do some preprocessing on the image frame
# we are putting the input image in a shape to match the model
# arg1: input the frame. arg2: scale factor to match YAML file
# arg3: height and width, mean value subtracted from images
# swap set to false because Caffe and OpenCV use the same convention
# crop: if we want to resize/crop the input image, the fact that it is set to crop #=false means we are going to resize to 300x300
= cv2.dnn.blobFromImage(frame, 1.0, (in_width, in_height), mean, swapRB=False, crop=False)
blob
# Run a model. Pass blob to Input
net.setInput(blob)= net.forward() # performs inference on our network image passed to it
detections
# for all the detections created we loop and calculate the confidence for each detection
for i in range(detections.shape[2]):
= detections[0, 0, i, 2]
confidence if confidence > conf_threshold:
= int(detections[0, 0, i, 3] * frame_width)
x_left_bottom = int(detections[0, 0, i, 4] * frame_height)
y_left_bottom = int(detections[0, 0, i, 5] * frame_width)
x_right_top = int(detections[0, 0, i, 6] * frame_height)
y_right_top
# build a box around the detection
0, 255, 0))
cv2.rectangle(frame, (x_left_bottom, y_left_bottom), (x_right_top, y_right_top), (
# print the confidence level on the image
= "Confidence: %.4f" % confidence
label = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
label_size, base_line
# Displays the information on the image
cv2.rectangle(
frame,- label_size[1]),
(x_left_bottom, y_left_bottom + label_size[0], y_left_bottom + base_line),
(x_left_bottom 255, 255, 255),
(
cv2.FILLED,
)0.5, (0, 0, 0))
cv2.putText(frame, label, (x_left_bottom, y_left_bottom), cv2.FONT_HERSHEY_SIMPLEX,
# Calculate the time it took for the detection and convert to milliseconds & annotate the frame, then display with imshow
= net.getPerfProfile()
t, _ = "Inference time: %.2f ms" % (t * 1000.0 / cv2.getTickFrequency())
label 0, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))
cv2.putText(frame, label, (
cv2.imshow(win_name, frame)
source.release() cv2.destroyWindow(win_name)
Face Detection - CAFFE
- We will use a pre-trained neural network to detect faces
- We cannot use OpenCV to train a neural network but we can use it to produce inference
- Even though we are downloading the weights file from the assets, we can download them from the internet (download_models.py) found at opencv/opencv on Github
- That page also contains script on how to use opencv to download various models
- That script also references a YAML.yaml file which references the model used, the url to download the weights file, as well as how the model was trained
As you see above it even picks up face images from the phone