ML | Implement face recognition with k-NN with scikit-learn



k-NN is one of the most basic classification algorithms in machine learning. It belongs to the supervised learning category of machine learning. k-NN is often used in search applications where you search for “similar” items. We measure similarity by creating vector representations of elements, and then compare vectors using an appropriate distance metric (such as Euclidean distance).

It is commonly used in data mining, pattern recognition, recommender systems, and intrusion detection.

Libraries used are:

OpenCV2
Pandas
Numpy
Scikit-learn

Dataset used:
We used the haarcascade_frontalface_default.xml data haarcascade_frontalface_default.xml which is readily available online, and also you can download it from this link .

Scikit learn:
scikit-learn provides a range of supervised and unsupervised learning algorithms through a consistent interface in Python.
This library is based on SciPy, which must be installed on your devices in order to use scikit_learn.

Face Recognition:
This includes three Python files where first is used to detect the face and save it in list format, second — to store data in .csv file format, and third — for face recognition.

facedetect.py

# this file is used for face detection
# and then save the face data

import cv2

import numpy as np

 
# import the file containing the data
# stored in CSV file format

import npwriter

 

name = input ( "Enter your name:" )

  
# this is used to access the webcam
# to capture frames

cap = cv2.VideoCapture ( 0 )

 

classifier = cv2.CascadeClassifier ( "../ dataset / haarcascade_frontalface_default.xml" )

 
# this is the class used to define faces as specified
# with haarcascade_frontalface_default.xml as data

f_list = []

  

while True :

  ret, frame = cap.read ()

 

# convert image to gray

# scale, how easy it is to detect

gray = cv2.cvtColor (frame, cv2.COLOR_BGR2GRAY)

 

  # detects multiscale, detects face and face coordinates

faces = classifier.detectMultiScale (gray, 1.5 , 5 )

  

# this is used to detect a face that

# closest to the webcam in first position

  faces = sorted (faces, key = lambda x: x [ 2 ] * x [ 3 ],

  reverse = True )

 

# only the first detected face is used

faces = faces [: 1

 

# len (Faces) is a number

  # faces in the frame

if len (faces) = = 1

  # this is removing from the tuple format

face = faces [ 0

 

# coordinate storage

# face in different variables

  x, y, w, h = face 

 

# this will show the face

# what is detected

im_face = frame [y: y + h, x: x + w] 

 

cv2.imshow ( "face" , im_face)

  

 

if not ret:

  continue

 

  cv2.imshow ( " full " , frame)

 

key = cv2.waitKey ( 1 )

  

  # this will break program execution

# when q is pressed and will click on the frame when c is pressed

if key & amp; 0xFF = = ord ( `q` ):

break

  elif key & amp; 0xFF = = ord ( `c` ):

if len (faces) = = 1 :

gray_face = cv2.cvtColor (im_face, cv2.COLOR_BGR2GRAY)

  gray_face = cv2.resize (gray_face, ( 100 , 100 ))

print ( len (f_list) , type (gray_face), gray_face.shape)

  

  # this will add face coordinates to f_list

f_list.append (gray_face.reshape ( - 1 )) 

else :

print ( "face not found" )

 

# this will be xp Animate detection data

# face 10 times to improve accuracy

if len (f_list) = = 10 :

break

 
# declared in npwriter
npwriter.write (name, np.array (f_list)) 

 

 
cap.release ()
cv2.destroyAllWindows ()

npwriter.py — Create / Update & # 39 ; .csv & # 39 ;: file

import pandas as pd

import numpy as np

import os.path

 

f_name = "face_data.csv"

 
# store data in a CSV file

def write (name, data):

 

if os.path.isfile (f_name):

 

df = pd.read_csv (f_name, index_col = 0 )

 

latest = pd.DataFrame (data, columns = map ( str , range ( 10000 )))

  latest [ "name" ] = name

 

  df = pd.concat ((df, latest), ignore_index = True , sort = False )

 

else :

 

# Provide a range only because the data

# already squished here when

# this was a store in f_list

df = pd.DataFrame (data, columns = map ( str , range ( 10000 )))

  df [ "name" ] = name

 

df.to_csv (f_name )

recog.py — face recognition

# this is used for recognition
# face after training the model with
# our data is stored using eat knn

import cv2

import numpy as np

import pandas as pd

  

from npwriter import f_name

from sklearn.neighbors import KNeighborsClassifier

 

 
# reading data

data = pd .read_csv (f_name) .values ​​

 
# data section

X, Y = data [:, 1 : - 1 ], data [:, - 1 ]

 

print ( X, Y)

 
# Knn function call with k = 5

model = KNeighborsClassifier (n_neighbors = 5 )

 
# model training
model.fit (X, Y)

 

cap = cv2.VideoCapture ( 0 )

  

classifier = cv2.CascadeClassifier ( "../ dataset / haarcascade_frontalface_default.xml" )

 

f_list = []

 

while True :

 

ret, frame = cap.read ()

  

gray = cv2. cvtColor (frame, cv2.COLOR_BGR2GRAY)

 

faces = classifier.detectMultiScale (gray, 1.5 , 5 )

  

X_test = []

 

# Testing data

for face in faces:

x, y, w, h = face

im_face = gray [y: y + h, x: x + w]

im_face = cv2.resize (im_face, ( 100 , 100 ))

  X_test .append (im_face.reshape ( - 1 ))

 

if len (faces) & gt; 0 :

response = model.predict (np.array (X_test))

# predicting the result using knn

 

for i, face in enumerate (faces):

x, y, w, h = face

  

# draw a rectangle on the detected face

cv2.rectangle (frame, (x, y), (x + w, y + h),

( 255 , 0 , 0 ), 3 )

 

  # add detected / predicted face name

cv2 .putText (frame, response [i], (x - 50 , y - 50 ),

cv2.FONT_HERSHEY_DUPLEX, 2 ,

  ( 0 , 255 , 0 ), 3 )

 

cv2.imshow ( "full" , frame)

 

key = cv2.waitKey ( 1 )

  

if key & amp; 0xFF = = ord ( "q" ):

break

  
cap.release ()
cv2.destroyAllWindows ()

Output: