FINAL PROJECT - Implementation of an artificial vision system to detect and recognize hand gestures.

1. The program will take a video capture using the computer camera or a web camera.

The program will have the following functionalities:

“Pattern recording” mode: when a key is pressed, it records the patterns representing at least 3 gestures. The descriptor must be stored in a file on disk.

All files are stored in our directory which we will identify 4 gestures that we will specifically store in the database.

Result of data specifically saved in the database.

§ “Identify” mode: when a key is pressed, the program will read the patterns stored in the file and recognize the key. The screen must clearly indicate which gesture has been identified..

All files are stored in our directory which we will identify 4 gestures that we will specifically store in the database.

Result of data specifically saved in the database.

2. Window where we monitor the various techniques previously learned for the identification of the hand.

namedWindow("frame", WINDOW_AUTOSIZE);
        createTrackbar("H-Min", "frame", &hMin, 180, eventoTrack, NULL);
        createTrackbar("S-Min", "frame", &sMin, 255, eventoTrack, NULL);
        createTrackbar("V-Min", "frame", &vMin, 255, eventoTrack, NULL);
        createTrackbar("H-Max", "frame", &hMax, 180, eventoTrack, NULL);
        createTrackbar("S-Max", "frame", &sMax, 255, eventoTrack, NULL);
        createTrackbar("V-Max", "frame", &vMax, 255, eventoTrack, NULL);
        createTrackbar("Y-Min", "frame", &yMin, 255, eventoTrack, NULL);
        createTrackbar("Cr-Min", "frame", &crMin, 255, eventoTrack, NULL);
        createTrackbar("Cb-Min", "frame", &cbMin, 255, eventoTrack, NULL);
        createTrackbar("Y-Max", "frame", &yMax, 255, eventoTrack, NULL);
        createTrackbar("Cr-Max", "frame", &crMax, 255, eventoTrack, NULL);
        createTrackbar("Cb-Max", "frame", &cbMax, 255, eventoTrack, NULL);
        createTrackbar("threshold", "frame", &thresholdVal, 255, eventoTrack, NULL);

3. The program must have results windows showing each of the stages carried out during the gesture identification process.

Identification of the hand gesture.

4. For this you can use different approaches, such as the one seen in class (thresholding by color in HSV, YcbCr color spaces or any other space).

5. You must use at least 2 techniques that allow you to verify if the gesture made is one of the 3 stored in the database (file)

The first technique used for the detection of gestures is the approximation by maximum and minimum points according to the calculation with the centroid and the area of interest.

The second technique used is the calculation of the Euclidean distance of the histograms of the image dataset with respect to the image that is displayed at the moment in the ROI Mask window.

In window M1 method one is displayed and in window M2 method two is displayed

PROJECT ANALYSIS.

¿How lighting affects gesture identification technique (Which one is most affected?).

The lighting is key for the recognition of the hand gesture since the clearer we obtain a better result at the time of its implementation, but if we have less illumination we obtain a clear effect of less precision that is more affected when we have the identification of four fingers.

¿Which preprocessing techniques allow for better results?

The technique used is the binarization process of the images, the transformation to the HSV and YCrCb color space to segment the skin color and elimination of the background in the area of interest.

Determine which color space has the best results to perform the color binarization process.

For the night as for the day we obtained better results using the YCRCB color binarization at 90% and a little HSV at 10%.

Specify which filters you used to improve the gesture identification process.

FILTER MEDIAN BLUR (Specifying with the threshold value) to soften the edges of the hand.

DILATION FILTER (Using a 5x5 mask) to fill empty spaces in the hand avoiding the elimination of fingers.

Source code

#include "opencv2/opencv.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/videoio.hpp"
#include <opencv2/highgui.hpp>
#include <opencv2/video.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <dirent.h>

#include <ctime>
#include <sstream>
#include <vector>
#include <time.h>
#include <dirent.h>
#include <list>
#include <map>
#include<string>
#include <fstream>

#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/video/video.hpp>
#include <opencv2/videoio/videoio.hpp>
#include <opencv2/imgcodecs/imgcodecs.hpp>
using namespace std;
using namespace cv;


vector<string> imagenes;
Mat frame, frameOut, hsv, ycrcb, frameAux, camera, ROI, ROIMask, bg, dif;
int thresholdVal=7, hMin = 0, sMin = 0, vMin = 0, hMax = 180, sMax = 106, vMax = 255, yMin = 30, crMin = 0, cbMin = 0, yMax = 150, crMax = 234, cbMax = 167;
int contIdGesto = 0;

vector<string> loadFile(const char* pathFolder) {
    vector<string> files;
    DIR* dr;
    struct dirent* en;
    dr = opendir(pathFolder); //open all directory
    if (dr) {
        while ((en = readdir(dr)) != NULL) {
            String file = en->d_name;
            if (file.compare(".") != 0 && file.compare("..") != 0)
            {
            //cout << "Load file " << en->d_name << endl;
                files.push_back(file);
            }   
        }
        closedir(dr); //close all directory
    }
    return files;
}



size_t findMaxContourn(vector < vector<Point> > contoursFrame) {
    size_t indexOfBiggestContour;

    if (contoursFrame.size() > 0) {
        indexOfBiggestContour = -1;
        size_t sizeOfBiggestContour = 0;
        for (size_t i = 0; i < contoursFrame.size(); i++) {
            if (contoursFrame[i].size() > sizeOfBiggestContour) {
                sizeOfBiggestContour = contoursFrame[i].size();
                indexOfBiggestContour = i;
            }
        }
    }
    return indexOfBiggestContour;
}

void saveFileImg(Mat frame, String filename) {
    imwrite(filename, frame);
    cout << "save img" << endl;
}

int compareMetod1(vector < vector<Point> > contoursIng, Mat frameImg) {
    vector<vector<int> >hull(contoursIng.size());
    vector<vector<Vec4i> > defects(contoursIng.size());
    vector<vector<Point> >defectPoint(contoursIng.size());
    int count;
    size_t indexOfBiggestContour = findMaxContourn(contoursIng);
    for (size_t i = 0; i < contoursIng.size(); i++)
    { 
        if (contourArea(contoursIng[i]) > 5000) {
            convexHull(contoursIng[i], hull[i], true);
            convexityDefects(contoursIng[i], hull[i], defects[i]);
            if (indexOfBiggestContour == i) {
                count = 0;
                for (size_t k = 0; k < defects[i].size(); k++) {
                    if (defects[i][k][3] > 13 * 256) {
                        int p_start = defects[i][k][0];
                        int p_end = defects[i][k][1];
                        int p_far = defects[i][k][2];
                        defectPoint[i].push_back(contoursIng[i][p_far]);
                        circle(frameImg, contoursIng[i][p_end], 3, Scalar(0, 0, 255), 2); //i ydi
                        count++;
                    }
                }
            }
        }
    }
    //cout << "Img points " << count << endl;
    return count;
}

double euclideanDistance(Mat histoTrain, Mat histoTest) {
    double d = 0.0;
    for (size_t i = 0; i < 256; i++)
    {
        d += pow((histoTrain.at<float>(i) - histoTest.at<float>(i)), 2);
    }
    return sqrt(d);
}

Mat histoCalc(Mat frame) {
    int histSize = 256;
    float range[] = { 0, 256 };
    const float* histRange = { range };
    bool uniform = true, accumulate = false;
    Mat histo;
    calcHist(&frame, 1, 0, Mat(), histo, 1, &histSize, &histRange, uniform, accumulate);
    return histo;
}

void findImagePoints(int ponintsROI, Mat camera, Mat maskRoi) {
    double d = 100000;
    Mat imgEuclidian;
    for (size_t i = 0; i < imagenes.size(); i++)
    {
        Mat imagen = imread("db/" + imagenes[i], IMREAD_GRAYSCALE);

        Mat histoImg = histoCalc(imagen);
        Mat histoRoi = histoCalc(maskRoi);

        double dAux = euclideanDistance(histoImg, histoRoi);
        if (dAux < d) {
            d = dAux;
            imgEuclidian = imagen.clone();
        }

        //cout << "File " << imagenes[i] << endl;
        vector<vector<Point> > contoursImg;
        vector<Vec4i> hierarchyImg;
        findContours(imagen, contoursImg, hierarchyImg, RETR_TREE, CHAIN_APPROX_SIMPLE, Point(0, 0));
        int numPoints = compareMetod1(contoursImg, imagen);

        if (ponintsROI == numPoints) {
            //cout << "File " << imagenes[i] << " Num Points " << numPoints << " ROI " << ponintsROI << endl;
            putText(camera, imagenes[i], Point(75, 450), FONT_HERSHEY_SIMPLEX, 3, Scalar(0, 255, 0), 3, 8, false);
            imshow("M1", imagen);
        }
    }
    imshow("M2", imgEuclidian);
}


void eventoTrack(int v, void* pP) {

}

int main() {
    VideoCapture videoCapture(0);
    if (!videoCapture.isOpened()) {
        cout << "Can't find camera!" << endl;
        return -1;
    }
    imagenes = loadFile("db/");

    while (true) {
        videoCapture >> frame;
        frameAux = frame.clone();
        camera = frame.clone();

        namedWindow("frame", WINDOW_AUTOSIZE);
        createTrackbar("H-Min", "frame", &hMin, 180, eventoTrack, NULL);
        createTrackbar("S-Min", "frame", &sMin, 255, eventoTrack, NULL);
        createTrackbar("V-Min", "frame", &vMin, 255, eventoTrack, NULL);
        createTrackbar("H-Max", "frame", &hMax, 180, eventoTrack, NULL);
        createTrackbar("S-Max", "frame", &sMax, 255, eventoTrack, NULL);
        createTrackbar("V-Max", "frame", &vMax, 255, eventoTrack, NULL);
        createTrackbar("Y-Min", "frame", &yMin, 255, eventoTrack, NULL);
        createTrackbar("Cr-Min", "frame", &crMin, 255, eventoTrack, NULL);
        createTrackbar("Cb-Min", "frame", &cbMin, 255, eventoTrack, NULL);
        createTrackbar("Y-Max", "frame", &yMax, 255, eventoTrack, NULL);
        createTrackbar("Cr-Max", "frame", &crMax, 255, eventoTrack, NULL);
        createTrackbar("Cb-Max", "frame", &cbMax, 255, eventoTrack, NULL);
        createTrackbar("threshold", "frame", &thresholdVal, 255, eventoTrack, NULL);

        Rect rect(12, 12, 200, 250);
        rectangle(camera, rect, Scalar(0, 0, 255));
        ROI = frame(rect);

        Mat hsvFrame, ycrcbFrame;

        cvtColor(ROI, hsvFrame, COLOR_BGR2HSV);
        cvtColor(ROI, ycrcbFrame, COLOR_BGR2YCrCb);

        if (thresholdVal % 2 != 0) {
            medianBlur(ROI, hsvFrame, thresholdVal);
            medianBlur(ROI, ycrcbFrame, thresholdVal);
        }
        
        inRange(hsvFrame,Scalar(hMin, sMin, vMin), Scalar(hMax, sMax, vMax), ROIMask);
        inRange(ycrcbFrame, Scalar(yMin, crMin, cbMin), Scalar(yMax, crMax, cbMax), ROIMask);
        
        dilate(ROIMask, ROIMask, getStructuringElement(MORPH_CROSS, Size(5, 5)));

        vector<vector<Point> > contoursFrame;
        vector<Vec4i> hierarchy;
        findContours(ROIMask, contoursFrame, hierarchy, RETR_TREE, CHAIN_APPROX_SIMPLE, Point(0, 0));

        Moments momentos1 = moments(ROIMask, true);
        double cx = momentos1.m10 / momentos1.m00;
        double cy = momentos1.m01 / momentos1.m00;

        Point centroide(cx, cy);
        circle(camera, centroide, 3, Scalar(255), 3);

        vector<vector<int> >hull(contoursFrame.size());
        vector<vector<Point> >hullPoint(contoursFrame.size()); //polígono que rodea la mano según el movimiento 
        vector<vector<Vec4i> > defects(contoursFrame.size()); //puntos verdes en las yemas de los dedos ... matriz multidimensional
        vector<vector<Point> >defectPoint(contoursFrame.size()); //manteniendo los puntos x, y de la yema del dedo como punto
        vector<RotatedRect>minRect(contoursFrame.size());

        size_t indexOfBiggestContour = findMaxContourn(contoursFrame);

        int count;

        for (size_t i = 0; i < contoursFrame.size(); i++)
        {
            if (contourArea(contoursFrame[i]) > 5000) {
                convexHull(contoursFrame[i], hull[i], true);
                convexityDefects(contoursFrame[i], hull[i], defects[i]);
                if (indexOfBiggestContour == i) {
                    minRect[i] = minAreaRect(contoursFrame[i]);
                    for (size_t k = 0; k < hull[i].size(); k++) {
                        int ind = hull[i][k];
                        hullPoint[i].push_back(contoursFrame[i][ind]);
                    }
                    count = 0;

                    for (size_t k = 0; k < defects[i].size(); k++) {
                        if (defects[i][k][3] > 13 * 256) {
                            int p_start = defects[i][k][0];
                            int p_end = defects[i][k][1];
                            int p_far = defects[i][k][2];
                            defectPoint[i].push_back(contoursFrame[i][p_far]);
                            circle(camera, contoursFrame[i][p_end], 3, Scalar(0, 0, 0), 2); //i ydi
                            count++;
                        }
                    }

                    findImagePoints(count, camera, ROIMask);

                    //putText(camera, "escaneando", Point(75, 450), FONT_HERSHEY_SIMPLEX, 3, Scalar(0, 255, 0), 3, 8, false);

                    drawContours(camera, contoursFrame, i, Scalar(255, 255, 0), 1, 8, vector<Vec4i>(), 0, Point());
                    drawContours(camera, hullPoint, i, Scalar(0, 0, 0), 1, 8, vector<Vec4i>(), 0, Point());
                    drawContours(camera, hullPoint, i, Scalar(0, 0, 255), 1, 8, vector<Vec4i>(), 0, Point());
                }
            }

        }
  

        switch (waitKey(1))
        {
        case 27: // ESC - Exit
            return 0;
        case 115: // S - Save file
            contIdGesto++;
            saveFileImg(ROIMask, "db/gesto"+ to_string(contIdGesto)+".png");
            imagenes = loadFile("db/");
            break;   
        }
        imshow("Camara", camera);
        imshow("Img ROI", ROI);
        imshow("ROI Mask", ROIMask);
    }
    return 0;
}

Video:

Buscar este blog

Computer vision