暑假时候复现的一篇计算机视觉经典抠图算法grabcut的论文,“GrabCut” — Interactive Foreground Extraction using Iterated Graph Cuts
代码存一下,虽然我转行了不搞cv了hhh
myGrabCut.h
#pragma once #include<graph.cpp> #include<opencv2/opencv.hpp> #include <opencv2/core.hpp> #include <opencv2/core/types.hpp> #include<string> typedef Graph<double, double, double> GraphType; using namespace cv; class GMM { public: Mat model; static const int compo_k = 5; double coefs[compo_k]; double mean[compo_k][3]; double covs[compo_k][3][3]; double inv_covs[compo_k][3][3]; double det_covs[compo_k]; double sums[compo_k][3]; double prods[compo_k][3][3]; int sample_num[compo_k]; int tot_sample_num; GMM(); void calc_inv_det(int ci); double prob_GMM(const Vec3d color) const; double prob_Gi(int ci, const Vec3d color) const; int sle_compo(const Vec3d color) const; void init_param(); void add_pixel(int ci, const Vec3d color); void esti_param(); }; static double calcBeta(const Mat& img); /* Calculate weights of noterminal vertices of graph. beta and gamma - parameters of GrabCut algorithm. */ static void calcNWeights(const Mat& img, Mat& leftW, Mat& upleftW, Mat& upW, Mat& uprightW, double beta, double gamma); /* Check size, type and element values of mask matrix. */ static void checkMask(const Mat& img, const Mat& mask); /* Initialize mask using rectangular. */ static void initMaskWithRect(Mat& mask, Size imgSize, Rect rect); /* Initialize GMM background and foreground models using kmeans algorithm. */ static void initGMMs(const Mat& img, const Mat& mask, GMM& bgdGMM, GMM& fgdGMM); /* Assign GMMs components for each pixel. */ static void assignGMMsComponents(const Mat& img, const Mat& mask, const GMM& bgdGMM, const GMM& fgdGMM, Mat& compIdxs); //论文中:迭代最小化算法step 2:从每个高斯模型的像素样本集中学习每个高斯模型的参数 /* Learn GMMs parameters. */ static void learnGMMs(const Mat& img, const Mat& mask, const Mat& compIdxs, GMM& bgdGMM, GMM& fgdGMM); /* Construct GCGraph */ static void constructGCGraph(const Mat& img, const Mat& mask, const GMM& bgdGMM, const GMM& fgdGMM, double lambda, const Mat& leftW, const Mat& upleftW, const Mat& upW, const Mat& uprightW, GraphType*& graph); /* Estimate segmentation using MaxFlow algorithm */ static void estimateSegmentation(GraphType* graph, Mat& mask, std::vector<double>& result); void mygrabCut(InputArray _img, InputOutputArray _mask, Rect rect, InputOutputArray _bgdModel, InputOutputArray _fgdModel, int iterCount, int mode);//,std::vector<double>& result);
主程序代码,其中最大流最小割直接用了maxflow-v3.01的库,其他调用了一些OpenCv的基本接口我不会说我就是照着OpenCv的库函数写的
#include "myGrabCut.h" GMM::GMM() { //一个像素的(唯一对应)高斯模型的参数个数或者说一个高斯模型的参数个数 //一个像素RGB三个通道值,故3个均值,3*3个协方差,共用一个权值 memset(coefs, 0, sizeof(coefs)); memset(mean, 0, sizeof(mean)); memset(covs, 0, sizeof(covs)); for (int ci = 0; ci < compo_k; ci++) if (coefs[ci] > 0) //计算GMM中第ci个高斯模型的协方差的逆Inverse和行列式Determinant //为了后面计算每个像素属于该高斯模型的概率(也就是数据能量项) calc_inv_det(ci); } double GMM::prob_GMM(const Vec3d color)const { double res = 0; for (int i = 0; i < compo_k; i++) res += coefs[i] * prob_Gi(i, color); return res; } double GMM::prob_Gi(int ci, const Vec3d color) const { if (coefs[ci] > 0) { Vec3d cent = color; cent[0] -= mean[ci][0], cent[1] -= mean[ci][1], cent[2] -= mean[ci][2]; double mul = cent[0] * (cent[0] * inv_covs[ci][0][0] + cent[1] * inv_covs[ci][1][0] + cent[2] * inv_covs[ci][2][0]) + cent[1] * (cent[0] * inv_covs[ci][0][1] + cent[1] * inv_covs[ci][1][1] + cent[2] * inv_covs[ci][2][1]) + cent[2] * (cent[0] * inv_covs[ci][0][2] + cent[1] * inv_covs[ci][1][2] + cent[2] * inv_covs[ci][2][2]); return 1.0 / sqrt(det_covs[ci]) * exp(-0.5 * mul); } return 0.0; } int GMM::sle_compo(const Vec3d color) const { int k = 0; double ma = 0.0, tmp = 0.0; for (int i = 0; i < compo_k; i++) { tmp = prob_Gi(i, color); if (tmp > ma) k = i, ma = tmp; } return k; } void GMM::init_param() { memset(prods, 0, sizeof(prods)); memset(sums, 0, sizeof(sums)); memset(sample_num, 0, sizeof(sample_num)); tot_sample_num; } void GMM::add_pixel(int ci, const Vec3d color) { for (int i = 0; i < 3; i++) { sums[ci][i] += color[i]; for (int j = 0; j < 3; j++) prods[ci][i][j] += color[i] * color[j]; } sample_num[ci]++; tot_sample_num++; } void GMM::esti_param() { for (int ci = 0, nu; ci < compo_k; ci++) { nu = sample_num[ci]; if (nu == 0) coefs[ci] = 0; else { coefs[ci] = 1.0 * nu / tot_sample_num; for (int i = 0; i < 3; i++) mean[ci][i] = sums[ci][i] / nu; for (int i = 0; i < 3; i++) { //mean[ci][i] = sums[ci][i] / nu; for (int j = 0; j < 3; j++) covs[ci][i][j] = prods[ci][i][j] / nu - mean[ci][i] * mean[ci][j]; } double* c =(double*)covs[ci]; double dtrm = c[0] * (c[4] * c[8] - c[5] * c[7]) - c[1] * (c[3] * c[8] - c[5] * c[6]) + c[2] * (c[3] * c[7] - c[4] * c[6]); if (dtrm <= std::numeric_limits<double>::epsilon()) { c[0] += 0.01; c[4] += 0.01; c[8] += 0.01; } calc_inv_det(ci); } } } void GMM::calc_inv_det(int ci) { if (coefs[ci] <= 0) return; double* c =(double*)covs[ci]; det_covs[ci] = c[0] * (c[4] * c[8] - c[5] * c[7]) - c[1] * (c[3] * c[8] - c[5] * c[6]) + c[2] * (c[3] * c[7] - c[4] * c[6]); //if (det_covs[ci] < std::numeric_limits<double>::epsilon()) det_covs[ci] += 1e-4; inv_covs[ci][0][0] = (c[4] * c[8] - c[5] * c[7]) / det_covs[ci]; inv_covs[ci][1][0] = -(c[3] * c[8] - c[5] * c[6]) / det_covs[ci]; inv_covs[ci][2][0] = (c[3] * c[7] - c[4] * c[6]) / det_covs[ci]; inv_covs[ci][0][1] = -(c[1] * c[8] - c[2] * c[7]) / det_covs[ci]; inv_covs[ci][1][1] = (c[0] * c[8] - c[2] * c[6]) / det_covs[ci]; inv_covs[ci][2][1] = -(c[0] * c[7] - c[1] * c[6]) / det_covs[ci]; inv_covs[ci][0][2] = (c[1] * c[5] - c[2] * c[4]) / det_covs[ci]; inv_covs[ci][1][2] = -(c[0] * c[5] - c[2] * c[3]) / det_covs[ci]; inv_covs[ci][2][2] = (c[0] * c[4] - c[1] * c[3]) / det_covs[ci]; } static double calcBeta(const Mat & img) { double beta = 0; for (int x = 0; x < img.rows; x++) { for (int y = 0; y < img.cols; y++) { //计算四个方向邻域两像素的差别,也就是欧式距离或者说二阶范数 //(当所有像素都算完后,就相当于计算八邻域的像素差了) Vec3d color = img.at<Vec3b>(x, y),diff; if (y > 0) // left >0的判断是为了避免在图像边界的时候还计算,导致越界 { diff = color - (Vec3d)img.at<Vec3b>(x, y - 1); beta += diff.dot(diff); //矩阵的点乘,也就是各个元素平方的和 } if (x > 0 && y > 0) // upleft { diff = color - (Vec3d)img.at<Vec3b>(x - 1, y - 1); beta += diff.dot(diff); } if (x > 0) // up { diff = color - (Vec3d)img.at<Vec3b>(x - 1, y); beta += diff.dot(diff); } if (x > 0 && y < img.cols - 1) // upright { diff = color - (Vec3d)img.at<Vec3b>(x - 1, y + 1); beta += diff.dot(diff); } } } if (beta <= std::numeric_limits<double>::epsilon()) beta = 0; else beta = 1.f / (2 * beta / (4 * img.cols * img.rows - 3 * img.cols - 3 * img.rows + 2)); //论文公式(5) return beta; } /* Calculate weights of noterminal vertices of graph. beta and gamma - parameters of GrabCut algorithm. */ static void calcNWeights(const Mat& img, Mat& leftW, Mat& upleftW, Mat& upW, Mat& uprightW, double beta, double gamma) { const double gds2 = gamma / std::sqrt(2.0f); //每个方向的边的权值通过一个和图大小相等的Mat来保存 leftW.create(img.rows, img.cols, CV_64FC1); upleftW.create(img.rows, img.cols, CV_64FC1); upW.create(img.rows, img.cols, CV_64FC1); uprightW.create(img.rows, img.cols, CV_64FC1); Vec3d color, diff; for (int x = 0; x < img.rows; x++) { for (int y = 0; y < img.cols; y++) { Vec3d color = img.at<Vec3b>(x, y); if (y > 0) // left //避免图的边界 { diff = color - (Vec3d)img.at<Vec3b>(x, y - 1); leftW.at<double>(x, y) = gamma * exp(-beta * diff.dot(diff)); } else leftW.at<double>(x, y) = 0; if (y > 0 && x > 0) // upleft { diff = color - (Vec3d)img.at<Vec3b>(x - 1, y - 1); upleftW.at<double>(x, y) = gds2 * exp(-beta * diff.dot(diff)); } else upleftW.at<double>(x, y) = 0; if (x > 0) // up { diff = color - (Vec3d)img.at<Vec3b>(x - 1, y); upW.at<double>(x, y) = gamma * exp(-beta * diff.dot(diff)); } else upW.at<double>(x, y) = 0; if (y + 1 < img.cols && x > 0) // upright { diff = color - (Vec3d)img.at<Vec3b>(x - 1, y + 1); uprightW.at<double>(x, y) = gds2 * exp(-beta * diff.dot(diff)); } else uprightW.at<double>(x, y) = 0; } } } /* Check size, type and element values of mask matrix. */ static void checkMask(const Mat & img, const Mat & mask) { if (mask.empty()) CV_Error(-5, "mask is empty"); if (mask.type() != CV_8UC1) CV_Error(-5, "mask must have CV_8UC1 type"); if (mask.cols != img.cols || mask.rows != img.rows) CV_Error(-5, "mask must have as many rows and cols as img"); for (int y = 0; y < mask.rows; y++) { for (int x = 0; x < mask.cols; x++) { uchar val = mask.at<uchar>(y, x); if (val != GC_BGD && val != GC_FGD && val != GC_PR_BGD && val != GC_PR_FGD) CV_Error(-5, "mask element value must be equel" "GC_BGD or GC_FGD or GC_PR_BGD or GC_PR_FGD"); } } Mat tmask = mask.clone(); for (int y = 0; y < tmask.rows; y++) { for (int x = 0; x < tmask.cols; x++) { uchar val = tmask.at<uchar>(y, x); if (val == GC_FGD || val == GC_PR_FGD) tmask.at<uchar>(y, x) = 255; } } imshow("tmask", tmask); waitKey(0); destroyWindow("tmask"); } /* Initialize mask using rectangular. */ static void initMaskWithRect(Mat & mask, Size imgSize, Rect rect) { mask.create(imgSize, CV_8UC1); mask.setTo(GC_BGD); rect.x = max(0, rect.x); rect.y = max(0, rect.y); rect.width = min(rect.width, imgSize.width - rect.x); rect.height = min(rect.height, imgSize.height - rect.y); (mask(rect)).setTo(Scalar(GC_PR_FGD)); //(mask(rect)).setTo(Scalar(255)); } /* Initialize GMM background and foreground models using kmeans algorithm. */ static void initGMMs(const Mat & img, const Mat & mask, GMM & bgdGMM, GMM & fgdGMM) { const int kMeansItCount = 10; //迭代次数 const int kMeansType = KMEANS_PP_CENTERS; //Use kmeans++ center initialization by Arthur and Vassilvitskii Mat bgdLabels, fgdLabels; //记录背景和前景的像素样本集中每个像素对应GMM的哪个高斯模型,论文中的kn std::vector<Vec3f> bgdSamples, fgdSamples; //背景和前景的像素样本集 Point p; for (p.y = 0; p.y < img.rows; p.y++) { for (p.x = 0; p.x < img.cols; p.x++) { //mask中标记为GC_BGD和GC_PR_BGD的像素都作为背景的样本像素 if (mask.at<uchar>(p) == GC_BGD || mask.at<uchar>(p) == GC_PR_BGD) bgdSamples.push_back((Vec3f)img.at<Vec3b>(p)); else // GC_FGD | GC_PR_FGD fgdSamples.push_back((Vec3f)img.at<Vec3b>(p)); } } CV_Assert(!bgdSamples.empty() && !fgdSamples.empty()); //kmeans中参数_bgdSamples为:每行一个样本 //kmeans的输出为bgdLabels,里面保存的是输入样本集中每一个样本对应的类标签(样本聚为componentsCount类后) Mat _bgdSamples((int)bgdSamples.size(), 3, CV_32FC1, &bgdSamples[0][0]); kmeans(_bgdSamples, GMM::compo_k, bgdLabels, TermCriteria(TermCriteria::COUNT, kMeansItCount, 0.0), 0, kMeansType); Mat _fgdSamples((int)fgdSamples.size(), 3, CV_32FC1, &fgdSamples[0][0]); kmeans(_fgdSamples, GMM::compo_k, fgdLabels, TermCriteria(TermCriteria::COUNT, kMeansItCount, 0.0), 0, kMeansType); //经过上面的步骤后,每个像素所属的高斯模型就确定的了,那么就可以估计GMM中每个高斯模型的参数了。 bgdGMM.init_param(); for (int i = 0; i < (int)bgdSamples.size(); i++) bgdGMM.add_pixel(bgdLabels.at<int>(i, 0), bgdSamples[i]); bgdGMM.esti_param(); fgdGMM.init_param(); for (int i = 0; i < (int)fgdSamples.size(); i++) fgdGMM.add_pixel(fgdLabels.at<int>(i, 0), fgdSamples[i]); fgdGMM.esti_param(); } /* Assign GMMs components for each pixel. */ static void assignGMMsComponents(const Mat & img, const Mat & mask, const GMM & bgdGMM, const GMM & fgdGMM, Mat & compIdxs) { Point p; for (p.y = 0; p.y < img.rows; p.y++) { for (p.x = 0; p.x < img.cols; p.x++) { Vec3d color = img.at<Vec3b>(p); //通过mask来判断该像素属于背景像素还是前景像素,再判断它属于前景或者背景GMM中的哪个高斯分量 compIdxs.at<int>(p) = mask.at<uchar>(p) == GC_BGD || mask.at<uchar>(p) == GC_PR_BGD ? bgdGMM.sle_compo(color) : fgdGMM.sle_compo(color); } } } //论文中:迭代最小化算法step 2:从每个高斯模型的像素样本集中学习每个高斯模型的参数 /* Learn GMMs parameters. */ static void learnGMMs(const Mat & img, const Mat & mask, const Mat & compIdxs, GMM & bgdGMM, GMM & fgdGMM) { bgdGMM.init_param(); fgdGMM.init_param(); Point p; for (int ci = 0; ci < GMM::compo_k; ci++) { for (p.y = 0; p.y < img.rows; p.y++) { for (p.x = 0; p.x < img.cols; p.x++) { if (compIdxs.at<int>(p) == ci) { if (mask.at<uchar>(p) == GC_BGD || mask.at<uchar>(p) == GC_PR_BGD) bgdGMM.add_pixel(ci, img.at<Vec3b>(p)); else fgdGMM.add_pixel(ci, img.at<Vec3b>(p)); } } } } bgdGMM.esti_param(); fgdGMM.esti_param(); } /* Construct GCGraph */ static void constructGCGraph(const Mat & img, const Mat & mask, const GMM & bgdGMM, const GMM & fgdGMM, double lambda, const Mat & leftW, const Mat & upleftW, const Mat & upW, const Mat & uprightW, GraphType*& graph) { int vtxCount = img.cols * img.rows; //顶点数,每一个像素是一个顶点 int edgeCount = 2 * (4 * vtxCount);// - 3 * (img.cols + img.rows) + 2); //边数,需要考虑图边界的边的缺失 //通过顶点数和边数创建图。这些类型声明和函数定义请参考gcgraph.hpp //graph.create(vtxCount, edgeCount); graph= new GraphType(vtxCount, edgeCount); double w; Point p; for (p.y = 0; p.y < img.rows; p.y++) { for (p.x = 0; p.x < img.cols; p.x++) { // add node //int vtxIdx = graph.addVtx(); //返回这个顶点在图中的索引 int vtxIdx = graph->add_node(); Vec3b color = img.at<Vec3b>(p); // set t-weights //计算每个顶点与Sink汇点t(代表背景)和源点Source(代表前景)连接的权值。 //也即计算Gibbs能量(每一个像素点作为背景像素或者前景像素)的第一个能量项 double fromSource, toSink; if (mask.at<uchar>(p) == GC_PR_BGD || mask.at<uchar>(p) == GC_PR_FGD) { //对每一个像素计算其作为背景像素或者前景像素的第一个能量项,作为分别与t和s点的连接权值 fromSource = -log(bgdGMM.prob_GMM(color)); toSink = -log(fgdGMM.prob_GMM(color)); } else if (mask.at<uchar>(p) == GC_BGD) { //对于确定为背景的像素点,它与Source点(前景)的连接为0,与Sink点的连接为lambda fromSource = 0; toSink = lambda; } else // GC_FGD { fromSource = lambda; toSink = 0; } //设置该顶点vtxIdx分别与Source点和Sink点的连接权值 //graph.addTermWeights(vtxIdx, fromSource, toSink); graph->add_tweights(vtxIdx, fromSource, toSink); //graph->add_tweights(vtxIdx, 50, 50); // set n-weights n-links //计算两个邻域顶点之间连接的权值。 //也即计算Gibbs能量的第二个能量项(平滑项) if (p.x > 0) { double w = leftW.at<double>(p); graph->add_edge(vtxIdx, vtxIdx - 1, w, w); } if (p.x > 0 && p.y > 0) { double w = upleftW.at<double>(p); graph->add_edge(vtxIdx, vtxIdx - img.cols - 1, w, w); } if (p.y > 0) { double w = upW.at<double>(p); graph->add_edge(vtxIdx, vtxIdx - img.cols, w, w); } if (p.x < img.cols - 1 && p.y>0) { double w = uprightW.at<double>(p); graph->add_edge(vtxIdx, vtxIdx - img.cols + 1, w, w); } } } } /* Estimate segmentation using MaxFlow algorithm */ static void estimateSegmentation(GraphType* graph, Mat & mask,std::vector<double>& result) { //通过最大流算法确定图的最小割,也即完成图像的分割 double tt=graph->maxflow(); result.push_back(tt); Point p; for (p.y = 0; p.y < mask.rows; p.y++) { for (p.x = 0; p.x < mask.cols; p.x++) { //通过图分割的结果来更新mask,即最后的图像分割结果。注意的是,永远都 //不会更新用户指定为背景或者前景的像素 if (mask.at<uchar>(p) == GC_PR_BGD || mask.at<uchar>(p) == GC_PR_FGD) { if (graph->what_segment(p.y * mask.cols + p.x /*vertex index*/)== GraphType::SOURCE) mask.at<uchar>(p) = GC_PR_FGD; else mask.at<uchar>(p) = GC_PR_BGD; } } } delete graph; } void tmpImageSave(Mat img,Mat mask,std::string file_tmp) { Mat binMask,timg; binMask.create(mask.size(), CV_8UC1); binMask = mask & 1; img.copyTo(timg, binMask); imwrite(file_tmp,timg); } void mygrabCut(InputArray _img, InputOutputArray _mask, Rect rect, InputOutputArray _bgdModel, InputOutputArray _fgdModel, int iterCount, int mode)//, std::vector<double>& result) { Mat img = _img.getMat(); Mat& mask = _mask.getMatRef(); //Mat& bgdModel = _bgdModel.getMatRef(); //Mat& fgdModel = _fgdModel.getMatRef(); std::vector<double> energy; std::vector<double>& result=energy; if (img.empty()) CV_Error(-5, "image is empty"); if (img.type() != CV_8UC3) CV_Error(-5, "image mush have CV_8UC3 type"); GMM bgdGMM, fgdGMM; Mat compIdxs(img.size(), CV_32SC1); if (mode == GC_INIT_WITH_RECT || mode == GC_INIT_WITH_MASK) { if (mode == GC_INIT_WITH_RECT) initMaskWithRect(mask, img.size(), rect); else // flag == GC_INIT_WITH_MASK checkMask(img, mask); initGMMs(img, mask, bgdGMM, fgdGMM); } if (iterCount <= 0) return; if (mode == GC_EVAL) checkMask(img, mask); const double gamma = 50; const double lambda = 9 * gamma; const double beta = calcBeta(img); Mat leftW, upleftW, upW, uprightW; calcNWeights(img, leftW, upleftW, upW, uprightW, beta, gamma); //checkMask(img, mask); std::string tmp_file = "tmp_pic/"; for (int i = 0; i < iterCount; i++) { GraphType *graph=nullptr; assignGMMsComponents(img, mask, bgdGMM, fgdGMM, compIdxs); learnGMMs(img, mask, compIdxs, bgdGMM, fgdGMM); constructGCGraph(img, mask, bgdGMM, fgdGMM, lambda, leftW, upleftW, upW, uprightW, graph); estimateSegmentation(graph, mask,result); tmpImageSave(img, mask, tmp_file + std::to_string(i) + std::string(".jpg")); } for (std::vector<double>::iterator it = energy.begin(); it != energy.end(); it++) { std::cout << (*it) << std::endl; } //checkMask(img, mask); }