暑假时候复现的一篇计算机视觉经典抠图算法grabcut的论文,“GrabCut” — Interactive Foreground Extraction using Iterated Graph Cuts
代码存一下,虽然我转行了不搞cv了hhh
myGrabCut.h
#pragma once
#include<graph.cpp>
#include<opencv2/opencv.hpp>
#include <opencv2/core.hpp>
#include <opencv2/core/types.hpp>
#include<string>
typedef Graph<double, double, double> GraphType;
using namespace cv;
class GMM {
public:
Mat model;
static const int compo_k = 5;
double coefs[compo_k];
double mean[compo_k][3];
double covs[compo_k][3][3];
double inv_covs[compo_k][3][3];
double det_covs[compo_k];
double sums[compo_k][3];
double prods[compo_k][3][3];
int sample_num[compo_k];
int tot_sample_num;
GMM();
void calc_inv_det(int ci);
double prob_GMM(const Vec3d color) const;
double prob_Gi(int ci, const Vec3d color) const;
int sle_compo(const Vec3d color) const;
void init_param();
void add_pixel(int ci, const Vec3d color);
void esti_param();
};
static double calcBeta(const Mat& img);
/*
Calculate weights of noterminal vertices of graph.
beta and gamma - parameters of GrabCut algorithm.
*/
static void calcNWeights(const Mat& img, Mat& leftW, Mat& upleftW, Mat& upW,
Mat& uprightW, double beta, double gamma);
/*
Check size, type and element values of mask matrix.
*/
static void checkMask(const Mat& img, const Mat& mask);
/*
Initialize mask using rectangular.
*/
static void initMaskWithRect(Mat& mask, Size imgSize, Rect rect);
/*
Initialize GMM background and foreground models using kmeans algorithm.
*/
static void initGMMs(const Mat& img, const Mat& mask, GMM& bgdGMM, GMM& fgdGMM);
/*
Assign GMMs components for each pixel.
*/
static void assignGMMsComponents(const Mat& img, const Mat& mask, const GMM& bgdGMM,
const GMM& fgdGMM, Mat& compIdxs);
//论文中:迭代最小化算法step 2:从每个高斯模型的像素样本集中学习每个高斯模型的参数
/*
Learn GMMs parameters.
*/
static void learnGMMs(const Mat& img, const Mat& mask, const Mat& compIdxs, GMM& bgdGMM, GMM& fgdGMM);
/*
Construct GCGraph
*/
static void constructGCGraph(const Mat& img, const Mat& mask, const GMM& bgdGMM, const GMM& fgdGMM, double lambda,
const Mat& leftW, const Mat& upleftW, const Mat& upW, const Mat& uprightW,
GraphType*& graph);
/*
Estimate segmentation using MaxFlow algorithm
*/
static void estimateSegmentation(GraphType* graph, Mat& mask, std::vector<double>& result);
void mygrabCut(InputArray _img, InputOutputArray _mask, Rect rect,
InputOutputArray _bgdModel, InputOutputArray _fgdModel,
int iterCount, int mode);//,std::vector<double>& result);
主程序代码,其中最大流最小割直接用了maxflow-v3.01的库,其他调用了一些OpenCv的基本接口我不会说我就是照着OpenCv的库函数写的
#include "myGrabCut.h"
GMM::GMM()
{
//一个像素的(唯一对应)高斯模型的参数个数或者说一个高斯模型的参数个数
//一个像素RGB三个通道值,故3个均值,3*3个协方差,共用一个权值
memset(coefs, 0, sizeof(coefs));
memset(mean, 0, sizeof(mean));
memset(covs, 0, sizeof(covs));
for (int ci = 0; ci < compo_k; ci++)
if (coefs[ci] > 0)
//计算GMM中第ci个高斯模型的协方差的逆Inverse和行列式Determinant
//为了后面计算每个像素属于该高斯模型的概率(也就是数据能量项)
calc_inv_det(ci);
}
double GMM::prob_GMM(const Vec3d color)const {
double res = 0;
for (int i = 0; i < compo_k; i++)
res += coefs[i] * prob_Gi(i, color);
return res;
}
double GMM::prob_Gi(int ci, const Vec3d color) const {
if (coefs[ci] > 0) {
Vec3d cent = color;
cent[0] -= mean[ci][0], cent[1] -= mean[ci][1], cent[2] -= mean[ci][2];
double mul = cent[0] * (cent[0] * inv_covs[ci][0][0] + cent[1] * inv_covs[ci][1][0] + cent[2] * inv_covs[ci][2][0])
+ cent[1] * (cent[0] * inv_covs[ci][0][1] + cent[1] * inv_covs[ci][1][1] + cent[2] * inv_covs[ci][2][1])
+ cent[2] * (cent[0] * inv_covs[ci][0][2] + cent[1] * inv_covs[ci][1][2] + cent[2] * inv_covs[ci][2][2]);
return 1.0 / sqrt(det_covs[ci]) * exp(-0.5 * mul);
}
return 0.0;
}
int GMM::sle_compo(const Vec3d color) const {
int k = 0;
double ma = 0.0, tmp = 0.0;
for (int i = 0; i < compo_k; i++) {
tmp = prob_Gi(i, color);
if (tmp > ma)
k = i, ma = tmp;
}
return k;
}
void GMM::init_param() {
memset(prods, 0, sizeof(prods));
memset(sums, 0, sizeof(sums));
memset(sample_num, 0, sizeof(sample_num));
tot_sample_num;
}
void GMM::add_pixel(int ci, const Vec3d color) {
for (int i = 0; i < 3; i++) {
sums[ci][i] += color[i];
for (int j = 0; j < 3; j++)
prods[ci][i][j] += color[i] * color[j];
}
sample_num[ci]++;
tot_sample_num++;
}
void GMM::esti_param() {
for (int ci = 0, nu; ci < compo_k; ci++) {
nu = sample_num[ci];
if (nu == 0)
coefs[ci] = 0;
else {
coefs[ci] = 1.0 * nu / tot_sample_num;
for (int i = 0; i < 3; i++) mean[ci][i] = sums[ci][i] / nu;
for (int i = 0; i < 3; i++) {
//mean[ci][i] = sums[ci][i] / nu;
for (int j = 0; j < 3; j++)
covs[ci][i][j] = prods[ci][i][j] / nu - mean[ci][i] * mean[ci][j];
}
double* c =(double*)covs[ci];
double dtrm = c[0] * (c[4] * c[8] - c[5] * c[7]) - c[1] * (c[3] * c[8] - c[5] * c[6]) + c[2] * (c[3] * c[7] - c[4] * c[6]);
if (dtrm <= std::numeric_limits<double>::epsilon())
{
c[0] += 0.01;
c[4] += 0.01;
c[8] += 0.01;
}
calc_inv_det(ci);
}
}
}
void GMM::calc_inv_det(int ci) {
if (coefs[ci] <= 0) return;
double* c =(double*)covs[ci];
det_covs[ci] = c[0] * (c[4] * c[8] - c[5] * c[7])
- c[1] * (c[3] * c[8] - c[5] * c[6])
+ c[2] * (c[3] * c[7] - c[4] * c[6]);
//if (det_covs[ci] < std::numeric_limits<double>::epsilon()) det_covs[ci] += 1e-4;
inv_covs[ci][0][0] = (c[4] * c[8] - c[5] * c[7]) / det_covs[ci];
inv_covs[ci][1][0] = -(c[3] * c[8] - c[5] * c[6]) / det_covs[ci];
inv_covs[ci][2][0] = (c[3] * c[7] - c[4] * c[6]) / det_covs[ci];
inv_covs[ci][0][1] = -(c[1] * c[8] - c[2] * c[7]) / det_covs[ci];
inv_covs[ci][1][1] = (c[0] * c[8] - c[2] * c[6]) / det_covs[ci];
inv_covs[ci][2][1] = -(c[0] * c[7] - c[1] * c[6]) / det_covs[ci];
inv_covs[ci][0][2] = (c[1] * c[5] - c[2] * c[4]) / det_covs[ci];
inv_covs[ci][1][2] = -(c[0] * c[5] - c[2] * c[3]) / det_covs[ci];
inv_covs[ci][2][2] = (c[0] * c[4] - c[1] * c[3]) / det_covs[ci];
}
static double calcBeta(const Mat & img) {
double beta = 0;
for (int x = 0; x < img.rows; x++)
{
for (int y = 0; y < img.cols; y++)
{
//计算四个方向邻域两像素的差别,也就是欧式距离或者说二阶范数
//(当所有像素都算完后,就相当于计算八邻域的像素差了)
Vec3d color = img.at<Vec3b>(x, y),diff;
if (y > 0) // left >0的判断是为了避免在图像边界的时候还计算,导致越界
{
diff = color - (Vec3d)img.at<Vec3b>(x, y - 1);
beta += diff.dot(diff); //矩阵的点乘,也就是各个元素平方的和
}
if (x > 0 && y > 0) // upleft
{
diff = color - (Vec3d)img.at<Vec3b>(x - 1, y - 1);
beta += diff.dot(diff);
}
if (x > 0) // up
{
diff = color - (Vec3d)img.at<Vec3b>(x - 1, y);
beta += diff.dot(diff);
}
if (x > 0 && y < img.cols - 1) // upright
{
diff = color - (Vec3d)img.at<Vec3b>(x - 1, y + 1);
beta += diff.dot(diff);
}
}
}
if (beta <= std::numeric_limits<double>::epsilon())
beta = 0;
else
beta = 1.f / (2 * beta / (4 * img.cols * img.rows - 3 * img.cols - 3 * img.rows + 2)); //论文公式(5)
return beta;
}
/*
Calculate weights of noterminal vertices of graph.
beta and gamma - parameters of GrabCut algorithm.
*/
static void calcNWeights(const Mat& img, Mat& leftW, Mat& upleftW, Mat& upW,
Mat& uprightW, double beta, double gamma)
{
const double gds2 = gamma / std::sqrt(2.0f);
//每个方向的边的权值通过一个和图大小相等的Mat来保存
leftW.create(img.rows, img.cols, CV_64FC1);
upleftW.create(img.rows, img.cols, CV_64FC1);
upW.create(img.rows, img.cols, CV_64FC1);
uprightW.create(img.rows, img.cols, CV_64FC1);
Vec3d color, diff;
for (int x = 0; x < img.rows; x++)
{
for (int y = 0; y < img.cols; y++)
{
Vec3d color = img.at<Vec3b>(x, y);
if (y > 0) // left //避免图的边界
{
diff = color - (Vec3d)img.at<Vec3b>(x, y - 1);
leftW.at<double>(x, y) = gamma * exp(-beta * diff.dot(diff));
}
else
leftW.at<double>(x, y) = 0;
if (y > 0 && x > 0) // upleft
{
diff = color - (Vec3d)img.at<Vec3b>(x - 1, y - 1);
upleftW.at<double>(x, y) = gds2 * exp(-beta * diff.dot(diff));
}
else
upleftW.at<double>(x, y) = 0;
if (x > 0) // up
{
diff = color - (Vec3d)img.at<Vec3b>(x - 1, y);
upW.at<double>(x, y) = gamma * exp(-beta * diff.dot(diff));
}
else
upW.at<double>(x, y) = 0;
if (y + 1 < img.cols && x > 0) // upright
{
diff = color - (Vec3d)img.at<Vec3b>(x - 1, y + 1);
uprightW.at<double>(x, y) = gds2 * exp(-beta * diff.dot(diff));
}
else
uprightW.at<double>(x, y) = 0;
}
}
}
/*
Check size, type and element values of mask matrix.
*/
static void checkMask(const Mat & img, const Mat & mask)
{
if (mask.empty())
CV_Error(-5, "mask is empty");
if (mask.type() != CV_8UC1)
CV_Error(-5, "mask must have CV_8UC1 type");
if (mask.cols != img.cols || mask.rows != img.rows)
CV_Error(-5, "mask must have as many rows and cols as img");
for (int y = 0; y < mask.rows; y++)
{
for (int x = 0; x < mask.cols; x++)
{
uchar val = mask.at<uchar>(y, x);
if (val != GC_BGD && val != GC_FGD && val != GC_PR_BGD && val != GC_PR_FGD)
CV_Error(-5, "mask element value must be equel"
"GC_BGD or GC_FGD or GC_PR_BGD or GC_PR_FGD");
}
}
Mat tmask = mask.clone();
for (int y = 0; y < tmask.rows; y++)
{
for (int x = 0; x < tmask.cols; x++)
{
uchar val = tmask.at<uchar>(y, x);
if (val == GC_FGD || val == GC_PR_FGD)
tmask.at<uchar>(y, x) = 255;
}
}
imshow("tmask", tmask);
waitKey(0);
destroyWindow("tmask");
}
/*
Initialize mask using rectangular.
*/
static void initMaskWithRect(Mat & mask, Size imgSize, Rect rect)
{
mask.create(imgSize, CV_8UC1);
mask.setTo(GC_BGD);
rect.x = max(0, rect.x);
rect.y = max(0, rect.y);
rect.width = min(rect.width, imgSize.width - rect.x);
rect.height = min(rect.height, imgSize.height - rect.y);
(mask(rect)).setTo(Scalar(GC_PR_FGD));
//(mask(rect)).setTo(Scalar(255));
}
/*
Initialize GMM background and foreground models using kmeans algorithm.
*/
static void initGMMs(const Mat & img, const Mat & mask, GMM & bgdGMM, GMM & fgdGMM)
{
const int kMeansItCount = 10; //迭代次数
const int kMeansType = KMEANS_PP_CENTERS; //Use kmeans++ center initialization by Arthur and Vassilvitskii
Mat bgdLabels, fgdLabels; //记录背景和前景的像素样本集中每个像素对应GMM的哪个高斯模型,论文中的kn
std::vector<Vec3f> bgdSamples, fgdSamples; //背景和前景的像素样本集
Point p;
for (p.y = 0; p.y < img.rows; p.y++)
{
for (p.x = 0; p.x < img.cols; p.x++)
{
//mask中标记为GC_BGD和GC_PR_BGD的像素都作为背景的样本像素
if (mask.at<uchar>(p) == GC_BGD || mask.at<uchar>(p) == GC_PR_BGD)
bgdSamples.push_back((Vec3f)img.at<Vec3b>(p));
else // GC_FGD | GC_PR_FGD
fgdSamples.push_back((Vec3f)img.at<Vec3b>(p));
}
}
CV_Assert(!bgdSamples.empty() && !fgdSamples.empty());
//kmeans中参数_bgdSamples为:每行一个样本
//kmeans的输出为bgdLabels,里面保存的是输入样本集中每一个样本对应的类标签(样本聚为componentsCount类后)
Mat _bgdSamples((int)bgdSamples.size(), 3, CV_32FC1, &bgdSamples[0][0]);
kmeans(_bgdSamples, GMM::compo_k, bgdLabels,
TermCriteria(TermCriteria::COUNT, kMeansItCount, 0.0), 0, kMeansType);
Mat _fgdSamples((int)fgdSamples.size(), 3, CV_32FC1, &fgdSamples[0][0]);
kmeans(_fgdSamples, GMM::compo_k, fgdLabels,
TermCriteria(TermCriteria::COUNT, kMeansItCount, 0.0), 0, kMeansType);
//经过上面的步骤后,每个像素所属的高斯模型就确定的了,那么就可以估计GMM中每个高斯模型的参数了。
bgdGMM.init_param();
for (int i = 0; i < (int)bgdSamples.size(); i++)
bgdGMM.add_pixel(bgdLabels.at<int>(i, 0), bgdSamples[i]);
bgdGMM.esti_param();
fgdGMM.init_param();
for (int i = 0; i < (int)fgdSamples.size(); i++)
fgdGMM.add_pixel(fgdLabels.at<int>(i, 0), fgdSamples[i]);
fgdGMM.esti_param();
}
/*
Assign GMMs components for each pixel.
*/
static void assignGMMsComponents(const Mat & img, const Mat & mask, const GMM & bgdGMM,
const GMM & fgdGMM, Mat & compIdxs)
{
Point p;
for (p.y = 0; p.y < img.rows; p.y++)
{
for (p.x = 0; p.x < img.cols; p.x++)
{
Vec3d color = img.at<Vec3b>(p);
//通过mask来判断该像素属于背景像素还是前景像素,再判断它属于前景或者背景GMM中的哪个高斯分量
compIdxs.at<int>(p) = mask.at<uchar>(p) == GC_BGD || mask.at<uchar>(p) == GC_PR_BGD ?
bgdGMM.sle_compo(color) : fgdGMM.sle_compo(color);
}
}
}
//论文中:迭代最小化算法step 2:从每个高斯模型的像素样本集中学习每个高斯模型的参数
/*
Learn GMMs parameters.
*/
static void learnGMMs(const Mat & img, const Mat & mask, const Mat & compIdxs, GMM & bgdGMM, GMM & fgdGMM)
{
bgdGMM.init_param();
fgdGMM.init_param();
Point p;
for (int ci = 0; ci < GMM::compo_k; ci++)
{
for (p.y = 0; p.y < img.rows; p.y++)
{
for (p.x = 0; p.x < img.cols; p.x++)
{
if (compIdxs.at<int>(p) == ci)
{
if (mask.at<uchar>(p) == GC_BGD || mask.at<uchar>(p) == GC_PR_BGD)
bgdGMM.add_pixel(ci, img.at<Vec3b>(p));
else
fgdGMM.add_pixel(ci, img.at<Vec3b>(p));
}
}
}
}
bgdGMM.esti_param();
fgdGMM.esti_param();
}
/*
Construct GCGraph
*/
static void constructGCGraph(const Mat & img, const Mat & mask, const GMM & bgdGMM, const GMM & fgdGMM, double lambda,
const Mat & leftW, const Mat & upleftW, const Mat & upW, const Mat & uprightW,
GraphType*& graph)
{
int vtxCount = img.cols * img.rows; //顶点数,每一个像素是一个顶点
int edgeCount = 2 * (4 * vtxCount);// - 3 * (img.cols + img.rows) + 2); //边数,需要考虑图边界的边的缺失
//通过顶点数和边数创建图。这些类型声明和函数定义请参考gcgraph.hpp
//graph.create(vtxCount, edgeCount);
graph= new GraphType(vtxCount, edgeCount);
double w;
Point p;
for (p.y = 0; p.y < img.rows; p.y++)
{
for (p.x = 0; p.x < img.cols; p.x++)
{
// add node
//int vtxIdx = graph.addVtx(); //返回这个顶点在图中的索引
int vtxIdx = graph->add_node();
Vec3b color = img.at<Vec3b>(p);
// set t-weights
//计算每个顶点与Sink汇点t(代表背景)和源点Source(代表前景)连接的权值。
//也即计算Gibbs能量(每一个像素点作为背景像素或者前景像素)的第一个能量项
double fromSource, toSink;
if (mask.at<uchar>(p) == GC_PR_BGD || mask.at<uchar>(p) == GC_PR_FGD)
{
//对每一个像素计算其作为背景像素或者前景像素的第一个能量项,作为分别与t和s点的连接权值
fromSource = -log(bgdGMM.prob_GMM(color));
toSink = -log(fgdGMM.prob_GMM(color));
}
else if (mask.at<uchar>(p) == GC_BGD)
{
//对于确定为背景的像素点,它与Source点(前景)的连接为0,与Sink点的连接为lambda
fromSource = 0;
toSink = lambda;
}
else // GC_FGD
{
fromSource = lambda;
toSink = 0;
}
//设置该顶点vtxIdx分别与Source点和Sink点的连接权值
//graph.addTermWeights(vtxIdx, fromSource, toSink);
graph->add_tweights(vtxIdx, fromSource, toSink);
//graph->add_tweights(vtxIdx, 50, 50);
// set n-weights n-links
//计算两个邻域顶点之间连接的权值。
//也即计算Gibbs能量的第二个能量项(平滑项)
if (p.x > 0)
{
double w = leftW.at<double>(p);
graph->add_edge(vtxIdx, vtxIdx - 1, w, w);
}
if (p.x > 0 && p.y > 0)
{
double w = upleftW.at<double>(p);
graph->add_edge(vtxIdx, vtxIdx - img.cols - 1, w, w);
}
if (p.y > 0)
{
double w = upW.at<double>(p);
graph->add_edge(vtxIdx, vtxIdx - img.cols, w, w);
}
if (p.x < img.cols - 1 && p.y>0)
{
double w = uprightW.at<double>(p);
graph->add_edge(vtxIdx, vtxIdx - img.cols + 1, w, w);
}
}
}
}
/*
Estimate segmentation using MaxFlow algorithm
*/
static void estimateSegmentation(GraphType* graph, Mat & mask,std::vector<double>& result)
{
//通过最大流算法确定图的最小割,也即完成图像的分割
double tt=graph->maxflow();
result.push_back(tt);
Point p;
for (p.y = 0; p.y < mask.rows; p.y++)
{
for (p.x = 0; p.x < mask.cols; p.x++)
{
//通过图分割的结果来更新mask,即最后的图像分割结果。注意的是,永远都
//不会更新用户指定为背景或者前景的像素
if (mask.at<uchar>(p) == GC_PR_BGD || mask.at<uchar>(p) == GC_PR_FGD)
{
if (graph->what_segment(p.y * mask.cols + p.x /*vertex index*/)== GraphType::SOURCE)
mask.at<uchar>(p) = GC_PR_FGD;
else
mask.at<uchar>(p) = GC_PR_BGD;
}
}
}
delete graph;
}
void tmpImageSave(Mat img,Mat mask,std::string file_tmp) {
Mat binMask,timg;
binMask.create(mask.size(), CV_8UC1);
binMask = mask & 1;
img.copyTo(timg, binMask);
imwrite(file_tmp,timg);
}
void mygrabCut(InputArray _img, InputOutputArray _mask, Rect rect,
InputOutputArray _bgdModel, InputOutputArray _fgdModel,
int iterCount, int mode)//, std::vector<double>& result)
{
Mat img = _img.getMat();
Mat& mask = _mask.getMatRef();
//Mat& bgdModel = _bgdModel.getMatRef();
//Mat& fgdModel = _fgdModel.getMatRef();
std::vector<double> energy;
std::vector<double>& result=energy;
if (img.empty())
CV_Error(-5, "image is empty");
if (img.type() != CV_8UC3)
CV_Error(-5, "image mush have CV_8UC3 type");
GMM bgdGMM, fgdGMM;
Mat compIdxs(img.size(), CV_32SC1);
if (mode == GC_INIT_WITH_RECT || mode == GC_INIT_WITH_MASK)
{
if (mode == GC_INIT_WITH_RECT)
initMaskWithRect(mask, img.size(), rect);
else // flag == GC_INIT_WITH_MASK
checkMask(img, mask);
initGMMs(img, mask, bgdGMM, fgdGMM);
}
if (iterCount <= 0)
return;
if (mode == GC_EVAL)
checkMask(img, mask);
const double gamma = 50;
const double lambda = 9 * gamma;
const double beta = calcBeta(img);
Mat leftW, upleftW, upW, uprightW;
calcNWeights(img, leftW, upleftW, upW, uprightW, beta, gamma);
//checkMask(img, mask);
std::string tmp_file = "tmp_pic/";
for (int i = 0; i < iterCount; i++)
{
GraphType *graph=nullptr;
assignGMMsComponents(img, mask, bgdGMM, fgdGMM, compIdxs);
learnGMMs(img, mask, compIdxs, bgdGMM, fgdGMM);
constructGCGraph(img, mask, bgdGMM, fgdGMM, lambda, leftW, upleftW, upW, uprightW, graph);
estimateSegmentation(graph, mask,result);
tmpImageSave(img, mask, tmp_file + std::to_string(i) + std::string(".jpg"));
}
for (std::vector<double>::iterator it = energy.begin(); it != energy.end(); it++) {
std::cout << (*it) << std::endl;
}
//checkMask(img, mask);
}
