1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
49 The code below implements keypoint detector, fern-based point classifier and a planar object detector.
52 1. Mustafa Özuysal, Michael Calonder, Vincent Lepetit, Pascal Fua,
53 "Fast KeyPoint Recognition Using Random Ferns,"
54 IEEE Transactions on Pattern Analysis and Machine Intelligence, 15 Jan. 2009.
56 2. Vincent Lepetit, Pascal Fua,
57 “Towards Recognizing Feature Points Using Classification Trees,”
58 Technical Report IC/2004/74, EPFL, 2004.
61 const int progressBarSize = 50;
63 //////////////////////////// Patch Generator //////////////////////////////////
65 static const double DEFAULT_BACKGROUND_MIN = 0;
66 static const double DEFAULT_BACKGROUND_MAX = 256;
67 static const double DEFAULT_NOISE_RANGE = 5;
68 static const double DEFAULT_LAMBDA_MIN = 0.6;
69 static const double DEFAULT_LAMBDA_MAX = 1.5;
70 static const double DEFAULT_THETA_MIN = -CV_PI;
71 static const double DEFAULT_THETA_MAX = CV_PI;
72 static const double DEFAULT_PHI_MIN = -CV_PI;
73 static const double DEFAULT_PHI_MAX = CV_PI;
75 PatchGenerator::PatchGenerator()
76 : backgroundMin(DEFAULT_BACKGROUND_MIN), backgroundMax(DEFAULT_BACKGROUND_MAX),
77 noiseRange(DEFAULT_NOISE_RANGE), randomBlur(true), lambdaMin(DEFAULT_LAMBDA_MIN),
78 lambdaMax(DEFAULT_LAMBDA_MAX), thetaMin(DEFAULT_THETA_MIN),
79 thetaMax(DEFAULT_THETA_MAX), phiMin(DEFAULT_PHI_MIN),
80 phiMax(DEFAULT_PHI_MAX)
85 PatchGenerator::PatchGenerator(double _backgroundMin, double _backgroundMax,
86 double _noiseRange, bool _randomBlur,
87 double _lambdaMin, double _lambdaMax,
88 double _thetaMin, double _thetaMax,
89 double _phiMin, double _phiMax )
90 : backgroundMin(_backgroundMin), backgroundMax(_backgroundMax),
91 noiseRange(_noiseRange), randomBlur(_randomBlur),
92 lambdaMin(_lambdaMin), lambdaMax(_lambdaMax),
93 thetaMin(_thetaMin), thetaMax(_thetaMax),
94 phiMin(_phiMin), phiMax(_phiMax)
99 void PatchGenerator::generateRandomTransform(Point2f srcCenter, Point2f dstCenter,
100 Mat& transform, RNG& rng, bool inverse) const
102 double lambda1 = rng.uniform(lambdaMin, lambdaMax);
103 double lambda2 = rng.uniform(lambdaMin, lambdaMax);
104 double theta = rng.uniform(thetaMin, thetaMax);
105 double phi = rng.uniform(phiMin, phiMax);
107 // Calculate random parameterized affine transformation A,
108 // A = T(patch center) * R(theta) * R(phi)' *
109 // S(lambda1, lambda2) * R(phi) * T(-pt)
110 double st = sin(theta);
111 double ct = cos(theta);
112 double sp = sin(phi);
113 double cp = cos(phi);
117 double A = lambda1*c2p + lambda2*s2p;
118 double B = (lambda2 - lambda1)*sp*cp;
119 double C = lambda1*s2p + lambda2*c2p;
121 double Ax_plus_By = A*srcCenter.x + B*srcCenter.y;
122 double Bx_plus_Cy = B*srcCenter.x + C*srcCenter.y;
124 transform.create(2, 3, CV_64F);
125 Mat_<double>& T = (Mat_<double>&)transform;
126 T(0,0) = A*ct - B*st;
127 T(0,1) = B*ct - C*st;
128 T(0,2) = -ct*Ax_plus_By + st*Bx_plus_Cy + dstCenter.x;
129 T(1,0) = A*st + B*ct;
130 T(1,1) = B*st + C*ct;
131 T(1,2) = -st*Ax_plus_By - ct*Bx_plus_Cy + dstCenter.y;
134 invertAffineTransform(T, T);
138 void PatchGenerator::operator ()(const Mat& image, Point2f pt, Mat& patch, Size patchSize, RNG& rng) const
141 Mat_<double> T(2, 3, buffer);
143 generateRandomTransform(pt, Point2f((patchSize.width-1)*0.5f, (patchSize.height-1)*0.5f), T, rng);
144 (*this)(image, T, patch, patchSize, rng);
148 void PatchGenerator::operator ()(const Mat& image, const Mat& T,
149 Mat& patch, Size patchSize, RNG& rng) const
151 patch.create( patchSize, image.type() );
152 if( backgroundMin != backgroundMax )
154 rng.fill(patch, RNG::UNIFORM, Scalar::all(backgroundMin), Scalar::all(backgroundMax));
155 warpAffine(image, patch, T, patchSize, INTER_LINEAR, BORDER_TRANSPARENT);
158 warpAffine(image, patch, T, patchSize, INTER_LINEAR, BORDER_CONSTANT, Scalar::all(backgroundMin));
160 int ksize = randomBlur ? (unsigned)rng % 9 - 5 : 0;
164 GaussianBlur(patch, patch, Size(ksize, ksize), 0, 0);
169 AutoBuffer<uchar> _noiseBuf( patchSize.width*patchSize.height*image.elemSize() );
170 Mat noise(patchSize, image.type(), (uchar*)_noiseBuf);
171 int delta = image.depth() == CV_8U ? 128 : image.depth() == CV_16U ? 32768 : 0;
172 rng.fill(noise, RNG::NORMAL, Scalar::all(delta), Scalar::all(noiseRange));
173 if( backgroundMin != backgroundMax )
174 addWeighted(patch, 1, noise, 1, -delta, patch);
177 for( int i = 0; i < patchSize.height; i++ )
179 uchar* prow = patch.ptr<uchar>(i);
180 const uchar* nrow = noise.ptr<uchar>(i);
181 for( int j = 0; j < patchSize.width; j++ )
182 if( prow[j] != backgroundMin )
183 prow[j] = saturate_cast<uchar>(prow[j] + nrow[j] - delta);
189 void PatchGenerator::warpWholeImage(const Mat& image, Mat& _T, Mat& buf,
190 Mat& warped, int border, RNG& rng) const
193 Rect roi(INT_MAX, INT_MAX, INT_MIN, INT_MIN);
195 for( int k = 0; k < 4; k++ )
198 pt0.x = (float)(k == 0 || k == 3 ? 0 : image.cols);
199 pt0.y = (float)(k < 2 ? 0 : image.rows);
200 pt1.x = (float)(T(0,0)*pt0.x + T(0,1)*pt0.y + T(0,2));
201 pt1.y = (float)(T(1,0)*pt0.x + T(1,1)*pt0.y + T(1,2));
203 roi.x = std::min(roi.x, cvFloor(pt1.x));
204 roi.y = std::min(roi.y, cvFloor(pt1.y));
205 roi.width = std::max(roi.width, cvCeil(pt1.x));
206 roi.height = std::max(roi.height, cvCeil(pt1.y));
209 roi.width -= roi.x - 1;
210 roi.height -= roi.y - 1;
211 int dx = border - roi.x;
212 int dy = border - roi.y;
214 if( (roi.width+border*2)*(roi.height+border*2) > buf.cols )
215 buf.create(1, (roi.width+border*2)*(roi.height+border*2), image.type());
217 warped = Mat(roi.height + border*2, roi.width + border*2,
218 image.type(), buf.data);
222 (*this)(image, T, warped, warped.size(), rng);
224 if( T.data != _T.data )
225 T.convertTo(_T, _T.type());
229 /////////////////////////////////////// LDetector //////////////////////////////////////////////
231 LDetector::LDetector() : radius(7), threshold(20), nOctaves(3), nViews(1000),
232 verbose(false), baseFeatureSize(32), clusteringDistance(2)
236 LDetector::LDetector(int _radius, int _threshold, int _nOctaves, int _nViews,
237 double _baseFeatureSize, double _clusteringDistance)
238 : radius(_radius), threshold(_threshold), nOctaves(_nOctaves), nViews(_nViews),
239 verbose(false), baseFeatureSize(_baseFeatureSize), clusteringDistance(_clusteringDistance)
243 static void getDiscreteCircle(int R, vector<Point>& circle, vector<int>& filledHCircle)
248 x = cvRound(std::sqrt((double)R*R - y*y));
251 circle.push_back(Point(x,y));
256 int i, n8 = (int)circle.size() - (x == y), n8_ = n8 - (x != y), n4 = n8 + n8_, n = n4*4;
260 for( i = 0; i < n8; i++ )
263 circle[i+n4] = Point(-p.y, p.x);
264 circle[i+n4*2] = Point(-p.x, -p.y);
265 circle[i+n4*3] = Point(p.y, -p.x);
268 for( i = n8; i < n4; i++ )
270 Point p = circle[n4 - i], q = Point(p.y, p.x);
272 circle[i+n4] = Point(-q.y, q.x);
273 circle[i+n4*2] = Point(-q.x, -q.y);
274 circle[i+n4*3] = Point(q.y, -q.x);
277 // the filled upper half of the circle is encoded as sequence of integers,
278 // i-th element is the coordinate of right-most circle point in each horizontal line y=i.
279 // the left-most point will be -filledHCircle[i].
280 for( i = 0, y = -1; i < n4; i++ )
285 filledHCircle.push_back(p.x);
294 struct CmpKeypointScores
296 bool operator ()(const KeyPoint& a, const KeyPoint& b) const { return std::abs(a.response) > std::abs(b.response); }
300 void LDetector::getMostStable2D(const Mat& image, vector<KeyPoint>& keypoints,
301 int maxPoints, const PatchGenerator& _patchGenerator) const
303 PatchGenerator patchGenerator = _patchGenerator;
304 patchGenerator.backgroundMin = patchGenerator.backgroundMax = 128;
307 Mat _M(2, 3, CV_64F), _iM(2, 3, CV_64F);
308 double *M = (double*)_M.data, *iM = (double*)_iM.data;
311 vector<KeyPoint> tempKeypoints;
312 double d2 = clusteringDistance*clusteringDistance;
315 // TODO: this loop can be run in parallel, for that we need
316 // a separate accumulator keypoint lists for different threads.
317 for( i = 0; i < nViews; i++ )
319 // 1. generate random transform
320 // 2. map the source image corners and compute the ROI in canvas
321 // 3. select the ROI in canvas, adjust the transformation matrix
322 // 4. apply the transformation
323 // 5. run keypoint detector in pyramids
324 // 6. map each point back and update the lists of most stable points
326 if(verbose && (i+1)*progressBarSize/nViews != i*progressBarSize/nViews)
330 patchGenerator.generateRandomTransform(Point2f(), Point2f(), _M, rng);
333 // identity transformation
335 M[1] = M[3] = M[2] = M[5] = 0;
338 patchGenerator.warpWholeImage(image, _M, warpbuf, warped, cvCeil(baseFeatureSize*0.5+radius), rng);
339 (*this)(warped, tempKeypoints, maxPoints*3);
340 invertAffineTransform(_M, _iM);
342 int j, sz0 = (int)tempKeypoints.size(), sz1;
343 for( j = 0; j < sz0; j++ )
345 KeyPoint kpt1 = tempKeypoints[j];
346 KeyPoint kpt0((float)(iM[0]*kpt1.pt.x + iM[1]*kpt1.pt.y + iM[2]),
347 (float)(iM[3]*kpt1.pt.x + iM[4]*kpt1.pt.y + iM[5]),
348 kpt1.size, -1.f, 1.f, kpt1.octave);
349 float r = kpt1.size*0.5f;
350 if( kpt0.pt.x < r || kpt0.pt.x >= image.cols - r ||
351 kpt0.pt.y < r || kpt0.pt.y >= image.rows - r )
354 sz1 = (int)keypoints.size();
355 for( k = 0; k < sz1; k++ )
357 KeyPoint kpt = keypoints[k];
358 if( kpt.octave != kpt0.octave )
360 double dx = kpt.pt.x - kpt0.pt.x, dy = kpt.pt.y - kpt0.pt.y;
361 if( dx*dx + dy*dy <= d2*(1 << kpt.octave*2) )
363 keypoints[k] = KeyPoint((kpt.pt.x*kpt.response + kpt0.pt.x)/(kpt.response+1),
364 (kpt.pt.y*kpt.response + kpt0.pt.y)/(kpt.response+1),
365 kpt.size, -1.f, kpt.response + 1, kpt.octave);
370 keypoints.push_back(kpt0);
377 if( (int)keypoints.size() > maxPoints )
379 sort(keypoints, CmpKeypointScores());
380 keypoints.resize(maxPoints);
385 static inline int computeLResponse(const uchar* ptr, const int* cdata, int csize)
387 int i, csize2 = csize/2, sum = -ptr[0]*csize;
388 for( i = 0; i < csize2; i++ )
391 sum += ptr[ofs] + ptr[-ofs];
397 static Point2f adjustCorner(const float* fval, float& fvaln)
399 double bx = (fval[3] - fval[5])*0.5;
400 double by = (fval[2] - fval[7])*0.5;
401 double Axx = fval[3] - fval[4]*2 + fval[5];
402 double Axy = (fval[0] - fval[2] - fval[6] + fval[8])*0.25;
403 double Ayy = fval[1] - fval[4]*2 + fval[7];
404 double D = Axx*Ayy - Axy*Axy;
405 D = D != 0 ? 1./D : 0;
406 double dx = (bx*Ayy - by*Axy)*D;
407 double dy = (by*Axx - bx*Axy)*D;
408 dx = std::min(std::max(dx, -1.), 1.);
409 dy = std::min(std::max(dy, -1.), 1.);
410 fvaln = (float)(fval[4] + (bx*dx + by*dy)*0.5);
411 if(fvaln*fval[4] < 0 || std::abs(fvaln) < std::abs(fval[4]))
414 return Point2f((float)dx, (float)dy);
417 void LDetector::operator()(const Mat& image, vector<KeyPoint>& keypoints, int maxCount, bool scaleCoords) const
420 buildPyramid(image, pyr, std::max(nOctaves-1, 0));
421 (*this)(pyr, keypoints, maxCount, scaleCoords);
424 void LDetector::operator()(const vector<Mat>& pyr, vector<KeyPoint>& keypoints, int maxCount, bool scaleCoords) const
426 const int lthreshold = 3;
427 int L, x, y, i, j, k, tau = lthreshold;
428 Mat scoreBuf(pyr[0].size(), CV_16S), maskBuf(pyr[0].size(), CV_8U);
429 int scoreElSize = scoreBuf.elemSize();
430 vector<Point> circle0;
431 vector<int> fhcircle0, circle, fcircle_s, fcircle;
432 getDiscreteCircle(radius, circle0, fhcircle0);
433 CV_Assert(fhcircle0.size() == (size_t)(radius+1) && circle0.size() % 2 == 0);
436 for( L = 0; L < nOctaves; L++ )
438 // Pyramidal keypoint detector body:
439 // 1. build next pyramid layer
440 // 2. scan points, check the circular neighborhood, compute the score
441 // 3. do non-maxima suppression
442 // 4. adjust the corners (sub-pix)
443 double cscale = scaleCoords ? 1 << L : 1;
444 Size layerSize = pyr[L].size();
445 if( layerSize.width < radius*2 + 3 || layerSize.height < radius*2 + 3 )
447 Mat scoreLayer(layerSize, scoreBuf.type(), scoreBuf.data);
448 Mat maskLayer(layerSize, maskBuf.type(), maskBuf.data);
449 const Mat& pyrLayer = pyr[L];
450 int sstep = scoreLayer.step/sizeof(short);
451 int mstep = maskLayer.step;
453 int csize = (int)circle0.size(), csize2 = csize/2;
454 circle.resize(csize*3);
455 for( i = 0; i < csize; i++ )
456 circle[i] = circle[i+csize] = circle[i+csize*2] = (-circle0[i].y)*pyrLayer.step + circle0[i].x;
459 for( i = -radius; i <= radius; i++ )
461 x = fhcircle0[std::abs(i)];
462 for( j = -x; j <= x; j++ )
464 fcircle_s.push_back(i*sstep + j);
465 fcircle.push_back(i*pyrLayer.step + j);
468 int nsize = (int)fcircle.size();
469 const int* cdata = &circle[0];
470 const int* ndata = &fcircle[0];
471 const int* ndata_s = &fcircle_s[0];
473 for( y = 0; y < radius; y++ )
475 memset( scoreLayer.ptr<short>(y), 0, layerSize.width*scoreElSize );
476 memset( scoreLayer.ptr<short>(layerSize.height-y-1), 0, layerSize.width*scoreElSize );
477 memset( maskLayer.ptr<uchar>(y), 0, layerSize.width );
478 memset( maskLayer.ptr<uchar>(layerSize.height-y-1), 0, layerSize.width );
481 int vradius = radius*pyrLayer.step;
483 for( y = radius; y < layerSize.height - radius; y++ )
485 const uchar* img = pyrLayer.ptr<uchar>(y) + radius;
486 short* scores = scoreLayer.ptr<short>(y);
487 uchar* mask = maskLayer.ptr<uchar>(y);
489 for( x = 0; x < radius; x++ )
491 scores[x] = scores[layerSize.width - 1 - x] = 0;
492 mask[x] = mask[layerSize.width - 1 - x] = 0;
495 for( x = radius; x < layerSize.width - radius; x++, img++ )
498 if( (std::abs(val0 - img[radius]) < tau && std::abs(val0 - img[-radius]) < tau) ||
499 (std::abs(val0 - img[vradius]) < tau && std::abs(val0 - img[-vradius]) < tau))
506 for( k = 0; k < csize; k++ )
508 if( std::abs(val0 - img[cdata[k]]) < tau &&
509 (std::abs(val0 - img[cdata[k + csize2]]) < tau ||
510 std::abs(val0 - img[cdata[k + csize2 - 1]]) < tau ||
511 std::abs(val0 - img[cdata[k + csize2 + 1]]) < tau ||
512 std::abs(val0 - img[cdata[k + csize2 - 2]]) < tau ||
513 std::abs(val0 - img[cdata[k + csize2 + 2]]) < tau/* ||
514 std::abs(val0 - img[cdata[k + csize2 - 3]]) < tau ||
515 std::abs(val0 - img[cdata[k + csize2 + 3]]) < tau*/) )
526 scores[x] = (short)computeLResponse(img, cdata, csize);
532 for( y = radius+1; y < layerSize.height - radius-1; y++ )
534 const uchar* img = pyrLayer.ptr<uchar>(y) + radius+1;
535 short* scores = scoreLayer.ptr<short>(y) + radius+1;
536 const uchar* mask = maskLayer.ptr<uchar>(y) + radius+1;
538 for( x = radius+1; x < layerSize.width - radius-1; x++, img++, scores++, mask++ )
541 if( !*mask || std::abs(val0) < lthreshold ||
542 (mask[-1] + mask[1] + mask[-mstep-1] + mask[-mstep] + mask[-mstep+1]+
543 mask[mstep-1] + mask[mstep] + mask[mstep+1] < 3))
545 bool recomputeZeroScores = radius*2 < y && y < layerSize.height - radius*2 &&
546 radius*2 < x && x < layerSize.width - radius*2;
550 for( k = 0; k < nsize; k++ )
552 int val = scores[ndata_s[k]];
553 if( val == 0 && recomputeZeroScores )
554 scores[ndata_s[k]] = (short)(val =
555 computeLResponse(img + ndata[k], cdata, csize));
562 for( k = 0; k < nsize; k++ )
564 int val = scores[ndata_s[k]];
565 if( val == 0 && recomputeZeroScores )
566 scores[ndata_s[k]] = (short)(val =
567 computeLResponse(img + ndata[k], cdata, csize));
574 float fval[9], fvaln = 0;
575 for( int i1 = -1; i1 <= 1; i1++ )
576 for( int j1 = -1; j1 <= 1; j1++ )
578 fval[(i1+1)*3 + j1 + 1] = (float)(scores[sstep*i1+j1] ? scores[sstep*i1+j1] :
579 computeLResponse(img + pyrLayer.step*i1 + j1, cdata, csize));
581 Point2f pt = adjustCorner(fval, fvaln);
584 keypoints.push_back(KeyPoint((float)(pt.x*cscale), (float)(pt.y*cscale),
585 (float)(baseFeatureSize*cscale), -1, fvaln, L));
590 if( maxCount > 0 && keypoints.size() > (size_t)maxCount )
592 sort(keypoints, CmpKeypointScores());
593 keypoints.resize(maxCount);
597 void LDetector::read(const FileNode& objnode)
599 radius = (int)objnode["radius"];
600 threshold = (int)objnode["threshold"];
601 nOctaves = (int)objnode["noctaves"];
602 nViews = (int)objnode["nviews"];
603 baseFeatureSize = (int)objnode["base-feature-size"];
604 clusteringDistance = (int)objnode["clustering-distance"];
607 void LDetector::write(FileStorage& fs, const String& name) const
609 WriteStructContext ws(fs, name, CV_NODE_MAP);
611 fs << "radius" << radius
612 << "threshold" << threshold
613 << "noctaves" << nOctaves
614 << "nviews" << nViews
615 << "base-feature-size" << baseFeatureSize
616 << "clustering-distance" << clusteringDistance;
619 void LDetector::setVerbose(bool _verbose)
624 /////////////////////////////////////// FernClassifier ////////////////////////////////////////////
626 FernClassifier::FernClassifier()
633 FernClassifier::FernClassifier(const FileNode& node)
640 FernClassifier::~FernClassifier()
645 int FernClassifier::getClassCount() const
651 int FernClassifier::getStructCount() const
657 int FernClassifier::getStructSize() const
663 int FernClassifier::getSignatureSize() const
665 return signatureSize;
669 int FernClassifier::getCompressionMethod() const
671 return compressionMethod;
675 Size FernClassifier::getPatchSize() const
681 FernClassifier::FernClassifier(const vector<Point2f>& points,
682 const vector<Ptr<Mat> >& refimgs,
683 const vector<int>& labels,
684 int _nclasses, int _patchSize,
685 int _signatureSize, int _nstructs,
686 int _structSize, int _nviews, int _compressionMethod,
687 const PatchGenerator& patchGenerator)
691 train(points, refimgs, labels, _nclasses, _patchSize,
692 _signatureSize, _nstructs, _structSize, _nviews,
693 _compressionMethod, patchGenerator);
697 void FernClassifier::write(FileStorage& fs, const String& objname) const
699 WriteStructContext ws(fs, objname, CV_NODE_MAP);
701 cv::write(fs, "nstructs", nstructs);
702 cv::write(fs, "struct-size", structSize);
703 cv::write(fs, "nclasses", nclasses);
704 cv::write(fs, "signature-size", signatureSize);
705 cv::write(fs, "compression-method", compressionMethod);
706 cv::write(fs, "patch-size", patchSize.width);
708 WriteStructContext wsf(fs, "features", CV_NODE_SEQ + CV_NODE_FLOW);
709 int i, nfeatures = (int)features.size();
710 for( i = 0; i < nfeatures; i++ )
712 cv::write(fs, features[i].y1*patchSize.width + features[i].x1);
713 cv::write(fs, features[i].y2*patchSize.width + features[i].x2);
717 WriteStructContext wsp(fs, "posteriors", CV_NODE_SEQ + CV_NODE_FLOW);
718 cv::write(fs, posteriors);
723 void FernClassifier::read(const FileNode& objnode)
727 nstructs = (int)objnode["nstructs"];
728 structSize = (int)objnode["struct-size"];
729 nclasses = (int)objnode["nclasses"];
730 signatureSize = (int)objnode["signature-size"];
731 compressionMethod = (int)objnode["compression-method"];
732 patchSize.width = patchSize.height = (int)objnode["patch-size"];
733 leavesPerStruct = 1 << structSize;
735 FileNode _nodes = objnode["features"];
736 int i, nfeatures = structSize*nstructs;
737 features.resize(nfeatures);
738 FileNodeIterator it = _nodes.begin(), it_end = _nodes.end();
739 for( i = 0; i < nfeatures && it != it_end; i++ )
743 features[i] = Feature(ofs1%patchSize.width, ofs1/patchSize.width,
744 ofs2%patchSize.width, ofs2/patchSize.width);
747 FileNode _posteriors = objnode["posteriors"];
748 int psz = leavesPerStruct*nstructs*signatureSize;
749 posteriors.reserve(psz);
750 _posteriors >> posteriors;
754 void FernClassifier::clear()
756 signatureSize = nclasses = nstructs = structSize = compressionMethod = leavesPerStruct = 0;
757 vector<Feature>().swap(features);
758 vector<float>().swap(posteriors);
762 int FernClassifier::getLeaf(int fern, const Mat& _patch) const
764 assert( 0 <= fern && fern < nstructs );
765 size_t fofs = fern*structSize, idx = 0;
766 const Mat_<uchar>& patch = (const Mat_<uchar>&)_patch;
768 for( int i = 0; i < structSize; i++ )
770 const Feature& f = features[fofs + i];
771 idx = (idx << 1) + f(patch);
774 return fern*leavesPerStruct + idx;
778 void FernClassifier::prepare(int _nclasses, int _patchSize, int _signatureSize,
779 int _nstructs, int _structSize,
780 int _nviews, int _compressionMethod)
784 CV_Assert( _nclasses > 1 && _patchSize >= 5 && _nstructs > 0 &&
785 _nviews > 0 && _structSize > 0 &&
786 (_compressionMethod == COMPRESSION_NONE ||
787 _compressionMethod == COMPRESSION_RANDOM_PROJ ||
788 _compressionMethod == COMPRESSION_PCA) );
790 nclasses = _nclasses;
791 patchSize = Size(_patchSize, _patchSize);
792 nstructs = _nstructs;
793 structSize = _structSize;
794 signatureSize = std::min(_signatureSize, nclasses);
795 compressionMethod = signatureSize == nclasses ? COMPRESSION_NONE : _compressionMethod;
797 leavesPerStruct = 1 << structSize;
799 int i, nfeatures = structSize*nstructs;
801 features = vector<Feature>( nfeatures );
802 posteriors = vector<float>( leavesPerStruct*nstructs*nclasses, 1.f );
803 classCounters = vector<int>( nclasses, leavesPerStruct );
805 CV_Assert( patchSize.width <= 256 && patchSize.height <= 256 );
808 for( i = 0; i < nfeatures; i++ )
810 int x1 = (unsigned)rng % patchSize.width;
811 int y1 = (unsigned)rng % patchSize.height;
812 int x2 = (unsigned)rng % patchSize.width;
813 int y2 = (unsigned)rng % patchSize.height;
814 features[i] = Feature(x1, y1, x2, y2);
819 void FernClassifier::train(const vector<Point2f>& points,
820 const vector<Ptr<Mat> >& refimgs,
821 const vector<int>& labels,
822 int _nclasses, int _patchSize,
823 int _signatureSize, int _nstructs,
824 int _structSize, int _nviews, int _compressionMethod,
825 const PatchGenerator& patchGenerator)
827 _nclasses = _nclasses > 0 ? _nclasses : (int)points.size();
828 CV_Assert( labels.empty() || labels.size() == points.size() );
830 prepare(_nclasses, _patchSize, _signatureSize, _nstructs,
831 _structSize, _nviews, _compressionMethod);
833 // pass all the views of all the samples through the generated trees and accumulate
834 // the statistics (posterior probabilities) in leaves.
836 int i, j, nsamples = (int)points.size();
839 for( i = 0; i < nsamples; i++ )
841 Point2f pt = points[i];
842 const Mat& src = *refimgs[i];
843 int classId = labels.empty() ? i : labels[i];
844 if( verbose && (i+1)*progressBarSize/nsamples != i*progressBarSize/nsamples )
846 CV_Assert( 0 <= classId && classId < nclasses );
847 classCounters[classId] += _nviews;
848 for( j = 0; j < _nviews; j++ )
850 patchGenerator(src, pt, patch, patchSize, rng);
851 for( int f = 0; f < nstructs; f++ )
852 posteriors[getLeaf(f, patch)*nclasses + classId]++;
862 void FernClassifier::trainFromSingleView(const Mat& image,
863 const vector<KeyPoint>& keypoints,
864 int _patchSize, int _signatureSize,
865 int _nstructs, int _structSize,
866 int _nviews, int _compressionMethod,
867 const PatchGenerator& patchGenerator)
869 prepare((int)keypoints.size(), _patchSize, _signatureSize, _nstructs,
870 _structSize, _nviews, _compressionMethod);
871 int i, j, k, nsamples = (int)keypoints.size(), maxOctave = 0;
872 for( i = 0; i < nsamples; i++ )
874 classCounters[i] = _nviews;
875 maxOctave = std::max(maxOctave, keypoints[i].octave);
878 double maxScale = patchGenerator.lambdaMax*2;
879 Mat canvas(cvRound(std::max(image.cols,image.rows)*maxScale + patchSize.width*2 + 10),
880 cvRound(std::max(image.cols,image.rows)*maxScale + patchSize.width*2 + 10), image.type());
882 vector<Mat> pyrbuf(maxOctave+1), pyr(maxOctave+1);
883 Point2f center0((image.cols-1)*0.5f, (image.rows-1)*0.5f),
884 center1((canvas.cols - 1)*0.5f, (canvas.rows - 1)*0.5f);
885 Mat _M(2, 3, CV_64F);
886 double *M = (double*)_M.data;
889 Mat patch(patchSize, CV_8U);
891 for( i = 0; i < _nviews; i++ )
893 patchGenerator.generateRandomTransform(center0, center1, _M, rng);
895 CV_Assert(_M.type() == CV_64F);
896 Rect roi(INT_MAX, INT_MAX, INT_MIN, INT_MIN);
898 for( k = 0; k < 4; k++ )
901 pt0.x = (float)(k == 0 || k == 3 ? 0 : image.cols);
902 pt0.y = (float)(k < 2 ? 0 : image.rows);
903 pt1.x = (float)(M[0]*pt0.x + M[1]*pt0.y + M[2]);
904 pt1.y = (float)(M[3]*pt0.x + M[4]*pt0.y + M[5]);
906 roi.x = std::min(roi.x, cvFloor(pt1.x));
907 roi.y = std::min(roi.y, cvFloor(pt1.y));
908 roi.width = std::max(roi.width, cvCeil(pt1.x));
909 roi.height = std::max(roi.height, cvCeil(pt1.y));
912 roi.width -= roi.x + 1;
913 roi.height -= roi.y + 1;
915 Mat canvas_roi(canvas, roi);
919 Size size = canvas_roi.size();
920 rng.fill(canvas_roi, RNG::UNIFORM, Scalar::all(0), Scalar::all(256));
921 warpAffine( image, canvas_roi, _M, size, INTER_LINEAR, BORDER_TRANSPARENT);
924 for( j = 1; j <= maxOctave; j++ )
926 size = Size((size.width+1)/2, (size.height+1)/2);
927 if( pyrbuf[j].cols < size.width*size.height )
928 pyrbuf[j].create(1, size.width*size.height, image.type());
929 pyr[j] = Mat(size, image.type(), pyrbuf[j].data);
930 pyrDown(pyr[j-1], pyr[j]);
933 if( patchGenerator.noiseRange > 0 )
935 const int noiseDelta = 128;
936 if( noisebuf.cols < pyr[0].cols*pyr[0].rows )
937 noisebuf.create(1, pyr[0].cols*pyr[0].rows, image.type());
938 for( j = 0; j <= maxOctave; j++ )
940 Mat noise(pyr[j].size(), image.type(), noisebuf.data);
941 rng.fill(noise, RNG::UNIFORM, Scalar::all(-patchGenerator.noiseRange + noiseDelta),
942 Scalar::all(patchGenerator.noiseRange + noiseDelta));
943 addWeighted(pyr[j], 1, noise, 1, -noiseDelta, pyr[j]);
947 for( j = 0; j < nsamples; j++ )
949 KeyPoint kpt = keypoints[j];
950 float scale = 1.f/(1 << kpt.octave);
951 Point2f pt((float)((M[0]*kpt.pt.x + M[1]*kpt.pt.y + M[2])*scale),
952 (float)((M[3]*kpt.pt.x + M[4]*kpt.pt.y + M[5])*scale));
953 getRectSubPix(pyr[kpt.octave], patchSize, pt, patch, patch.type());
954 for( int f = 0; f < nstructs; f++ )
955 posteriors[getLeaf(f, patch)*nclasses + j]++;
958 if( verbose && (i+1)*progressBarSize/_nviews != i*progressBarSize/_nviews )
968 int FernClassifier::operator()(const Mat& img, Point2f pt, vector<float>& signature) const
971 getRectSubPix(img, patchSize, pt, patch, img.type());
972 return (*this)(patch, signature);
976 int FernClassifier::operator()(const Mat& patch, vector<float>& signature) const
978 if( posteriors.empty() )
979 CV_Error(CV_StsNullPtr,
980 "The descriptor has not been trained or "
981 "the floating-point posteriors have been deleted");
982 CV_Assert(patch.size() == patchSize);
984 int i, j, sz = signatureSize;
985 signature.resize(sz);
986 float* s = &signature[0];
988 for( j = 0; j < sz; j++ )
991 for( i = 0; i < nstructs; i++ )
993 int lf = getLeaf(i, patch);
994 const float* ldata = &posteriors[lf*signatureSize];
995 for( j = 0; j <= sz - 4; j += 4 )
997 float t0 = s[j] + ldata[j];
998 float t1 = s[j+1] + ldata[j+1];
999 s[j] = t0; s[j+1] = t1;
1000 t0 = s[j+2] + ldata[j+2];
1001 t1 = s[j+3] + ldata[j+3];
1002 s[j+2] = t0; s[j+3] = t1;
1004 for( ; j < sz; j++ )
1009 if( signatureSize == nclasses && compressionMethod == COMPRESSION_NONE )
1011 for( i = 1; i < nclasses; i++ )
1019 void FernClassifier::finalize(RNG&)
1021 int i, j, k, n = nclasses;
1022 vector<double> invClassCounters(n);
1023 Mat_<double> _temp(1, n);
1024 double* temp = &_temp(0,0);
1026 for( i = 0; i < n; i++ )
1027 invClassCounters[i] = 1./classCounters[i];
1029 for( i = 0; i < nstructs; i++ )
1031 for( j = 0; j < leavesPerStruct; j++ )
1033 float* P = &posteriors[(i*leavesPerStruct + j)*nclasses];
1035 for( k = 0; k < n; k++ )
1036 sum += P[k]*invClassCounters[k];
1038 for( k = 0; k < n; k++ )
1039 temp[k] = P[k]*invClassCounters[k]*sum;
1041 for( k = 0; k < n; k++ )
1042 P[k] = (float)temp[k];
1047 // do the first pass over the data.
1048 if( compressionMethod == COMPRESSION_RANDOM_PROJ )
1050 // In case of random projection
1051 // we generate a random m x n matrix consisting of -1's and 1's
1052 // (called Bernoulli matrix) and multiply it by each vector
1053 // of posterior probabilities.
1054 // the product is stored back into the same input vector.
1056 Mat_<uchar> csmatrix;
1059 // generate random Bernoulli matrix:
1060 // -1's are replaced with 0's and 1's stay 1's.
1061 csmatrix.create(m, n);
1062 rng.fill(csmatrix, RNG::UNIFORM, Scalar::all(0), Scalar::all(2));
1064 vector<float> dst(m);
1066 for( i = 0; i < totalLeaves; i++ )
1068 int S = sampleCounters[i];
1072 float scale = 1.f/(S*(m < n ? std::sqrt((float)m) : 1.f));
1073 const int* leaf = (const int*)&posteriors[i*n];
1074 float* out_leaf = (float*)&posteriors[i*m];
1076 for( j = 0; j < m; j++ )
1081 const uchar* csrow = csmatrix.ptr(j);
1082 // Because -1's in the Bernoulli matrix are encoded as 0's,
1083 // the real dot product value will be
1084 // A - B, where A is the sum of leaf[j]'s for which csrow[j]==1 and
1085 // B is the sum of leaf[j]'s for which csrow[j]==0.
1086 // since A + B = S, then A - B = A - (S - A) = 2*A - S.
1088 for( k = 0; k < n; k++ )
1089 A += leaf[k] & -(int)csrow[k];
1090 val = (A*2 - S)*scale;
1093 val = leaf[j]*scale;
1097 // put the vector back (since it's shorter than the original, we can do it in-place)
1098 for( j = 0; j < m; j++ )
1099 out_leaf[j] = dst[j];
1102 else if( compressionMethod == COMPRESSION_PCA )
1104 // In case of PCA we do 3 passes over the data:
1105 // first, we compute the mean vector
1106 // second, we compute the covariation matrix
1107 // then we do eigen decomposition of the matrix and construct the PCA
1108 // projection matrix
1109 // and on the third pass we actually do PCA compression.
1111 int nonEmptyLeaves = 0;
1112 Mat_<double> _mean(1, n), _vec(1, n), _dvec(m, 1),
1113 _cov(n, n), _evals(n, 1), _evects(n, n);
1115 double* mean = &_mean(0,0);
1116 double* vec = &_vec(0,0);
1117 double* dvec = &_dvec(0,0);
1119 for( i = 0; i < totalLeaves; i++ )
1121 int S = sampleCounters[i];
1125 const int* leaf = (const int*)&posteriors[0] + i*n;
1126 float* out_leaf = (float*)&posteriors[0] + i*n;
1128 for( j = 0; j < n; j++ )
1130 float t = leaf[j]*invS;
1137 CV_Assert( nonEmptyLeaves >= ntrees );
1138 _mean *= 1./nonEmptyLeaves;
1140 for( i = 0; i < totalLeaves; i++ )
1142 int S = sampleCounters[i];
1145 const float* leaf = (const float*)&posteriors[0] + i*n;
1146 for( j = 0; j < n; j++ )
1147 vec[j] = leaf[j] - mean[j];
1148 gemm(_vec, _vec, 1, _cov, 1, _cov, GEMM_1_T);
1151 _cov *= 1./nonEmptyLeaves;
1152 eigen(_cov, _evals, _evects);
1153 // use the first m eigenvectors (stored as rows of the eigenvector matrix)
1154 // as the projection matrix in PCA
1155 _evects = _evects(Range(0, m), Range::all());
1157 for( i = 0; i < totalLeaves; i++ )
1159 int S = sampleCounters[i];
1162 const float* leaf = (const float*)&posteriors[0] + i*n;
1163 float* out_leaf = (float*)&posteriors[0] + i*m;
1165 for( j = 0; j < n; j++ )
1166 vec[j] = leaf[j] - mean[j];
1167 gemm(_evects, _vec, 1, Mat(), 0, _dvec, GEMM_2_T);
1169 for( j = 0; j < m; j++ )
1170 out_leaf[j] = (float)dvec[j];
1174 CV_Error( CV_StsBadArg,
1175 "Unknown compression method; use COMPRESSION_RANDOM_PROJ or COMPRESSION_PCA" );
1177 // and shrink the vector
1178 posteriors.resize(totalLeaves*m);
1182 void FernClassifier::setVerbose(bool _verbose)
1187 ////////////////////////////////////// Planar Object Detector ////////////////////////////////////
1189 PlanarObjectDetector::PlanarObjectDetector()
1193 PlanarObjectDetector::PlanarObjectDetector(const FileNode& node)
1198 PlanarObjectDetector::PlanarObjectDetector(const vector<Mat>& pyr, int npoints,
1199 int patchSize, int nstructs, int structSize,
1200 int nviews, const LDetector& detector,
1201 const PatchGenerator& patchGenerator)
1203 train(pyr, npoints, patchSize, nstructs,
1204 structSize, nviews, detector, patchGenerator);
1207 PlanarObjectDetector::~PlanarObjectDetector()
1211 vector<KeyPoint> PlanarObjectDetector::getModelPoints() const
1216 void PlanarObjectDetector::train(const vector<Mat>& pyr, int npoints,
1217 int patchSize, int nstructs, int structSize,
1218 int nviews, const LDetector& detector,
1219 const PatchGenerator& patchGenerator)
1221 modelROI = Rect(0, 0, pyr[0].cols, pyr[0].rows);
1222 ldetector = detector;
1223 ldetector.setVerbose(verbose);
1224 ldetector.getMostStable2D(pyr[0], modelPoints, npoints, patchGenerator);
1226 npoints = modelPoints.size();
1227 fernClassifier.setVerbose(verbose);
1228 fernClassifier.trainFromSingleView(pyr[0], modelPoints,
1229 patchSize, (int)modelPoints.size(), nstructs, structSize, nviews,
1230 FernClassifier::COMPRESSION_NONE, patchGenerator);
1233 void PlanarObjectDetector::train(const vector<Mat>& pyr, const vector<KeyPoint>& keypoints,
1234 int patchSize, int nstructs, int structSize,
1235 int nviews, const LDetector& detector,
1236 const PatchGenerator& patchGenerator)
1238 modelROI = Rect(0, 0, pyr[0].cols, pyr[0].rows);
1239 ldetector = detector;
1240 ldetector.setVerbose(verbose);
1241 modelPoints.resize(keypoints.size());
1242 std::copy(keypoints.begin(), keypoints.end(), modelPoints.begin());
1244 fernClassifier.setVerbose(verbose);
1245 fernClassifier.trainFromSingleView(pyr[0], modelPoints,
1246 patchSize, (int)modelPoints.size(), nstructs, structSize, nviews,
1247 FernClassifier::COMPRESSION_NONE, patchGenerator);
1250 void PlanarObjectDetector::read(const FileNode& node)
1252 FileNodeIterator it = node["model-roi"].begin(), it_end;
1253 it >> modelROI.x >> modelROI.y >> modelROI.width >> modelROI.height;
1254 ldetector.read(node["detector"]);
1255 fernClassifier.read(node["fern-classifier"]);
1256 cv::read(node["model-points"], modelPoints);
1257 CV_Assert(modelPoints.size() == (size_t)fernClassifier.getClassCount());
1261 void PlanarObjectDetector::write(FileStorage& fs, const String& objname) const
1263 WriteStructContext ws(fs, objname, CV_NODE_MAP);
1266 WriteStructContext wsroi(fs, "model-roi", CV_NODE_SEQ + CV_NODE_FLOW);
1267 cv::write(fs, modelROI.x);
1268 cv::write(fs, modelROI.y);
1269 cv::write(fs, modelROI.width);
1270 cv::write(fs, modelROI.height);
1272 ldetector.write(fs, "detector");
1273 cv::write(fs, "model-points", modelPoints);
1274 fernClassifier.write(fs, "fern-classifier");
1278 bool PlanarObjectDetector::operator()(const Mat& image, Mat& H, vector<Point2f>& corners) const
1281 buildPyramid(image, pyr, ldetector.nOctaves - 1);
1282 vector<KeyPoint> keypoints;
1283 ldetector(pyr, keypoints);
1285 return (*this)(pyr, keypoints, H, corners);
1288 bool PlanarObjectDetector::operator()(const vector<Mat>& pyr, const vector<KeyPoint>& keypoints,
1289 Mat& _H, vector<Point2f>& corners, vector<int>* pairs) const
1291 int i, j, m = (int)modelPoints.size(), n = (int)keypoints.size();
1292 vector<int> bestMatches(m, -1);
1293 vector<float> maxLogProb(m, -FLT_MAX);
1294 vector<float> signature;
1295 vector<Point2f> fromPt, toPt;
1297 for( i = 0; i < n; i++ )
1299 KeyPoint kpt = keypoints[i];
1300 CV_Assert(0 <= kpt.octave && kpt.octave < (int)pyr.size());
1301 kpt.pt.x /= (float)(1 << kpt.octave);
1302 kpt.pt.y /= (float)(1 << kpt.octave);
1303 int k = fernClassifier(pyr[kpt.octave], kpt.pt, signature);
1304 if( k >= 0 && (bestMatches[k] < 0 || signature[k] > maxLogProb[k]) )
1306 maxLogProb[k] = signature[k];
1314 for( i = 0; i < m; i++ )
1315 if( bestMatches[i] >= 0 )
1317 fromPt.push_back(modelPoints[i].pt);
1318 toPt.push_back(keypoints[bestMatches[i]].pt);
1321 if( fromPt.size() < 4 )
1325 _H = findHomography(fromPt, toPt, mask, RANSAC, 10);
1328 const Mat_<double>& H = _H;
1330 for( i = 0; i < 4; i++ )
1332 Point2f pt((float)(modelROI.x + (i == 0 || i == 3 ? 0 : modelROI.width)),
1333 (float)(modelROI.y + (i <= 1 ? 0 : modelROI.height)));
1334 double w = 1./(H(2,0)*pt.x + H(2,1)*pt.y + H(2,2));
1335 corners[i] = Point2f((float)((H(0,0)*pt.x + H(0,1)*pt.y + H(0,2))*w),
1336 (float)((H(1,0)*pt.x + H(1,1)*pt.y + H(1,2))*w));
1342 for( i = j = 0; i < m; i++ )
1343 if( bestMatches[i] >= 0 && mask[j++] )
1345 pairs->push_back(i);
1346 pairs->push_back(bestMatches[i]);
1350 return _H.data != 0;
1354 void PlanarObjectDetector::setVerbose(bool _verbose)