1 /*M///////////////////////////////////////////////////////////////////////////////////////
\r
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
\r
5 // By downloading, copying, installing or using the software you agree to this license.
\r
6 // If you do not agree to this license, do not download, install,
\r
7 // copy or use the software.
\r
10 // License Agreement
\r
11 // For Open Source Computer Vision Library
\r
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
\r
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
\r
15 // Third party copyrights are property of their respective owners.
\r
17 // Redistribution and use in source and binary forms, with or without modification,
\r
18 // are permitted provided that the following conditions are met:
\r
20 // * Redistribution's of source code must retain the above copyright notice,
\r
21 // this list of conditions and the following disclaimer.
\r
23 // * Redistribution's in binary form must reproduce the above copyright notice,
\r
24 // this list of conditions and the following disclaimer in the documentation
\r
25 // and/or other materials provided with the distribution.
\r
27 // * The name of the copyright holders may not be used to endorse or promote products
\r
28 // derived from this software without specific prior written permission.
\r
30 // This software is provided by the copyright holders and contributors "as is" and
\r
31 // any express or implied warranties, including, but not limited to, the implied
\r
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
\r
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
\r
34 // indirect, incidental, special, exemplary, or consequential damages
\r
35 // (including, but not limited to, procurement of substitute goods or services;
\r
36 // loss of use, data, or profits; or business interruption) however caused
\r
37 // and on any theory of liability, whether in contract, strict liability,
\r
38 // or tort (including negligence or otherwise) arising in any way out of
\r
39 // the use of this software, even if advised of the possibility of such damage.
\r
50 template<typename T, int shift> struct FixPtCast
\r
54 rtype operator ()(type1 arg) const { return (T)((arg + (1 << (shift-1))) >> shift); }
\r
57 template<typename T, int shift> struct FltCast
\r
61 rtype operator ()(type1 arg) const { return arg*(T)(1./(1 << shift)); }
\r
66 int operator()(const uchar**, uchar*, int, int) const { return 0; }
\r
71 struct PyrDownVec_32s8u
\r
73 int operator()(const uchar** _src, uchar* dst, int, int width) const
\r
76 const int** src = (const int**)_src;
\r
77 const int *row0 = src[0], *row1 = src[1], *row2 = src[2], *row3 = src[3], *row4 = src[4];
\r
78 __m128i delta = _mm_set1_epi16(128);
\r
80 for( ; x <= width - 16; x += 16 )
\r
82 __m128i r0, r1, r2, r3, r4, t0, t1;
\r
83 r0 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row0 + x)),
\r
84 _mm_load_si128((const __m128i*)(row0 + x + 4)));
\r
85 r1 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row1 + x)),
\r
86 _mm_load_si128((const __m128i*)(row1 + x + 4)));
\r
87 r2 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row2 + x)),
\r
88 _mm_load_si128((const __m128i*)(row2 + x + 4)));
\r
89 r3 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row3 + x)),
\r
90 _mm_load_si128((const __m128i*)(row3 + x + 4)));
\r
91 r4 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row4 + x)),
\r
92 _mm_load_si128((const __m128i*)(row4 + x + 4)));
\r
93 r0 = _mm_add_epi16(r0, r4);
\r
94 r1 = _mm_add_epi16(_mm_add_epi16(r1, r3), r2);
\r
95 r0 = _mm_add_epi16(r0, _mm_add_epi16(r2, r2));
\r
96 t0 = _mm_add_epi16(r0, _mm_slli_epi16(r1, 2));
\r
97 r0 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row0 + x + 8)),
\r
98 _mm_load_si128((const __m128i*)(row0 + x + 12)));
\r
99 r1 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row1 + x + 8)),
\r
100 _mm_load_si128((const __m128i*)(row1 + x + 12)));
\r
101 r2 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row2 + x + 8)),
\r
102 _mm_load_si128((const __m128i*)(row2 + x + 12)));
\r
103 r3 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row3 + x + 8)),
\r
104 _mm_load_si128((const __m128i*)(row3 + x + 12)));
\r
105 r4 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row4 + x + 8)),
\r
106 _mm_load_si128((const __m128i*)(row4 + x + 12)));
\r
107 r0 = _mm_add_epi16(r0, r4);
\r
108 r1 = _mm_add_epi16(_mm_add_epi16(r1, r3), r2);
\r
109 r0 = _mm_add_epi16(r0, _mm_add_epi16(r2, r2));
\r
110 t1 = _mm_add_epi16(r0, _mm_slli_epi16(r1, 2));
\r
111 t0 = _mm_srli_epi16(_mm_add_epi16(t0, delta), 8);
\r
112 t1 = _mm_srli_epi16(_mm_add_epi16(t1, delta), 8);
\r
113 _mm_storeu_si128((__m128i*)(dst + x), _mm_packus_epi16(t0, t1));
\r
116 for( ; x <= width - 4; x += 4 )
\r
118 __m128i r0, r1, r2, r3, r4, z = _mm_setzero_si128();
\r
119 r0 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row0 + x)), z);
\r
120 r1 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row1 + x)), z);
\r
121 r2 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row2 + x)), z);
\r
122 r3 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row3 + x)), z);
\r
123 r4 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row4 + x)), z);
\r
124 r0 = _mm_add_epi16(r0, r4);
\r
125 r1 = _mm_add_epi16(_mm_add_epi16(r1, r3), r2);
\r
126 r0 = _mm_add_epi16(r0, _mm_add_epi16(r2, r2));
\r
127 r0 = _mm_add_epi16(r0, _mm_slli_epi16(r1, 2));
\r
128 r0 = _mm_srli_epi16(_mm_add_epi16(r0, delta), 8);
\r
129 *(int*)(dst + x) = _mm_cvtsi128_si32(_mm_packus_epi16(r0, r0));
\r
136 struct PyrDownVec_32f
\r
138 int operator()(const uchar** _src, uchar* _dst, int, int width) const
\r
141 const float** src = (const float**)_src;
\r
142 float* dst = (float*)_dst;
\r
143 const float *row0 = src[0], *row1 = src[1], *row2 = src[2], *row3 = src[3], *row4 = src[4];
\r
144 __m128 _4 = _mm_set1_ps(4.f), _scale = _mm_set1_ps(1.f/256);
\r
145 for( ; x <= width - 8; x += 8 )
\r
147 __m128 r0, r1, r2, r3, r4, t0, t1;
\r
148 r0 = _mm_load_ps(row0 + x);
\r
149 r1 = _mm_load_ps(row1 + x);
\r
150 r2 = _mm_load_ps(row2 + x);
\r
151 r3 = _mm_load_ps(row3 + x);
\r
152 r4 = _mm_load_ps(row4 + x);
\r
153 r0 = _mm_add_ps(r0, r4);
\r
154 r1 = _mm_add_ps(_mm_add_ps(r1, r3), r2);
\r
155 r0 = _mm_add_ps(r0, _mm_add_ps(r2, r2));
\r
156 t0 = _mm_add_ps(r0, _mm_mul_ps(r1, _4));
\r
158 r0 = _mm_load_ps(row0 + x + 4);
\r
159 r1 = _mm_load_ps(row1 + x + 4);
\r
160 r2 = _mm_load_ps(row2 + x + 4);
\r
161 r3 = _mm_load_ps(row3 + x + 4);
\r
162 r4 = _mm_load_ps(row4 + x + 4);
\r
163 r0 = _mm_add_ps(r0, r4);
\r
164 r1 = _mm_add_ps(_mm_add_ps(r1, r3), r2);
\r
165 r0 = _mm_add_ps(r0, _mm_add_ps(r2, r2));
\r
166 t1 = _mm_add_ps(r0, _mm_mul_ps(r1, _4));
\r
168 t0 = _mm_mul_ps(t0, _scale);
\r
169 t1 = _mm_mul_ps(t1, _scale);
\r
171 _mm_storeu_ps(dst + x, t0);
\r
172 _mm_storeu_ps(dst + x + 4, t1);
\r
181 typedef NoVec PyrDownVec_32s8u;
\r
182 typedef NoVec PyrDownVec_32f;
\r
186 template<class CastOp, class VecOp> void
\r
187 pyrDown_( const Mat& _src, Mat& _dst )
\r
189 const int PD_SZ = 5;
\r
190 typedef typename CastOp::type1 WT;
\r
191 typedef typename CastOp::rtype T;
\r
193 Size ssize = _src.size(), dsize = _dst.size();
\r
194 int cn = _src.channels();
\r
195 int bufstep = (int)alignSize(dsize.width*cn, 16);
\r
196 AutoBuffer<WT> _buf(bufstep*PD_SZ + 16);
\r
197 WT* buf = alignPtr((WT*)_buf, 16);
\r
198 int tabL[CV_CN_MAX*(PD_SZ+2)], tabR[CV_CN_MAX*(PD_SZ+2)];
\r
199 AutoBuffer<int> _tabM(dsize.width*cn);
\r
205 CV_Assert( std::abs(dsize.width*2 - ssize.width) <= 2 &&
\r
206 std::abs(dsize.height*2 - ssize.height) <= 2 );
\r
207 int k, x, sy0 = -PD_SZ/2, sy = sy0, width0 = std::min((ssize.width-PD_SZ/2-1)/2 + 1, dsize.width);
\r
209 for( x = 0; x <= PD_SZ+1; x++ )
\r
211 int sx0 = borderInterpolate(x - PD_SZ/2, ssize.width, BORDER_REFLECT_101)*cn;
\r
212 int sx1 = borderInterpolate(x + width0*2 - PD_SZ/2, ssize.width, BORDER_REFLECT_101)*cn;
\r
213 for( k = 0; k < cn; k++ )
\r
215 tabL[x*cn + k] = sx0 + k;
\r
216 tabR[x*cn + k] = sx1 + k;
\r
224 for( x = 0; x < dsize.width; x++ )
\r
225 tabM[x] = (x/cn)*2*cn + x % cn;
\r
227 for( int y = 0; y < dsize.height; y++ )
\r
229 T* dst = (T*)(_dst.data + _dst.step*y);
\r
230 WT *row0, *row1, *row2, *row3, *row4;
\r
232 // fill the ring buffer (horizontal convolution and decimation)
\r
233 for( ; sy <= y*2 + 2; sy++ )
\r
235 WT* row = buf + ((sy - sy0) % PD_SZ)*bufstep;
\r
236 int _sy = borderInterpolate(sy, ssize.height, BORDER_REFLECT_101);
\r
237 const T* src = (const T*)(_src.data + _src.step*_sy);
\r
239 const int* tab = tabL;
\r
243 for( ; x < limit; x++ )
\r
245 row[x] = src[tab[x+cn*2]]*6 + (src[tab[x+cn]] + src[tab[x+cn*3]])*4 +
\r
246 src[tab[x]] + src[tab[x+cn*4]];
\r
249 if( x == dsize.width )
\r
254 for( ; x < width0; x++ )
\r
255 row[x] = src[x*2]*6 + (src[x*2 - 1] + src[x*2 + 1])*4 +
\r
256 src[x*2 - 2] + src[x*2 + 2];
\r
260 for( ; x < width0; x += 3 )
\r
262 const T* s = src + x*2;
\r
263 WT t0 = s[0]*6 + (s[-3] + s[3])*4 + s[-6] + s[6];
\r
264 WT t1 = s[1]*6 + (s[-2] + s[4])*4 + s[-5] + s[7];
\r
265 WT t2 = s[2]*6 + (s[-1] + s[5])*4 + s[-4] + s[8];
\r
266 row[x] = t0; row[x+1] = t1; row[x+2] = t2;
\r
271 for( ; x < width0; x += 4 )
\r
273 const T* s = src + x*2;
\r
274 WT t0 = s[0]*6 + (s[-4] + s[4])*4 + s[-8] + s[8];
\r
275 WT t1 = s[1]*6 + (s[-3] + s[5])*4 + s[-7] + s[9];
\r
276 row[x] = t0; row[x+1] = t1;
\r
277 t0 = s[2]*6 + (s[-2] + s[6])*4 + s[-6] + s[10];
\r
278 t1 = s[3]*6 + (s[-1] + s[7])*4 + s[-5] + s[11];
\r
279 row[x+2] = t0; row[x+3] = t1;
\r
284 for( ; x < width0; x++ )
\r
287 row[x] = src[sx]*6 + (src[sx - cn] + src[sx + cn])*4 +
\r
288 src[sx - cn*2] + src[sx + cn*2];
\r
292 limit = dsize.width;
\r
297 // do vertical convolution and decimation and write the result to the destination image
\r
298 for( k = 0; k < PD_SZ; k++ )
\r
299 rows[k] = buf + ((y*2 - PD_SZ/2 + k - sy0) % PD_SZ)*bufstep;
\r
300 row0 = rows[0]; row1 = rows[1]; row2 = rows[2]; row3 = rows[3]; row4 = rows[4];
\r
302 x = vecOp((const uchar**)rows, (uchar*)dst, (int)_dst.step, dsize.width);
\r
303 for( ; x < dsize.width; x++ )
\r
304 dst[x] = castOp(row2[x]*6 + (row1[x] + row3[x])*4 + row0[x] + row4[x]);
\r
309 template<class CastOp, class VecOp> void
\r
310 pyrUp_( const Mat& _src, Mat& _dst )
\r
312 const int PU_SZ = 3;
\r
313 typedef typename CastOp::type1 WT;
\r
314 typedef typename CastOp::rtype T;
\r
316 Size ssize = _src.size(), dsize = _dst.size();
\r
317 int cn = _src.channels();
\r
318 int bufstep = (int)alignSize((dsize.width+1)*cn, 16);
\r
319 AutoBuffer<WT> _buf(bufstep*PU_SZ + 16);
\r
320 WT* buf = alignPtr((WT*)_buf, 16);
\r
321 AutoBuffer<int> _dtab(ssize.width*cn);
\r
327 CV_Assert( std::abs(dsize.width - ssize.width*2) == dsize.width % 2 &&
\r
328 std::abs(dsize.height - ssize.height*2) == dsize.height % 2);
\r
329 int k, x, sy0 = -PU_SZ/2, sy = sy0, width0 = ssize.width - 1;
\r
335 for( x = 0; x < ssize.width; x++ )
\r
336 dtab[x] = (x/cn)*2*cn + x % cn;
\r
338 for( int y = 0; y < ssize.height; y++ )
\r
340 T* dst0 = (T*)(_dst.data + _dst.step*y*2);
\r
341 T* dst1 = (T*)(_dst.data + _dst.step*(y*2+1));
\r
342 WT *row0, *row1, *row2;
\r
344 if( y*2+1 >= dsize.height )
\r
347 // fill the ring buffer (horizontal convolution and decimation)
\r
348 for( ; sy <= y + 1; sy++ )
\r
350 WT* row = buf + ((sy - sy0) % PU_SZ)*bufstep;
\r
351 int _sy = borderInterpolate(sy, ssize.height, BORDER_REFLECT_101);
\r
352 const T* src = (const T*)(_src.data + _src.step*_sy);
\r
354 if( ssize.width == cn )
\r
356 for( x = 0; x < cn; x++ )
\r
357 row[x] = row[x + cn] = src[x]*8;
\r
361 for( x = 0; x < cn; x++ )
\r
364 WT t0 = src[x]*6 + src[x + cn]*2;
\r
365 WT t1 = (src[x] + src[x + cn])*4;
\r
366 row[dx] = t0; row[dx + cn] = t1;
\r
367 dx = dtab[ssize.width - cn + x];
\r
368 int sx = ssize.width - cn + x;
\r
369 t0 = src[sx - cn] + src[sx]*7;
\r
371 row[dx] = t0; row[dx + cn] = t1;
\r
374 for( x = cn; x < ssize.width - cn; x++ )
\r
377 WT t0 = src[x-cn] + src[x]*6 + src[x+cn];
\r
378 WT t1 = (src[x] + src[x+cn])*4;
\r
384 // do vertical convolution and decimation and write the result to the destination image
\r
385 for( k = 0; k < PU_SZ; k++ )
\r
386 rows[k] = buf + ((y - PU_SZ/2 + k - sy0) % PU_SZ)*bufstep;
\r
387 row0 = rows[0]; row1 = rows[1]; row2 = rows[2];
\r
389 x = vecOp((const uchar**)rows, (uchar*)dst0, (int)_dst.step, dsize.width);
\r
390 for( ; x < dsize.width; x++ )
\r
392 T t1 = castOp((row1[x] + row2[x])*4);
\r
393 T t0 = castOp(row0[x] + row1[x]*6 + row2[x]);
\r
394 dst1[x] = t1; dst0[x] = t0;
\r
399 typedef void (*PyrFunc)(const Mat&, Mat&);
\r
401 void pyrDown( const Mat& _src, Mat& _dst, const Size& _dsz )
\r
403 Size dsz = _dsz == Size() ? Size((_src.cols + 1)/2, (_src.rows + 1)/2) : _dsz;
\r
404 _dst.create( dsz, _src.type() );
\r
405 int depth = _src.depth();
\r
407 if( depth == CV_8U )
\r
408 func = pyrDown_<FixPtCast<uchar, 8>, PyrDownVec_32s8u>;
\r
409 else if( depth == CV_16U )
\r
410 func = pyrDown_<FixPtCast<ushort, 8>, NoVec>;
\r
411 else if( depth == CV_32F )
\r
412 func = pyrDown_<FltCast<float, 8>, PyrDownVec_32f>;
\r
413 else if( depth == CV_64F )
\r
414 func = pyrDown_<FltCast<double, 8>, NoVec>;
\r
416 CV_Error( CV_StsUnsupportedFormat, "" );
\r
418 func( _src, _dst );
\r
421 void pyrUp( const Mat& _src, Mat& _dst, const Size& _dsz )
\r
423 Size dsz = _dsz == Size() ? Size(_src.cols*2, _src.rows*2) : _dsz;
\r
424 _dst.create( dsz, _src.type() );
\r
425 int depth = _src.depth();
\r
427 if( depth == CV_8U )
\r
428 func = pyrUp_<FixPtCast<uchar, 6>, NoVec>;
\r
429 else if( depth == CV_16U )
\r
430 func = pyrUp_<FixPtCast<ushort, 6>, NoVec>;
\r
431 else if( depth == CV_32F )
\r
432 func = pyrUp_<FltCast<float, 6>, NoVec>;
\r
433 else if( depth == CV_64F )
\r
434 func = pyrUp_<FltCast<double, 6>, NoVec>;
\r
436 CV_Error( CV_StsUnsupportedFormat, "" );
\r
438 func( _src, _dst );
\r
441 void buildPyramid( const Mat& _src, vector<Mat>& _dst, int maxlevel )
\r
443 _dst.resize( maxlevel + 1 );
\r
445 for( int i = 1; i <= maxlevel; i++ )
\r
446 pyrDown( _dst[i-1], _dst[i] );
\r
451 CV_IMPL void cvPyrDown( const void* srcarr, void* dstarr, int _filter )
\r
453 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
\r
455 CV_Assert( _filter == CV_GAUSSIAN_5x5 && src.type() == dst.type());
\r
456 cv::pyrDown( src, dst, dst.size() );
\r
459 CV_IMPL void cvPyrUp( const void* srcarr, void* dstarr, int _filter )
\r
461 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
\r
463 CV_Assert( _filter == CV_GAUSSIAN_5x5 && src.type() == dst.type());
\r
464 cv::pyrUp( src, dst, dst.size() );
\r
469 cvReleasePyramid( CvMat*** _pyramid, int extra_layers )
\r
471 CV_FUNCNAME( "cvReleasePyramid" );
\r
479 CV_ERROR( CV_StsNullPtr, "" );
\r
481 pyramid = *_pyramid;
\r
485 for( i = 0; i <= extra_layers; i++ )
\r
486 cvReleaseMat( &pyramid[i] );
\r
489 cvFree( _pyramid );
\r
496 cvCreatePyramid( const CvArr* srcarr, int extra_layers, double rate,
\r
497 const CvSize* layer_sizes, CvArr* bufarr,
\r
498 int calc, int filter )
\r
500 CvMat** pyramid = 0;
\r
501 const float eps = 0.1f;
\r
503 CV_FUNCNAME( "cvCreatePyramid" );
\r
507 int i, elem_size, layer_step;
\r
509 CvSize size, layer_size;
\r
512 CV_CALL( src = cvGetMat( srcarr, &stub ));
\r
514 if( extra_layers < 0 )
\r
515 CV_ERROR( CV_StsOutOfRange, "The number of extra layers must be non negative" );
\r
517 elem_size = CV_ELEM_SIZE(src->type);
\r
518 size = cvGetMatSize(src);
\r
525 CV_CALL( buf = cvGetMat( bufarr, &bstub ));
\r
526 bufsize = buf->rows*buf->cols*CV_ELEM_SIZE(buf->type);
\r
528 for( i = 1; i <= extra_layers; i++ )
\r
532 layer_size.width = cvRound(layer_size.width*rate+eps);
\r
533 layer_size.height = cvRound(layer_size.height*rate+eps);
\r
536 layer_size = layer_sizes[i-1];
\r
537 layer_step = layer_size.width*elem_size;
\r
538 bufsize -= layer_step*layer_size.height;
\r
542 CV_ERROR( CV_StsOutOfRange, "The buffer is too small to fit the pyramid" );
\r
543 ptr = buf->data.ptr;
\r
546 CV_CALL( pyramid = (CvMat**)cvAlloc( (extra_layers+1)*sizeof(pyramid[0]) ));
\r
547 memset( pyramid, 0, (extra_layers+1)*sizeof(pyramid[0]) );
\r
549 pyramid[0] = cvCreateMatHeader( size.height, size.width, src->type );
\r
550 cvSetData( pyramid[0], src->data.ptr, src->step );
\r
553 for( i = 1; i <= extra_layers; i++ )
\r
557 layer_size.width = cvRound(layer_size.width*rate + eps);
\r
558 layer_size.height = cvRound(layer_size.height*rate + eps);
\r
561 layer_size = layer_sizes[i];
\r
565 pyramid[i] = cvCreateMatHeader( layer_size.height, layer_size.width, src->type );
\r
566 layer_step = layer_size.width*elem_size;
\r
567 cvSetData( pyramid[i], ptr, layer_step );
\r
568 ptr += layer_step*layer_size.height;
\r
571 pyramid[i] = cvCreateMat( layer_size.height, layer_size.width, src->type );
\r
574 cvPyrDown( pyramid[i-1], pyramid[i], filter );
\r
575 //cvResize( pyramid[i-1], pyramid[i], CV_INTER_LINEAR );
\r
580 if( cvGetErrStatus() < 0 )
\r
581 cvReleasePyramid( &pyramid, extra_layers );
\r