vcs.maemo.org Git - opencv/blob - cv/src/cvsmooth.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                        Intel License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
  14 // Third party copyrights are property of their respective owners.
  15 //
  16 // Redistribution and use in source and binary forms, with or without modification,
  17 // are permitted provided that the following conditions are met:
  18 //
  19 //   * Redistribution's of source code must retain the above copyright notice,
  20 //     this list of conditions and the following disclaimer.
  21 //
  22 //   * Redistribution's in binary form must reproduce the above copyright notice,
  23 //     this list of conditions and the following disclaimer in the documentation
  24 //     and/or other materials provided with the distribution.
  25 //
  26 //   * The name of Intel Corporation may not be used to endorse or promote products
  27 //     derived from this software without specific prior written permission.
  28 //
  29 // This software is provided by the copyright holders and contributors "as is" and
  30 // any express or implied warranties, including, but not limited to, the implied
  31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  32 // In no event shall the Intel Corporation or contributors be liable for any direct,
  33 // indirect, incidental, special, exemplary, or consequential damages
  34 // (including, but not limited to, procurement of substitute goods or services;
  35 // loss of use, data, or profits; or business interruption) however caused
  36 // and on any theory of liability, whether in contract, strict liability,
  37 // or tort (including negligence or otherwise) arising in any way out of
  38 // the use of this software, even if advised of the possibility of such damage.
  39 //
  40 //M*/
  41
  42 #include "_cv.h"
  43
  44 /*
  45  * This file includes the code, contributed by Simon Perreault
  46  * (the function icvMedianBlur_8u_CnR_O1)
  47  *
  48  * Constant-time median filtering -- http://nomis80.org/ctmf.html
  49  * Copyright (C) 2006 Simon Perreault
  50  *
  51  * Contact:
  52  *  Laboratoire de vision et systemes numeriques
  53  *  Pavillon Adrien-Pouliot
  54  *  Universite Laval
  55  *  Sainte-Foy, Quebec, Canada
  56  *  G1K 7P4
  57  *
  58  *  perreaul@gel.ulaval.ca
  59  */
  60
  61 // uncomment the line below to force SSE2 mode
  62 //#define CV_SSE2 1
  63
  64 /****************************************************************************************\
  65                                          Box Filter
  66 \****************************************************************************************/
  67
  68 static void icvSumRow_8u32s( const uchar* src0, int* dst, void* params );
  69 static void icvSumRow_32f64f( const float* src0, double* dst, void* params );
  70 static void icvSumCol_32s8u( const int** src, uchar* dst, int dst_step,
  71                              int count, void* params );
  72 static void icvSumCol_32s16s( const int** src, short* dst, int dst_step,
  73                              int count, void* params );
  74 static void icvSumCol_32s32s( const int** src, int* dst, int dst_step,
  75                              int count, void* params );
  76 static void icvSumCol_64f32f( const double** src, float* dst, int dst_step,
  77                               int count, void* params );
  78
  79 CvBoxFilter::CvBoxFilter()
  80 {
  81     min_depth = CV_32S;
  82     sum = 0;
  83     sum_count = 0;
  84     normalized = false;
  85 }
  86
  87
  88 CvBoxFilter::CvBoxFilter( int _max_width, int _src_type, int _dst_type,
  89                           bool _normalized, CvSize _ksize,
  90                           CvPoint _anchor, int _border_mode,
  91                           CvScalar _border_value )
  92 {
  93     min_depth = CV_32S;
  94     sum = 0;
  95     sum_count = 0;
  96     normalized = false;
  97     init( _max_width, _src_type, _dst_type, _normalized,
  98           _ksize, _anchor, _border_mode, _border_value );
  99 }
 100
 101
 102 CvBoxFilter::~CvBoxFilter()
 103 {
 104     clear();
 105 }
 106
 107
 108 void CvBoxFilter::init( int _max_width, int _src_type, int _dst_type,
 109                         bool _normalized, CvSize _ksize,
 110                         CvPoint _anchor, int _border_mode,
 111                         CvScalar _border_value )
 112 {
 113     CV_FUNCNAME( "CvBoxFilter::init" );
 114
 115     __BEGIN__;
 116
 117     sum = 0;
 118     normalized = _normalized;
 119
 120     if( normalized && CV_MAT_TYPE(_src_type) != CV_MAT_TYPE(_dst_type) ||
 121         !normalized && CV_MAT_CN(_src_type) != CV_MAT_CN(_dst_type))
 122         CV_ERROR( CV_StsUnmatchedFormats,
 123         "In case of normalized box filter input and output must have the same type.\n"
 124         "In case of unnormalized box filter the number of input and output channels must be the same" );
 125
 126     min_depth = CV_MAT_DEPTH(_src_type) == CV_8U ? CV_32S : CV_64F;
 127
 128     CvBaseImageFilter::init( _max_width, _src_type, _dst_type, 1, _ksize,
 129                              _anchor, _border_mode, _border_value );
 130
 131     scale = normalized ? 1./(ksize.width*ksize.height) : 1;
 132
 133     if( CV_MAT_DEPTH(src_type) == CV_8U )
 134         x_func = (CvRowFilterFunc)icvSumRow_8u32s;
 135     else if( CV_MAT_DEPTH(src_type) == CV_32F )
 136         x_func = (CvRowFilterFunc)icvSumRow_32f64f;
 137     else
 138         CV_ERROR( CV_StsUnsupportedFormat, "Unknown/unsupported input image format" );
 139
 140     if( CV_MAT_DEPTH(dst_type) == CV_8U )
 141     {
 142         if( !normalized )
 143             CV_ERROR( CV_StsBadArg, "Only normalized box filter can be used for 8u->8u transformation" );
 144         y_func = (CvColumnFilterFunc)icvSumCol_32s8u;
 145     }
 146     else if( CV_MAT_DEPTH(dst_type) == CV_16S )
 147     {
 148         if( normalized || CV_MAT_DEPTH(src_type) != CV_8U )
 149             CV_ERROR( CV_StsBadArg, "Only 8u->16s unnormalized box filter is supported in case of 16s output" );
 150         y_func = (CvColumnFilterFunc)icvSumCol_32s16s;
 151     }
 152         else if( CV_MAT_DEPTH(dst_type) == CV_32S )
 153         {
 154                 if( normalized || CV_MAT_DEPTH(src_type) != CV_8U )
 155                         CV_ERROR( CV_StsBadArg, "Only 8u->32s unnormalized box filter is supported in case of 32s output");
 156
 157                 y_func = (CvColumnFilterFunc)icvSumCol_32s32s;
 158         }
 159     else if( CV_MAT_DEPTH(dst_type) == CV_32F )
 160     {
 161         if( CV_MAT_DEPTH(src_type) != CV_32F )
 162             CV_ERROR( CV_StsBadArg, "Only 32f->32f box filter (normalized or not) is supported in case of 32f output" );
 163         y_func = (CvColumnFilterFunc)icvSumCol_64f32f;
 164     }
 165         else{
 166                 CV_ERROR( CV_StsBadArg, "Unknown/unsupported destination image format" );
 167         }
 168
 169     __END__;
 170 }
 171
 172
 173 void CvBoxFilter::start_process( CvSlice x_range, int width )
 174 {
 175     CvBaseImageFilter::start_process( x_range, width );
 176     int i, psz = CV_ELEM_SIZE(work_type);
 177     uchar* s;
 178     buf_end -= buf_step;
 179     buf_max_count--;
 180     assert( buf_max_count >= max_ky*2 + 1 );
 181     s = sum = buf_end + cvAlign((width + ksize.width - 1)*CV_ELEM_SIZE(src_type), ALIGN);
 182     sum_count = 0;
 183
 184     width *= psz;
 185     for( i = 0; i < width; i++ )
 186         s[i] = (uchar)0;
 187 }
 188
 189
 190 static void
 191 icvSumRow_8u32s( const uchar* src, int* dst, void* params )
 192 {
 193     const CvBoxFilter* state = (const CvBoxFilter*)params;
 194     int ksize = state->get_kernel_size().width;
 195     int width = state->get_width();
 196     int cn = CV_MAT_CN(state->get_src_type());
 197     int i, k;
 198
 199     width = (width - 1)*cn; ksize *= cn;
 200
 201     for( k = 0; k < cn; k++, src++, dst++ )
 202     {
 203         int s = 0;
 204         for( i = 0; i < ksize; i += cn )
 205             s += src[i];
 206         dst[0] = s;
 207         for( i = 0; i < width; i += cn )
 208         {
 209             s += src[i+ksize] - src[i];
 210             dst[i+cn] = s;
 211         }
 212     }
 213 }
 214
 215
 216 static void
 217 icvSumRow_32f64f( const float* src, double* dst, void* params )
 218 {
 219     const CvBoxFilter* state = (const CvBoxFilter*)params;
 220     int ksize = state->get_kernel_size().width;
 221     int width = state->get_width();
 222     int cn = CV_MAT_CN(state->get_src_type());
 223     int i, k;
 224
 225     width = (width - 1)*cn; ksize *= cn;
 226
 227     for( k = 0; k < cn; k++, src++, dst++ )
 228     {
 229         double s = 0;
 230         for( i = 0; i < ksize; i += cn )
 231             s += src[i];
 232         dst[0] = s;
 233         for( i = 0; i < width; i += cn )
 234         {
 235             s += (double)src[i+ksize] - src[i];
 236             dst[i+cn] = s;
 237         }
 238     }
 239 }
 240
 241
 242 static void
 243 icvSumCol_32s8u( const int** src, uchar* dst,
 244                  int dst_step, int count, void* params )
 245 {
 246 #define BLUR_SHIFT 24
 247     CvBoxFilter* state = (CvBoxFilter*)params;
 248     int ksize = state->get_kernel_size().height;
 249     int i, width = state->get_width();
 250     int cn = CV_MAT_CN(state->get_src_type());
 251     double scale = state->get_scale();
 252     int iscale = cvFloor(scale*(1 << BLUR_SHIFT));
 253     int* sum = (int*)state->get_sum_buf();
 254     int* _sum_count = state->get_sum_count_ptr();
 255     int sum_count = *_sum_count;
 256
 257     width *= cn;
 258     src += sum_count;
 259     count += ksize - 1 - sum_count;
 260
 261     for( ; count--; src++ )
 262     {
 263         const int* sp = src[0];
 264         if( sum_count+1 < ksize )
 265         {
 266             for( i = 0; i <= width - 2; i += 2 )
 267             {
 268                 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
 269                 sum[i] = s0; sum[i+1] = s1;
 270             }
 271
 272             for( ; i < width; i++ )
 273                 sum[i] += sp[i];
 274
 275             sum_count++;
 276         }
 277         else
 278         {
 279             const int* sm = src[-ksize+1];
 280             for( i = 0; i <= width - 2; i += 2 )
 281             {
 282                 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
 283                 int t0 = CV_DESCALE(s0*iscale, BLUR_SHIFT), t1 = CV_DESCALE(s1*iscale, BLUR_SHIFT);
 284                 s0 -= sm[i]; s1 -= sm[i+1];
 285                 sum[i] = s0; sum[i+1] = s1;
 286                 dst[i] = (uchar)t0; dst[i+1] = (uchar)t1;
 287             }
 288
 289             for( ; i < width; i++ )
 290             {
 291                 int s0 = sum[i] + sp[i], t0 = CV_DESCALE(s0*iscale, BLUR_SHIFT);
 292                 sum[i] = s0 - sm[i]; dst[i] = (uchar)t0;
 293             }
 294             dst += dst_step;
 295         }
 296     }
 297
 298     *_sum_count = sum_count;
 299 #undef BLUR_SHIFT
 300 }
 301
 302
 303 static void
 304 icvSumCol_32s16s( const int** src, short* dst,
 305                   int dst_step, int count, void* params )
 306 {
 307     CvBoxFilter* state = (CvBoxFilter*)params;
 308     int ksize = state->get_kernel_size().height;
 309     int ktotal = ksize*state->get_kernel_size().width;
 310     int i, width = state->get_width();
 311     int cn = CV_MAT_CN(state->get_src_type());
 312     int* sum = (int*)state->get_sum_buf();
 313     int* _sum_count = state->get_sum_count_ptr();
 314     int sum_count = *_sum_count;
 315
 316     dst_step /= sizeof(dst[0]);
 317     width *= cn;
 318     src += sum_count;
 319     count += ksize - 1 - sum_count;
 320
 321     for( ; count--; src++ )
 322     {
 323         const int* sp = src[0];
 324         if( sum_count+1 < ksize )
 325         {
 326             for( i = 0; i <= width - 2; i += 2 )
 327             {
 328                 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
 329                 sum[i] = s0; sum[i+1] = s1;
 330             }
 331
 332             for( ; i < width; i++ )
 333                 sum[i] += sp[i];
 334
 335             sum_count++;
 336         }
 337         else if( ktotal < 128 )
 338         {
 339             const int* sm = src[-ksize+1];
 340             for( i = 0; i <= width - 2; i += 2 )
 341             {
 342                 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
 343                 dst[i] = (short)s0; dst[i+1] = (short)s1;
 344                 s0 -= sm[i]; s1 -= sm[i+1];
 345                 sum[i] = s0; sum[i+1] = s1;
 346             }
 347
 348             for( ; i < width; i++ )
 349             {
 350                 int s0 = sum[i] + sp[i];
 351                 dst[i] = (short)s0;
 352                 sum[i] = s0 - sm[i];
 353             }
 354             dst += dst_step;
 355         }
 356         else
 357         {
 358             const int* sm = src[-ksize+1];
 359             for( i = 0; i <= width - 2; i += 2 )
 360             {
 361                 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
 362                 dst[i] = CV_CAST_16S(s0); dst[i+1] = CV_CAST_16S(s1);
 363                 s0 -= sm[i]; s1 -= sm[i+1];
 364                 sum[i] = s0; sum[i+1] = s1;
 365             }
 366
 367             for( ; i < width; i++ )
 368             {
 369                 int s0 = sum[i] + sp[i];
 370                 dst[i] = CV_CAST_16S(s0);
 371                 sum[i] = s0 - sm[i];
 372             }
 373             dst += dst_step;
 374         }
 375     }
 376
 377     *_sum_count = sum_count;
 378 }
 379
 380 static void
 381 icvSumCol_32s32s( const int** src, int * dst,
 382                   int dst_step, int count, void* params )
 383 {
 384     CvBoxFilter* state = (CvBoxFilter*)params;
 385     int ksize = state->get_kernel_size().height;
 386     int i, width = state->get_width();
 387     int cn = CV_MAT_CN(state->get_src_type());
 388     int* sum = (int*)state->get_sum_buf();
 389     int* _sum_count = state->get_sum_count_ptr();
 390     int sum_count = *_sum_count;
 391
 392     dst_step /= sizeof(dst[0]);
 393     width *= cn;
 394     src += sum_count;
 395     count += ksize - 1 - sum_count;
 396
 397     for( ; count--; src++ )
 398     {
 399         const int* sp = src[0];
 400         if( sum_count+1 < ksize )
 401         {
 402             for( i = 0; i <= width - 2; i += 2 )
 403             {
 404                 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
 405                 sum[i] = s0; sum[i+1] = s1;
 406             }
 407
 408             for( ; i < width; i++ )
 409                 sum[i] += sp[i];
 410
 411             sum_count++;
 412         }
 413         else
 414         {
 415             const int* sm = src[-ksize+1];
 416             for( i = 0; i <= width - 2; i += 2 )
 417             {
 418                 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
 419                 dst[i] = s0; dst[i+1] = s1;
 420                 s0 -= sm[i]; s1 -= sm[i+1];
 421                 sum[i] = s0; sum[i+1] = s1;
 422             }
 423
 424             for( ; i < width; i++ )
 425             {
 426                 int s0 = sum[i] + sp[i];
 427                 dst[i] = s0;
 428                 sum[i] = s0 - sm[i];
 429             }
 430             dst += dst_step;
 431         }
 432     }
 433
 434     *_sum_count = sum_count;
 435 }
 436
 437
 438 static void
 439 icvSumCol_64f32f( const double** src, float* dst,
 440                   int dst_step, int count, void* params )
 441 {
 442     CvBoxFilter* state = (CvBoxFilter*)params;
 443     int ksize = state->get_kernel_size().height;
 444     int i, width = state->get_width();
 445     int cn = CV_MAT_CN(state->get_src_type());
 446     double scale = state->get_scale();
 447     bool normalized = state->is_normalized();
 448     double* sum = (double*)state->get_sum_buf();
 449     int* _sum_count = state->get_sum_count_ptr();
 450     int sum_count = *_sum_count;
 451
 452     dst_step /= sizeof(dst[0]);
 453     width *= cn;
 454     src += sum_count;
 455     count += ksize - 1 - sum_count;
 456
 457     for( ; count--; src++ )
 458     {
 459         const double* sp = src[0];
 460         if( sum_count+1 < ksize )
 461         {
 462             for( i = 0; i <= width - 2; i += 2 )
 463             {
 464                 double s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
 465                 sum[i] = s0; sum[i+1] = s1;
 466             }
 467
 468             for( ; i < width; i++ )
 469                 sum[i] += sp[i];
 470
 471             sum_count++;
 472         }
 473         else
 474         {
 475             const double* sm = src[-ksize+1];
 476             if( normalized )
 477                 for( i = 0; i <= width - 2; i += 2 )
 478                 {
 479                     double s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
 480                     double t0 = s0*scale, t1 = s1*scale;
 481                     s0 -= sm[i]; s1 -= sm[i+1];
 482                     dst[i] = (float)t0; dst[i+1] = (float)t1;
 483                     sum[i] = s0; sum[i+1] = s1;
 484                 }
 485             else
 486                 for( i = 0; i <= width - 2; i += 2 )
 487                 {
 488                     double s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
 489                     dst[i] = (float)s0; dst[i+1] = (float)s1;
 490                     s0 -= sm[i]; s1 -= sm[i+1];
 491                     sum[i] = s0; sum[i+1] = s1;
 492                 }
 493
 494             for( ; i < width; i++ )
 495             {
 496                 double s0 = sum[i] + sp[i], t0 = s0*scale;
 497                 sum[i] = s0 - sm[i]; dst[i] = (float)t0;
 498             }
 499             dst += dst_step;
 500         }
 501     }
 502
 503     *_sum_count = sum_count;
 504 }
 505
 506
 507 /****************************************************************************************\
 508                                       Median Filter
 509 \****************************************************************************************/
 510
 511 #define CV_MINMAX_8U(a,b) \
 512     (t = CV_FAST_CAST_8U((a) - (b)), (b) += t, a -= t)
 513
 514 #if CV_SSE2 && !defined __SSE2__
 515 #define __SSE2__ 1
 516 #include "emmintrin.h"
 517 #endif
 518
 519 #if defined(__VEC__) || defined(__ALTIVEC__)
 520 #include <altivec.h>
 521 #undef bool
 522 #endif
 523
 524 #if defined(__GNUC__)
 525 #define align(x) __attribute__ ((aligned (x)))
 526 #elif CV_SSE2 && (defined(__ICL) || (_MSC_VER >= 1300))
 527 #define align(x) __declspec(align(x))
 528 #else
 529 #define align(x)
 530 #endif
 531
 532 #if _MSC_VER >= 1200
 533 #pragma warning( disable: 4244 )
 534 #endif
 535
 536 /**
 537  * This structure represents a two-tier histogram. The first tier (known as the
 538  * "coarse" level) is 4 bit wide and the second tier (known as the "fine" level)
 539  * is 8 bit wide. Pixels inserted in the fine level also get inserted into the
 540  * coarse bucket designated by the 4 MSBs of the fine bucket value.
 541  *
 542  * The structure is aligned on 16 bits, which is a prerequisite for SIMD
 543  * instructions. Each bucket is 16 bit wide, which means that extra care must be
 544  * taken to prevent overflow.
 545  */
 546 typedef struct align(16)
 547 {
 548     ushort coarse[16];
 549     ushort fine[16][16];
 550 } Histogram;
 551
 552 /**
 553  * HOP is short for Histogram OPeration. This macro makes an operation \a op on
 554  * histogram \a h for pixel value \a x. It takes care of handling both levels.
 555  */
 556 #define HOP(h,x,op) \
 557     h.coarse[x>>4] op; \
 558     *((ushort*) h.fine + x) op;
 559
 560 #define COP(c,j,x,op) \
 561     h_coarse[ 16*(n*c+j) + (x>>4) ] op; \
 562     h_fine[ 16 * (n*(16*c+(x>>4)) + j) + (x & 0xF) ] op;
 563
 564 #if defined __SSE2__ || defined __MMX__ || defined __ALTIVEC__
 565 #define MEDIAN_HAVE_SIMD 1
 566 #else
 567 #define MEDIAN_HAVE_SIMD 0
 568 #endif
 569
 570 /**
 571  * Adds histograms \a x and \a y and stores the result in \a y. Makes use of
 572  * SSE2, MMX or Altivec, if available.
 573  */
 574 #if defined(__SSE2__)
 575 static inline void histogram_add( const ushort x[16], ushort y[16] )
 576 {
 577     _mm_store_si128( (__m128i*) &y[0], _mm_add_epi16(
 578         _mm_load_si128((__m128i*) &y[0]), _mm_load_si128((__m128i*) &x[0] )));
 579     _mm_store_si128( (__m128i*) &y[8], _mm_add_epi16(
 580         _mm_load_si128((__m128i*) &y[8]), _mm_load_si128((__m128i*) &x[8] )));
 581 }
 582 #elif defined(__MMX__)
 583 static inline void histogram_add( const ushort x[16], ushort y[16] )
 584 {
 585     *(__m64*) &y[0]  = _mm_add_pi16( *(__m64*) &y[0],  *(__m64*) &x[0]  );
 586     *(__m64*) &y[4]  = _mm_add_pi16( *(__m64*) &y[4],  *(__m64*) &x[4]  );
 587     *(__m64*) &y[8]  = _mm_add_pi16( *(__m64*) &y[8],  *(__m64*) &x[8]  );
 588     *(__m64*) &y[12] = _mm_add_pi16( *(__m64*) &y[12], *(__m64*) &x[12] );
 589 }
 590 #elif defined(__ALTIVEC__)
 591 static inline void histogram_add( const ushort x[16], ushort y[16] )
 592 {
 593     *(vector ushort*) &y[0] = vec_add( *(vector ushort*) &y[0], *(vector ushort*) &x[0] );
 594     *(vector ushort*) &y[8] = vec_add( *(vector ushort*) &y[8], *(vector ushort*) &x[8] );
 595 }
 596 #else
 597 static inline void histogram_add( const ushort x[16], ushort y[16] )
 598 {
 599     int i;
 600     for( i = 0; i < 16; ++i )
 601         y[i] = (ushort)(y[i] + x[i]);
 602 }
 603 #endif
 604
 605 /**
 606  * Subtracts histogram \a x from \a y and stores the result in \a y. Makes use
 607  * of SSE2, MMX or Altivec, if available.
 608  */
 609 #if defined(__SSE2__)
 610 static inline void histogram_sub( const ushort x[16], ushort y[16] )
 611 {
 612     _mm_store_si128( (__m128i*) &y[0], _mm_sub_epi16(
 613         _mm_load_si128((__m128i*) &y[0]), _mm_load_si128((__m128i*) &x[0] )));
 614     _mm_store_si128( (__m128i*) &y[8], _mm_sub_epi16(
 615         _mm_load_si128((__m128i*) &y[8]), _mm_load_si128((__m128i*) &x[8] )));
 616 }
 617 #elif defined(__MMX__)
 618 static inline void histogram_sub( const ushort x[16], ushort y[16] )
 619 {
 620     *(__m64*) &y[0]  = _mm_sub_pi16( *(__m64*) &y[0],  *(__m64*) &x[0]  );
 621     *(__m64*) &y[4]  = _mm_sub_pi16( *(__m64*) &y[4],  *(__m64*) &x[4]  );
 622     *(__m64*) &y[8]  = _mm_sub_pi16( *(__m64*) &y[8],  *(__m64*) &x[8]  );
 623     *(__m64*) &y[12] = _mm_sub_pi16( *(__m64*) &y[12], *(__m64*) &x[12] );
 624 }
 625 #elif defined(__ALTIVEC__)
 626 static inline void histogram_sub( const ushort x[16], ushort y[16] )
 627 {
 628     *(vector ushort*) &y[0] = vec_sub( *(vector ushort*) &y[0], *(vector ushort*) &x[0] );
 629     *(vector ushort*) &y[8] = vec_sub( *(vector ushort*) &y[8], *(vector ushort*) &x[8] );
 630 }
 631 #else
 632 static inline void histogram_sub( const ushort x[16], ushort y[16] )
 633 {
 634     int i;
 635     for( i = 0; i < 16; ++i )
 636         y[i] = (ushort)(y[i] - x[i]);
 637 }
 638 #endif
 639
 640 static inline void histogram_muladd( int a, const ushort x[16],
 641         ushort y[16] )
 642 {
 643     int i;
 644     for ( i = 0; i < 16; ++i )
 645         y[i] = (ushort)(y[i] + a * x[i]);
 646 }
 647
 648 static CvStatus CV_STDCALL
 649 icvMedianBlur_8u_CnR_O1( uchar* src, int src_step, uchar* dst, int dst_step,
 650                          CvSize size, int kernel_size, int cn, int pad_left, int pad_right )
 651 {
 652     int r = (kernel_size-1)/2;
 653     const int m = size.height, n = size.width;
 654     int i, j, k, c;
 655     const unsigned char *p, *q;
 656     Histogram H[4];
 657     ushort *h_coarse, *h_fine, luc[4][16];
 658
 659     if( size.height < r || size.width < r )
 660         return CV_BADSIZE_ERR;
 661
 662     assert( src );
 663     assert( dst );
 664     assert( r >= 0 );
 665     assert( size.width >= 2*r+1 );
 666     assert( size.height >= 2*r+1 );
 667     assert( src_step != 0 );
 668     assert( dst_step != 0 );
 669
 670     h_coarse = (ushort*) cvAlloc(  1 * 16 * n * cn * sizeof(ushort) );
 671     h_fine   = (ushort*) cvAlloc( 16 * 16 * n * cn * sizeof(ushort) );
 672     memset( h_coarse, 0,  1 * 16 * n * cn * sizeof(ushort) );
 673     memset( h_fine,   0, 16 * 16 * n * cn * sizeof(ushort) );
 674
 675     /* First row initialization */
 676     for ( j = 0; j < n; ++j ) {
 677         for ( c = 0; c < cn; ++c ) {
 678             COP( c, j, src[cn*j+c], += r+1 );
 679         }
 680     }
 681     for ( i = 0; i < r; ++i ) {
 682         for ( j = 0; j < n; ++j ) {
 683             for ( c = 0; c < cn; ++c ) {
 684                 COP( c, j, src[src_step*i+cn*j+c], ++ );
 685             }
 686         }
 687     }
 688
 689     for ( i = 0; i < m; ++i ) {
 690
 691         /* Update column histograms for entire row. */
 692         p = src + src_step * MAX( 0, i-r-1 );
 693         q = p + cn * n;
 694         for ( j = 0; p != q; ++j ) {
 695             for ( c = 0; c < cn; ++c, ++p ) {
 696                 COP( c, j, *p, -- );
 697             }
 698         }
 699
 700         p = src + src_step * MIN( m-1, i+r );
 701         q = p + cn * n;
 702         for ( j = 0; p != q; ++j ) {
 703             for ( c = 0; c < cn; ++c, ++p ) {
 704                 COP( c, j, *p, ++ );
 705             }
 706         }
 707
 708         /* First column initialization */
 709         memset( H, 0, cn*sizeof(H[0]) );
 710         memset( luc, 0, cn*sizeof(luc[0]) );
 711         if ( pad_left ) {
 712             for ( c = 0; c < cn; ++c ) {
 713                 histogram_muladd( r, &h_coarse[16*n*c], H[c].coarse );
 714             }
 715         }
 716         for ( j = 0; j < (pad_left ? r : 2*r); ++j ) {
 717             for ( c = 0; c < cn; ++c ) {
 718                 histogram_add( &h_coarse[16*(n*c+j)], H[c].coarse );
 719             }
 720         }
 721         for ( c = 0; c < cn; ++c ) {
 722             for ( k = 0; k < 16; ++k ) {
 723                 histogram_muladd( 2*r+1, &h_fine[16*n*(16*c+k)], &H[c].fine[k][0] );
 724             }
 725         }
 726
 727         for ( j = pad_left ? 0 : r; j < (pad_right ? n : n-r); ++j ) {
 728             for ( c = 0; c < cn; ++c ) {
 729                 int t = 2*r*r + 2*r, b, sum = 0;
 730                 ushort* segment;
 731
 732                 histogram_add( &h_coarse[16*(n*c + MIN(j+r,n-1))], H[c].coarse );
 733
 734                 /* Find median at coarse level */
 735                 for ( k = 0; k < 16 ; ++k ) {
 736                     sum += H[c].coarse[k];
 737                     if ( sum > t ) {
 738                         sum -= H[c].coarse[k];
 739                         break;
 740                     }
 741                 }
 742                 assert( k < 16 );
 743
 744                 /* Update corresponding histogram segment */
 745                 if ( luc[c][k] <= j-r ) {
 746                     memset( &H[c].fine[k], 0, 16 * sizeof(ushort) );
 747                     for ( luc[c][k] = j-r; luc[c][k] < MIN(j+r+1,n); ++luc[c][k] ) {
 748                         histogram_add( &h_fine[16*(n*(16*c+k)+luc[c][k])], H[c].fine[k] );
 749                     }
 750                     if ( luc[c][k] < j+r+1 ) {
 751                         histogram_muladd( j+r+1 - n, &h_fine[16*(n*(16*c+k)+(n-1))], &H[c].fine[k][0] );
 752                         luc[c][k] = (ushort)(j+r+1);
 753                     }
 754                 }
 755                 else {
 756                     for ( ; luc[c][k] < j+r+1; ++luc[c][k] ) {
 757                         histogram_sub( &h_fine[16*(n*(16*c+k)+MAX(luc[c][k]-2*r-1,0))], H[c].fine[k] );
 758                         histogram_add( &h_fine[16*(n*(16*c+k)+MIN(luc[c][k],n-1))], H[c].fine[k] );
 759                     }
 760                 }
 761
 762                 histogram_sub( &h_coarse[16*(n*c+MAX(j-r,0))], H[c].coarse );
 763
 764                 /* Find median in segment */
 765                 segment = H[c].fine[k];
 766                 for ( b = 0; b < 16 ; ++b ) {
 767                     sum += segment[b];
 768                     if ( sum > t ) {
 769                         dst[dst_step*i+cn*j+c] = (uchar)(16*k + b);
 770                         break;
 771                     }
 772                 }
 773                 assert( b < 16 );
 774             }
 775         }
 776     }
 777
 778 #if defined(__MMX__)
 779     _mm_empty();
 780 #endif
 781
 782     cvFree(&h_coarse);
 783     cvFree(&h_fine);
 784
 785 #undef HOP
 786 #undef COP
 787     return CV_OK;
 788 }
 789
 790
 791 #if _MSC_VER >= 1200
 792 #pragma warning( default: 4244 )
 793 #endif
 794
 795
 796 static CvStatus CV_STDCALL
 797 icvMedianBlur_8u_CnR_Om( uchar* src, int src_step, uchar* dst, int dst_step,
 798                          CvSize size, int m, int cn )
 799 {
 800     #define N  16
 801     int     zone0[4][N];
 802     int     zone1[4][N*N];
 803     int     x, y;
 804     int     n2 = m*m/2;
 805     int     nx = (m + 1)/2 - 1;
 806     uchar*  src_max = src + size.height*src_step;
 807     uchar*  src_right = src + size.width*cn;
 808
 809     #define UPDATE_ACC01( pix, cn, op ) \
 810     {                                   \
 811         int p = (pix);                  \
 812         zone1[cn][p] op;                \
 813         zone0[cn][p >> 4] op;           \
 814     }
 815
 816     if( size.height < nx || size.width < nx )
 817         return CV_BADSIZE_ERR;
 818
 819     if( m == 3 )
 820     {
 821         size.width *= cn;
 822
 823         for( y = 0; y < size.height; y++, dst += dst_step )
 824         {
 825             const uchar* src0 = src + src_step*(y-1);
 826             const uchar* src1 = src0 + src_step;
 827             const uchar* src2 = src1 + src_step;
 828             if( y == 0 )
 829                 src0 = src1;
 830             else if( y == size.height - 1 )
 831                 src2 = src1;
 832
 833             for( x = 0; x < 2*cn; x++ )
 834             {
 835                 int x0 = x < cn ? x : size.width - 3*cn + x;
 836                 int x2 = x < cn ? x + cn : size.width - 2*cn + x;
 837                 int x1 = x < cn ? x0 : x2, t;
 838
 839                 int p0 = src0[x0], p1 = src0[x1], p2 = src0[x2];
 840                 int p3 = src1[x0], p4 = src1[x1], p5 = src1[x2];
 841                 int p6 = src2[x0], p7 = src2[x1], p8 = src2[x2];
 842
 843                 CV_MINMAX_8U(p1, p2); CV_MINMAX_8U(p4, p5);
 844                 CV_MINMAX_8U(p7, p8); CV_MINMAX_8U(p0, p1);
 845                 CV_MINMAX_8U(p3, p4); CV_MINMAX_8U(p6, p7);
 846                 CV_MINMAX_8U(p1, p2); CV_MINMAX_8U(p4, p5);
 847                 CV_MINMAX_8U(p7, p8); CV_MINMAX_8U(p0, p3);
 848                 CV_MINMAX_8U(p5, p8); CV_MINMAX_8U(p4, p7);
 849                 CV_MINMAX_8U(p3, p6); CV_MINMAX_8U(p1, p4);
 850                 CV_MINMAX_8U(p2, p5); CV_MINMAX_8U(p4, p7);
 851                 CV_MINMAX_8U(p4, p2); CV_MINMAX_8U(p6, p4);
 852                 CV_MINMAX_8U(p4, p2);
 853                 dst[x1] = (uchar)p4;
 854             }
 855
 856             for( x = cn; x < size.width - cn; x++ )
 857             {
 858                 int p0 = src0[x-cn], p1 = src0[x], p2 = src0[x+cn];
 859                 int p3 = src1[x-cn], p4 = src1[x], p5 = src1[x+cn];
 860                 int p6 = src2[x-cn], p7 = src2[x], p8 = src2[x+cn];
 861                 int t;
 862
 863                 CV_MINMAX_8U(p1, p2); CV_MINMAX_8U(p4, p5);
 864                 CV_MINMAX_8U(p7, p8); CV_MINMAX_8U(p0, p1);
 865                 CV_MINMAX_8U(p3, p4); CV_MINMAX_8U(p6, p7);
 866                 CV_MINMAX_8U(p1, p2); CV_MINMAX_8U(p4, p5);
 867                 CV_MINMAX_8U(p7, p8); CV_MINMAX_8U(p0, p3);
 868                 CV_MINMAX_8U(p5, p8); CV_MINMAX_8U(p4, p7);
 869                 CV_MINMAX_8U(p3, p6); CV_MINMAX_8U(p1, p4);
 870                 CV_MINMAX_8U(p2, p5); CV_MINMAX_8U(p4, p7);
 871                 CV_MINMAX_8U(p4, p2); CV_MINMAX_8U(p6, p4);
 872                 CV_MINMAX_8U(p4, p2);
 873
 874                 dst[x] = (uchar)p4;
 875             }
 876         }
 877
 878         return CV_OK;
 879     }
 880
 881     for( x = 0; x < size.width; x++, dst += cn )
 882     {
 883         uchar* dst_cur = dst;
 884         uchar* src_top = src;
 885         uchar* src_bottom = src;
 886         int    k, c;
 887         int    x0 = -1;
 888         int    src_step1 = src_step, dst_step1 = dst_step;
 889
 890         if( x % 2 != 0 )
 891         {
 892             src_bottom = src_top += src_step*(size.height-1);
 893             dst_cur += dst_step*(size.height-1);
 894             src_step1 = -src_step1;
 895             dst_step1 = -dst_step1;
 896         }
 897
 898         if( x <= m/2 )
 899             nx++;
 900
 901         if( nx < m )
 902             x0 = x < m/2 ? 0 : (nx-1)*cn;
 903
 904         // init accumulator
 905         memset( zone0, 0, sizeof(zone0[0])*cn );
 906         memset( zone1, 0, sizeof(zone1[0])*cn );
 907
 908         for( y = 0; y <= m/2; y++ )
 909         {
 910             for( c = 0; c < cn; c++ )
 911             {
 912                 if( y > 0 )
 913                 {
 914                     if( x0 >= 0 )
 915                         UPDATE_ACC01( src_bottom[x0+c], c, += (m - nx) );
 916                     for( k = 0; k < nx*cn; k += cn )
 917                         UPDATE_ACC01( src_bottom[k+c], c, ++ );
 918                 }
 919                 else
 920                 {
 921                     if( x0 >= 0 )
 922                         UPDATE_ACC01( src_bottom[x0+c], c, += (m - nx)*(m/2+1) );
 923                     for( k = 0; k < nx*cn; k += cn )
 924                         UPDATE_ACC01( src_bottom[k+c], c, += m/2+1 );
 925                 }
 926             }
 927
 928             if( src_step1 > 0 && y < size.height-1 ||
 929                 src_step1 < 0 && size.height-y-1 > 0 )
 930                 src_bottom += src_step1;
 931         }
 932
 933         for( y = 0; y < size.height; y++, dst_cur += dst_step1 )
 934         {
 935             // find median
 936             for( c = 0; c < cn; c++ )
 937             {
 938                 int s = 0;
 939                 for( k = 0; ; k++ )
 940                 {
 941                     int t = s + zone0[c][k];
 942                     if( t > n2 ) break;
 943                     s = t;
 944                 }
 945
 946                 for( k *= N; ;k++ )
 947                 {
 948                     s += zone1[c][k];
 949                     if( s > n2 ) break;
 950                 }
 951
 952                 dst_cur[c] = (uchar)k;
 953             }
 954
 955             if( y+1 == size.height )
 956                 break;
 957
 958             if( cn == 1 )
 959             {
 960                 for( k = 0; k < nx; k++ )
 961                 {
 962                     int p = src_top[k];
 963                     int q = src_bottom[k];
 964                     zone1[0][p]--;
 965                     zone0[0][p>>4]--;
 966                     zone1[0][q]++;
 967                     zone0[0][q>>4]++;
 968                 }
 969             }
 970             else if( cn == 3 )
 971             {
 972                 for( k = 0; k < nx*3; k += 3 )
 973                 {
 974                     UPDATE_ACC01( src_top[k], 0, -- );
 975                     UPDATE_ACC01( src_top[k+1], 1, -- );
 976                     UPDATE_ACC01( src_top[k+2], 2, -- );
 977
 978                     UPDATE_ACC01( src_bottom[k], 0, ++ );
 979                     UPDATE_ACC01( src_bottom[k+1], 1, ++ );
 980                     UPDATE_ACC01( src_bottom[k+2], 2, ++ );
 981                 }
 982             }
 983             else
 984             {
 985                 assert( cn == 4 );
 986                 for( k = 0; k < nx*4; k += 4 )
 987                 {
 988                     UPDATE_ACC01( src_top[k], 0, -- );
 989                     UPDATE_ACC01( src_top[k+1], 1, -- );
 990                     UPDATE_ACC01( src_top[k+2], 2, -- );
 991                     UPDATE_ACC01( src_top[k+3], 3, -- );
 992
 993                     UPDATE_ACC01( src_bottom[k], 0, ++ );
 994                     UPDATE_ACC01( src_bottom[k+1], 1, ++ );
 995                     UPDATE_ACC01( src_bottom[k+2], 2, ++ );
 996                     UPDATE_ACC01( src_bottom[k+3], 3, ++ );
 997                 }
 998             }
 999
1000             if( x0 >= 0 )
1001             {
1002                 for( c = 0; c < cn; c++ )
1003                 {
1004                     UPDATE_ACC01( src_top[x0+c], c, -= (m - nx) );
1005                     UPDATE_ACC01( src_bottom[x0+c], c, += (m - nx) );
1006                 }
1007             }
1008
1009             if( src_step1 > 0 && src_bottom + src_step1 < src_max ||
1010                 src_step1 < 0 && src_bottom + src_step1 >= src )
1011                 src_bottom += src_step1;
1012
1013             if( y >= m/2 )
1014                 src_top += src_step1;
1015         }
1016
1017         if( x >= m/2 )
1018             src += cn;
1019         if( src + nx*cn > src_right ) nx--;
1020     }
1021 #undef N
1022 #undef UPDATE_ACC
1023     return CV_OK;
1024 }
1025
1026
1027 /****************************************************************************************\
1028                                    Bilateral Filtering
1029 \****************************************************************************************/
1030
1031 static void
1032 icvBilateralFiltering_8u( const CvMat* src, CvMat* dst, int d,
1033                           double sigma_color, double sigma_space )
1034 {
1035     CvMat* temp = 0;
1036     float* color_weight = 0;
1037     float* space_weight = 0;
1038     int* space_ofs = 0;
1039
1040     CV_FUNCNAME( "icvBilateralFiltering_8u" );
1041
1042     __BEGIN__;
1043
1044     double gauss_color_coeff = -0.5/(sigma_color*sigma_color);
1045     double gauss_space_coeff = -0.5/(sigma_space*sigma_space);
1046     int cn = CV_MAT_CN(src->type);
1047     int i, j, k, maxk, radius;
1048     CvSize size = cvGetMatSize(src);
1049
1050     if( CV_MAT_TYPE(src->type) != CV_8UC1 &&
1051         CV_MAT_TYPE(src->type) != CV_8UC3 ||
1052         !CV_ARE_TYPES_EQ(src, dst) )
1053         CV_ERROR( CV_StsUnsupportedFormat,
1054         "Both source and destination must be 8-bit, single-channel or 3-channel images" );
1055
1056     if( sigma_color <= 0 )
1057         sigma_color = 1;
1058     if( sigma_space <= 0 )
1059         sigma_space = 1;
1060
1061     if( d == 0 )
1062         radius = cvRound(sigma_space*1.5);
1063     else
1064         radius = d/2;
1065     radius = MAX(radius, 1);
1066     d = radius*2 + 1;
1067
1068     CV_CALL( temp = cvCreateMat( src->rows + radius*2,
1069         src->cols + radius*2, src->type ));
1070     CV_CALL( cvCopyMakeBorder( src, temp, cvPoint(radius,radius), IPL_BORDER_REPLICATE ));
1071     CV_CALL( color_weight = (float*)cvAlloc(cn*256*sizeof(color_weight[0])));
1072     CV_CALL( space_weight = (float*)cvAlloc(d*d*sizeof(space_weight[0])));
1073     CV_CALL( space_ofs = (int*)cvAlloc(d*d*sizeof(space_ofs[0])));
1074
1075     // initialize color-related bilateral filter coefficients
1076     for( i = 0; i < 256*cn; i++ )
1077         color_weight[i] = (float)exp(i*i*gauss_color_coeff);
1078
1079     // initialize space-related bilateral filter coefficients
1080     for( i = -radius, maxk = 0; i <= radius; i++ )
1081         for( j = -radius; j <= radius; j++ )
1082         {
1083             double r = sqrt((double)i*i + (double)j*j);
1084             if( r > radius )
1085                 continue;
1086             space_weight[maxk] = (float)exp(r*r*gauss_space_coeff);
1087             space_ofs[maxk++] = i*temp->step + j*cn;
1088         }
1089
1090     for( i = 0; i < size.height; i++ )
1091     {
1092         const uchar* sptr = temp->data.ptr + (i+radius)*temp->step + radius*cn;
1093         uchar* dptr = dst->data.ptr + i*dst->step;
1094
1095         if( cn == 1 )
1096         {
1097             for( j = 0; j < size.width; j++ )
1098             {
1099                 float sum = 0, wsum = 0;
1100                 int val0 = sptr[j];
1101                 for( k = 0; k < maxk; k++ )
1102                 {
1103                     int val = sptr[j + space_ofs[k]];
1104                     float w = space_weight[k]*color_weight[abs(val - val0)];
1105                     sum += val*w;
1106                     wsum += w;
1107                 }
1108                 // overflow is not possible here => there is no need to use CV_CAST_8U
1109                 dptr[j] = (uchar)cvRound(sum/wsum);
1110             }
1111         }
1112         else
1113         {
1114             assert( cn == 3 );
1115             for( j = 0; j < size.width*3; j += 3 )
1116             {
1117                 float sum_b = 0, sum_g = 0, sum_r = 0, wsum = 0;
1118                 int b0 = sptr[j], g0 = sptr[j+1], r0 = sptr[j+2];
1119                 for( k = 0; k < maxk; k++ )
1120                 {
1121                     const uchar* sptr_k = sptr + j + space_ofs[k];
1122                     int b = sptr_k[0], g = sptr_k[1], r = sptr_k[2];
1123                     float w = space_weight[k]*color_weight[abs(b - b0) +
1124                         abs(g - g0) + abs(r - r0)];
1125                     sum_b += b*w; sum_g += g*w; sum_r += r*w;
1126                     wsum += w;
1127                 }
1128                 wsum = 1.f/wsum;
1129                 b0 = cvRound(sum_b*wsum);
1130                 g0 = cvRound(sum_g*wsum);
1131                 r0 = cvRound(sum_r*wsum);
1132                 dptr[j] = (uchar)b0; dptr[j+1] = (uchar)g0; dptr[j+2] = (uchar)r0;
1133             }
1134         }
1135     }
1136
1137     __END__;
1138
1139     cvReleaseMat( &temp );
1140     cvFree( &color_weight );
1141     cvFree( &space_weight );
1142     cvFree( &space_ofs );
1143 }
1144
1145
1146 static void icvBilateralFiltering_32f( const CvMat* src, CvMat* dst, int d,
1147                                        double sigma_color, double sigma_space )
1148 {
1149         CvMat* temp = 0;
1150     float* space_weight = 0;
1151     int* space_ofs = 0;
1152     float *expLUT = 0;
1153
1154     CV_FUNCNAME( "icvBilateralFiltering_32f" );
1155
1156     __BEGIN__;
1157
1158     double gauss_color_coeff = -0.5/(sigma_color*sigma_color);
1159     double gauss_space_coeff = -0.5/(sigma_space*sigma_space);
1160     int cn = CV_MAT_CN(src->type);
1161     int i, j, k, maxk, radius;
1162     double minValSrc=-1, maxValSrc=1;
1163     const int kExpNumBinsPerChannel = 1 << 12;
1164     int kExpNumBins = 0;
1165     float lastExpVal = 1.f;
1166     int temp_step;
1167     float len, scale_index;
1168     CvMat src_reshaped;
1169
1170     CvSize size = cvGetMatSize(src);
1171
1172     if( CV_MAT_TYPE(src->type) != CV_32FC1 &&
1173         CV_MAT_TYPE(src->type) != CV_32FC3 ||
1174         !CV_ARE_TYPES_EQ(src, dst) )
1175         CV_ERROR( CV_StsUnsupportedFormat,
1176         "Both source and destination must be 32-bit float, single-channel or 3-channel images" );
1177
1178     if( sigma_color <= 0 )
1179         sigma_color = 1;
1180     if( sigma_space <= 0 )
1181         sigma_space = 1;
1182
1183     if( d == 0 )
1184         radius = cvRound(sigma_space*1.5);
1185     else
1186         radius = d/2;
1187     radius = MAX(radius, 1);
1188     d = radius*2 + 1;
1189     // compute the min/max range for the input image (even if multichannel)
1190
1191     CV_CALL( cvReshape( src, &src_reshaped, 1 ) );
1192     CV_CALL( cvMinMaxLoc(&src_reshaped, &minValSrc, &maxValSrc) );
1193
1194     // temporary copy of the image with borders for easy processing
1195     CV_CALL( temp = cvCreateMat( src->rows + radius*2,
1196         src->cols + radius*2, src->type ));
1197     temp_step = temp->step/sizeof(float);
1198     CV_CALL( cvCopyMakeBorder( src, temp, cvPoint(radius,radius), IPL_BORDER_REPLICATE ));
1199     // allocate lookup tables
1200     CV_CALL( space_weight = (float*)cvAlloc(d*d*sizeof(space_weight[0])));
1201     CV_CALL( space_ofs = (int*)cvAlloc(d*d*sizeof(space_ofs[0])));
1202
1203     // assign a length which is slightly more than needed
1204     len = (float)(maxValSrc - minValSrc) * cn;
1205     kExpNumBins = kExpNumBinsPerChannel * cn;
1206     CV_CALL( expLUT = (float*)cvAlloc((kExpNumBins+2) * sizeof(expLUT[0])));
1207     scale_index = kExpNumBins/len;
1208
1209     // initialize the exp LUT
1210     for( i = 0; i < kExpNumBins+2; i++ )
1211     {
1212         if( lastExpVal > 0.f )
1213         {
1214             double val =  i / scale_index;
1215             expLUT[i] = (float)exp(val * val * gauss_color_coeff);
1216             lastExpVal = expLUT[i];
1217         }
1218         else
1219             expLUT[i] = 0.f;
1220     }
1221
1222     // initialize space-related bilateral filter coefficients
1223     for( i = -radius, maxk = 0; i <= radius; i++ )
1224         for( j = -radius; j <= radius; j++ )
1225         {
1226             double r = sqrt((double)i*i + (double)j*j);
1227             if( r > radius )
1228                 continue;
1229             space_weight[maxk] = (float)exp(r*r*gauss_space_coeff);
1230             space_ofs[maxk++] = i*temp_step + j*cn;
1231         }
1232
1233     for( i = 0; i < size.height; i++ )
1234     {
1235             const float* sptr = temp->data.fl + (i+radius)*temp_step + radius*cn;
1236         float* dptr = (float*)(dst->data.ptr + i*dst->step);
1237
1238         if( cn == 1 )
1239         {
1240             for( j = 0; j < size.width; j++ )
1241             {
1242                 float sum = 0, wsum = 0;
1243                 float val0 = sptr[j];
1244                 for( k = 0; k < maxk; k++ )
1245                 {
1246                     float val = sptr[j + space_ofs[k]];
1247                                         float alpha = (float)(fabs(val - val0)*scale_index);
1248                     int idx = cvFloor(alpha);
1249                     alpha -= idx;
1250                     float w = space_weight[k]*(expLUT[idx] + alpha*(expLUT[idx+1] - expLUT[idx]));
1251                         sum += val*w;
1252                     wsum += w;
1253                 }
1254                 dptr[j] = (float)(sum/wsum);
1255             }
1256         }
1257         else
1258         {
1259             assert( cn == 3 );
1260             for( j = 0; j < size.width*3; j += 3 )
1261             {
1262                 float sum_b = 0, sum_g = 0, sum_r = 0, wsum = 0;
1263                 float b0 = sptr[j], g0 = sptr[j+1], r0 = sptr[j+2];
1264                 for( k = 0; k < maxk; k++ )
1265                 {
1266                     const float* sptr_k = sptr + j + space_ofs[k];
1267                     float b = sptr_k[0], g = sptr_k[1], r = sptr_k[2];
1268                                         float alpha = (float)((fabs(b - b0) + fabs(g - g0) + fabs(r - r0))*scale_index);
1269                     int idx = cvFloor(alpha);
1270                     alpha -= idx;
1271                     float w = space_weight[k]*(expLUT[idx] + alpha*(expLUT[idx+1] - expLUT[idx]));
1272                     sum_b += b*w; sum_g += g*w; sum_r += r*w;
1273                     wsum += w;
1274                 }
1275                 wsum = 1.f/wsum;
1276                 b0 = sum_b*wsum;
1277                 g0 = sum_g*wsum;
1278                 r0 = sum_r*wsum;
1279                 dptr[j] = b0; dptr[j+1] = g0; dptr[j+2] = r0;
1280             }
1281         }
1282     }
1283
1284     __END__;
1285
1286     cvReleaseMat( &temp );
1287     cvFree( &space_weight );
1288     cvFree( &space_ofs );
1289     cvFree( &expLUT );
1290 }
1291
1292 //////////////////////////////// IPP smoothing functions /////////////////////////////////
1293
1294 icvFilterMedian_8u_C1R_t icvFilterMedian_8u_C1R_p = 0;
1295 icvFilterMedian_8u_C3R_t icvFilterMedian_8u_C3R_p = 0;
1296 icvFilterMedian_8u_C4R_t icvFilterMedian_8u_C4R_p = 0;
1297
1298 icvFilterBox_8u_C1R_t icvFilterBox_8u_C1R_p = 0;
1299 icvFilterBox_8u_C3R_t icvFilterBox_8u_C3R_p = 0;
1300 icvFilterBox_8u_C4R_t icvFilterBox_8u_C4R_p = 0;
1301 icvFilterBox_32f_C1R_t icvFilterBox_32f_C1R_p = 0;
1302 icvFilterBox_32f_C3R_t icvFilterBox_32f_C3R_p = 0;
1303 icvFilterBox_32f_C4R_t icvFilterBox_32f_C4R_p = 0;
1304
1305 typedef CvStatus (CV_STDCALL * CvSmoothFixedIPPFunc)
1306 ( const void* src, int srcstep, void* dst, int dststep,
1307   CvSize size, CvSize ksize, CvPoint anchor );
1308
1309 //////////////////////////////////////////////////////////////////////////////////////////
1310
1311 CV_IMPL void
1312 cvSmooth( const void* srcarr, void* dstarr, int smooth_type,
1313           int param1, int param2, double param3, double param4 )
1314 {
1315     CvBoxFilter box_filter;
1316     CvSepFilter gaussian_filter;
1317
1318     CvMat* temp = 0;
1319
1320     CV_FUNCNAME( "cvSmooth" );
1321
1322     __BEGIN__;
1323
1324     int coi1 = 0, coi2 = 0;
1325     CvMat srcstub, *src = (CvMat*)srcarr;
1326     CvMat dststub, *dst = (CvMat*)dstarr;
1327     CvSize size;
1328     int src_type, dst_type, depth, cn;
1329     double sigma1 = 0, sigma2 = 0;
1330     bool have_ipp = icvFilterMedian_8u_C1R_p != 0;
1331
1332     CV_CALL( src = cvGetMat( src, &srcstub, &coi1 ));
1333     CV_CALL( dst = cvGetMat( dst, &dststub, &coi2 ));
1334
1335     if( coi1 != 0 || coi2 != 0 )
1336         CV_ERROR( CV_BadCOI, "" );
1337
1338     src_type = CV_MAT_TYPE( src->type );
1339     dst_type = CV_MAT_TYPE( dst->type );
1340     depth = CV_MAT_DEPTH(src_type);
1341     cn = CV_MAT_CN(src_type);
1342     size = cvGetMatSize(src);
1343
1344     if( !CV_ARE_SIZES_EQ( src, dst ))
1345         CV_ERROR( CV_StsUnmatchedSizes, "" );
1346
1347     if( smooth_type != CV_BLUR_NO_SCALE && !CV_ARE_TYPES_EQ( src, dst ))
1348         CV_ERROR( CV_StsUnmatchedFormats,
1349         "The specified smoothing algorithm requires input and ouput arrays be of the same type" );
1350
1351     if( smooth_type == CV_BLUR || smooth_type == CV_BLUR_NO_SCALE ||
1352         smooth_type == CV_GAUSSIAN || smooth_type == CV_MEDIAN )
1353     {
1354         // automatic detection of kernel size from sigma
1355         if( smooth_type == CV_GAUSSIAN )
1356         {
1357             sigma1 = param3;
1358             sigma2 = param4 ? param4 : param3;
1359
1360             if( param1 == 0 && sigma1 > 0 )
1361                 param1 = cvRound(sigma1*(depth == CV_8U ? 3 : 4)*2 + 1)|1;
1362             if( param2 == 0 && sigma2 > 0 )
1363                 param2 = cvRound(sigma2*(depth == CV_8U ? 3 : 4)*2 + 1)|1;
1364         }
1365
1366         if( param2 == 0 )
1367             param2 = size.height == 1 ? 1 : param1;
1368         if( param1 < 1 || (param1 & 1) == 0 || param2 < 1 || (param2 & 1) == 0 )
1369             CV_ERROR( CV_StsOutOfRange,
1370                 "Both mask width and height must be >=1 and odd" );
1371
1372         if( param1 == 1 && param2 == 1 )
1373         {
1374             cvConvert( src, dst );
1375             EXIT;
1376         }
1377     }
1378
1379     if( have_ipp && (smooth_type == CV_BLUR || (smooth_type == CV_MEDIAN && param1 <= 15)) &&
1380         size.width >= param1 && size.height >= param2 && param1 > 1 && param2 > 1 )
1381     {
1382         CvSmoothFixedIPPFunc ipp_median_box_func = 0;
1383
1384         if( smooth_type == CV_BLUR )
1385         {
1386             ipp_median_box_func =
1387                 src_type == CV_8UC1 ? icvFilterBox_8u_C1R_p :
1388                 src_type == CV_8UC3 ? icvFilterBox_8u_C3R_p :
1389                 src_type == CV_8UC4 ? icvFilterBox_8u_C4R_p :
1390                 src_type == CV_32FC1 ? icvFilterBox_32f_C1R_p :
1391                 src_type == CV_32FC3 ? icvFilterBox_32f_C3R_p :
1392                 src_type == CV_32FC4 ? icvFilterBox_32f_C4R_p : 0;
1393         }
1394         else if( smooth_type == CV_MEDIAN )
1395         {
1396             ipp_median_box_func =
1397                 src_type == CV_8UC1 ? icvFilterMedian_8u_C1R_p :
1398                 src_type == CV_8UC3 ? icvFilterMedian_8u_C3R_p :
1399                 src_type == CV_8UC4 ? icvFilterMedian_8u_C4R_p : 0;
1400         }
1401
1402         if( ipp_median_box_func )
1403         {
1404             CvSize el_size = { param1, param2 };
1405             CvPoint el_anchor = { param1/2, param2/2 };
1406             int stripe_size = 1 << 14; // the optimal value may depend on CPU cache,
1407                                        // overhead of the current IPP code etc.
1408             const uchar* shifted_ptr;
1409             int y, dy = 0;
1410             int temp_step, dst_step = dst->step;
1411
1412             CV_CALL( temp = icvIPPFilterInit( src, stripe_size, el_size ));
1413
1414             shifted_ptr = temp->data.ptr +
1415                 el_anchor.y*temp->step + el_anchor.x*CV_ELEM_SIZE(src_type);
1416             temp_step = temp->step ? temp->step : CV_STUB_STEP;
1417
1418             for( y = 0; y < src->rows; y += dy )
1419             {
1420                 dy = icvIPPFilterNextStripe( src, temp, y, el_size, el_anchor );
1421                 IPPI_CALL( ipp_median_box_func( shifted_ptr, temp_step,
1422                     dst->data.ptr + y*dst_step, dst_step, cvSize(src->cols, dy),
1423                     el_size, el_anchor ));
1424             }
1425             EXIT;
1426         }
1427     }
1428
1429     if( smooth_type == CV_BLUR || smooth_type == CV_BLUR_NO_SCALE )
1430     {
1431         CV_CALL( box_filter.init( src->cols, src_type, dst_type,
1432             smooth_type == CV_BLUR, cvSize(param1, param2) ));
1433         CV_CALL( box_filter.process( src, dst ));
1434     }
1435     else if( smooth_type == CV_MEDIAN )
1436     {
1437         int img_size_mp = size.width*size.height;
1438         img_size_mp = (img_size_mp + (1<<19)) >> 20;
1439
1440         if( depth != CV_8U || cn != 1 && cn != 3 && cn != 4 )
1441             CV_ERROR( CV_StsUnsupportedFormat,
1442             "Median filter only supports 8uC1, 8uC3 and 8uC4 images" );
1443
1444         if( size.width < param1*2 || size.height < param1*2 ||
1445             param1 <= 3 + (img_size_mp < 1 ? 12 : img_size_mp < 4 ? 6 : 2)*(MEDIAN_HAVE_SIMD ? 1 : 3))
1446         {
1447             // Special case optimized for 3x3
1448             IPPI_CALL( icvMedianBlur_8u_CnR_Om( src->data.ptr, src->step,
1449                 dst->data.ptr, dst->step, size, param1, cn ));
1450         }
1451         else
1452         {
1453             const int r = (param1 - 1) / 2;
1454             const int CACHE_SIZE = (int) ( 0.95 * 256 * 1024 / cn );  // assume a 256 kB cache size
1455             const int STRIPES = (int) cvCeil( (double) (size.width - 2*r) /
1456                     (CACHE_SIZE / sizeof(Histogram) - 2*r) );
1457             const int STRIPE_SIZE = (int) cvCeil(
1458                     (double) ( size.width + STRIPES*2*r - 2*r ) / STRIPES );
1459
1460             for( int i = 0; i < size.width; i += STRIPE_SIZE - 2*r )
1461             {
1462                 int stripe = STRIPE_SIZE;
1463                 // Make sure that the filter kernel fits into one stripe.
1464                 if( i + STRIPE_SIZE - 2*r >= size.width ||
1465                     size.width - (i + STRIPE_SIZE - 2*r) < 2*r+1 )
1466                     stripe = size.width - i;
1467
1468                 IPPI_CALL( icvMedianBlur_8u_CnR_O1( src->data.ptr + cn*i, src->step,
1469                     dst->data.ptr + cn*i, dst->step, cvSize(stripe, size.height),
1470                     param1, cn, i == 0, stripe == size.width - i ));
1471
1472                 if( stripe == size.width - i )
1473                     break;
1474             }
1475         }
1476     }
1477     else if( smooth_type == CV_GAUSSIAN )
1478     {
1479         CvSize ksize = { param1, param2 };
1480         float* kx = (float*)cvStackAlloc( ksize.width*sizeof(kx[0]) );
1481         float* ky = (float*)cvStackAlloc( ksize.height*sizeof(ky[0]) );
1482         CvMat KX = cvMat( 1, ksize.width, CV_32F, kx );
1483         CvMat KY = cvMat( 1, ksize.height, CV_32F, ky );
1484
1485         CvSepFilter::init_gaussian_kernel( &KX, sigma1 );
1486         if( ksize.width != ksize.height || fabs(sigma1 - sigma2) > FLT_EPSILON )
1487             CvSepFilter::init_gaussian_kernel( &KY, sigma2 );
1488         else
1489             KY.data.fl = kx;
1490
1491         if( have_ipp && size.width >= param1*3 &&
1492             size.height >= param2 && param1 > 1 && param2 > 1 )
1493         {
1494             int done;
1495             CV_CALL( done = icvIPPSepFilter( src, dst, &KX, &KY,
1496                         cvPoint(ksize.width/2,ksize.height/2)));
1497             if( done )
1498                 EXIT;
1499         }
1500
1501         CV_CALL( gaussian_filter.init( src->cols, src_type, dst_type, &KX, &KY ));
1502         CV_CALL( gaussian_filter.process( src, dst ));
1503     }
1504     else if( smooth_type == CV_BILATERAL )
1505     {
1506         if( param2 != 0 && (param2 != param1 || param1 % 2 == 0) )
1507             CV_ERROR( CV_StsBadSize, "Bilateral filter only supports square windows of odd size" );
1508
1509         switch( src_type )
1510         {
1511         case CV_32FC1:
1512         case CV_32FC3:
1513             CV_CALL( icvBilateralFiltering_32f( src, dst, param1, param3, param4 ));
1514             break;
1515         case CV_8UC1:
1516         case CV_8UC3:
1517             CV_CALL( icvBilateralFiltering_8u( src, dst, param1, param3, param4 ));
1518             break;
1519         default:
1520             CV_ERROR( CV_StsUnsupportedFormat,
1521                 "Unknown/unsupported format: bilateral filter only supports 8uC1, 8uC3, 32fC1 and 32fC3 formats" );
1522         }
1523     }
1524
1525     __END__;
1526
1527     cvReleaseMat( &temp );
1528 }
1529
1530 /* End of file. */