vcs.maemo.org Git - mardrone/blob - mardrone/ARDrone_SDK_Version_1_8_20110726/ARDroneLib/VLIB/video_dct.c

   1 #include <VLIB/video_dct.h>
   2 #include <VLIB/Platform/video_utils.h>
   3 #include <VP_Os/vp_os_malloc.h>
   4
   5 #define FIX_0_298631336  ((INT32)  2446)        /* FIX(0.298631336) */
   6 #define FIX_0_390180644  ((INT32)  3196)        /* FIX(0.390180644) */
   7 #define FIX_0_541196100  ((INT32)  4433)        /* FIX(0.541196100) */
   8 #define FIX_0_765366865  ((INT32)  6270)        /* FIX(0.765366865) */
   9 #define FIX_0_899976223  ((INT32)  7373)        /* FIX(0.899976223) */
  10 #define FIX_1_175875602  ((INT32)  9633)        /* FIX(1.175875602) */
  11 #define FIX_1_501321110  ((INT32)  12299)       /* FIX(1.501321110) */
  12 #define FIX_1_847759065  ((INT32)  15137)       /* FIX(1.847759065) */
  13 #define FIX_1_961570560  ((INT32)  16069)       /* FIX(1.961570560) */
  14 #define FIX_2_053119869  ((INT32)  16819)       /* FIX(2.053119869) */
  15 #define FIX_2_562915447  ((INT32)  20995)       /* FIX(2.562915447) */
  16 #define FIX_3_072711026  ((INT32)  25172)       /* FIX(3.072711026) */
  17
  18 #define INT32       int
  19 #define DCTELEM     int
  20 #define DCTSIZE     8
  21 #define DCTSIZE2    64
  22 #define CONST_BITS  13
  23 #define PASS1_BITS  1
  24 #define ONE     ((INT32) 1)
  25 #define MULTIPLY(var,const)  ((var) * (const))
  26 #define DESCALE(x,n)  RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
  27 #define RIGHT_SHIFT(x,shft)     ((x) >> (shft))
  28
  29 #ifndef HAS_FDCT_COMPUTE
  30 void fdct(const unsigned short* in, short* out)
  31 {
  32   INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
  33   INT32 tmp10, tmp11, tmp12, tmp13;
  34   INT32 z1, z2, z3, z4, z5;
  35   int ctr;
  36   // SHIFT_TEMPS
  37
  38   int data[DCTSIZE * DCTSIZE];
  39   int i, j;
  40   int* dataptr = data;
  41
  42   for( i = 0; i < DCTSIZE; i++ )
  43   {
  44     for( j = 0; j < DCTSIZE; j++ )
  45     {
  46       int temp;
  47
  48       temp = in[i*DCTSIZE + j];
  49       dataptr[i*DCTSIZE + j] = temp;
  50     }
  51   }
  52
  53   /* Pass 1: process rows. */
  54   /* Note results are scaled up by sqrt(8) compared to a true DCT; */
  55   /* furthermore, we scale the results by 2**PASS1_BITS. */
  56
  57   dataptr = data;
  58   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
  59     tmp0 = dataptr[0] + dataptr[7];
  60     tmp7 = dataptr[0] - dataptr[7];
  61     tmp1 = dataptr[1] + dataptr[6];
  62     tmp6 = dataptr[1] - dataptr[6];
  63     tmp2 = dataptr[2] + dataptr[5];
  64     tmp5 = dataptr[2] - dataptr[5];
  65     tmp3 = dataptr[3] + dataptr[4];
  66     tmp4 = dataptr[3] - dataptr[4];
  67
  68     /* Even part per LL&M figure 1 --- note that published figure is faulty;
  69      * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
  70      */
  71
  72     tmp10 = tmp0 + tmp3;
  73     tmp13 = tmp0 - tmp3;
  74     tmp11 = tmp1 + tmp2;
  75     tmp12 = tmp1 - tmp2;
  76
  77     dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);
  78     dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
  79
  80     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
  81     dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), CONST_BITS-PASS1_BITS);
  82     dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), CONST_BITS-PASS1_BITS);
  83
  84     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
  85      * cK represents cos(K*pi/16).
  86      * i0..i3 in the paper are tmp4..tmp7 here.
  87      */
  88
  89     z1 = tmp4 + tmp7;
  90     z2 = tmp5 + tmp6;
  91     z3 = tmp4 + tmp6;
  92     z4 = tmp5 + tmp7;
  93     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
  94
  95     tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
  96     tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
  97     tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
  98     tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
  99     z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
 100     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
 101     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
 102     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
 103
 104     z3 += z5;
 105     z4 += z5;
 106
 107     dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
 108     dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
 109     dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
 110     dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
 111
 112     dataptr += DCTSIZE;         /* advance pointer to next row */
 113   }
 114
 115   /* Pass 2: process columns.
 116    * We remove the PASS1_BITS scaling, but leave the results scaled up
 117    * by an overall factor of 8.
 118    */
 119
 120   dataptr = data;
 121   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
 122     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
 123     tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
 124     tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
 125     tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
 126     tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
 127     tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
 128     tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
 129     tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
 130
 131     /* Even part per LL&M figure 1 --- note that published figure is faulty;
 132      * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
 133      */
 134
 135     tmp10 = tmp0 + tmp3;
 136     tmp13 = tmp0 - tmp3;
 137     tmp11 = tmp1 + tmp2;
 138     tmp12 = tmp1 - tmp2;
 139
 140     dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
 141     dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
 142
 143     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
 144     dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), CONST_BITS+PASS1_BITS);
 145     dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), CONST_BITS+PASS1_BITS);
 146
 147     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
 148      * cK represents cos(K*pi/16).
 149      * i0..i3 in the paper are tmp4..tmp7 here.
 150      */
 151
 152     z1 = tmp4 + tmp7;
 153     z2 = tmp5 + tmp6;
 154     z3 = tmp4 + tmp6;
 155     z4 = tmp5 + tmp7;
 156     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
 157
 158     tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
 159     tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
 160     tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
 161     tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
 162     z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
 163     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
 164     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
 165     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
 166
 167     z3 += z5;
 168     z4 += z5;
 169
 170     dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS+PASS1_BITS);
 171     dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS+PASS1_BITS);
 172     dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS+PASS1_BITS);
 173     dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS+PASS1_BITS);
 174
 175     dataptr++;  /* advance pointer to next column */
 176   }
 177
 178   for( i = 0; i < DCTSIZE; i++ )
 179     for( j = 0; j < DCTSIZE; j++ )
 180       out[i*DCTSIZE + j] = data[i*DCTSIZE + j] >> 3;
 181 }
 182 #endif // HAS_FDCT_COMPUTE
 183
 184 #ifndef HAS_IDCT_COMPUTE
 185 void idct(const short* in, unsigned short* out)
 186 {
 187   INT32 tmp0, tmp1, tmp2, tmp3;
 188   INT32 tmp10, tmp11, tmp12, tmp13;
 189   INT32 z1, z2, z3, z4, z5;
 190   int* wsptr;
 191   int* outptr;
 192   const short* inptr;
 193   int ctr;
 194   int workspace[DCTSIZE2];      /* buffers data between passes */
 195   int data[DCTSIZE2];
 196   // SHIFT_TEMPS
 197
 198   /* Pass 1: process columns from input, store into work array. */
 199   /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
 200   /* furthermore, we scale the results by 2**PASS1_BITS. */
 201
 202   inptr = in;
 203   wsptr = workspace;
 204   for (ctr = DCTSIZE; ctr > 0; ctr--) {
 205     /* Due to quantization, we will usually find that many of the input
 206      * coefficients are zero, especially the AC terms.  We can exploit this
 207      * by short-circuiting the IDCT calculation for any column in which all
 208      * the AC terms are zero.  In that case each output is equal to the
 209      * DC coefficient (with scale factor as needed).
 210      * With typical images and quantization tables, half or more of the
 211      * column DCT calculations can be simplified this way.
 212      */
 213
 214     if( inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
 215         inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
 216         inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
 217         inptr[DCTSIZE*7] == 0 ) {
 218       /* AC terms all zero */
 219       int dcval = inptr[DCTSIZE*0] << PASS1_BITS;
 220
 221       wsptr[DCTSIZE*0] = dcval;
 222       wsptr[DCTSIZE*1] = dcval;
 223       wsptr[DCTSIZE*2] = dcval;
 224       wsptr[DCTSIZE*3] = dcval;
 225       wsptr[DCTSIZE*4] = dcval;
 226       wsptr[DCTSIZE*5] = dcval;
 227       wsptr[DCTSIZE*6] = dcval;
 228       wsptr[DCTSIZE*7] = dcval;
 229
 230       inptr++;  /* advance pointers to next column */
 231       wsptr++;
 232       continue;
 233     }
 234
 235     /* Even part: reverse the even part of the forward DCT. */
 236     /* The rotator is sqrt(2)*c(-6). */
 237
 238     z2 = inptr[DCTSIZE*2];
 239     z3 = inptr[DCTSIZE*6];
 240
 241     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
 242     tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
 243     tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
 244
 245     z2 = inptr[DCTSIZE*0];
 246     z3 = inptr[DCTSIZE*4];
 247
 248     tmp0 = (z2 + z3) << CONST_BITS;
 249     tmp1 = (z2 - z3) << CONST_BITS;
 250
 251     tmp10 = tmp0 + tmp3;
 252     tmp13 = tmp0 - tmp3;
 253     tmp11 = tmp1 + tmp2;
 254     tmp12 = tmp1 - tmp2;
 255
 256     /* Odd part per figure 8; the matrix is unitary and hence its
 257      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
 258      */
 259
 260     tmp0 = inptr[DCTSIZE*7];
 261     tmp1 = inptr[DCTSIZE*5];
 262     tmp2 = inptr[DCTSIZE*3];
 263     tmp3 = inptr[DCTSIZE*1];
 264
 265     z1 = tmp0 + tmp3;
 266     z2 = tmp1 + tmp2;
 267     z3 = tmp0 + tmp2;
 268     z4 = tmp1 + tmp3;
 269     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
 270
 271     tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
 272     tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
 273     tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
 274     tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
 275     z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
 276     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
 277     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
 278     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
 279
 280     z3 += z5;
 281     z4 += z5;
 282
 283     tmp0 += z1 + z3;
 284     tmp1 += z2 + z4;
 285     tmp2 += z2 + z3;
 286     tmp3 += z1 + z4;
 287
 288     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
 289
 290     wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
 291     wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
 292     wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
 293     wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
 294     wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
 295     wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
 296     wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
 297     wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
 298
 299     inptr++;  /* advance pointers to next column */
 300     wsptr++;
 301   }
 302
 303   /* Pass 2: process rows from work array, store into output array. */
 304   /* Note that we must descale the results by a factor of 8 == 2**3, */
 305   /* and also undo the PASS1_BITS scaling. */
 306
 307   wsptr = workspace;
 308   outptr = data;
 309   for (ctr = 0; ctr < DCTSIZE; ctr++) {
 310     /* Even part: reverse the even part of the forward DCT. */
 311     /* The rotator is sqrt(2)*c(-6). */
 312
 313     z2 = (INT32) wsptr[2];
 314     z3 = (INT32) wsptr[6];
 315
 316     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
 317     tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
 318     tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
 319
 320     tmp0 = ((INT32) wsptr[0] + (INT32) wsptr[4]) << CONST_BITS;
 321     tmp1 = ((INT32) wsptr[0] - (INT32) wsptr[4]) << CONST_BITS;
 322
 323     tmp10 = tmp0 + tmp3;
 324     tmp13 = tmp0 - tmp3;
 325     tmp11 = tmp1 + tmp2;
 326     tmp12 = tmp1 - tmp2;
 327
 328     /* Odd part per figure 8; the matrix is unitary and hence its
 329      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
 330      */
 331
 332     tmp0 = (INT32) wsptr[7];
 333     tmp1 = (INT32) wsptr[5];
 334     tmp2 = (INT32) wsptr[3];
 335     tmp3 = (INT32) wsptr[1];
 336
 337     z1 = tmp0 + tmp3;
 338     z2 = tmp1 + tmp2;
 339     z3 = tmp0 + tmp2;
 340     z4 = tmp1 + tmp3;
 341     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
 342
 343     tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
 344     tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
 345     tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
 346     tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
 347     z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
 348     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
 349     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
 350     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
 351
 352     z3 += z5;
 353     z4 += z5;
 354
 355     tmp0 += z1 + z3;
 356     tmp1 += z2 + z4;
 357     tmp2 += z2 + z3;
 358     tmp3 += z1 + z4;
 359
 360     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
 361
 362     outptr[0] = (tmp10 + tmp3) >> ( CONST_BITS+PASS1_BITS+3 );
 363     outptr[7] = (tmp10 - tmp3) >> ( CONST_BITS+PASS1_BITS+3 );
 364     outptr[1] = (tmp11 + tmp2) >> ( CONST_BITS+PASS1_BITS+3 );
 365     outptr[6] = (tmp11 - tmp2) >> ( CONST_BITS+PASS1_BITS+3 );
 366     outptr[2] = (tmp12 + tmp1) >> ( CONST_BITS+PASS1_BITS+3 );
 367     outptr[5] = (tmp12 - tmp1) >> ( CONST_BITS+PASS1_BITS+3 );
 368     outptr[3] = (tmp13 + tmp0) >> ( CONST_BITS+PASS1_BITS+3 );
 369     outptr[4] = (tmp13 - tmp0) >> ( CONST_BITS+PASS1_BITS+3 );
 370
 371     wsptr += DCTSIZE; /* advance pointer to next row */
 372     outptr += DCTSIZE;
 373   }
 374
 375   for(ctr = 0; ctr < DCTSIZE2; ctr++)
 376     out[ctr] = data[ctr];
 377 }
 378 #endif // HAS_IDCT_COMPUTE
 379
 380 #ifndef HAS_FDCT_COMPUTE
 381 int16_t* video_fdct_compute(int16_t* in, int16_t* out, int32_t num_macro_blocks)
 382 {
 383   while( num_macro_blocks > 0 )
 384   {
 385     fdct((uint16_t*)in, out);
 386
 387     in  += MCU_BLOCK_SIZE;
 388     out += MCU_BLOCK_SIZE;
 389
 390     fdct((uint16_t*)in, out);
 391
 392     in  += MCU_BLOCK_SIZE;
 393     out += MCU_BLOCK_SIZE;
 394
 395     fdct((uint16_t*)in, out);
 396
 397     in  += MCU_BLOCK_SIZE;
 398     out += MCU_BLOCK_SIZE;
 399
 400     fdct((uint16_t*)in, out);
 401
 402     in  += MCU_BLOCK_SIZE;
 403     out += MCU_BLOCK_SIZE;
 404
 405     fdct((uint16_t*)in, out);
 406
 407     in  += MCU_BLOCK_SIZE;
 408     out += MCU_BLOCK_SIZE;
 409
 410     fdct((uint16_t*)in, out);
 411
 412     in  += MCU_BLOCK_SIZE;
 413     out += MCU_BLOCK_SIZE;
 414
 415     num_macro_blocks--;
 416   }
 417
 418   return out;
 419 }
 420 #endif // HAS_FDCT_COMPUTE
 421
 422 #ifndef HAS_IDCT_COMPUTE
 423 int16_t* video_idct_compute(int16_t* in, int16_t* out, int32_t num_macro_blocks)
 424 {
 425   while( num_macro_blocks > 0 )
 426   {
 427     idct(in, (uint16_t*)out);
 428
 429     in  += MCU_BLOCK_SIZE;
 430     out += MCU_BLOCK_SIZE;
 431
 432     idct(in, (uint16_t*)out);
 433
 434     in  += MCU_BLOCK_SIZE;
 435     out += MCU_BLOCK_SIZE;
 436
 437     idct(in, (uint16_t*)out);
 438
 439     in  += MCU_BLOCK_SIZE;
 440     out += MCU_BLOCK_SIZE;
 441
 442     idct(in, (uint16_t*)out);
 443
 444     in  += MCU_BLOCK_SIZE;
 445     out += MCU_BLOCK_SIZE;
 446
 447     idct(in, (uint16_t*)out);
 448
 449     in  += MCU_BLOCK_SIZE;
 450     out += MCU_BLOCK_SIZE;
 451
 452     idct(in, (uint16_t*)out);
 453
 454     in  += MCU_BLOCK_SIZE;
 455     out += MCU_BLOCK_SIZE;
 456
 457     num_macro_blocks--;
 458   }
 459
 460   return out;
 461 }
 462 #endif // HAS_IDCT_COMPUTE