vcs.maemo.org Git - mardrone/blob - mardrone/ARDrone_SDK_Version_1_8_20110726/ARDroneLib/VP_SDK/Examples/linux/api_ifile_MJPEG_sdl.c

   1 #include <stdlib.h>
   2 #include <ctype.h>
   3
   4 #include <VP_Api/vp_api.h>
   5 #include <VP_Api/vp_api_thread_helper.h>
   6 #include <VP_Api/vp_api_error.h>
   7 #include <VP_Api/vp_api_picture.h>
   8 #include <VP_Stages/vp_stages_configs.h>
   9 #include <VP_Stages/vp_stages_io_console.h>
  10 #include <VP_Stages/vp_stages_o_sdl.h>
  11 #include <VP_Stages/vp_stages_io_com.h>
  12 #include <VP_Stages/vp_stages_io_file.h>
  13 #include <VP_Os/vp_os_print.h>
  14 #include <VP_Os/vp_os_malloc.h>
  15 #include <VP_Os/vp_os_delay.h>
  16
  17 #include <MJPEG/mjpeg.h>
  18 #include <MJPEG/dct.h>
  19
  20 #define NB_STAGES 3
  21
  22 #define CAMIF_BLOCKLINES_LOG2       4
  23 #define CAMIF_BLOCKLINES            (1 << CAMIF_BLOCKLINES_LOG2)
  24
  25 #define ACQ_WIDTH   (176)
  26 #define ACQ_HEIGHT  (144)
  27
  28 #define WINDOW_WIDTH  320
  29 #define WINDOW_HEIGHT 240
  30
  31
  32 PIPELINE_HANDLE pipeline_handle;
  33
  34 PROTO_THREAD_ROUTINE(escaper, nomParams);
  35 PROTO_THREAD_ROUTINE(app, nomParams);
  36
  37 BEGIN_THREAD_TABLE
  38   THREAD_TABLE_ENTRY(escaper, 20)
  39   THREAD_TABLE_ENTRY(app, 20)
  40 END_THREAD_TABLE
  41
  42
  43 ///*******************************************************************************************************************///
  44
  45 typedef struct _mjpeg_stage_decoding_config_t
  46 {
  47   stream_t          stream;
  48   mjpeg_t           mjpeg;
  49   vp_api_picture_t* picture;
  50
  51   uint32_t          out_buffer_size;
  52
  53 } mjpeg_stage_decoding_config_t;
  54
  55 C_RESULT mjpeg_stage_decoding_open(mjpeg_stage_decoding_config_t *cfg)
  56 {
  57   uint32_t width = cfg->picture->width;
  58   uint32_t height = cfg->picture->height;
  59   enum PixelFormat format = cfg->picture->format;
  60
  61   stream_new( &cfg->stream, OUTPUT_STREAM );
  62
  63   return mjpeg_init( &cfg->mjpeg, MJPEG_DECODE, width, height, format );
  64 }
  65
  66 C_RESULT mjpeg_stage_decoding_transform(mjpeg_stage_decoding_config_t *cfg, vp_api_io_data_t *in, vp_api_io_data_t *out)
  67 {
  68   C_RESULT res;
  69   bool_t got_image;
  70
  71   vp_os_mutex_lock( &out->lock );
  72
  73   if(out->status == VP_API_STATUS_INIT)
  74   {
  75     out->numBuffers   = 1;
  76     out->buffers      = (int8_t**)cfg->picture;
  77     out->indexBuffer  = 0;
  78     out->lineSize     = 0;
  79
  80     out->status = VP_API_STATUS_PROCESSING;
  81   }
  82
  83   if( in->status == VP_API_STATUS_ENDED )
  84   {
  85     out->status = in->status;
  86   }
  87
  88   // Several cases must be handled in this stage
  89   // 1st: Input buffer is too small to decode a complete picture
  90   // 2nd: Input buffer is big enough to decode 1 frame
  91   // 3rd: Input buffer is so big we can decode more than 1 frame
  92
  93   if( in->size > 0 )
  94   {
  95     if( out->status == VP_API_STATUS_PROCESSING )
  96     {
  97       // Reinit stream with new data
  98       stream_config( &cfg->stream, in->size, in->buffers[in->indexBuffer] );
  99     }
 100
 101     if(out->status == VP_API_STATUS_PROCESSING || out->status == VP_API_STATUS_STILL_RUNNING)
 102     {
 103       // If out->size == 1 it means picture is ready
 104       out->size = 0;
 105       out->status = VP_API_STATUS_PROCESSING;
 106
 107       res = mjpeg_decode( &cfg->mjpeg, cfg->picture, &cfg->stream, &got_image );
 108
 109       // handle case 2 & 3
 110       if( FAILED(stream_is_empty( &cfg->stream )) )
 111       {
 112         // Some data are still in stream
 113         // Next time we run this stage we don't want this data to be lost
 114         // So flag it!
 115         out->status = VP_API_STATUS_STILL_RUNNING;
 116       }
 117
 118       if( got_image )
 119       {
 120         // we got one picture (handle case 1)
 121         out->size = 1;
 122
 123         PRINT( "%d picture decoded\n", cfg->mjpeg.num_frames );
 124       }
 125     }
 126   }
 127
 128   vp_os_mutex_unlock( &out->lock );
 129
 130   return C_OK;
 131 }
 132
 133 C_RESULT mjpeg_stage_decoding_close(mjpeg_stage_decoding_config_t *cfg)
 134 {
 135   stream_delete( &cfg->stream );
 136
 137   return mjpeg_release( &cfg->mjpeg );
 138 }
 139
 140 const vp_api_stage_funcs_t mjpeg_decoding_funcs = {
 141   (vp_api_stage_handle_msg_t) NULL,
 142   (vp_api_stage_open_t) mjpeg_stage_decoding_open,
 143   (vp_api_stage_transform_t) mjpeg_stage_decoding_transform,
 144   (vp_api_stage_close_t) mjpeg_stage_decoding_close
 145 };
 146
 147
 148 int
 149 main(int argc, char **argv)
 150 {
 151   if(argc != 2)
 152   {
 153     PRINT("You must specify a filename.\n");
 154     return EXIT_FAILURE;
 155   }
 156
 157   START_THREAD(escaper, NO_PARAM);
 158   START_THREAD(app, argv);
 159
 160   JOIN_THREAD(escaper);
 161   JOIN_THREAD(app);
 162
 163   return EXIT_SUCCESS;
 164 }
 165
 166
 167 PROTO_THREAD_ROUTINE(app,argv)
 168 {
 169   vp_api_picture_t picture;
 170
 171   vp_api_io_pipeline_t    pipeline;
 172   vp_api_io_data_t        out;
 173   vp_api_io_stage_t       stages[NB_STAGES];
 174
 175   vp_stages_input_file_config_t     ifc;
 176   vp_stages_output_sdl_config_t     osc;
 177   mjpeg_stage_decoding_config_t     dec;
 178
 179   vp_os_memset(&ifc,0,sizeof(vp_stages_input_file_config_t));
 180
 181   ifc.name            = ((char**)argv)[1];
 182   ifc.buffer_size     = (ACQ_WIDTH*ACQ_HEIGHT*3)/2;
 183
 184   osc.width           = WINDOW_WIDTH;
 185   osc.height          = WINDOW_HEIGHT;
 186   osc.bpp             = 16;
 187   osc.window_width    = WINDOW_WIDTH;
 188   osc.window_height   = WINDOW_HEIGHT;
 189   osc.pic_width       = ACQ_WIDTH;
 190   osc.pic_height      = ACQ_HEIGHT;
 191   osc.y_size          = ACQ_WIDTH*ACQ_HEIGHT;
 192   osc.c_size          = (ACQ_WIDTH*ACQ_HEIGHT) >> 2;
 193
 194   /// Picture configuration
 195   picture.format        = PIX_FMT_YUV420P;
 196
 197   picture.width         = ACQ_WIDTH;
 198   picture.height        = ACQ_HEIGHT;
 199   picture.framerate     = 15;
 200
 201   picture.y_buf   = vp_os_malloc( ACQ_WIDTH*ACQ_HEIGHT );
 202   picture.cr_buf  = vp_os_malloc( ACQ_WIDTH*ACQ_HEIGHT/4 );
 203   picture.cb_buf  = vp_os_malloc( ACQ_WIDTH*ACQ_HEIGHT/4 );
 204
 205   picture.y_line_size   = ACQ_WIDTH;
 206   picture.cb_line_size  = ACQ_WIDTH / 2;
 207   picture.cr_line_size  = ACQ_WIDTH / 2;
 208
 209   picture.y_pad         = 0;
 210   picture.c_pad         = 0;
 211
 212   dec.picture           = &picture;
 213   dec.out_buffer_size   = 4096;
 214
 215   stages[0].type      = VP_API_INPUT_FILE;
 216   stages[0].cfg       = (void *)&ifc;
 217   stages[0].funcs     = vp_stages_input_file_funcs;
 218
 219   stages[1].type      = VP_API_FILTER_DECODER;
 220   stages[1].cfg       = (void *)&dec;
 221   stages[1].funcs     = mjpeg_decoding_funcs;
 222
 223   stages[2].type      = VP_API_OUTPUT_SDL;
 224   stages[2].cfg       = (void *)&osc;
 225   stages[2].funcs     = vp_stages_output_sdl_funcs;
 226
 227   pipeline.nb_stages  = NB_STAGES;
 228   pipeline.stages     = &stages[0];
 229
 230   vp_api_open(&pipeline, &pipeline_handle);
 231   out.status = VP_API_STATUS_PROCESSING;
 232   while(SUCCEED(vp_api_run(&pipeline, &out)) && (out.status == VP_API_STATUS_PROCESSING || out.status == VP_API_STATUS_STILL_RUNNING))
 233   {
 234     vp_os_delay( 30 );
 235   }
 236
 237   vp_api_close(&pipeline, &pipeline_handle);
 238
 239   return EXIT_SUCCESS;
 240 }
 241
 242
 243 ///*******************************************************************************************************************///
 244
 245
 246 // static THREAD_HANDLE dct_thread_handle;
 247
 248 static dct_io_buffer_t* current_io_buffer;
 249 static dct_io_buffer_t* result_io_buffer;
 250
 251 static void fdct(const unsigned short* in, short* out);
 252 static void idct(const short* in, unsigned short* out);
 253
 254
 255 //-----------------------------------------------------------------------------
 256 // DCT API
 257 //-----------------------------------------------------------------------------
 258
 259
 260 bool_t dct_init(void)
 261 {
 262   current_io_buffer = NULL;
 263   result_io_buffer  = NULL;
 264
 265   return TRUE;
 266 }
 267
 268 bool_t dct_compute( dct_io_buffer_t* io_buffer )
 269 {
 270   bool_t res = FALSE;
 271
 272   assert(io_buffer != NULL);
 273
 274   if( current_io_buffer == NULL && result_io_buffer == NULL )
 275   {
 276     current_io_buffer = io_buffer;
 277     res = TRUE;
 278
 279   }
 280
 281   return res;
 282 }
 283
 284 dct_io_buffer_t* dct_result( void )
 285 {
 286   uint32_t i;
 287   dct_io_buffer_t* io_buffer;
 288
 289   io_buffer = NULL;
 290
 291   if( current_io_buffer != NULL)
 292   {
 293     if( current_io_buffer->dct_mode == DCT_MODE_FDCT )
 294     {
 295       for( i = 0; i < current_io_buffer->num_total_blocks; i++ )
 296       {
 297         fdct(current_io_buffer->input[i], current_io_buffer->output[i]);
 298       }
 299     }
 300     else if( current_io_buffer->dct_mode == DCT_MODE_IDCT )
 301     {
 302       for( i = 0; i < current_io_buffer->num_total_blocks; i++ )
 303       {
 304         idct(current_io_buffer->input[i], current_io_buffer->output[i]);
 305       }
 306     }
 307
 308     io_buffer = current_io_buffer;
 309     current_io_buffer = NULL;
 310   }
 311
 312   return io_buffer;
 313 }
 314
 315
 316 //-----------------------------------------------------------------------------
 317 // DCT Computation
 318 //-----------------------------------------------------------------------------
 319
 320
 321 #define FIX_0_298631336  ((INT32)  2446)        /* FIX(0.298631336) */
 322 #define FIX_0_390180644  ((INT32)  3196)        /* FIX(0.390180644) */
 323 #define FIX_0_541196100  ((INT32)  4433)        /* FIX(0.541196100) */
 324 #define FIX_0_765366865  ((INT32)  6270)        /* FIX(0.765366865) */
 325 #define FIX_0_899976223  ((INT32)  7373)        /* FIX(0.899976223) */
 326 #define FIX_1_175875602  ((INT32)  9633)        /* FIX(1.175875602) */
 327 #define FIX_1_501321110  ((INT32)  12299)       /* FIX(1.501321110) */
 328 #define FIX_1_847759065  ((INT32)  15137)       /* FIX(1.847759065) */
 329 #define FIX_1_961570560  ((INT32)  16069)       /* FIX(1.961570560) */
 330 #define FIX_2_053119869  ((INT32)  16819)       /* FIX(2.053119869) */
 331 #define FIX_2_562915447  ((INT32)  20995)       /* FIX(2.562915447) */
 332 #define FIX_3_072711026  ((INT32)  25172)       /* FIX(3.072711026) */
 333
 334 #define INT32       int
 335 #define DCTELEM     int
 336 #define DCTSIZE     8
 337 #define DCTSIZE2    64
 338 #define CONST_BITS  13
 339 #define PASS1_BITS  1
 340 #define ONE     ((INT32) 1)
 341 #define MULTIPLY(var,const)  ((var) * (const))
 342 #define DESCALE(x,n)  RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
 343 #define RIGHT_SHIFT(x,shft)     ((x) >> (shft))
 344
 345 static void fdct(const unsigned short* in, short* out)
 346 {
 347   INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
 348   INT32 tmp10, tmp11, tmp12, tmp13;
 349   INT32 z1, z2, z3, z4, z5;
 350   int ctr;
 351   // SHIFT_TEMPS
 352
 353   int data[DCTSIZE * DCTSIZE];
 354   int i, j;
 355   int* dataptr = data;
 356
 357   for( i = 0; i < DCTSIZE; i++ )
 358   {
 359     for( j = 0; j < DCTSIZE; j++ )
 360     {
 361       int temp;
 362
 363       temp = in[i*DCTSIZE + j];
 364       dataptr[i*DCTSIZE + j] = temp;
 365     }
 366   }
 367
 368   /* Pass 1: process rows. */
 369   /* Note results are scaled up by sqrt(8) compared to a true DCT; */
 370   /* furthermore, we scale the results by 2**PASS1_BITS. */
 371
 372   dataptr = data;
 373   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
 374     tmp0 = dataptr[0] + dataptr[7];
 375     tmp7 = dataptr[0] - dataptr[7];
 376     tmp1 = dataptr[1] + dataptr[6];
 377     tmp6 = dataptr[1] - dataptr[6];
 378     tmp2 = dataptr[2] + dataptr[5];
 379     tmp5 = dataptr[2] - dataptr[5];
 380     tmp3 = dataptr[3] + dataptr[4];
 381     tmp4 = dataptr[3] - dataptr[4];
 382
 383     /* Even part per LL&M figure 1 --- note that published figure is faulty;
 384      * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
 385      */
 386
 387     tmp10 = tmp0 + tmp3;
 388     tmp13 = tmp0 - tmp3;
 389     tmp11 = tmp1 + tmp2;
 390     tmp12 = tmp1 - tmp2;
 391
 392     dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);
 393     dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
 394
 395     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
 396     dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), CONST_BITS-PASS1_BITS);
 397     dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), CONST_BITS-PASS1_BITS);
 398
 399     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
 400      * cK represents cos(K*pi/16).
 401      * i0..i3 in the paper are tmp4..tmp7 here.
 402      */
 403
 404     z1 = tmp4 + tmp7;
 405     z2 = tmp5 + tmp6;
 406     z3 = tmp4 + tmp6;
 407     z4 = tmp5 + tmp7;
 408     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
 409
 410     tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
 411     tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
 412     tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
 413     tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
 414     z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
 415     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
 416     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
 417     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
 418
 419     z3 += z5;
 420     z4 += z5;
 421
 422     dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
 423     dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
 424     dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
 425     dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
 426
 427     dataptr += DCTSIZE;         /* advance pointer to next row */
 428   }
 429
 430   /* Pass 2: process columns.
 431    * We remove the PASS1_BITS scaling, but leave the results scaled up
 432    * by an overall factor of 8.
 433    */
 434
 435   dataptr = data;
 436   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
 437     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
 438     tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
 439     tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
 440     tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
 441     tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
 442     tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
 443     tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
 444     tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
 445
 446     /* Even part per LL&M figure 1 --- note that published figure is faulty;
 447      * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
 448      */
 449
 450     tmp10 = tmp0 + tmp3;
 451     tmp13 = tmp0 - tmp3;
 452     tmp11 = tmp1 + tmp2;
 453     tmp12 = tmp1 - tmp2;
 454
 455     dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
 456     dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
 457
 458     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
 459     dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), CONST_BITS+PASS1_BITS);
 460     dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), CONST_BITS+PASS1_BITS);
 461
 462     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
 463      * cK represents cos(K*pi/16).
 464      * i0..i3 in the paper are tmp4..tmp7 here.
 465      */
 466
 467     z1 = tmp4 + tmp7;
 468     z2 = tmp5 + tmp6;
 469     z3 = tmp4 + tmp6;
 470     z4 = tmp5 + tmp7;
 471     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
 472
 473     tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
 474     tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
 475     tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
 476     tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
 477     z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
 478     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
 479     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
 480     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
 481
 482     z3 += z5;
 483     z4 += z5;
 484
 485     dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS+PASS1_BITS);
 486     dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS+PASS1_BITS);
 487     dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS+PASS1_BITS);
 488     dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS+PASS1_BITS);
 489
 490     dataptr++;  /* advance pointer to next column */
 491   }
 492
 493   for( i = 0; i < DCTSIZE; i++ )
 494     for( j = 0; j < DCTSIZE; j++ )
 495       out[i*DCTSIZE + j] = data[i*DCTSIZE + j] >> 3;
 496 }
 497
 498 static void idct(const short* in, unsigned short* out)
 499 {
 500   INT32 tmp0, tmp1, tmp2, tmp3;
 501   INT32 tmp10, tmp11, tmp12, tmp13;
 502   INT32 z1, z2, z3, z4, z5;
 503   int* wsptr;
 504   int* outptr;
 505   const short* inptr;
 506   int ctr;
 507   int workspace[DCTSIZE2];      /* buffers data between passes */
 508   int data[DCTSIZE2];
 509   // SHIFT_TEMPS
 510
 511   /* Pass 1: process columns from input, store into work array. */
 512   /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
 513   /* furthermore, we scale the results by 2**PASS1_BITS. */
 514
 515   inptr = in;
 516   wsptr = workspace;
 517   for (ctr = DCTSIZE; ctr > 0; ctr--) {
 518     /* Due to quantization, we will usually find that many of the input
 519      * coefficients are zero, especially the AC terms.  We can exploit this
 520      * by short-circuiting the IDCT calculation for any column in which all
 521      * the AC terms are zero.  In that case each output is equal to the
 522      * DC coefficient (with scale factor as needed).
 523      * With typical images and quantization tables, half or more of the
 524      * column DCT calculations can be simplified this way.
 525      */
 526
 527     if( inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
 528         inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
 529         inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
 530         inptr[DCTSIZE*7] == 0 ) {
 531       /* AC terms all zero */
 532       int dcval = inptr[DCTSIZE*0] << PASS1_BITS;
 533
 534       wsptr[DCTSIZE*0] = dcval;
 535       wsptr[DCTSIZE*1] = dcval;
 536       wsptr[DCTSIZE*2] = dcval;
 537       wsptr[DCTSIZE*3] = dcval;
 538       wsptr[DCTSIZE*4] = dcval;
 539       wsptr[DCTSIZE*5] = dcval;
 540       wsptr[DCTSIZE*6] = dcval;
 541       wsptr[DCTSIZE*7] = dcval;
 542
 543       inptr++;  /* advance pointers to next column */
 544       wsptr++;
 545       continue;
 546     }
 547
 548     /* Even part: reverse the even part of the forward DCT. */
 549     /* The rotator is sqrt(2)*c(-6). */
 550
 551     z2 = inptr[DCTSIZE*2];
 552     z3 = inptr[DCTSIZE*6];
 553
 554     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
 555     tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
 556     tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
 557
 558     z2 = inptr[DCTSIZE*0];
 559     z3 = inptr[DCTSIZE*4];
 560
 561     tmp0 = (z2 + z3) << CONST_BITS;
 562     tmp1 = (z2 - z3) << CONST_BITS;
 563
 564     tmp10 = tmp0 + tmp3;
 565     tmp13 = tmp0 - tmp3;
 566     tmp11 = tmp1 + tmp2;
 567     tmp12 = tmp1 - tmp2;
 568
 569     /* Odd part per figure 8; the matrix is unitary and hence its
 570      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
 571      */
 572
 573     tmp0 = inptr[DCTSIZE*7];
 574     tmp1 = inptr[DCTSIZE*5];
 575     tmp2 = inptr[DCTSIZE*3];
 576     tmp3 = inptr[DCTSIZE*1];
 577
 578     z1 = tmp0 + tmp3;
 579     z2 = tmp1 + tmp2;
 580     z3 = tmp0 + tmp2;
 581     z4 = tmp1 + tmp3;
 582     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
 583
 584     tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
 585     tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
 586     tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
 587     tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
 588     z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
 589     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
 590     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
 591     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
 592
 593     z3 += z5;
 594     z4 += z5;
 595
 596     tmp0 += z1 + z3;
 597     tmp1 += z2 + z4;
 598     tmp2 += z2 + z3;
 599     tmp3 += z1 + z4;
 600
 601     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
 602
 603     wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
 604     wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
 605     wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
 606     wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
 607     wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
 608     wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
 609     wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
 610     wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
 611
 612     inptr++;  /* advance pointers to next column */
 613     wsptr++;
 614   }
 615
 616   /* Pass 2: process rows from work array, store into output array. */
 617   /* Note that we must descale the results by a factor of 8 == 2**3, */
 618   /* and also undo the PASS1_BITS scaling. */
 619
 620   wsptr = workspace;
 621   outptr = data;
 622   for (ctr = 0; ctr < DCTSIZE; ctr++) {
 623     /* Even part: reverse the even part of the forward DCT. */
 624     /* The rotator is sqrt(2)*c(-6). */
 625
 626     z2 = (INT32) wsptr[2];
 627     z3 = (INT32) wsptr[6];
 628
 629     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
 630     tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
 631     tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
 632
 633     tmp0 = ((INT32) wsptr[0] + (INT32) wsptr[4]) << CONST_BITS;
 634     tmp1 = ((INT32) wsptr[0] - (INT32) wsptr[4]) << CONST_BITS;
 635
 636     tmp10 = tmp0 + tmp3;
 637     tmp13 = tmp0 - tmp3;
 638     tmp11 = tmp1 + tmp2;
 639     tmp12 = tmp1 - tmp2;
 640
 641     /* Odd part per figure 8; the matrix is unitary and hence its
 642      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
 643      */
 644
 645     tmp0 = (INT32) wsptr[7];
 646     tmp1 = (INT32) wsptr[5];
 647     tmp2 = (INT32) wsptr[3];
 648     tmp3 = (INT32) wsptr[1];
 649
 650     z1 = tmp0 + tmp3;
 651     z2 = tmp1 + tmp2;
 652     z3 = tmp0 + tmp2;
 653     z4 = tmp1 + tmp3;
 654     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
 655
 656     tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
 657     tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
 658     tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
 659     tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
 660     z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
 661     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
 662     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
 663     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
 664
 665     z3 += z5;
 666     z4 += z5;
 667
 668     tmp0 += z1 + z3;
 669     tmp1 += z2 + z4;
 670     tmp2 += z2 + z3;
 671     tmp3 += z1 + z4;
 672
 673     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
 674
 675     outptr[0] = (tmp10 + tmp3) >> ( CONST_BITS+PASS1_BITS+3 );
 676     outptr[7] = (tmp10 - tmp3) >> ( CONST_BITS+PASS1_BITS+3 );
 677     outptr[1] = (tmp11 + tmp2) >> ( CONST_BITS+PASS1_BITS+3 );
 678     outptr[6] = (tmp11 - tmp2) >> ( CONST_BITS+PASS1_BITS+3 );
 679     outptr[2] = (tmp12 + tmp1) >> ( CONST_BITS+PASS1_BITS+3 );
 680     outptr[5] = (tmp12 - tmp1) >> ( CONST_BITS+PASS1_BITS+3 );
 681     outptr[3] = (tmp13 + tmp0) >> ( CONST_BITS+PASS1_BITS+3 );
 682     outptr[4] = (tmp13 - tmp0) >> ( CONST_BITS+PASS1_BITS+3 );
 683
 684     wsptr += DCTSIZE; /* advance pointer to next row */
 685     outptr += DCTSIZE;
 686   }
 687
 688   for(ctr = 0; ctr < DCTSIZE2; ctr++)
 689     out[ctr] = data[ctr];
 690 }