vcs.maemo.org Git - mardrone/blob - mardrone/ARDrone_SDK_Version_1_8_20110726/ARDroneLib/VP_SDK/Examples/linux/api_BTclientTCP_MJPEG_sdl.c

   1 #include <stdlib.h>
   2 #include <ctype.h>
   3
   4 #include <VP_Api/vp_api.h>
   5 #include <VP_Api/vp_api_thread_helper.h>
   6 #include <VP_Api/vp_api_error.h>
   7 #include <VP_Stages/vp_stages_configs.h>
   8 #include <VP_Stages/vp_stages_io_console.h>
   9 #include <VP_Stages/vp_stages_o_sdl.h>
  10 #include <VP_Stages/vp_stages_io_com.h>
  11 #include <VP_Stages/vp_stages_io_file.h>
  12 #include <VP_Os/vp_os_print.h>
  13 #include <VP_Os/vp_os_malloc.h>
  14 #include <VP_Os/vp_os_delay.h>
  15
  16 #include <MJPEG/mjpeg.h>
  17
  18 #define NB_STAGES 3
  19
  20 #define ACQ_WIDTH   320
  21 #define ACQ_HEIGHT  240
  22
  23
  24 static PIPELINE_HANDLE pipeline_handle;
  25
  26
  27 PROTO_THREAD_ROUTINE(escaper,nomParams);
  28 PROTO_THREAD_ROUTINE(app,nomParams);
  29
  30 BEGIN_THREAD_TABLE
  31   THREAD_TABLE_ENTRY(escaper,20)
  32   THREAD_TABLE_ENTRY(app,20)
  33 END_THREAD_TABLE
  34
  35
  36 typedef struct _mjpeg_stage_decoding_config_t
  37 {
  38   stream_t          stream;
  39   mjpeg_t           mjpeg;
  40   vp_api_picture_t* picture;
  41
  42   uint32_t          out_buffer_size;
  43
  44 } mjpeg_stage_decoding_config_t;
  45
  46 C_RESULT mjpeg_stage_decoding_open(mjpeg_stage_decoding_config_t *cfg)
  47 {
  48   stream_new( &cfg->stream, OUTPUT_STREAM );
  49
  50   return mjpeg_init( &cfg->mjpeg, MJPEG_DECODE, cfg->picture->width, cfg->picture->height, cfg->picture->format );
  51 }
  52
  53 C_RESULT mjpeg_stage_decoding_transform(mjpeg_stage_decoding_config_t *cfg, vp_api_io_data_t *in, vp_api_io_data_t *out)
  54 {
  55   C_RESULT res;
  56   bool_t   got_image;
  57
  58   vp_os_mutex_lock( &out->lock );
  59
  60   if(out->status == VP_API_STATUS_INIT)
  61   {
  62 #ifdef RECORD_MJPEG_VIDEO
  63     fp = fopen("video", "wb");
  64 #endif // ! RECORD_MJPEG_VIDEO
  65
  66     out->numBuffers   = 1;
  67     out->buffers      = (int8_t**)cfg->picture;
  68     out->indexBuffer  = 0;
  69     out->lineSize     = 0;
  70
  71     out->status = VP_API_STATUS_PROCESSING;
  72   }
  73
  74   if( in->status == VP_API_STATUS_ENDED )
  75   {
  76 #ifdef RECORD_MJPEG_VIDEO
  77     fclose( fp );
  78 #endif // ! RECORD_MJPEG_VIDEO
  79     out->status = in->status;
  80   }
  81
  82   // Several cases must be handled in this stage
  83   // 1st: Input buffer is too small to decode a complete picture
  84   // 2nd: Input buffer is big enough to decode 1 frame
  85   // 3rd: Input buffer is so big we can decode more than 1 frame
  86
  87   if( in->size > 0 )
  88   {
  89     if( out->status == VP_API_STATUS_PROCESSING )
  90     {
  91       // Reinit stream with new data
  92       stream_config( &cfg->stream, in->size, in->buffers[in->indexBuffer] );
  93 #ifdef RECORD_MJPEG_VIDEO
  94       if( fp != NULL )
  95         fwrite(in->buffers[in->indexBuffer], in->size, 1, fp);
  96 #endif // ! RECORD_MJPEG_VIDEO
  97     }
  98
  99     if(out->status == VP_API_STATUS_PROCESSING || out->status == VP_API_STATUS_STILL_RUNNING)
 100     {
 101       // If out->size == 1 it means picture is ready
 102       out->size = 0;
 103       out->status = VP_API_STATUS_PROCESSING;
 104
 105       res = mjpeg_decode( &cfg->mjpeg, cfg->picture, &cfg->stream, &got_image );
 106
 107       // handle case 2 & 3
 108       if( FAILED(stream_is_empty( &cfg->stream )) )
 109       {
 110         // Some data are still in stream
 111         // Next time we run this stage we don't want this data to be lost
 112         // So flag it!
 113         out->status = VP_API_STATUS_STILL_RUNNING;
 114       }
 115
 116       if( got_image )
 117       {
 118         // we got one picture (handle case 1)
 119         out->size = 1;
 120
 121         PRINT( "%d picture decoded\n", cfg->mjpeg.num_frames );
 122       }
 123     }
 124   }
 125
 126   vp_os_mutex_unlock( &out->lock );
 127
 128   return C_OK;
 129 }
 130
 131 C_RESULT mjpeg_stage_decoding_close(mjpeg_stage_decoding_config_t *cfg)
 132 {
 133   stream_delete( &cfg->stream );
 134
 135   return mjpeg_release( &cfg->mjpeg );
 136 }
 137
 138
 139 const vp_api_stage_funcs_t mjpeg_decoding_funcs = {
 140   (vp_api_stage_handle_msg_t) NULL,
 141   (vp_api_stage_open_t) mjpeg_stage_decoding_open,
 142   (vp_api_stage_transform_t) mjpeg_stage_decoding_transform,
 143   (vp_api_stage_close_t) mjpeg_stage_decoding_close
 144 };
 145
 146 int
 147 main(int argc, char **argv)
 148 {
 149   START_THREAD(escaper, NO_PARAM);
 150   START_THREAD(app, argv);
 151
 152   JOIN_THREAD(escaper);
 153   JOIN_THREAD(app);
 154
 155   return EXIT_SUCCESS;
 156 }
 157
 158 PROTO_THREAD_ROUTINE(app,argv)
 159 {
 160   vp_api_picture_t picture;
 161
 162   vp_api_io_pipeline_t    pipeline;
 163   vp_api_io_data_t        out;
 164   vp_api_io_stage_t       stages[NB_STAGES];
 165
 166   vp_stages_input_com_config_t    icc;
 167   mjpeg_stage_decoding_config_t   dec;
 168   vp_stages_output_sdl_config_t   osc;
 169
 170   vp_com_t                        com;
 171   vp_com_bluetooth_connection_t   connection;
 172   vp_com_bluetooth_config_t       config;
 173
 174   /// Picture configuration
 175   picture.format        = PIX_FMT_YUV420P;
 176
 177   picture.width         = ACQ_WIDTH;
 178   picture.height        = ACQ_HEIGHT;
 179   picture.framerate     = 15;
 180
 181   picture.y_buf   = vp_os_malloc( ACQ_WIDTH*ACQ_HEIGHT );
 182   picture.cr_buf  = vp_os_malloc( ACQ_WIDTH*ACQ_HEIGHT/4 );
 183   picture.cb_buf  = vp_os_malloc( ACQ_WIDTH*ACQ_HEIGHT/4 );
 184
 185   picture.y_line_size   = ACQ_WIDTH;
 186   picture.cb_line_size  = ACQ_WIDTH / 2;
 187   picture.cr_line_size  = ACQ_WIDTH / 2;
 188
 189   picture.y_pad         = 0;
 190   picture.c_pad         = 0;
 191
 192   dec.picture         = &picture;
 193   dec.out_buffer_size = 4096;
 194
 195   vp_os_memset( &icc,         0, sizeof(vp_stages_input_com_config_t)  );
 196   vp_os_memset( &osc,         0, sizeof(vp_stages_output_sdl_config_t) );
 197   vp_os_memset( &connection,  0, sizeof(vp_com_bluetooth_connection_t) );
 198   vp_os_memset( &config,      0, sizeof(vp_com_bluetooth_config_t)     );
 199   vp_os_memset( &com,         0, sizeof(vp_com_t)                      );
 200
 201   vp_com_str_to_address("08:75:48:03:60:34",&connection.address);
 202   //  vp_com_str_to_address("00:12:1C:FF:A4:EE",&connection.address);
 203
 204   strcpy(config.itfName,    "bnep0");
 205   strcpy(config.localHost,  "192.168.2.58");
 206   strcpy(config.netmask,    "255.255.255.0");
 207   strcpy(config.broadcast,  "192.168.2.255");
 208   strcpy(config.gateway,    "192.168.2.0");
 209   strcpy(config.server,     "192.168.2.0");
 210   strcpy(config.passkey,    "1234" );
 211   config.secure = 1;
 212
 213   com.type                          = VP_COM_BLUETOOTH;
 214
 215   icc.com                           = &com;
 216   icc.config                        = (vp_com_config_t*)&config;
 217   icc.connection                    = (vp_com_connection_t*)&connection;
 218   icc.socket.type                   = VP_COM_CLIENT;
 219   icc.socket.protocol               = VP_COM_TCP;
 220   icc.socket.port                   = 5555;
 221   icc.buffer_size                   = 1600;
 222   //  icc.sockopt                       = VP_COM_NON_BLOCKING;
 223
 224   strcpy(icc.socket.serverHost,"192.168.2.23");
 225
 226   osc.width           = 320;
 227   osc.height          = 240;
 228   osc.bpp             = 16;
 229   osc.window_width    = 320;
 230   osc.window_height   = 240;
 231   osc.pic_width       = ACQ_WIDTH;
 232   osc.pic_height      = ACQ_HEIGHT;
 233   osc.y_size          = ACQ_WIDTH*ACQ_HEIGHT;
 234   osc.c_size          = (ACQ_WIDTH*ACQ_HEIGHT) >> 2;
 235
 236   stages[0].type                    = VP_API_INPUT_SOCKET;
 237   stages[0].cfg                     = (void *)&icc;
 238   stages[0].funcs                   = vp_stages_input_com_funcs;
 239
 240   stages[1].type                    = VP_API_FILTER_DECODER;
 241   stages[1].cfg                     = (void*)&dec;
 242   stages[1].funcs                   = mjpeg_decoding_funcs;
 243
 244   stages[2].type                    = VP_API_OUTPUT_SDL;
 245   stages[2].cfg                     = (void *)&osc;
 246   stages[2].funcs                   = vp_stages_output_sdl_funcs;
 247
 248   pipeline.nb_stages                = NB_STAGES;
 249   pipeline.stages                   = &stages[0];
 250
 251   vp_api_open(&pipeline, &pipeline_handle);
 252   out.status = VP_API_STATUS_PROCESSING;
 253   while(SUCCEED(vp_api_run(&pipeline, &out)) && (out.status == VP_API_STATUS_PROCESSING || out.status == VP_API_STATUS_STILL_RUNNING));
 254
 255   vp_api_close(&pipeline, &pipeline_handle);
 256
 257   return EXIT_SUCCESS;
 258 }
 259
 260 ///*******************************************************************************************************************///
 261
 262
 263 // static THREAD_HANDLE dct_thread_handle;
 264
 265 static dct_io_buffer_t* current_io_buffer;
 266 static dct_io_buffer_t* result_io_buffer;
 267
 268 static void fdct(const unsigned short* in, short* out);
 269 static void idct(const short* in, unsigned short* out);
 270
 271
 272 //-----------------------------------------------------------------------------
 273 // DCT API
 274 //-----------------------------------------------------------------------------
 275
 276
 277 bool_t dct_init(void)
 278 {
 279   current_io_buffer = NULL;
 280   result_io_buffer  = NULL;
 281
 282   return TRUE;
 283 }
 284
 285 bool_t dct_compute( dct_io_buffer_t* io_buffer )
 286 {
 287   bool_t res = FALSE;
 288
 289   assert(io_buffer != NULL);
 290
 291   if( current_io_buffer == NULL && result_io_buffer == NULL )
 292   {
 293     current_io_buffer = io_buffer;
 294     res = TRUE;
 295
 296   }
 297
 298   return res;
 299 }
 300
 301 dct_io_buffer_t* dct_result( void )
 302 {
 303   uint32_t i;
 304   dct_io_buffer_t* io_buffer;
 305
 306   io_buffer = NULL;
 307
 308   if( current_io_buffer != NULL)
 309   {
 310     if( current_io_buffer->dct_mode == DCT_MODE_FDCT )
 311     {
 312       for( i = 0; i < current_io_buffer->num_total_blocks; i++ )
 313       {
 314         fdct(current_io_buffer->input[i], current_io_buffer->output[i]);
 315       }
 316     }
 317     else if( current_io_buffer->dct_mode == DCT_MODE_IDCT )
 318     {
 319       for( i = 0; i < current_io_buffer->num_total_blocks; i++ )
 320       {
 321         idct(current_io_buffer->input[i], current_io_buffer->output[i]);
 322       }
 323     }
 324
 325     io_buffer = current_io_buffer;
 326     current_io_buffer = NULL;
 327   }
 328
 329   return io_buffer;
 330 }
 331
 332
 333 //-----------------------------------------------------------------------------
 334 // DCT Computation
 335 //-----------------------------------------------------------------------------
 336
 337
 338 #define FIX_0_298631336  ((INT32)  2446)        /* FIX(0.298631336) */
 339 #define FIX_0_390180644  ((INT32)  3196)        /* FIX(0.390180644) */
 340 #define FIX_0_541196100  ((INT32)  4433)        /* FIX(0.541196100) */
 341 #define FIX_0_765366865  ((INT32)  6270)        /* FIX(0.765366865) */
 342 #define FIX_0_899976223  ((INT32)  7373)        /* FIX(0.899976223) */
 343 #define FIX_1_175875602  ((INT32)  9633)        /* FIX(1.175875602) */
 344 #define FIX_1_501321110  ((INT32)  12299)       /* FIX(1.501321110) */
 345 #define FIX_1_847759065  ((INT32)  15137)       /* FIX(1.847759065) */
 346 #define FIX_1_961570560  ((INT32)  16069)       /* FIX(1.961570560) */
 347 #define FIX_2_053119869  ((INT32)  16819)       /* FIX(2.053119869) */
 348 #define FIX_2_562915447  ((INT32)  20995)       /* FIX(2.562915447) */
 349 #define FIX_3_072711026  ((INT32)  25172)       /* FIX(3.072711026) */
 350
 351 #define INT32       int
 352 #define DCTELEM     int
 353 #define DCTSIZE     8
 354 #define DCTSIZE2    64
 355 #define CONST_BITS  13
 356 #define PASS1_BITS  1
 357 #define ONE     ((INT32) 1)
 358 #define MULTIPLY(var,const)  ((var) * (const))
 359 #define DESCALE(x,n)  RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
 360 #define RIGHT_SHIFT(x,shft)     ((x) >> (shft))
 361
 362 static void fdct(const unsigned short* in, short* out)
 363 {
 364   INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
 365   INT32 tmp10, tmp11, tmp12, tmp13;
 366   INT32 z1, z2, z3, z4, z5;
 367   int ctr;
 368   // SHIFT_TEMPS
 369
 370   int data[DCTSIZE * DCTSIZE];
 371   int i, j;
 372   int* dataptr = data;
 373
 374   for( i = 0; i < DCTSIZE; i++ )
 375   {
 376     for( j = 0; j < DCTSIZE; j++ )
 377     {
 378       int temp;
 379
 380       temp = in[i*DCTSIZE + j];
 381       dataptr[i*DCTSIZE + j] = temp;
 382     }
 383   }
 384
 385   /* Pass 1: process rows. */
 386   /* Note results are scaled up by sqrt(8) compared to a true DCT; */
 387   /* furthermore, we scale the results by 2**PASS1_BITS. */
 388
 389   dataptr = data;
 390   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
 391     tmp0 = dataptr[0] + dataptr[7];
 392     tmp7 = dataptr[0] - dataptr[7];
 393     tmp1 = dataptr[1] + dataptr[6];
 394     tmp6 = dataptr[1] - dataptr[6];
 395     tmp2 = dataptr[2] + dataptr[5];
 396     tmp5 = dataptr[2] - dataptr[5];
 397     tmp3 = dataptr[3] + dataptr[4];
 398     tmp4 = dataptr[3] - dataptr[4];
 399
 400     /* Even part per LL&M figure 1 --- note that published figure is faulty;
 401      * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
 402      */
 403
 404     tmp10 = tmp0 + tmp3;
 405     tmp13 = tmp0 - tmp3;
 406     tmp11 = tmp1 + tmp2;
 407     tmp12 = tmp1 - tmp2;
 408
 409     dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);
 410     dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
 411
 412     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
 413     dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), CONST_BITS-PASS1_BITS);
 414     dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), CONST_BITS-PASS1_BITS);
 415
 416     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
 417      * cK represents cos(K*pi/16).
 418      * i0..i3 in the paper are tmp4..tmp7 here.
 419      */
 420
 421     z1 = tmp4 + tmp7;
 422     z2 = tmp5 + tmp6;
 423     z3 = tmp4 + tmp6;
 424     z4 = tmp5 + tmp7;
 425     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
 426
 427     tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
 428     tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
 429     tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
 430     tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
 431     z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
 432     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
 433     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
 434     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
 435
 436     z3 += z5;
 437     z4 += z5;
 438
 439     dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
 440     dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
 441     dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
 442     dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
 443
 444     dataptr += DCTSIZE;         /* advance pointer to next row */
 445   }
 446
 447   /* Pass 2: process columns.
 448    * We remove the PASS1_BITS scaling, but leave the results scaled up
 449    * by an overall factor of 8.
 450    */
 451
 452   dataptr = data;
 453   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
 454     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
 455     tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
 456     tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
 457     tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
 458     tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
 459     tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
 460     tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
 461     tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
 462
 463     /* Even part per LL&M figure 1 --- note that published figure is faulty;
 464      * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
 465      */
 466
 467     tmp10 = tmp0 + tmp3;
 468     tmp13 = tmp0 - tmp3;
 469     tmp11 = tmp1 + tmp2;
 470     tmp12 = tmp1 - tmp2;
 471
 472     dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
 473     dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
 474
 475     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
 476     dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), CONST_BITS+PASS1_BITS);
 477     dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), CONST_BITS+PASS1_BITS);
 478
 479     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
 480      * cK represents cos(K*pi/16).
 481      * i0..i3 in the paper are tmp4..tmp7 here.
 482      */
 483
 484     z1 = tmp4 + tmp7;
 485     z2 = tmp5 + tmp6;
 486     z3 = tmp4 + tmp6;
 487     z4 = tmp5 + tmp7;
 488     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
 489
 490     tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
 491     tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
 492     tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
 493     tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
 494     z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
 495     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
 496     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
 497     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
 498
 499     z3 += z5;
 500     z4 += z5;
 501
 502     dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS+PASS1_BITS);
 503     dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS+PASS1_BITS);
 504     dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS+PASS1_BITS);
 505     dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS+PASS1_BITS);
 506
 507     dataptr++;  /* advance pointer to next column */
 508   }
 509
 510   for( i = 0; i < DCTSIZE; i++ )
 511     for( j = 0; j < DCTSIZE; j++ )
 512       out[i*DCTSIZE + j] = data[i*DCTSIZE + j] >> 3;
 513 }
 514
 515 static void idct(const short* in, unsigned short* out)
 516 {
 517   INT32 tmp0, tmp1, tmp2, tmp3;
 518   INT32 tmp10, tmp11, tmp12, tmp13;
 519   INT32 z1, z2, z3, z4, z5;
 520   int* wsptr;
 521   int* outptr;
 522   const short* inptr;
 523   int ctr;
 524   int workspace[DCTSIZE2];      /* buffers data between passes */
 525   int data[DCTSIZE2];
 526   // SHIFT_TEMPS
 527
 528   /* Pass 1: process columns from input, store into work array. */
 529   /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
 530   /* furthermore, we scale the results by 2**PASS1_BITS. */
 531
 532   inptr = in;
 533   wsptr = workspace;
 534   for (ctr = DCTSIZE; ctr > 0; ctr--) {
 535     /* Due to quantization, we will usually find that many of the input
 536      * coefficients are zero, especially the AC terms.  We can exploit this
 537      * by short-circuiting the IDCT calculation for any column in which all
 538      * the AC terms are zero.  In that case each output is equal to the
 539      * DC coefficient (with scale factor as needed).
 540      * With typical images and quantization tables, half or more of the
 541      * column DCT calculations can be simplified this way.
 542      */
 543
 544     if( inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
 545         inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
 546         inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
 547         inptr[DCTSIZE*7] == 0 ) {
 548       /* AC terms all zero */
 549       int dcval = inptr[DCTSIZE*0] << PASS1_BITS;
 550
 551       wsptr[DCTSIZE*0] = dcval;
 552       wsptr[DCTSIZE*1] = dcval;
 553       wsptr[DCTSIZE*2] = dcval;
 554       wsptr[DCTSIZE*3] = dcval;
 555       wsptr[DCTSIZE*4] = dcval;
 556       wsptr[DCTSIZE*5] = dcval;
 557       wsptr[DCTSIZE*6] = dcval;
 558       wsptr[DCTSIZE*7] = dcval;
 559
 560       inptr++;  /* advance pointers to next column */
 561       wsptr++;
 562       continue;
 563     }
 564
 565     /* Even part: reverse the even part of the forward DCT. */
 566     /* The rotator is sqrt(2)*c(-6). */
 567
 568     z2 = inptr[DCTSIZE*2];
 569     z3 = inptr[DCTSIZE*6];
 570
 571     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
 572     tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
 573     tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
 574
 575     z2 = inptr[DCTSIZE*0];
 576     z3 = inptr[DCTSIZE*4];
 577
 578     tmp0 = (z2 + z3) << CONST_BITS;
 579     tmp1 = (z2 - z3) << CONST_BITS;
 580
 581     tmp10 = tmp0 + tmp3;
 582     tmp13 = tmp0 - tmp3;
 583     tmp11 = tmp1 + tmp2;
 584     tmp12 = tmp1 - tmp2;
 585
 586     /* Odd part per figure 8; the matrix is unitary and hence its
 587      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
 588      */
 589
 590     tmp0 = inptr[DCTSIZE*7];
 591     tmp1 = inptr[DCTSIZE*5];
 592     tmp2 = inptr[DCTSIZE*3];
 593     tmp3 = inptr[DCTSIZE*1];
 594
 595     z1 = tmp0 + tmp3;
 596     z2 = tmp1 + tmp2;
 597     z3 = tmp0 + tmp2;
 598     z4 = tmp1 + tmp3;
 599     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
 600
 601     tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
 602     tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
 603     tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
 604     tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
 605     z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
 606     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
 607     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
 608     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
 609
 610     z3 += z5;
 611     z4 += z5;
 612
 613     tmp0 += z1 + z3;
 614     tmp1 += z2 + z4;
 615     tmp2 += z2 + z3;
 616     tmp3 += z1 + z4;
 617
 618     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
 619
 620     wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
 621     wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
 622     wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
 623     wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
 624     wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
 625     wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
 626     wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
 627     wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
 628
 629     inptr++;  /* advance pointers to next column */
 630     wsptr++;
 631   }
 632
 633   /* Pass 2: process rows from work array, store into output array. */
 634   /* Note that we must descale the results by a factor of 8 == 2**3, */
 635   /* and also undo the PASS1_BITS scaling. */
 636
 637   wsptr = workspace;
 638   outptr = data;
 639   for (ctr = 0; ctr < DCTSIZE; ctr++) {
 640     /* Even part: reverse the even part of the forward DCT. */
 641     /* The rotator is sqrt(2)*c(-6). */
 642
 643     z2 = (INT32) wsptr[2];
 644     z3 = (INT32) wsptr[6];
 645
 646     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
 647     tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
 648     tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
 649
 650     tmp0 = ((INT32) wsptr[0] + (INT32) wsptr[4]) << CONST_BITS;
 651     tmp1 = ((INT32) wsptr[0] - (INT32) wsptr[4]) << CONST_BITS;
 652
 653     tmp10 = tmp0 + tmp3;
 654     tmp13 = tmp0 - tmp3;
 655     tmp11 = tmp1 + tmp2;
 656     tmp12 = tmp1 - tmp2;
 657
 658     /* Odd part per figure 8; the matrix is unitary and hence its
 659      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
 660      */
 661
 662     tmp0 = (INT32) wsptr[7];
 663     tmp1 = (INT32) wsptr[5];
 664     tmp2 = (INT32) wsptr[3];
 665     tmp3 = (INT32) wsptr[1];
 666
 667     z1 = tmp0 + tmp3;
 668     z2 = tmp1 + tmp2;
 669     z3 = tmp0 + tmp2;
 670     z4 = tmp1 + tmp3;
 671     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
 672
 673     tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
 674     tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
 675     tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
 676     tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
 677     z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
 678     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
 679     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
 680     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
 681
 682     z3 += z5;
 683     z4 += z5;
 684
 685     tmp0 += z1 + z3;
 686     tmp1 += z2 + z4;
 687     tmp2 += z2 + z3;
 688     tmp3 += z1 + z4;
 689
 690     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
 691
 692     outptr[0] = (tmp10 + tmp3) >> ( CONST_BITS+PASS1_BITS+3 );
 693     outptr[7] = (tmp10 - tmp3) >> ( CONST_BITS+PASS1_BITS+3 );
 694     outptr[1] = (tmp11 + tmp2) >> ( CONST_BITS+PASS1_BITS+3 );
 695     outptr[6] = (tmp11 - tmp2) >> ( CONST_BITS+PASS1_BITS+3 );
 696     outptr[2] = (tmp12 + tmp1) >> ( CONST_BITS+PASS1_BITS+3 );
 697     outptr[5] = (tmp12 - tmp1) >> ( CONST_BITS+PASS1_BITS+3 );
 698     outptr[3] = (tmp13 + tmp0) >> ( CONST_BITS+PASS1_BITS+3 );
 699     outptr[4] = (tmp13 - tmp0) >> ( CONST_BITS+PASS1_BITS+3 );
 700
 701     wsptr += DCTSIZE; /* advance pointer to next row */
 702     outptr += DCTSIZE;
 703   }
 704
 705   for(ctr = 0; ctr < DCTSIZE2; ctr++)
 706     out[ctr] = data[ctr];
 707 }