vcs.maemo.org Git - mardrone/blob - mardrone/ARDrone_SDK_Version_1_8_20110726/ARDroneLib/VP_SDK/Examples/linux/api_wifiClientTCP_MJPEG_sdl.c

   1 #include <stdlib.h>
   2 #include <ctype.h>
   3 #include <sys/time.h>
   4
   5 #include <VP_Api/vp_api.h>
   6 #include <VP_Api/vp_api_thread_helper.h>
   7 #include <VP_Api/vp_api_error.h>
   8 #include <VP_Stages/vp_stages_configs.h>
   9 #include <VP_Stages/vp_stages_io_console.h>
  10 #include <VP_Stages/vp_stages_o_sdl.h>
  11 #include <VP_Stages/vp_stages_io_com.h>
  12 #include <VP_Stages/vp_stages_io_file.h>
  13 #include <VP_Os/vp_os_print.h>
  14 #include <VP_Os/vp_os_malloc.h>
  15 #include <VP_Os/vp_os_delay.h>
  16
  17 #include <MJPEG/mjpeg.h>
  18
  19 #define NB_STAGES 3
  20
  21 #define RECORD_MJPEG_VIDEO
  22
  23 #define CAMIF_BLOCKLINES_LOG2       4
  24 #define CAMIF_BLOCKLINES            (1 << CAMIF_BLOCKLINES_LOG2)
  25
  26 #define ACQ_WIDTH   (176)
  27 #define ACQ_HEIGHT  (144)
  28
  29 #define WINDOW_WIDTH  (320)
  30 #define WINDOW_HEIGHT (240)
  31
  32 #ifdef RECORD_MJPEG_VIDEO
  33 static  FILE* fp;
  34 #endif // ! RECORD_MJPEG_VIDEO
  35
  36 static PIPELINE_HANDLE pipeline_handle;
  37
  38
  39 PROTO_THREAD_ROUTINE(escaper,nomParams);
  40 PROTO_THREAD_ROUTINE(app,nomParams);
  41
  42 BEGIN_THREAD_TABLE
  43   THREAD_TABLE_ENTRY(escaper,20)
  44   THREAD_TABLE_ENTRY(app,20)
  45 END_THREAD_TABLE
  46
  47
  48 typedef struct _mjpeg_stage_decoding_config_t
  49 {
  50   stream_t          stream;
  51   mjpeg_t           mjpeg;
  52   vp_api_picture_t* picture;
  53
  54   uint32_t          out_buffer_size;
  55
  56 } mjpeg_stage_decoding_config_t;
  57
  58 C_RESULT mjpeg_stage_decoding_open(mjpeg_stage_decoding_config_t *cfg)
  59 {
  60   uint32_t width = cfg->picture->width;
  61   uint32_t height = cfg->picture->height;
  62   enum PixelFormat format = cfg->picture->format;
  63
  64   stream_new( &cfg->stream, OUTPUT_STREAM );
  65
  66   return mjpeg_init( &cfg->mjpeg, MJPEG_DECODE, width, height, format );
  67 }
  68
  69 static uint32_t get_current_time()
  70 {
  71   static bool_t init = TRUE;
  72   static struct timeval  tv_init;
  73
  74   uint32_t t;
  75   float32_t s, us;
  76
  77   struct timeval  tv;
  78
  79   if( init )
  80   {
  81     gettimeofday(&tv_init, NULL);
  82     init = FALSE;
  83   }
  84
  85   gettimeofday(&tv, NULL);
  86   tv.tv_sec   -= tv_init.tv_sec;
  87   tv.tv_usec  -= tv_init.tv_usec;
  88
  89   s   = tv.tv_sec;
  90   us  = tv.tv_usec;
  91
  92   t = (s * 1000.0f) + ( us / 1000.0f);
  93
  94   return t;
  95 }
  96
  97 C_RESULT mjpeg_stage_decoding_transform(mjpeg_stage_decoding_config_t *cfg, vp_api_io_data_t *in, vp_api_io_data_t *out)
  98 {
  99   C_RESULT res;
 100   bool_t   got_image;
 101   static  int32_t start_time, dt;
 102
 103   vp_os_mutex_lock( &out->lock );
 104
 105   if(out->status == VP_API_STATUS_INIT)
 106   {
 107 #ifdef RECORD_MJPEG_VIDEO
 108     fp = fopen("video", "wb");
 109 #endif // ! RECORD_MJPEG_VIDEO
 110
 111     out->numBuffers   = 1;
 112     out->buffers      = (int8_t**)cfg->picture;
 113     out->indexBuffer  = 0;
 114     out->lineSize     = 0;
 115
 116     out->status = VP_API_STATUS_PROCESSING;
 117
 118     dt = 0;
 119   }
 120
 121   if( in->status == VP_API_STATUS_ENDED )
 122   {
 123 #ifdef RECORD_MJPEG_VIDEO
 124     fclose( fp );
 125 #endif // ! RECORD_MJPEG_VIDEO
 126     out->status = in->status;
 127   }
 128
 129   // Several cases must be handled in this stage
 130   // 1st: Input buffer is too small to decode a complete picture
 131   // 2nd: Input buffer is big enough to decode 1 frame
 132   // 3rd: Input buffer is so big we can decode more than 1 frame
 133
 134   if( in->size > 0 )
 135   {
 136     if( out->status == VP_API_STATUS_PROCESSING )
 137     {
 138       // Reinit stream with new data
 139       stream_config( &cfg->stream, in->size, in->buffers[in->indexBuffer] );
 140 #ifdef RECORD_MJPEG_VIDEO
 141       if( fp != NULL )
 142         fwrite(in->buffers[in->indexBuffer], in->size, 1, fp);
 143 #endif // ! RECORD_MJPEG_VIDEO
 144     }
 145
 146     if(out->status == VP_API_STATUS_PROCESSING || out->status == VP_API_STATUS_STILL_RUNNING)
 147     {
 148       // If out->size == 1 it means picture is ready
 149       out->size = 0;
 150       out->status = VP_API_STATUS_PROCESSING;
 151
 152       start_time = get_current_time();
 153       res = mjpeg_decode( &cfg->mjpeg, cfg->picture, &cfg->stream, &got_image );
 154       dt += (get_current_time() - start_time);
 155
 156       // handle case 2 & 3
 157       if( FAILED(stream_is_empty( &cfg->stream )) )
 158       {
 159         // Some data are still in stream
 160         // Next time we run this stage we don't want this data to be lost
 161         // So flag it!
 162         out->status = VP_API_STATUS_STILL_RUNNING;
 163       }
 164
 165       if( got_image )
 166       {
 167         // we got one picture (handle case 1)
 168         out->size = 1;
 169         PRINT( "%d picture decoded in %d ms\n", cfg->mjpeg.num_frames, dt );
 170         dt = 0;
 171       }
 172     }
 173   }
 174
 175   vp_os_mutex_unlock( &out->lock );
 176
 177   return C_OK;
 178 }
 179
 180 C_RESULT mjpeg_stage_decoding_close(mjpeg_stage_decoding_config_t *cfg)
 181 {
 182   stream_delete( &cfg->stream );
 183
 184   return mjpeg_release( &cfg->mjpeg );
 185 }
 186
 187
 188 const vp_api_stage_funcs_t mjpeg_decoding_funcs = {
 189   (vp_api_stage_handle_msg_t) NULL,
 190   (vp_api_stage_open_t) mjpeg_stage_decoding_open,
 191   (vp_api_stage_transform_t) mjpeg_stage_decoding_transform,
 192   (vp_api_stage_close_t) mjpeg_stage_decoding_close
 193 };
 194
 195 int
 196 main(int argc, char **argv)
 197 {
 198   START_THREAD(escaper, NO_PARAM);
 199   START_THREAD(app, argv);
 200
 201   JOIN_THREAD(escaper);
 202   JOIN_THREAD(app);
 203
 204   return EXIT_SUCCESS;
 205 }
 206
 207 PROTO_THREAD_ROUTINE(app,argv)
 208 {
 209
 210   uint32_t nb_stages = 0;
 211   vp_api_picture_t picture;
 212
 213   vp_api_io_pipeline_t    pipeline;
 214   vp_api_io_data_t        out;
 215   vp_api_io_stage_t       stages[NB_STAGES];
 216
 217   vp_stages_input_com_config_t    icc;
 218   mjpeg_stage_decoding_config_t   dec;
 219   vp_stages_output_sdl_config_t   osc;
 220
 221   vp_com_t                        com;
 222   vp_com_wifi_connection_t        connection;
 223   vp_com_wifi_config_t            config;
 224
 225   /// Picture configuration
 226   picture.format        = PIX_FMT_YUV420P;
 227
 228   picture.width         = ACQ_WIDTH;
 229   picture.height        = ACQ_HEIGHT;
 230   picture.framerate     = 15;
 231
 232   picture.y_buf   = vp_os_malloc( ACQ_WIDTH*ACQ_HEIGHT );
 233   picture.cr_buf  = vp_os_malloc( ACQ_WIDTH*ACQ_HEIGHT/4 );
 234   picture.cb_buf  = vp_os_malloc( ACQ_WIDTH*ACQ_HEIGHT/4 );
 235
 236   picture.y_line_size   = ACQ_WIDTH;
 237   picture.cb_line_size  = ACQ_WIDTH / 2;
 238   picture.cr_line_size  = ACQ_WIDTH / 2;
 239
 240   picture.y_pad         = 0;
 241   picture.c_pad         = 0;
 242
 243   dec.picture           = &picture;
 244   dec.out_buffer_size   = 4096;
 245
 246   vp_os_memset( &icc,         0, sizeof(vp_stages_input_com_config_t));
 247   vp_os_memset( &connection,  0, sizeof(vp_com_wifi_connection_t) );
 248   strcpy(connection.networkName,    "linksys");
 249   vp_stages_fill_default_config(WIFI_COM_CONFIG, &config, sizeof(config));
 250   vp_os_memset(&com, 0, sizeof(vp_com_t));
 251
 252   com.type                          = VP_COM_WIFI;
 253   icc.com                           = &com;
 254   icc.config                        = (vp_com_config_t*)&config;
 255   icc.connection                    = (vp_com_connection_t*)&connection;
 256   icc.socket.type                   = VP_COM_CLIENT;
 257   icc.socket.protocol               = VP_COM_TCP;
 258   icc.socket.port                   = 5555;
 259   icc.buffer_size                   = 1024;
 260   icc.sockopt                       = VP_COM_NON_BLOCKING;
 261
 262   strcpy(icc.socket.serverHost,     "30.30.30.1");
 263
 264   osc.width                         = WINDOW_WIDTH;
 265   osc.height                        = WINDOW_HEIGHT;
 266   osc.bpp                           = 16;
 267   osc.window_width                  = WINDOW_WIDTH;
 268   osc.window_height                 = WINDOW_HEIGHT;
 269   osc.pic_width                     = ACQ_WIDTH;
 270   osc.pic_height                    = ACQ_HEIGHT;
 271   osc.y_size                        = ACQ_WIDTH*ACQ_HEIGHT;
 272   osc.c_size                        = (ACQ_WIDTH*ACQ_HEIGHT) >> 2;
 273
 274   stages[nb_stages].type            = VP_API_INPUT_SOCKET;
 275   stages[nb_stages].cfg             = (void *)&icc;
 276   stages[nb_stages].funcs           = vp_stages_input_com_funcs;
 277
 278   nb_stages++;
 279
 280   stages[nb_stages].type            = VP_API_FILTER_DECODER;
 281   stages[nb_stages].cfg             = (void*)&dec;
 282   stages[nb_stages].funcs           = mjpeg_decoding_funcs;
 283
 284   nb_stages++;
 285
 286   stages[nb_stages].type            = VP_API_OUTPUT_SDL;
 287   stages[nb_stages].cfg             = (void *)&osc;
 288   stages[nb_stages].funcs           = vp_stages_output_sdl_funcs;
 289
 290   nb_stages++;
 291
 292   pipeline.nb_stages                = nb_stages;
 293   pipeline.stages                   = &stages[0];
 294
 295   vp_api_open(&pipeline, &pipeline_handle);
 296   out.status = VP_API_STATUS_PROCESSING;
 297   while(SUCCEED(vp_api_run(&pipeline, &out)) && (out.status == VP_API_STATUS_PROCESSING || out.status == VP_API_STATUS_STILL_RUNNING));
 298
 299   vp_api_close(&pipeline, &pipeline_handle);
 300
 301   return EXIT_SUCCESS;
 302 }
 303
 304 ///*******************************************************************************************************************///
 305
 306
 307 // static THREAD_HANDLE dct_thread_handle;
 308
 309 static dct_io_buffer_t* current_io_buffer;
 310 static dct_io_buffer_t* result_io_buffer;
 311
 312 static void fdct(const unsigned short* in, short* out);
 313 static void idct(const short* in, unsigned short* out);
 314
 315
 316 //-----------------------------------------------------------------------------
 317 // DCT API
 318 //-----------------------------------------------------------------------------
 319
 320
 321 bool_t dct_init(void)
 322 {
 323   current_io_buffer = NULL;
 324   result_io_buffer  = NULL;
 325
 326   return TRUE;
 327 }
 328
 329 bool_t dct_compute( dct_io_buffer_t* io_buffer )
 330 {
 331   bool_t res = FALSE;
 332
 333   assert(io_buffer != NULL);
 334
 335   if( current_io_buffer == NULL && result_io_buffer == NULL )
 336   {
 337     current_io_buffer = io_buffer;
 338     res = TRUE;
 339
 340   }
 341
 342   return res;
 343 }
 344
 345 dct_io_buffer_t* dct_result( void )
 346 {
 347   uint32_t i;
 348   dct_io_buffer_t* io_buffer;
 349
 350   io_buffer = NULL;
 351
 352   if( current_io_buffer != NULL)
 353   {
 354     if( current_io_buffer->dct_mode == DCT_MODE_FDCT )
 355     {
 356       for( i = 0; i < current_io_buffer->num_total_blocks; i++ )
 357       {
 358         fdct(current_io_buffer->input[i], current_io_buffer->output[i]);
 359       }
 360     }
 361     else if( current_io_buffer->dct_mode == DCT_MODE_IDCT )
 362     {
 363       for( i = 0; i < current_io_buffer->num_total_blocks; i++ )
 364       {
 365         idct(current_io_buffer->input[i], current_io_buffer->output[i]);
 366       }
 367     }
 368
 369     io_buffer = current_io_buffer;
 370     current_io_buffer = NULL;
 371   }
 372
 373   return io_buffer;
 374 }
 375
 376
 377 //-----------------------------------------------------------------------------
 378 // DCT Computation
 379 //-----------------------------------------------------------------------------
 380
 381
 382 #define FIX_0_298631336  ((INT32)  2446)        /* FIX(0.298631336) */
 383 #define FIX_0_390180644  ((INT32)  3196)        /* FIX(0.390180644) */
 384 #define FIX_0_541196100  ((INT32)  4433)        /* FIX(0.541196100) */
 385 #define FIX_0_765366865  ((INT32)  6270)        /* FIX(0.765366865) */
 386 #define FIX_0_899976223  ((INT32)  7373)        /* FIX(0.899976223) */
 387 #define FIX_1_175875602  ((INT32)  9633)        /* FIX(1.175875602) */
 388 #define FIX_1_501321110  ((INT32)  12299)       /* FIX(1.501321110) */
 389 #define FIX_1_847759065  ((INT32)  15137)       /* FIX(1.847759065) */
 390 #define FIX_1_961570560  ((INT32)  16069)       /* FIX(1.961570560) */
 391 #define FIX_2_053119869  ((INT32)  16819)       /* FIX(2.053119869) */
 392 #define FIX_2_562915447  ((INT32)  20995)       /* FIX(2.562915447) */
 393 #define FIX_3_072711026  ((INT32)  25172)       /* FIX(3.072711026) */
 394
 395 #define INT32       int
 396 #define DCTELEM     int
 397 #define DCTSIZE     8
 398 #define DCTSIZE2    64
 399 #define CONST_BITS  13
 400 #define PASS1_BITS  1
 401 #define ONE     ((INT32) 1)
 402 #define MULTIPLY(var,const)  ((var) * (const))
 403 #define DESCALE(x,n)  RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
 404 #define RIGHT_SHIFT(x,shft)     ((x) >> (shft))
 405
 406 static void fdct(const unsigned short* in, short* out)
 407 {
 408   INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
 409   INT32 tmp10, tmp11, tmp12, tmp13;
 410   INT32 z1, z2, z3, z4, z5;
 411   int ctr;
 412   // SHIFT_TEMPS
 413
 414   int data[DCTSIZE * DCTSIZE];
 415   int i, j;
 416   int* dataptr = data;
 417
 418   for( i = 0; i < DCTSIZE; i++ )
 419   {
 420     for( j = 0; j < DCTSIZE; j++ )
 421     {
 422       int temp;
 423
 424       temp = in[i*DCTSIZE + j];
 425       dataptr[i*DCTSIZE + j] = temp;
 426     }
 427   }
 428
 429   /* Pass 1: process rows. */
 430   /* Note results are scaled up by sqrt(8) compared to a true DCT; */
 431   /* furthermore, we scale the results by 2**PASS1_BITS. */
 432
 433   dataptr = data;
 434   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
 435     tmp0 = dataptr[0] + dataptr[7];
 436     tmp7 = dataptr[0] - dataptr[7];
 437     tmp1 = dataptr[1] + dataptr[6];
 438     tmp6 = dataptr[1] - dataptr[6];
 439     tmp2 = dataptr[2] + dataptr[5];
 440     tmp5 = dataptr[2] - dataptr[5];
 441     tmp3 = dataptr[3] + dataptr[4];
 442     tmp4 = dataptr[3] - dataptr[4];
 443
 444     /* Even part per LL&M figure 1 --- note that published figure is faulty;
 445      * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
 446      */
 447
 448     tmp10 = tmp0 + tmp3;
 449     tmp13 = tmp0 - tmp3;
 450     tmp11 = tmp1 + tmp2;
 451     tmp12 = tmp1 - tmp2;
 452
 453     dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);
 454     dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
 455
 456     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
 457     dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), CONST_BITS-PASS1_BITS);
 458     dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), CONST_BITS-PASS1_BITS);
 459
 460     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
 461      * cK represents cos(K*pi/16).
 462      * i0..i3 in the paper are tmp4..tmp7 here.
 463      */
 464
 465     z1 = tmp4 + tmp7;
 466     z2 = tmp5 + tmp6;
 467     z3 = tmp4 + tmp6;
 468     z4 = tmp5 + tmp7;
 469     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
 470
 471     tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
 472     tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
 473     tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
 474     tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
 475     z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
 476     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
 477     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
 478     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
 479
 480     z3 += z5;
 481     z4 += z5;
 482
 483     dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
 484     dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
 485     dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
 486     dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
 487
 488     dataptr += DCTSIZE;         /* advance pointer to next row */
 489   }
 490
 491   /* Pass 2: process columns.
 492    * We remove the PASS1_BITS scaling, but leave the results scaled up
 493    * by an overall factor of 8.
 494    */
 495
 496   dataptr = data;
 497   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
 498     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
 499     tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
 500     tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
 501     tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
 502     tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
 503     tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
 504     tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
 505     tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
 506
 507     /* Even part per LL&M figure 1 --- note that published figure is faulty;
 508      * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
 509      */
 510
 511     tmp10 = tmp0 + tmp3;
 512     tmp13 = tmp0 - tmp3;
 513     tmp11 = tmp1 + tmp2;
 514     tmp12 = tmp1 - tmp2;
 515
 516     dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
 517     dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
 518
 519     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
 520     dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), CONST_BITS+PASS1_BITS);
 521     dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), CONST_BITS+PASS1_BITS);
 522
 523     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
 524      * cK represents cos(K*pi/16).
 525      * i0..i3 in the paper are tmp4..tmp7 here.
 526      */
 527
 528     z1 = tmp4 + tmp7;
 529     z2 = tmp5 + tmp6;
 530     z3 = tmp4 + tmp6;
 531     z4 = tmp5 + tmp7;
 532     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
 533
 534     tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
 535     tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
 536     tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
 537     tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
 538     z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
 539     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
 540     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
 541     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
 542
 543     z3 += z5;
 544     z4 += z5;
 545
 546     dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS+PASS1_BITS);
 547     dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS+PASS1_BITS);
 548     dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS+PASS1_BITS);
 549     dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS+PASS1_BITS);
 550
 551     dataptr++;  /* advance pointer to next column */
 552   }
 553
 554   for( i = 0; i < DCTSIZE; i++ )
 555     for( j = 0; j < DCTSIZE; j++ )
 556       out[i*DCTSIZE + j] = data[i*DCTSIZE + j] >> 3;
 557 }
 558
 559 static void idct(const short* in, unsigned short* out)
 560 {
 561   INT32 tmp0, tmp1, tmp2, tmp3;
 562   INT32 tmp10, tmp11, tmp12, tmp13;
 563   INT32 z1, z2, z3, z4, z5;
 564   int* wsptr;
 565   int* outptr;
 566   const short* inptr;
 567   int ctr;
 568   int workspace[DCTSIZE2];      /* buffers data between passes */
 569   int data[DCTSIZE2];
 570   // SHIFT_TEMPS
 571
 572   /* Pass 1: process columns from input, store into work array. */
 573   /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
 574   /* furthermore, we scale the results by 2**PASS1_BITS. */
 575
 576   inptr = in;
 577   wsptr = workspace;
 578   for (ctr = DCTSIZE; ctr > 0; ctr--) {
 579     /* Due to quantization, we will usually find that many of the input
 580      * coefficients are zero, especially the AC terms.  We can exploit this
 581      * by short-circuiting the IDCT calculation for any column in which all
 582      * the AC terms are zero.  In that case each output is equal to the
 583      * DC coefficient (with scale factor as needed).
 584      * With typical images and quantization tables, half or more of the
 585      * column DCT calculations can be simplified this way.
 586      */
 587
 588     if( inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
 589         inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
 590         inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
 591         inptr[DCTSIZE*7] == 0 ) {
 592       /* AC terms all zero */
 593       int dcval = inptr[DCTSIZE*0] << PASS1_BITS;
 594
 595       wsptr[DCTSIZE*0] = dcval;
 596       wsptr[DCTSIZE*1] = dcval;
 597       wsptr[DCTSIZE*2] = dcval;
 598       wsptr[DCTSIZE*3] = dcval;
 599       wsptr[DCTSIZE*4] = dcval;
 600       wsptr[DCTSIZE*5] = dcval;
 601       wsptr[DCTSIZE*6] = dcval;
 602       wsptr[DCTSIZE*7] = dcval;
 603
 604       inptr++;  /* advance pointers to next column */
 605       wsptr++;
 606       continue;
 607     }
 608
 609     /* Even part: reverse the even part of the forward DCT. */
 610     /* The rotator is sqrt(2)*c(-6). */
 611
 612     z2 = inptr[DCTSIZE*2];
 613     z3 = inptr[DCTSIZE*6];
 614
 615     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
 616     tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
 617     tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
 618
 619     z2 = inptr[DCTSIZE*0];
 620     z3 = inptr[DCTSIZE*4];
 621
 622     tmp0 = (z2 + z3) << CONST_BITS;
 623     tmp1 = (z2 - z3) << CONST_BITS;
 624
 625     tmp10 = tmp0 + tmp3;
 626     tmp13 = tmp0 - tmp3;
 627     tmp11 = tmp1 + tmp2;
 628     tmp12 = tmp1 - tmp2;
 629
 630     /* Odd part per figure 8; the matrix is unitary and hence its
 631      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
 632      */
 633
 634     tmp0 = inptr[DCTSIZE*7];
 635     tmp1 = inptr[DCTSIZE*5];
 636     tmp2 = inptr[DCTSIZE*3];
 637     tmp3 = inptr[DCTSIZE*1];
 638
 639     z1 = tmp0 + tmp3;
 640     z2 = tmp1 + tmp2;
 641     z3 = tmp0 + tmp2;
 642     z4 = tmp1 + tmp3;
 643     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
 644
 645     tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
 646     tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
 647     tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
 648     tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
 649     z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
 650     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
 651     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
 652     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
 653
 654     z3 += z5;
 655     z4 += z5;
 656
 657     tmp0 += z1 + z3;
 658     tmp1 += z2 + z4;
 659     tmp2 += z2 + z3;
 660     tmp3 += z1 + z4;
 661
 662     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
 663
 664     wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
 665     wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
 666     wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
 667     wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
 668     wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
 669     wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
 670     wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
 671     wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
 672
 673     inptr++;  /* advance pointers to next column */
 674     wsptr++;
 675   }
 676
 677   /* Pass 2: process rows from work array, store into output array. */
 678   /* Note that we must descale the results by a factor of 8 == 2**3, */
 679   /* and also undo the PASS1_BITS scaling. */
 680
 681   wsptr = workspace;
 682   outptr = data;
 683   for (ctr = 0; ctr < DCTSIZE; ctr++) {
 684     /* Even part: reverse the even part of the forward DCT. */
 685     /* The rotator is sqrt(2)*c(-6). */
 686
 687     z2 = (INT32) wsptr[2];
 688     z3 = (INT32) wsptr[6];
 689
 690     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
 691     tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
 692     tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
 693
 694     tmp0 = ((INT32) wsptr[0] + (INT32) wsptr[4]) << CONST_BITS;
 695     tmp1 = ((INT32) wsptr[0] - (INT32) wsptr[4]) << CONST_BITS;
 696
 697     tmp10 = tmp0 + tmp3;
 698     tmp13 = tmp0 - tmp3;
 699     tmp11 = tmp1 + tmp2;
 700     tmp12 = tmp1 - tmp2;
 701
 702     /* Odd part per figure 8; the matrix is unitary and hence its
 703      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
 704      */
 705
 706     tmp0 = (INT32) wsptr[7];
 707     tmp1 = (INT32) wsptr[5];
 708     tmp2 = (INT32) wsptr[3];
 709     tmp3 = (INT32) wsptr[1];
 710
 711     z1 = tmp0 + tmp3;
 712     z2 = tmp1 + tmp2;
 713     z3 = tmp0 + tmp2;
 714     z4 = tmp1 + tmp3;
 715     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
 716
 717     tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
 718     tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
 719     tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
 720     tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
 721     z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
 722     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
 723     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
 724     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
 725
 726     z3 += z5;
 727     z4 += z5;
 728
 729     tmp0 += z1 + z3;
 730     tmp1 += z2 + z4;
 731     tmp2 += z2 + z3;
 732     tmp3 += z1 + z4;
 733
 734     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
 735
 736     outptr[0] = (tmp10 + tmp3) >> ( CONST_BITS+PASS1_BITS+3 );
 737     outptr[7] = (tmp10 - tmp3) >> ( CONST_BITS+PASS1_BITS+3 );
 738     outptr[1] = (tmp11 + tmp2) >> ( CONST_BITS+PASS1_BITS+3 );
 739     outptr[6] = (tmp11 - tmp2) >> ( CONST_BITS+PASS1_BITS+3 );
 740     outptr[2] = (tmp12 + tmp1) >> ( CONST_BITS+PASS1_BITS+3 );
 741     outptr[5] = (tmp12 - tmp1) >> ( CONST_BITS+PASS1_BITS+3 );
 742     outptr[3] = (tmp13 + tmp0) >> ( CONST_BITS+PASS1_BITS+3 );
 743     outptr[4] = (tmp13 - tmp0) >> ( CONST_BITS+PASS1_BITS+3 );
 744
 745     wsptr += DCTSIZE; /* advance pointer to next row */
 746     outptr += DCTSIZE;
 747   }
 748
 749   for(ctr = 0; ctr < DCTSIZE2; ctr++)
 750     out[ctr] = data[ctr];
 751 }