5 #include <VP_Api/vp_api.h>
6 #include <VP_Api/vp_api_thread_helper.h>
7 #include <VP_Api/vp_api_error.h>
8 #include <VP_Stages/vp_stages_configs.h>
9 #include <VP_Stages/vp_stages_io_console.h>
10 #include <VP_Stages/vp_stages_o_sdl.h>
11 #include <VP_Stages/vp_stages_io_com.h>
12 #include <VP_Stages/vp_stages_io_file.h>
13 #include <VP_Os/vp_os_print.h>
14 #include <VP_Os/vp_os_malloc.h>
15 #include <VP_Os/vp_os_delay.h>
17 #include <MJPEG/mjpeg.h>
21 #define RECORD_MJPEG_VIDEO
23 #define CAMIF_BLOCKLINES_LOG2 4
24 #define CAMIF_BLOCKLINES (1 << CAMIF_BLOCKLINES_LOG2)
26 #define ACQ_WIDTH (176)
27 #define ACQ_HEIGHT (144)
29 #define WINDOW_WIDTH (320)
30 #define WINDOW_HEIGHT (240)
32 #ifdef RECORD_MJPEG_VIDEO
34 #endif // ! RECORD_MJPEG_VIDEO
36 static PIPELINE_HANDLE pipeline_handle;
39 PROTO_THREAD_ROUTINE(escaper,nomParams);
40 PROTO_THREAD_ROUTINE(app,nomParams);
43 THREAD_TABLE_ENTRY(escaper,20)
44 THREAD_TABLE_ENTRY(app,20)
48 typedef struct _mjpeg_stage_decoding_config_t
52 vp_api_picture_t* picture;
54 uint32_t out_buffer_size;
56 } mjpeg_stage_decoding_config_t;
58 C_RESULT mjpeg_stage_decoding_open(mjpeg_stage_decoding_config_t *cfg)
60 uint32_t width = cfg->picture->width;
61 uint32_t height = cfg->picture->height;
62 enum PixelFormat format = cfg->picture->format;
64 stream_new( &cfg->stream, OUTPUT_STREAM );
66 return mjpeg_init( &cfg->mjpeg, MJPEG_DECODE, width, height, format );
69 static uint32_t get_current_time()
71 static bool_t init = TRUE;
72 static struct timeval tv_init;
81 gettimeofday(&tv_init, NULL);
85 gettimeofday(&tv, NULL);
86 tv.tv_sec -= tv_init.tv_sec;
87 tv.tv_usec -= tv_init.tv_usec;
92 t = (s * 1000.0f) + ( us / 1000.0f);
97 C_RESULT mjpeg_stage_decoding_transform(mjpeg_stage_decoding_config_t *cfg, vp_api_io_data_t *in, vp_api_io_data_t *out)
101 static int32_t start_time, dt;
103 vp_os_mutex_lock( &out->lock );
105 if(out->status == VP_API_STATUS_INIT)
107 #ifdef RECORD_MJPEG_VIDEO
108 fp = fopen("video", "wb");
109 #endif // ! RECORD_MJPEG_VIDEO
112 out->buffers = (int8_t**)cfg->picture;
113 out->indexBuffer = 0;
116 out->status = VP_API_STATUS_PROCESSING;
121 if( in->status == VP_API_STATUS_ENDED )
123 #ifdef RECORD_MJPEG_VIDEO
125 #endif // ! RECORD_MJPEG_VIDEO
126 out->status = in->status;
129 // Several cases must be handled in this stage
130 // 1st: Input buffer is too small to decode a complete picture
131 // 2nd: Input buffer is big enough to decode 1 frame
132 // 3rd: Input buffer is so big we can decode more than 1 frame
136 if( out->status == VP_API_STATUS_PROCESSING )
138 // Reinit stream with new data
139 stream_config( &cfg->stream, in->size, in->buffers[in->indexBuffer] );
140 #ifdef RECORD_MJPEG_VIDEO
142 fwrite(in->buffers[in->indexBuffer], in->size, 1, fp);
143 #endif // ! RECORD_MJPEG_VIDEO
146 if(out->status == VP_API_STATUS_PROCESSING || out->status == VP_API_STATUS_STILL_RUNNING)
148 // If out->size == 1 it means picture is ready
150 out->status = VP_API_STATUS_PROCESSING;
152 start_time = get_current_time();
153 res = mjpeg_decode( &cfg->mjpeg, cfg->picture, &cfg->stream, &got_image );
154 dt += (get_current_time() - start_time);
157 if( FAILED(stream_is_empty( &cfg->stream )) )
159 // Some data are still in stream
160 // Next time we run this stage we don't want this data to be lost
162 out->status = VP_API_STATUS_STILL_RUNNING;
167 // we got one picture (handle case 1)
169 PRINT( "%d picture decoded in %d ms\n", cfg->mjpeg.num_frames, dt );
175 vp_os_mutex_unlock( &out->lock );
180 C_RESULT mjpeg_stage_decoding_close(mjpeg_stage_decoding_config_t *cfg)
182 stream_delete( &cfg->stream );
184 return mjpeg_release( &cfg->mjpeg );
188 const vp_api_stage_funcs_t mjpeg_decoding_funcs = {
189 (vp_api_stage_handle_msg_t) NULL,
190 (vp_api_stage_open_t) mjpeg_stage_decoding_open,
191 (vp_api_stage_transform_t) mjpeg_stage_decoding_transform,
192 (vp_api_stage_close_t) mjpeg_stage_decoding_close
196 main(int argc, char **argv)
198 START_THREAD(escaper, NO_PARAM);
199 START_THREAD(app, argv);
201 JOIN_THREAD(escaper);
207 PROTO_THREAD_ROUTINE(app,argv)
210 uint32_t nb_stages = 0;
211 vp_api_picture_t picture;
213 vp_api_io_pipeline_t pipeline;
214 vp_api_io_data_t out;
215 vp_api_io_stage_t stages[NB_STAGES];
217 vp_stages_input_com_config_t icc;
218 mjpeg_stage_decoding_config_t dec;
219 vp_stages_output_sdl_config_t osc;
222 vp_com_wifi_connection_t connection;
223 vp_com_wifi_config_t config;
225 /// Picture configuration
226 picture.format = PIX_FMT_YUV420P;
228 picture.width = ACQ_WIDTH;
229 picture.height = ACQ_HEIGHT;
230 picture.framerate = 15;
232 picture.y_buf = vp_os_malloc( ACQ_WIDTH*ACQ_HEIGHT );
233 picture.cr_buf = vp_os_malloc( ACQ_WIDTH*ACQ_HEIGHT/4 );
234 picture.cb_buf = vp_os_malloc( ACQ_WIDTH*ACQ_HEIGHT/4 );
236 picture.y_line_size = ACQ_WIDTH;
237 picture.cb_line_size = ACQ_WIDTH / 2;
238 picture.cr_line_size = ACQ_WIDTH / 2;
243 dec.picture = &picture;
244 dec.out_buffer_size = 4096;
246 vp_os_memset( &icc, 0, sizeof(vp_stages_input_com_config_t));
247 vp_os_memset( &connection, 0, sizeof(vp_com_wifi_connection_t) );
248 strcpy(connection.networkName, "linksys");
249 vp_stages_fill_default_config(WIFI_COM_CONFIG, &config, sizeof(config));
250 vp_os_memset(&com, 0, sizeof(vp_com_t));
252 com.type = VP_COM_WIFI;
254 icc.config = (vp_com_config_t*)&config;
255 icc.connection = (vp_com_connection_t*)&connection;
256 icc.socket.type = VP_COM_CLIENT;
257 icc.socket.protocol = VP_COM_TCP;
258 icc.socket.port = 5555;
259 icc.buffer_size = 1024;
260 icc.sockopt = VP_COM_NON_BLOCKING;
262 strcpy(icc.socket.serverHost, "30.30.30.1");
264 osc.width = WINDOW_WIDTH;
265 osc.height = WINDOW_HEIGHT;
267 osc.window_width = WINDOW_WIDTH;
268 osc.window_height = WINDOW_HEIGHT;
269 osc.pic_width = ACQ_WIDTH;
270 osc.pic_height = ACQ_HEIGHT;
271 osc.y_size = ACQ_WIDTH*ACQ_HEIGHT;
272 osc.c_size = (ACQ_WIDTH*ACQ_HEIGHT) >> 2;
274 stages[nb_stages].type = VP_API_INPUT_SOCKET;
275 stages[nb_stages].cfg = (void *)&icc;
276 stages[nb_stages].funcs = vp_stages_input_com_funcs;
280 stages[nb_stages].type = VP_API_FILTER_DECODER;
281 stages[nb_stages].cfg = (void*)&dec;
282 stages[nb_stages].funcs = mjpeg_decoding_funcs;
286 stages[nb_stages].type = VP_API_OUTPUT_SDL;
287 stages[nb_stages].cfg = (void *)&osc;
288 stages[nb_stages].funcs = vp_stages_output_sdl_funcs;
292 pipeline.nb_stages = nb_stages;
293 pipeline.stages = &stages[0];
295 vp_api_open(&pipeline, &pipeline_handle);
296 out.status = VP_API_STATUS_PROCESSING;
297 while(SUCCEED(vp_api_run(&pipeline, &out)) && (out.status == VP_API_STATUS_PROCESSING || out.status == VP_API_STATUS_STILL_RUNNING));
299 vp_api_close(&pipeline, &pipeline_handle);
304 ///*******************************************************************************************************************///
307 // static THREAD_HANDLE dct_thread_handle;
309 static dct_io_buffer_t* current_io_buffer;
310 static dct_io_buffer_t* result_io_buffer;
312 static void fdct(const unsigned short* in, short* out);
313 static void idct(const short* in, unsigned short* out);
316 //-----------------------------------------------------------------------------
318 //-----------------------------------------------------------------------------
321 bool_t dct_init(void)
323 current_io_buffer = NULL;
324 result_io_buffer = NULL;
329 bool_t dct_compute( dct_io_buffer_t* io_buffer )
333 assert(io_buffer != NULL);
335 if( current_io_buffer == NULL && result_io_buffer == NULL )
337 current_io_buffer = io_buffer;
345 dct_io_buffer_t* dct_result( void )
348 dct_io_buffer_t* io_buffer;
352 if( current_io_buffer != NULL)
354 if( current_io_buffer->dct_mode == DCT_MODE_FDCT )
356 for( i = 0; i < current_io_buffer->num_total_blocks; i++ )
358 fdct(current_io_buffer->input[i], current_io_buffer->output[i]);
361 else if( current_io_buffer->dct_mode == DCT_MODE_IDCT )
363 for( i = 0; i < current_io_buffer->num_total_blocks; i++ )
365 idct(current_io_buffer->input[i], current_io_buffer->output[i]);
369 io_buffer = current_io_buffer;
370 current_io_buffer = NULL;
377 //-----------------------------------------------------------------------------
379 //-----------------------------------------------------------------------------
382 #define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */
383 #define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */
384 #define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */
385 #define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */
386 #define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */
387 #define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */
388 #define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */
389 #define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */
390 #define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */
391 #define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */
392 #define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */
393 #define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */
399 #define CONST_BITS 13
401 #define ONE ((INT32) 1)
402 #define MULTIPLY(var,const) ((var) * (const))
403 #define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
404 #define RIGHT_SHIFT(x,shft) ((x) >> (shft))
406 static void fdct(const unsigned short* in, short* out)
408 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
409 INT32 tmp10, tmp11, tmp12, tmp13;
410 INT32 z1, z2, z3, z4, z5;
414 int data[DCTSIZE * DCTSIZE];
418 for( i = 0; i < DCTSIZE; i++ )
420 for( j = 0; j < DCTSIZE; j++ )
424 temp = in[i*DCTSIZE + j];
425 dataptr[i*DCTSIZE + j] = temp;
429 /* Pass 1: process rows. */
430 /* Note results are scaled up by sqrt(8) compared to a true DCT; */
431 /* furthermore, we scale the results by 2**PASS1_BITS. */
434 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
435 tmp0 = dataptr[0] + dataptr[7];
436 tmp7 = dataptr[0] - dataptr[7];
437 tmp1 = dataptr[1] + dataptr[6];
438 tmp6 = dataptr[1] - dataptr[6];
439 tmp2 = dataptr[2] + dataptr[5];
440 tmp5 = dataptr[2] - dataptr[5];
441 tmp3 = dataptr[3] + dataptr[4];
442 tmp4 = dataptr[3] - dataptr[4];
444 /* Even part per LL&M figure 1 --- note that published figure is faulty;
445 * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
453 dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);
454 dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
456 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
457 dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), CONST_BITS-PASS1_BITS);
458 dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), CONST_BITS-PASS1_BITS);
460 /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
461 * cK represents cos(K*pi/16).
462 * i0..i3 in the paper are tmp4..tmp7 here.
469 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
471 tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
472 tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
473 tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
474 tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
475 z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
476 z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
477 z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
478 z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
483 dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
484 dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
485 dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
486 dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
488 dataptr += DCTSIZE; /* advance pointer to next row */
491 /* Pass 2: process columns.
492 * We remove the PASS1_BITS scaling, but leave the results scaled up
493 * by an overall factor of 8.
497 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
498 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
499 tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
500 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
501 tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
502 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
503 tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
504 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
505 tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
507 /* Even part per LL&M figure 1 --- note that published figure is faulty;
508 * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
516 dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
517 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
519 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
520 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), CONST_BITS+PASS1_BITS);
521 dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), CONST_BITS+PASS1_BITS);
523 /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
524 * cK represents cos(K*pi/16).
525 * i0..i3 in the paper are tmp4..tmp7 here.
532 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
534 tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
535 tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
536 tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
537 tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
538 z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
539 z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
540 z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
541 z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
546 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS+PASS1_BITS);
547 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS+PASS1_BITS);
548 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS+PASS1_BITS);
549 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS+PASS1_BITS);
551 dataptr++; /* advance pointer to next column */
554 for( i = 0; i < DCTSIZE; i++ )
555 for( j = 0; j < DCTSIZE; j++ )
556 out[i*DCTSIZE + j] = data[i*DCTSIZE + j] >> 3;
559 static void idct(const short* in, unsigned short* out)
561 INT32 tmp0, tmp1, tmp2, tmp3;
562 INT32 tmp10, tmp11, tmp12, tmp13;
563 INT32 z1, z2, z3, z4, z5;
568 int workspace[DCTSIZE2]; /* buffers data between passes */
572 /* Pass 1: process columns from input, store into work array. */
573 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
574 /* furthermore, we scale the results by 2**PASS1_BITS. */
578 for (ctr = DCTSIZE; ctr > 0; ctr--) {
579 /* Due to quantization, we will usually find that many of the input
580 * coefficients are zero, especially the AC terms. We can exploit this
581 * by short-circuiting the IDCT calculation for any column in which all
582 * the AC terms are zero. In that case each output is equal to the
583 * DC coefficient (with scale factor as needed).
584 * With typical images and quantization tables, half or more of the
585 * column DCT calculations can be simplified this way.
588 if( inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
589 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
590 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
591 inptr[DCTSIZE*7] == 0 ) {
592 /* AC terms all zero */
593 int dcval = inptr[DCTSIZE*0] << PASS1_BITS;
595 wsptr[DCTSIZE*0] = dcval;
596 wsptr[DCTSIZE*1] = dcval;
597 wsptr[DCTSIZE*2] = dcval;
598 wsptr[DCTSIZE*3] = dcval;
599 wsptr[DCTSIZE*4] = dcval;
600 wsptr[DCTSIZE*5] = dcval;
601 wsptr[DCTSIZE*6] = dcval;
602 wsptr[DCTSIZE*7] = dcval;
604 inptr++; /* advance pointers to next column */
609 /* Even part: reverse the even part of the forward DCT. */
610 /* The rotator is sqrt(2)*c(-6). */
612 z2 = inptr[DCTSIZE*2];
613 z3 = inptr[DCTSIZE*6];
615 z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
616 tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
617 tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
619 z2 = inptr[DCTSIZE*0];
620 z3 = inptr[DCTSIZE*4];
622 tmp0 = (z2 + z3) << CONST_BITS;
623 tmp1 = (z2 - z3) << CONST_BITS;
630 /* Odd part per figure 8; the matrix is unitary and hence its
631 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
634 tmp0 = inptr[DCTSIZE*7];
635 tmp1 = inptr[DCTSIZE*5];
636 tmp2 = inptr[DCTSIZE*3];
637 tmp3 = inptr[DCTSIZE*1];
643 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
645 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
646 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
647 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
648 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
649 z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
650 z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
651 z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
652 z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
662 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
664 wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
665 wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
666 wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
667 wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
668 wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
669 wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
670 wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
671 wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
673 inptr++; /* advance pointers to next column */
677 /* Pass 2: process rows from work array, store into output array. */
678 /* Note that we must descale the results by a factor of 8 == 2**3, */
679 /* and also undo the PASS1_BITS scaling. */
683 for (ctr = 0; ctr < DCTSIZE; ctr++) {
684 /* Even part: reverse the even part of the forward DCT. */
685 /* The rotator is sqrt(2)*c(-6). */
687 z2 = (INT32) wsptr[2];
688 z3 = (INT32) wsptr[6];
690 z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
691 tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
692 tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
694 tmp0 = ((INT32) wsptr[0] + (INT32) wsptr[4]) << CONST_BITS;
695 tmp1 = ((INT32) wsptr[0] - (INT32) wsptr[4]) << CONST_BITS;
702 /* Odd part per figure 8; the matrix is unitary and hence its
703 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
706 tmp0 = (INT32) wsptr[7];
707 tmp1 = (INT32) wsptr[5];
708 tmp2 = (INT32) wsptr[3];
709 tmp3 = (INT32) wsptr[1];
715 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
717 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
718 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
719 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
720 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
721 z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
722 z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
723 z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
724 z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
734 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
736 outptr[0] = (tmp10 + tmp3) >> ( CONST_BITS+PASS1_BITS+3 );
737 outptr[7] = (tmp10 - tmp3) >> ( CONST_BITS+PASS1_BITS+3 );
738 outptr[1] = (tmp11 + tmp2) >> ( CONST_BITS+PASS1_BITS+3 );
739 outptr[6] = (tmp11 - tmp2) >> ( CONST_BITS+PASS1_BITS+3 );
740 outptr[2] = (tmp12 + tmp1) >> ( CONST_BITS+PASS1_BITS+3 );
741 outptr[5] = (tmp12 - tmp1) >> ( CONST_BITS+PASS1_BITS+3 );
742 outptr[3] = (tmp13 + tmp0) >> ( CONST_BITS+PASS1_BITS+3 );
743 outptr[4] = (tmp13 - tmp0) >> ( CONST_BITS+PASS1_BITS+3 );
745 wsptr += DCTSIZE; /* advance pointer to next row */
749 for(ctr = 0; ctr < DCTSIZE2; ctr++)
750 out[ctr] = data[ctr];