1 #include <RAMCODE/ramcode.h>
2 #include "video_gen_sad.h"
5 sad_fn gen_sad( uint32_t* buffer, uint32_t buffer_length,
6 uint32_t bytes_per_entry_ref, uint32_t bytes_per_entry_src,
7 uint16_t offset_ref, uint16_t offset_src,
8 uint32_t square_length )
11 /* Register usage in generated code
12 lr/r14 : sad accumulation
13 r0 : ref - r2 : data read with r0
14 r1 : src - r3 : data read with r1
19 int8_t *buffer_save, *buffer_current;
20 sad_fn sad = (sad_fn) NULL;
22 uint32_t prologue[] = {
23 0xe52de004, // str lr, [sp, #-4]!
24 0xe3a0c000, // mov ip, 0x0
25 0xe3a0e000 // mov lr, 0x0
28 uint32_t epilogue[] = {
29 0xe1a0000e, // mov r0, lr
30 0xe49df004 // ldr pc, [sp], #4
33 uint32_t sad_acc[] = {
34 0xe0732002, // rsbs r2, r3, r2
35 0xb2622000, // rsblt r2, r2, #0
36 0xe08ee002 // add lr, lr, r2
39 uint32_t dec_count[] = {
40 0xe25cc001 // subs ip, ip, #1
43 offset_ref += ( 1 - square_length );
44 offset_src += ( 1 - square_length );
46 if( offset_ref < 1024 && offset_src < 1024 )
48 sad = (sad_fn) buffer;
51 buffer = emit_buffer( buffer, prologue, sizeof(prologue) );
52 b[1] |= square_length;
54 switch( bytes_per_entry_ref )
57 buffer = emit_load8_pi( COND_FIELD_ALWAYS, buffer, ARM_REG_0, ARM_REG_2, 1 );
61 buffer = emit_load16_pi( COND_FIELD_ALWAYS, buffer, ARM_REG_0, ARM_REG_2, 1 );
65 buffer = emit_load32_pi( COND_FIELD_ALWAYS, buffer, ARM_REG_0, ARM_REG_2, 1 );
69 switch( bytes_per_entry_src )
72 buffer = emit_load8_pi( COND_FIELD_ALWAYS, buffer, ARM_REG_1, ARM_REG_3, 1 );
76 buffer = emit_load16_pi( COND_FIELD_ALWAYS, buffer, ARM_REG_1, ARM_REG_3, 1 );
80 buffer = emit_load32_pi( COND_FIELD_ALWAYS, buffer, ARM_REG_1, ARM_REG_3, 1 );
84 buffer_save = (int8_t*)&buffer[0];
88 while( square_length > 0 )
90 emit_buffer( buffer, sad_acc, sizeof(sad_acc) );
92 switch( bytes_per_entry_ref )
95 buffer = emit_load8_pi( COND_FIELD_ALWAYS, buffer, ARM_REG_0, ARM_REG_2, 1 );
99 buffer = emit_load16_pi( COND_FIELD_ALWAYS, buffer, ARM_REG_0, ARM_REG_2, 1 );
103 buffer = emit_load32_pi( COND_FIELD_ALWAYS, buffer, ARM_REG_0, ARM_REG_2, 1 );
107 switch( bytes_per_entry_src )
110 buffer = emit_load8_pi( COND_FIELD_ALWAYS, buffer, ARM_REG_1, ARM_REG_3, 1 );
114 buffer = emit_load16_pi( COND_FIELD_ALWAYS, buffer, ARM_REG_1, ARM_REG_3, 1 );
118 buffer = emit_load32_pi( COND_FIELD_ALWAYS, buffer, ARM_REG_1, ARM_REG_3, 1 );
125 buffer = emit_buffer( buffer, sad_acc, sizeof(sad_acc) );
126 buffer = emit_buffer( buffer, dec_count, sizeof(dec_count) );
128 switch( bytes_per_entry_ref )
131 buffer = emit_load8_pi( COND_FIELD_NE, buffer, ARM_REG_0, ARM_REG_2, offset_ref );
135 buffer = emit_load16_pi( COND_FIELD_NE, buffer, ARM_REG_0, ARM_REG_2, offset_ref );
139 buffer = emit_load32_pi( COND_FIELD_NE, buffer, ARM_REG_0, ARM_REG_2, offset_ref );
143 switch( bytes_per_entry_src )
146 buffer = emit_load8_pi( COND_FIELD_NE, buffer, ARM_REG_1, ARM_REG_3, offset_src );
150 buffer = emit_load16_pi( COND_FIELD_NE, buffer, ARM_REG_1, ARM_REG_3, offset_src );
154 buffer = emit_load32_pi( COND_FIELD_NE, buffer, ARM_REG_1, ARM_REG_3, offset_src );
158 buffer_current = (int8_t*)&buffer[0];
159 buffer = emit_branch( COND_FIELD_NE, buffer, buffer_save - buffer_current - 8 );
161 buffer = emit_buffer( buffer, epilogue, sizeof(epilogue) );
163 arm_mmu_flush_dcache();