00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00028 #include "dsputil.h"
00029
00030
00033 static void vc1_v_overlap_c(uint8_t* src, int stride)
00034 {
00035 int i;
00036 int a, b, c, d;
00037 int d1, d2;
00038 int rnd = 1;
00039 for(i = 0; i < 8; i++) {
00040 a = src[-2*stride];
00041 b = src[-stride];
00042 c = src[0];
00043 d = src[stride];
00044 d1 = (a - d + 3 + rnd) >> 3;
00045 d2 = (a - d + b - c + 4 - rnd) >> 3;
00046
00047 src[-2*stride] = a - d1;
00048 src[-stride] = b - d2;
00049 src[0] = c + d2;
00050 src[stride] = d + d1;
00051 src++;
00052 rnd = !rnd;
00053 }
00054 }
00055
00058 static void vc1_h_overlap_c(uint8_t* src, int stride)
00059 {
00060 int i;
00061 int a, b, c, d;
00062 int d1, d2;
00063 int rnd = 1;
00064 for(i = 0; i < 8; i++) {
00065 a = src[-2];
00066 b = src[-1];
00067 c = src[0];
00068 d = src[1];
00069 d1 = (a - d + 3 + rnd) >> 3;
00070 d2 = (a - d + b - c + 4 - rnd) >> 3;
00071
00072 src[-2] = a - d1;
00073 src[-1] = b - d2;
00074 src[0] = c + d2;
00075 src[1] = d + d1;
00076 src += stride;
00077 rnd = !rnd;
00078 }
00079 }
00080
00081
00084 static void vc1_inv_trans_8x8_c(DCTELEM block[64])
00085 {
00086 int i;
00087 register int t1,t2,t3,t4,t5,t6,t7,t8;
00088 DCTELEM *src, *dst;
00089
00090 src = block;
00091 dst = block;
00092 for(i = 0; i < 8; i++){
00093 t1 = 12 * (src[0] + src[4]);
00094 t2 = 12 * (src[0] - src[4]);
00095 t3 = 16 * src[2] + 6 * src[6];
00096 t4 = 6 * src[2] - 16 * src[6];
00097
00098 t5 = t1 + t3;
00099 t6 = t2 + t4;
00100 t7 = t2 - t4;
00101 t8 = t1 - t3;
00102
00103 t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7];
00104 t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7];
00105 t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7];
00106 t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7];
00107
00108 dst[0] = (t5 + t1 + 4) >> 3;
00109 dst[1] = (t6 + t2 + 4) >> 3;
00110 dst[2] = (t7 + t3 + 4) >> 3;
00111 dst[3] = (t8 + t4 + 4) >> 3;
00112 dst[4] = (t8 - t4 + 4) >> 3;
00113 dst[5] = (t7 - t3 + 4) >> 3;
00114 dst[6] = (t6 - t2 + 4) >> 3;
00115 dst[7] = (t5 - t1 + 4) >> 3;
00116
00117 src += 8;
00118 dst += 8;
00119 }
00120
00121 src = block;
00122 dst = block;
00123 for(i = 0; i < 8; i++){
00124 t1 = 12 * (src[ 0] + src[32]);
00125 t2 = 12 * (src[ 0] - src[32]);
00126 t3 = 16 * src[16] + 6 * src[48];
00127 t4 = 6 * src[16] - 16 * src[48];
00128
00129 t5 = t1 + t3;
00130 t6 = t2 + t4;
00131 t7 = t2 - t4;
00132 t8 = t1 - t3;
00133
00134 t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56];
00135 t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56];
00136 t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56];
00137 t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56];
00138
00139 dst[ 0] = (t5 + t1 + 64) >> 7;
00140 dst[ 8] = (t6 + t2 + 64) >> 7;
00141 dst[16] = (t7 + t3 + 64) >> 7;
00142 dst[24] = (t8 + t4 + 64) >> 7;
00143 dst[32] = (t8 - t4 + 64 + 1) >> 7;
00144 dst[40] = (t7 - t3 + 64 + 1) >> 7;
00145 dst[48] = (t6 - t2 + 64 + 1) >> 7;
00146 dst[56] = (t5 - t1 + 64 + 1) >> 7;
00147
00148 src++;
00149 dst++;
00150 }
00151 }
00152
00155 static void vc1_inv_trans_8x4_c(DCTELEM block[64], int n)
00156 {
00157 int i;
00158 register int t1,t2,t3,t4,t5,t6,t7,t8;
00159 DCTELEM *src, *dst;
00160 int off;
00161
00162 off = n * 32;
00163 src = block + off;
00164 dst = block + off;
00165 for(i = 0; i < 4; i++){
00166 t1 = 12 * (src[0] + src[4]);
00167 t2 = 12 * (src[0] - src[4]);
00168 t3 = 16 * src[2] + 6 * src[6];
00169 t4 = 6 * src[2] - 16 * src[6];
00170
00171 t5 = t1 + t3;
00172 t6 = t2 + t4;
00173 t7 = t2 - t4;
00174 t8 = t1 - t3;
00175
00176 t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7];
00177 t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7];
00178 t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7];
00179 t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7];
00180
00181 dst[0] = (t5 + t1 + 4) >> 3;
00182 dst[1] = (t6 + t2 + 4) >> 3;
00183 dst[2] = (t7 + t3 + 4) >> 3;
00184 dst[3] = (t8 + t4 + 4) >> 3;
00185 dst[4] = (t8 - t4 + 4) >> 3;
00186 dst[5] = (t7 - t3 + 4) >> 3;
00187 dst[6] = (t6 - t2 + 4) >> 3;
00188 dst[7] = (t5 - t1 + 4) >> 3;
00189
00190 src += 8;
00191 dst += 8;
00192 }
00193
00194 src = block + off;
00195 dst = block + off;
00196 for(i = 0; i < 8; i++){
00197 t1 = 17 * (src[ 0] + src[16]);
00198 t2 = 17 * (src[ 0] - src[16]);
00199 t3 = 22 * src[ 8];
00200 t4 = 22 * src[24];
00201 t5 = 10 * src[ 8];
00202 t6 = 10 * src[24];
00203
00204 dst[ 0] = (t1 + t3 + t6 + 64) >> 7;
00205 dst[ 8] = (t2 - t4 + t5 + 64) >> 7;
00206 dst[16] = (t2 + t4 - t5 + 64) >> 7;
00207 dst[24] = (t1 - t3 - t6 + 64) >> 7;
00208
00209 src ++;
00210 dst ++;
00211 }
00212 }
00213
00216 static void vc1_inv_trans_4x8_c(DCTELEM block[64], int n)
00217 {
00218 int i;
00219 register int t1,t2,t3,t4,t5,t6,t7,t8;
00220 DCTELEM *src, *dst;
00221 int off;
00222
00223 off = n * 4;
00224 src = block + off;
00225 dst = block + off;
00226 for(i = 0; i < 8; i++){
00227 t1 = 17 * (src[0] + src[2]);
00228 t2 = 17 * (src[0] - src[2]);
00229 t3 = 22 * src[1];
00230 t4 = 22 * src[3];
00231 t5 = 10 * src[1];
00232 t6 = 10 * src[3];
00233
00234 dst[0] = (t1 + t3 + t6 + 4) >> 3;
00235 dst[1] = (t2 - t4 + t5 + 4) >> 3;
00236 dst[2] = (t2 + t4 - t5 + 4) >> 3;
00237 dst[3] = (t1 - t3 - t6 + 4) >> 3;
00238
00239 src += 8;
00240 dst += 8;
00241 }
00242
00243 src = block + off;
00244 dst = block + off;
00245 for(i = 0; i < 4; i++){
00246 t1 = 12 * (src[ 0] + src[32]);
00247 t2 = 12 * (src[ 0] - src[32]);
00248 t3 = 16 * src[16] + 6 * src[48];
00249 t4 = 6 * src[16] - 16 * src[48];
00250
00251 t5 = t1 + t3;
00252 t6 = t2 + t4;
00253 t7 = t2 - t4;
00254 t8 = t1 - t3;
00255
00256 t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56];
00257 t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56];
00258 t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56];
00259 t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56];
00260
00261 dst[ 0] = (t5 + t1 + 64) >> 7;
00262 dst[ 8] = (t6 + t2 + 64) >> 7;
00263 dst[16] = (t7 + t3 + 64) >> 7;
00264 dst[24] = (t8 + t4 + 64) >> 7;
00265 dst[32] = (t8 - t4 + 64 + 1) >> 7;
00266 dst[40] = (t7 - t3 + 64 + 1) >> 7;
00267 dst[48] = (t6 - t2 + 64 + 1) >> 7;
00268 dst[56] = (t5 - t1 + 64 + 1) >> 7;
00269
00270 src++;
00271 dst++;
00272 }
00273 }
00274
00277 static void vc1_inv_trans_4x4_c(DCTELEM block[64], int n)
00278 {
00279 int i;
00280 register int t1,t2,t3,t4,t5,t6;
00281 DCTELEM *src, *dst;
00282 int off;
00283
00284 off = (n&1) * 4 + (n&2) * 16;
00285 src = block + off;
00286 dst = block + off;
00287 for(i = 0; i < 4; i++){
00288 t1 = 17 * (src[0] + src[2]);
00289 t2 = 17 * (src[0] - src[2]);
00290 t3 = 22 * src[1];
00291 t4 = 22 * src[3];
00292 t5 = 10 * src[1];
00293 t6 = 10 * src[3];
00294
00295 dst[0] = (t1 + t3 + t6 + 4) >> 3;
00296 dst[1] = (t2 - t4 + t5 + 4) >> 3;
00297 dst[2] = (t2 + t4 - t5 + 4) >> 3;
00298 dst[3] = (t1 - t3 - t6 + 4) >> 3;
00299
00300 src += 8;
00301 dst += 8;
00302 }
00303
00304 src = block + off;
00305 dst = block + off;
00306 for(i = 0; i < 4; i++){
00307 t1 = 17 * (src[ 0] + src[16]);
00308 t2 = 17 * (src[ 0] - src[16]);
00309 t3 = 22 * src[ 8];
00310 t4 = 22 * src[24];
00311 t5 = 10 * src[ 8];
00312 t6 = 10 * src[24];
00313
00314 dst[ 0] = (t1 + t3 + t6 + 64) >> 7;
00315 dst[ 8] = (t2 - t4 + t5 + 64) >> 7;
00316 dst[16] = (t2 + t4 - t5 + 64) >> 7;
00317 dst[24] = (t1 - t3 - t6 + 64) >> 7;
00318
00319 src ++;
00320 dst ++;
00321 }
00322 }
00323
00324
00326 #define VC1_MSPEL_FILTER_16B(DIR, TYPE) \
00327 static av_always_inline int vc1_mspel_ ## DIR ## _filter_16bits(const TYPE *src, int stride, int mode) \
00328 { \
00329 switch(mode){ \
00330 case 0: \
00331 return 0; \
00332 case 1: \
00333 return -4*src[-stride] + 53*src[0] + 18*src[stride] - 3*src[stride*2]; \
00334 case 2: \
00335 return -src[-stride] + 9*src[0] + 9*src[stride] - src[stride*2]; \
00336 case 3: \
00337 return -3*src[-stride] + 18*src[0] + 53*src[stride] - 4*src[stride*2]; \
00338 } \
00339 return 0; \
00340 }
00341
00342 VC1_MSPEL_FILTER_16B(ver, uint8_t);
00343 VC1_MSPEL_FILTER_16B(hor, int16_t);
00344
00345
00348 static av_always_inline int vc1_mspel_filter(const uint8_t *src, int stride, int mode, int r)
00349 {
00350 switch(mode){
00351 case 0:
00352 return src[0];
00353 case 1:
00354 return (-4*src[-stride] + 53*src[0] + 18*src[stride] - 3*src[stride*2] + 32 - r) >> 6;
00355 case 2:
00356 return (-src[-stride] + 9*src[0] + 9*src[stride] - src[stride*2] + 8 - r) >> 4;
00357 case 3:
00358 return (-3*src[-stride] + 18*src[0] + 53*src[stride] - 4*src[stride*2] + 32 - r) >> 6;
00359 }
00360 return 0;
00361 }
00362
00365 static void vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride, int hmode, int vmode, int rnd)
00366 {
00367 int i, j;
00368
00369 if (vmode) {
00370 int r;
00371
00372 if (hmode) {
00373 static const int shift_value[] = { 0, 5, 1, 5 };
00374 int shift = (shift_value[hmode]+shift_value[vmode])>>1;
00375 int16_t tmp[11*8], *tptr = tmp;
00376
00377 r = (1<<(shift-1)) + rnd-1;
00378
00379 src -= 1;
00380 for(j = 0; j < 8; j++) {
00381 for(i = 0; i < 11; i++)
00382 tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode)+r)>>shift;
00383 src += stride;
00384 tptr += 11;
00385 }
00386
00387 r = 64-rnd;
00388 tptr = tmp+1;
00389 for(j = 0; j < 8; j++) {
00390 for(i = 0; i < 8; i++)
00391 dst[i] = av_clip_uint8((vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode)+r)>>7);
00392 dst += stride;
00393 tptr += 11;
00394 }
00395
00396 return;
00397 }
00398 else {
00399 r = 1-rnd;
00400
00401 for(j = 0; j < 8; j++) {
00402 for(i = 0; i < 8; i++)
00403 dst[i] = av_clip_uint8(vc1_mspel_filter(src + i, stride, vmode, r));
00404 src += stride;
00405 dst += stride;
00406 }
00407 return;
00408 }
00409 }
00410
00411
00412 for(j = 0; j < 8; j++) {
00413 for(i = 0; i < 8; i++)
00414 dst[i] = av_clip_uint8(vc1_mspel_filter(src + i, 1, hmode, rnd));
00415 dst += stride;
00416 src += stride;
00417 }
00418 }
00419
00420
00421
00422
00423 void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd);
00424
00425 #define PUT_VC1_MSPEL(a, b)\
00426 static void put_vc1_mspel_mc ## a ## b ##_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \
00427 vc1_mspel_mc(dst, src, stride, a, b, rnd); \
00428 }
00429
00430 PUT_VC1_MSPEL(1, 0)
00431 PUT_VC1_MSPEL(2, 0)
00432 PUT_VC1_MSPEL(3, 0)
00433
00434 PUT_VC1_MSPEL(0, 1)
00435 PUT_VC1_MSPEL(1, 1)
00436 PUT_VC1_MSPEL(2, 1)
00437 PUT_VC1_MSPEL(3, 1)
00438
00439 PUT_VC1_MSPEL(0, 2)
00440 PUT_VC1_MSPEL(1, 2)
00441 PUT_VC1_MSPEL(2, 2)
00442 PUT_VC1_MSPEL(3, 2)
00443
00444 PUT_VC1_MSPEL(0, 3)
00445 PUT_VC1_MSPEL(1, 3)
00446 PUT_VC1_MSPEL(2, 3)
00447 PUT_VC1_MSPEL(3, 3)
00448
00449 void ff_vc1dsp_init(DSPContext* dsp, AVCodecContext *avctx) {
00450 dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_c;
00451 dsp->vc1_inv_trans_4x8 = vc1_inv_trans_4x8_c;
00452 dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_c;
00453 dsp->vc1_inv_trans_4x4 = vc1_inv_trans_4x4_c;
00454 dsp->vc1_h_overlap = vc1_h_overlap_c;
00455 dsp->vc1_v_overlap = vc1_v_overlap_c;
00456
00457 dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_c;
00458 dsp->put_vc1_mspel_pixels_tab[ 1] = put_vc1_mspel_mc10_c;
00459 dsp->put_vc1_mspel_pixels_tab[ 2] = put_vc1_mspel_mc20_c;
00460 dsp->put_vc1_mspel_pixels_tab[ 3] = put_vc1_mspel_mc30_c;
00461 dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_c;
00462 dsp->put_vc1_mspel_pixels_tab[ 5] = put_vc1_mspel_mc11_c;
00463 dsp->put_vc1_mspel_pixels_tab[ 6] = put_vc1_mspel_mc21_c;
00464 dsp->put_vc1_mspel_pixels_tab[ 7] = put_vc1_mspel_mc31_c;
00465 dsp->put_vc1_mspel_pixels_tab[ 8] = put_vc1_mspel_mc02_c;
00466 dsp->put_vc1_mspel_pixels_tab[ 9] = put_vc1_mspel_mc12_c;
00467 dsp->put_vc1_mspel_pixels_tab[10] = put_vc1_mspel_mc22_c;
00468 dsp->put_vc1_mspel_pixels_tab[11] = put_vc1_mspel_mc32_c;
00469 dsp->put_vc1_mspel_pixels_tab[12] = put_vc1_mspel_mc03_c;
00470 dsp->put_vc1_mspel_pixels_tab[13] = put_vc1_mspel_mc13_c;
00471 dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_c;
00472 dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_c;
00473 }