00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00030 #include "avcodec.h"
00031 #include "dsputil.h"
00032 #include "mpegvideo.h"
00033 #include "simple_idct.h"
00034 #include "faandct.h"
00035 #include "h263.h"
00036 #include "snow.h"
00037
00038
00039 void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
00040
00041
00042 void vorbis_inverse_coupling(float *mag, float *ang, int blocksize);
00043
00044
00045 void ff_flac_compute_autocorr(const int32_t *data, int len, int lag, double *autoc);
00046
00047 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
00048 uint32_t ff_squareTbl[512] = {0, };
00049
00050 const uint8_t ff_zigzag_direct[64] = {
00051 0, 1, 8, 16, 9, 2, 3, 10,
00052 17, 24, 32, 25, 18, 11, 4, 5,
00053 12, 19, 26, 33, 40, 48, 41, 34,
00054 27, 20, 13, 6, 7, 14, 21, 28,
00055 35, 42, 49, 56, 57, 50, 43, 36,
00056 29, 22, 15, 23, 30, 37, 44, 51,
00057 58, 59, 52, 45, 38, 31, 39, 46,
00058 53, 60, 61, 54, 47, 55, 62, 63
00059 };
00060
00061
00062
00063 const uint8_t ff_zigzag248_direct[64] = {
00064 0, 8, 1, 9, 16, 24, 2, 10,
00065 17, 25, 32, 40, 48, 56, 33, 41,
00066 18, 26, 3, 11, 4, 12, 19, 27,
00067 34, 42, 49, 57, 50, 58, 35, 43,
00068 20, 28, 5, 13, 6, 14, 21, 29,
00069 36, 44, 51, 59, 52, 60, 37, 45,
00070 22, 30, 7, 15, 23, 31, 38, 46,
00071 53, 61, 54, 62, 39, 47, 55, 63,
00072 };
00073
00074
00075 DECLARE_ALIGNED_8(uint16_t, inv_zigzag_direct16[64]) = {0, };
00076
00077 const uint8_t ff_alternate_horizontal_scan[64] = {
00078 0, 1, 2, 3, 8, 9, 16, 17,
00079 10, 11, 4, 5, 6, 7, 15, 14,
00080 13, 12, 19, 18, 24, 25, 32, 33,
00081 26, 27, 20, 21, 22, 23, 28, 29,
00082 30, 31, 34, 35, 40, 41, 48, 49,
00083 42, 43, 36, 37, 38, 39, 44, 45,
00084 46, 47, 50, 51, 56, 57, 58, 59,
00085 52, 53, 54, 55, 60, 61, 62, 63,
00086 };
00087
00088 const uint8_t ff_alternate_vertical_scan[64] = {
00089 0, 8, 16, 24, 1, 9, 2, 10,
00090 17, 25, 32, 40, 48, 56, 57, 49,
00091 41, 33, 26, 18, 3, 11, 4, 12,
00092 19, 27, 34, 42, 50, 58, 35, 43,
00093 51, 59, 20, 28, 5, 13, 6, 14,
00094 21, 29, 36, 44, 52, 60, 37, 45,
00095 53, 61, 22, 30, 7, 15, 23, 31,
00096 38, 46, 54, 62, 39, 47, 55, 63,
00097 };
00098
00099
00100 const uint32_t ff_inverse[256]={
00101 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
00102 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
00103 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
00104 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
00105 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
00106 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
00107 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
00108 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
00109 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
00110 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
00111 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
00112 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
00113 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
00114 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
00115 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
00116 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
00117 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
00118 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
00119 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
00120 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
00121 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
00122 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
00123 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
00124 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
00125 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
00126 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
00127 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
00128 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
00129 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
00130 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
00131 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
00132 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
00133 };
00134
00135
00136 static const uint8_t simple_mmx_permutation[64]={
00137 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00138 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00139 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00140 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00141 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00142 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00143 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00144 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00145 };
00146
00147 static int pix_sum_c(uint8_t * pix, int line_size)
00148 {
00149 int s, i, j;
00150
00151 s = 0;
00152 for (i = 0; i < 16; i++) {
00153 for (j = 0; j < 16; j += 8) {
00154 s += pix[0];
00155 s += pix[1];
00156 s += pix[2];
00157 s += pix[3];
00158 s += pix[4];
00159 s += pix[5];
00160 s += pix[6];
00161 s += pix[7];
00162 pix += 8;
00163 }
00164 pix += line_size - 16;
00165 }
00166 return s;
00167 }
00168
00169 static int pix_norm1_c(uint8_t * pix, int line_size)
00170 {
00171 int s, i, j;
00172 uint32_t *sq = ff_squareTbl + 256;
00173
00174 s = 0;
00175 for (i = 0; i < 16; i++) {
00176 for (j = 0; j < 16; j += 8) {
00177 #if 0
00178 s += sq[pix[0]];
00179 s += sq[pix[1]];
00180 s += sq[pix[2]];
00181 s += sq[pix[3]];
00182 s += sq[pix[4]];
00183 s += sq[pix[5]];
00184 s += sq[pix[6]];
00185 s += sq[pix[7]];
00186 #else
00187 #if LONG_MAX > 2147483647
00188 register uint64_t x=*(uint64_t*)pix;
00189 s += sq[x&0xff];
00190 s += sq[(x>>8)&0xff];
00191 s += sq[(x>>16)&0xff];
00192 s += sq[(x>>24)&0xff];
00193 s += sq[(x>>32)&0xff];
00194 s += sq[(x>>40)&0xff];
00195 s += sq[(x>>48)&0xff];
00196 s += sq[(x>>56)&0xff];
00197 #else
00198 register uint32_t x=*(uint32_t*)pix;
00199 s += sq[x&0xff];
00200 s += sq[(x>>8)&0xff];
00201 s += sq[(x>>16)&0xff];
00202 s += sq[(x>>24)&0xff];
00203 x=*(uint32_t*)(pix+4);
00204 s += sq[x&0xff];
00205 s += sq[(x>>8)&0xff];
00206 s += sq[(x>>16)&0xff];
00207 s += sq[(x>>24)&0xff];
00208 #endif
00209 #endif
00210 pix += 8;
00211 }
00212 pix += line_size - 16;
00213 }
00214 return s;
00215 }
00216
00217 static void bswap_buf(uint32_t *dst, uint32_t *src, int w){
00218 int i;
00219
00220 for(i=0; i+8<=w; i+=8){
00221 dst[i+0]= bswap_32(src[i+0]);
00222 dst[i+1]= bswap_32(src[i+1]);
00223 dst[i+2]= bswap_32(src[i+2]);
00224 dst[i+3]= bswap_32(src[i+3]);
00225 dst[i+4]= bswap_32(src[i+4]);
00226 dst[i+5]= bswap_32(src[i+5]);
00227 dst[i+6]= bswap_32(src[i+6]);
00228 dst[i+7]= bswap_32(src[i+7]);
00229 }
00230 for(;i<w; i++){
00231 dst[i+0]= bswap_32(src[i+0]);
00232 }
00233 }
00234
00235 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00236 {
00237 int s, i;
00238 uint32_t *sq = ff_squareTbl + 256;
00239
00240 s = 0;
00241 for (i = 0; i < h; i++) {
00242 s += sq[pix1[0] - pix2[0]];
00243 s += sq[pix1[1] - pix2[1]];
00244 s += sq[pix1[2] - pix2[2]];
00245 s += sq[pix1[3] - pix2[3]];
00246 pix1 += line_size;
00247 pix2 += line_size;
00248 }
00249 return s;
00250 }
00251
00252 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00253 {
00254 int s, i;
00255 uint32_t *sq = ff_squareTbl + 256;
00256
00257 s = 0;
00258 for (i = 0; i < h; i++) {
00259 s += sq[pix1[0] - pix2[0]];
00260 s += sq[pix1[1] - pix2[1]];
00261 s += sq[pix1[2] - pix2[2]];
00262 s += sq[pix1[3] - pix2[3]];
00263 s += sq[pix1[4] - pix2[4]];
00264 s += sq[pix1[5] - pix2[5]];
00265 s += sq[pix1[6] - pix2[6]];
00266 s += sq[pix1[7] - pix2[7]];
00267 pix1 += line_size;
00268 pix2 += line_size;
00269 }
00270 return s;
00271 }
00272
00273 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
00274 {
00275 int s, i;
00276 uint32_t *sq = ff_squareTbl + 256;
00277
00278 s = 0;
00279 for (i = 0; i < h; i++) {
00280 s += sq[pix1[ 0] - pix2[ 0]];
00281 s += sq[pix1[ 1] - pix2[ 1]];
00282 s += sq[pix1[ 2] - pix2[ 2]];
00283 s += sq[pix1[ 3] - pix2[ 3]];
00284 s += sq[pix1[ 4] - pix2[ 4]];
00285 s += sq[pix1[ 5] - pix2[ 5]];
00286 s += sq[pix1[ 6] - pix2[ 6]];
00287 s += sq[pix1[ 7] - pix2[ 7]];
00288 s += sq[pix1[ 8] - pix2[ 8]];
00289 s += sq[pix1[ 9] - pix2[ 9]];
00290 s += sq[pix1[10] - pix2[10]];
00291 s += sq[pix1[11] - pix2[11]];
00292 s += sq[pix1[12] - pix2[12]];
00293 s += sq[pix1[13] - pix2[13]];
00294 s += sq[pix1[14] - pix2[14]];
00295 s += sq[pix1[15] - pix2[15]];
00296
00297 pix1 += line_size;
00298 pix2 += line_size;
00299 }
00300 return s;
00301 }
00302
00303
00304 #ifdef CONFIG_SNOW_ENCODER //dwt is in snow.c
00305 static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){
00306 int s, i, j;
00307 const int dec_count= w==8 ? 3 : 4;
00308 int tmp[32*32];
00309 int level, ori;
00310 static const int scale[2][2][4][4]={
00311 {
00312 {
00313
00314 {268, 239, 239, 213},
00315 { 0, 224, 224, 152},
00316 { 0, 135, 135, 110},
00317 },{
00318
00319 {344, 310, 310, 280},
00320 { 0, 320, 320, 228},
00321 { 0, 175, 175, 136},
00322 { 0, 129, 129, 102},
00323 }
00324 },{
00325 {
00326
00327 {275, 245, 245, 218},
00328 { 0, 230, 230, 156},
00329 { 0, 138, 138, 113},
00330 },{
00331
00332 {352, 317, 317, 286},
00333 { 0, 328, 328, 233},
00334 { 0, 180, 180, 140},
00335 { 0, 132, 132, 105},
00336 }
00337 }
00338 };
00339
00340 for (i = 0; i < h; i++) {
00341 for (j = 0; j < w; j+=4) {
00342 tmp[32*i+j+0] = (pix1[j+0] - pix2[j+0])<<4;
00343 tmp[32*i+j+1] = (pix1[j+1] - pix2[j+1])<<4;
00344 tmp[32*i+j+2] = (pix1[j+2] - pix2[j+2])<<4;
00345 tmp[32*i+j+3] = (pix1[j+3] - pix2[j+3])<<4;
00346 }
00347 pix1 += line_size;
00348 pix2 += line_size;
00349 }
00350
00351 ff_spatial_dwt(tmp, w, h, 32, type, dec_count);
00352
00353 s=0;
00354 assert(w==h);
00355 for(level=0; level<dec_count; level++){
00356 for(ori= level ? 1 : 0; ori<4; ori++){
00357 int size= w>>(dec_count-level);
00358 int sx= (ori&1) ? size : 0;
00359 int stride= 32<<(dec_count-level);
00360 int sy= (ori&2) ? stride>>1 : 0;
00361
00362 for(i=0; i<size; i++){
00363 for(j=0; j<size; j++){
00364 int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori];
00365 s += FFABS(v);
00366 }
00367 }
00368 }
00369 }
00370 assert(s>=0);
00371 return s>>9;
00372 }
00373
00374 static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
00375 return w_c(v, pix1, pix2, line_size, 8, h, 1);
00376 }
00377
00378 static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
00379 return w_c(v, pix1, pix2, line_size, 8, h, 0);
00380 }
00381
00382 static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
00383 return w_c(v, pix1, pix2, line_size, 16, h, 1);
00384 }
00385
00386 static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
00387 return w_c(v, pix1, pix2, line_size, 16, h, 0);
00388 }
00389
00390 int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
00391 return w_c(v, pix1, pix2, line_size, 32, h, 1);
00392 }
00393
00394 int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
00395 return w_c(v, pix1, pix2, line_size, 32, h, 0);
00396 }
00397 #endif
00398
00399 static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
00400 {
00401 int i;
00402
00403
00404 for(i=0;i<8;i++) {
00405 block[0] = pixels[0];
00406 block[1] = pixels[1];
00407 block[2] = pixels[2];
00408 block[3] = pixels[3];
00409 block[4] = pixels[4];
00410 block[5] = pixels[5];
00411 block[6] = pixels[6];
00412 block[7] = pixels[7];
00413 pixels += line_size;
00414 block += 8;
00415 }
00416 }
00417
00418 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
00419 const uint8_t *s2, int stride){
00420 int i;
00421
00422
00423 for(i=0;i<8;i++) {
00424 block[0] = s1[0] - s2[0];
00425 block[1] = s1[1] - s2[1];
00426 block[2] = s1[2] - s2[2];
00427 block[3] = s1[3] - s2[3];
00428 block[4] = s1[4] - s2[4];
00429 block[5] = s1[5] - s2[5];
00430 block[6] = s1[6] - s2[6];
00431 block[7] = s1[7] - s2[7];
00432 s1 += stride;
00433 s2 += stride;
00434 block += 8;
00435 }
00436 }
00437
00438
00439 static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00440 int line_size)
00441 {
00442 int i;
00443 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00444
00445
00446 for(i=0;i<8;i++) {
00447 pixels[0] = cm[block[0]];
00448 pixels[1] = cm[block[1]];
00449 pixels[2] = cm[block[2]];
00450 pixels[3] = cm[block[3]];
00451 pixels[4] = cm[block[4]];
00452 pixels[5] = cm[block[5]];
00453 pixels[6] = cm[block[6]];
00454 pixels[7] = cm[block[7]];
00455
00456 pixels += line_size;
00457 block += 8;
00458 }
00459 }
00460
00461 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00462 int line_size)
00463 {
00464 int i;
00465 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00466
00467
00468 for(i=0;i<4;i++) {
00469 pixels[0] = cm[block[0]];
00470 pixels[1] = cm[block[1]];
00471 pixels[2] = cm[block[2]];
00472 pixels[3] = cm[block[3]];
00473
00474 pixels += line_size;
00475 block += 8;
00476 }
00477 }
00478
00479 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00480 int line_size)
00481 {
00482 int i;
00483 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00484
00485
00486 for(i=0;i<2;i++) {
00487 pixels[0] = cm[block[0]];
00488 pixels[1] = cm[block[1]];
00489
00490 pixels += line_size;
00491 block += 8;
00492 }
00493 }
00494
00495 static void put_signed_pixels_clamped_c(const DCTELEM *block,
00496 uint8_t *restrict pixels,
00497 int line_size)
00498 {
00499 int i, j;
00500
00501 for (i = 0; i < 8; i++) {
00502 for (j = 0; j < 8; j++) {
00503 if (*block < -128)
00504 *pixels = 0;
00505 else if (*block > 127)
00506 *pixels = 255;
00507 else
00508 *pixels = (uint8_t)(*block + 128);
00509 block++;
00510 pixels++;
00511 }
00512 pixels += (line_size - 8);
00513 }
00514 }
00515
00516 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00517 int line_size)
00518 {
00519 int i;
00520 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00521
00522
00523 for(i=0;i<8;i++) {
00524 pixels[0] = cm[pixels[0] + block[0]];
00525 pixels[1] = cm[pixels[1] + block[1]];
00526 pixels[2] = cm[pixels[2] + block[2]];
00527 pixels[3] = cm[pixels[3] + block[3]];
00528 pixels[4] = cm[pixels[4] + block[4]];
00529 pixels[5] = cm[pixels[5] + block[5]];
00530 pixels[6] = cm[pixels[6] + block[6]];
00531 pixels[7] = cm[pixels[7] + block[7]];
00532 pixels += line_size;
00533 block += 8;
00534 }
00535 }
00536
00537 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00538 int line_size)
00539 {
00540 int i;
00541 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00542
00543
00544 for(i=0;i<4;i++) {
00545 pixels[0] = cm[pixels[0] + block[0]];
00546 pixels[1] = cm[pixels[1] + block[1]];
00547 pixels[2] = cm[pixels[2] + block[2]];
00548 pixels[3] = cm[pixels[3] + block[3]];
00549 pixels += line_size;
00550 block += 8;
00551 }
00552 }
00553
00554 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00555 int line_size)
00556 {
00557 int i;
00558 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00559
00560
00561 for(i=0;i<2;i++) {
00562 pixels[0] = cm[pixels[0] + block[0]];
00563 pixels[1] = cm[pixels[1] + block[1]];
00564 pixels += line_size;
00565 block += 8;
00566 }
00567 }
00568
00569 static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
00570 {
00571 int i;
00572 for(i=0;i<8;i++) {
00573 pixels[0] += block[0];
00574 pixels[1] += block[1];
00575 pixels[2] += block[2];
00576 pixels[3] += block[3];
00577 pixels[4] += block[4];
00578 pixels[5] += block[5];
00579 pixels[6] += block[6];
00580 pixels[7] += block[7];
00581 pixels += line_size;
00582 block += 8;
00583 }
00584 }
00585
00586 static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
00587 {
00588 int i;
00589 for(i=0;i<4;i++) {
00590 pixels[0] += block[0];
00591 pixels[1] += block[1];
00592 pixels[2] += block[2];
00593 pixels[3] += block[3];
00594 pixels += line_size;
00595 block += 4;
00596 }
00597 }
00598
00599 static int sum_abs_dctelem_c(DCTELEM *block)
00600 {
00601 int sum=0, i;
00602 for(i=0; i<64; i++)
00603 sum+= FFABS(block[i]);
00604 return sum;
00605 }
00606
00607 #if 0
00608
00609 #define PIXOP2(OPNAME, OP) \
00610 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00611 {\
00612 int i;\
00613 for(i=0; i<h; i++){\
00614 OP(*((uint64_t*)block), AV_RN64(pixels));\
00615 pixels+=line_size;\
00616 block +=line_size;\
00617 }\
00618 }\
00619 \
00620 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00621 {\
00622 int i;\
00623 for(i=0; i<h; i++){\
00624 const uint64_t a= AV_RN64(pixels );\
00625 const uint64_t b= AV_RN64(pixels+1);\
00626 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00627 pixels+=line_size;\
00628 block +=line_size;\
00629 }\
00630 }\
00631 \
00632 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00633 {\
00634 int i;\
00635 for(i=0; i<h; i++){\
00636 const uint64_t a= AV_RN64(pixels );\
00637 const uint64_t b= AV_RN64(pixels+1);\
00638 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00639 pixels+=line_size;\
00640 block +=line_size;\
00641 }\
00642 }\
00643 \
00644 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00645 {\
00646 int i;\
00647 for(i=0; i<h; i++){\
00648 const uint64_t a= AV_RN64(pixels );\
00649 const uint64_t b= AV_RN64(pixels+line_size);\
00650 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00651 pixels+=line_size;\
00652 block +=line_size;\
00653 }\
00654 }\
00655 \
00656 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00657 {\
00658 int i;\
00659 for(i=0; i<h; i++){\
00660 const uint64_t a= AV_RN64(pixels );\
00661 const uint64_t b= AV_RN64(pixels+line_size);\
00662 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00663 pixels+=line_size;\
00664 block +=line_size;\
00665 }\
00666 }\
00667 \
00668 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00669 {\
00670 int i;\
00671 const uint64_t a= AV_RN64(pixels );\
00672 const uint64_t b= AV_RN64(pixels+1);\
00673 uint64_t l0= (a&0x0303030303030303ULL)\
00674 + (b&0x0303030303030303ULL)\
00675 + 0x0202020202020202ULL;\
00676 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00677 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00678 uint64_t l1,h1;\
00679 \
00680 pixels+=line_size;\
00681 for(i=0; i<h; i+=2){\
00682 uint64_t a= AV_RN64(pixels );\
00683 uint64_t b= AV_RN64(pixels+1);\
00684 l1= (a&0x0303030303030303ULL)\
00685 + (b&0x0303030303030303ULL);\
00686 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00687 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00688 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00689 pixels+=line_size;\
00690 block +=line_size;\
00691 a= AV_RN64(pixels );\
00692 b= AV_RN64(pixels+1);\
00693 l0= (a&0x0303030303030303ULL)\
00694 + (b&0x0303030303030303ULL)\
00695 + 0x0202020202020202ULL;\
00696 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00697 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00698 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00699 pixels+=line_size;\
00700 block +=line_size;\
00701 }\
00702 }\
00703 \
00704 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00705 {\
00706 int i;\
00707 const uint64_t a= AV_RN64(pixels );\
00708 const uint64_t b= AV_RN64(pixels+1);\
00709 uint64_t l0= (a&0x0303030303030303ULL)\
00710 + (b&0x0303030303030303ULL)\
00711 + 0x0101010101010101ULL;\
00712 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00713 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00714 uint64_t l1,h1;\
00715 \
00716 pixels+=line_size;\
00717 for(i=0; i<h; i+=2){\
00718 uint64_t a= AV_RN64(pixels );\
00719 uint64_t b= AV_RN64(pixels+1);\
00720 l1= (a&0x0303030303030303ULL)\
00721 + (b&0x0303030303030303ULL);\
00722 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00723 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00724 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00725 pixels+=line_size;\
00726 block +=line_size;\
00727 a= AV_RN64(pixels );\
00728 b= AV_RN64(pixels+1);\
00729 l0= (a&0x0303030303030303ULL)\
00730 + (b&0x0303030303030303ULL)\
00731 + 0x0101010101010101ULL;\
00732 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00733 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00734 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00735 pixels+=line_size;\
00736 block +=line_size;\
00737 }\
00738 }\
00739 \
00740 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
00741 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
00742 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
00743 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
00744 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
00745 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
00746 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
00747
00748 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
00749 #else // 64 bit variant
00750
00751 #define PIXOP2(OPNAME, OP) \
00752 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00753 int i;\
00754 for(i=0; i<h; i++){\
00755 OP(*((uint16_t*)(block )), AV_RN16(pixels ));\
00756 pixels+=line_size;\
00757 block +=line_size;\
00758 }\
00759 }\
00760 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00761 int i;\
00762 for(i=0; i<h; i++){\
00763 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
00764 pixels+=line_size;\
00765 block +=line_size;\
00766 }\
00767 }\
00768 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00769 int i;\
00770 for(i=0; i<h; i++){\
00771 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
00772 OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
00773 pixels+=line_size;\
00774 block +=line_size;\
00775 }\
00776 }\
00777 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00778 OPNAME ## _pixels8_c(block, pixels, line_size, h);\
00779 }\
00780 \
00781 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00782 int src_stride1, int src_stride2, int h){\
00783 int i;\
00784 for(i=0; i<h; i++){\
00785 uint32_t a,b;\
00786 a= AV_RN32(&src1[i*src_stride1 ]);\
00787 b= AV_RN32(&src2[i*src_stride2 ]);\
00788 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
00789 a= AV_RN32(&src1[i*src_stride1+4]);\
00790 b= AV_RN32(&src2[i*src_stride2+4]);\
00791 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
00792 }\
00793 }\
00794 \
00795 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00796 int src_stride1, int src_stride2, int h){\
00797 int i;\
00798 for(i=0; i<h; i++){\
00799 uint32_t a,b;\
00800 a= AV_RN32(&src1[i*src_stride1 ]);\
00801 b= AV_RN32(&src2[i*src_stride2 ]);\
00802 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
00803 a= AV_RN32(&src1[i*src_stride1+4]);\
00804 b= AV_RN32(&src2[i*src_stride2+4]);\
00805 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
00806 }\
00807 }\
00808 \
00809 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00810 int src_stride1, int src_stride2, int h){\
00811 int i;\
00812 for(i=0; i<h; i++){\
00813 uint32_t a,b;\
00814 a= AV_RN32(&src1[i*src_stride1 ]);\
00815 b= AV_RN32(&src2[i*src_stride2 ]);\
00816 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
00817 }\
00818 }\
00819 \
00820 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00821 int src_stride1, int src_stride2, int h){\
00822 int i;\
00823 for(i=0; i<h; i++){\
00824 uint32_t a,b;\
00825 a= AV_RN16(&src1[i*src_stride1 ]);\
00826 b= AV_RN16(&src2[i*src_stride2 ]);\
00827 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
00828 }\
00829 }\
00830 \
00831 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00832 int src_stride1, int src_stride2, int h){\
00833 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
00834 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
00835 }\
00836 \
00837 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00838 int src_stride1, int src_stride2, int h){\
00839 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
00840 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
00841 }\
00842 \
00843 static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00844 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00845 }\
00846 \
00847 static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00848 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00849 }\
00850 \
00851 static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00852 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00853 }\
00854 \
00855 static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00856 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00857 }\
00858 \
00859 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
00860 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00861 int i;\
00862 for(i=0; i<h; i++){\
00863 uint32_t a, b, c, d, l0, l1, h0, h1;\
00864 a= AV_RN32(&src1[i*src_stride1]);\
00865 b= AV_RN32(&src2[i*src_stride2]);\
00866 c= AV_RN32(&src3[i*src_stride3]);\
00867 d= AV_RN32(&src4[i*src_stride4]);\
00868 l0= (a&0x03030303UL)\
00869 + (b&0x03030303UL)\
00870 + 0x02020202UL;\
00871 h0= ((a&0xFCFCFCFCUL)>>2)\
00872 + ((b&0xFCFCFCFCUL)>>2);\
00873 l1= (c&0x03030303UL)\
00874 + (d&0x03030303UL);\
00875 h1= ((c&0xFCFCFCFCUL)>>2)\
00876 + ((d&0xFCFCFCFCUL)>>2);\
00877 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
00878 a= AV_RN32(&src1[i*src_stride1+4]);\
00879 b= AV_RN32(&src2[i*src_stride2+4]);\
00880 c= AV_RN32(&src3[i*src_stride3+4]);\
00881 d= AV_RN32(&src4[i*src_stride4+4]);\
00882 l0= (a&0x03030303UL)\
00883 + (b&0x03030303UL)\
00884 + 0x02020202UL;\
00885 h0= ((a&0xFCFCFCFCUL)>>2)\
00886 + ((b&0xFCFCFCFCUL)>>2);\
00887 l1= (c&0x03030303UL)\
00888 + (d&0x03030303UL);\
00889 h1= ((c&0xFCFCFCFCUL)>>2)\
00890 + ((d&0xFCFCFCFCUL)>>2);\
00891 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
00892 }\
00893 }\
00894 \
00895 static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00896 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00897 }\
00898 \
00899 static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00900 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00901 }\
00902 \
00903 static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00904 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00905 }\
00906 \
00907 static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00908 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00909 }\
00910 \
00911 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
00912 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00913 int i;\
00914 for(i=0; i<h; i++){\
00915 uint32_t a, b, c, d, l0, l1, h0, h1;\
00916 a= AV_RN32(&src1[i*src_stride1]);\
00917 b= AV_RN32(&src2[i*src_stride2]);\
00918 c= AV_RN32(&src3[i*src_stride3]);\
00919 d= AV_RN32(&src4[i*src_stride4]);\
00920 l0= (a&0x03030303UL)\
00921 + (b&0x03030303UL)\
00922 + 0x01010101UL;\
00923 h0= ((a&0xFCFCFCFCUL)>>2)\
00924 + ((b&0xFCFCFCFCUL)>>2);\
00925 l1= (c&0x03030303UL)\
00926 + (d&0x03030303UL);\
00927 h1= ((c&0xFCFCFCFCUL)>>2)\
00928 + ((d&0xFCFCFCFCUL)>>2);\
00929 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
00930 a= AV_RN32(&src1[i*src_stride1+4]);\
00931 b= AV_RN32(&src2[i*src_stride2+4]);\
00932 c= AV_RN32(&src3[i*src_stride3+4]);\
00933 d= AV_RN32(&src4[i*src_stride4+4]);\
00934 l0= (a&0x03030303UL)\
00935 + (b&0x03030303UL)\
00936 + 0x01010101UL;\
00937 h0= ((a&0xFCFCFCFCUL)>>2)\
00938 + ((b&0xFCFCFCFCUL)>>2);\
00939 l1= (c&0x03030303UL)\
00940 + (d&0x03030303UL);\
00941 h1= ((c&0xFCFCFCFCUL)>>2)\
00942 + ((d&0xFCFCFCFCUL)>>2);\
00943 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
00944 }\
00945 }\
00946 static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
00947 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00948 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
00949 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
00950 }\
00951 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
00952 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00953 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
00954 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
00955 }\
00956 \
00957 static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00958 {\
00959 int i, a0, b0, a1, b1;\
00960 a0= pixels[0];\
00961 b0= pixels[1] + 2;\
00962 a0 += b0;\
00963 b0 += pixels[2];\
00964 \
00965 pixels+=line_size;\
00966 for(i=0; i<h; i+=2){\
00967 a1= pixels[0];\
00968 b1= pixels[1];\
00969 a1 += b1;\
00970 b1 += pixels[2];\
00971 \
00972 block[0]= (a1+a0)>>2; \
00973 block[1]= (b1+b0)>>2;\
00974 \
00975 pixels+=line_size;\
00976 block +=line_size;\
00977 \
00978 a0= pixels[0];\
00979 b0= pixels[1] + 2;\
00980 a0 += b0;\
00981 b0 += pixels[2];\
00982 \
00983 block[0]= (a1+a0)>>2;\
00984 block[1]= (b1+b0)>>2;\
00985 pixels+=line_size;\
00986 block +=line_size;\
00987 }\
00988 }\
00989 \
00990 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00991 {\
00992 int i;\
00993 const uint32_t a= AV_RN32(pixels );\
00994 const uint32_t b= AV_RN32(pixels+1);\
00995 uint32_t l0= (a&0x03030303UL)\
00996 + (b&0x03030303UL)\
00997 + 0x02020202UL;\
00998 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
00999 + ((b&0xFCFCFCFCUL)>>2);\
01000 uint32_t l1,h1;\
01001 \
01002 pixels+=line_size;\
01003 for(i=0; i<h; i+=2){\
01004 uint32_t a= AV_RN32(pixels );\
01005 uint32_t b= AV_RN32(pixels+1);\
01006 l1= (a&0x03030303UL)\
01007 + (b&0x03030303UL);\
01008 h1= ((a&0xFCFCFCFCUL)>>2)\
01009 + ((b&0xFCFCFCFCUL)>>2);\
01010 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01011 pixels+=line_size;\
01012 block +=line_size;\
01013 a= AV_RN32(pixels );\
01014 b= AV_RN32(pixels+1);\
01015 l0= (a&0x03030303UL)\
01016 + (b&0x03030303UL)\
01017 + 0x02020202UL;\
01018 h0= ((a&0xFCFCFCFCUL)>>2)\
01019 + ((b&0xFCFCFCFCUL)>>2);\
01020 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01021 pixels+=line_size;\
01022 block +=line_size;\
01023 }\
01024 }\
01025 \
01026 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
01027 {\
01028 int j;\
01029 for(j=0; j<2; j++){\
01030 int i;\
01031 const uint32_t a= AV_RN32(pixels );\
01032 const uint32_t b= AV_RN32(pixels+1);\
01033 uint32_t l0= (a&0x03030303UL)\
01034 + (b&0x03030303UL)\
01035 + 0x02020202UL;\
01036 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
01037 + ((b&0xFCFCFCFCUL)>>2);\
01038 uint32_t l1,h1;\
01039 \
01040 pixels+=line_size;\
01041 for(i=0; i<h; i+=2){\
01042 uint32_t a= AV_RN32(pixels );\
01043 uint32_t b= AV_RN32(pixels+1);\
01044 l1= (a&0x03030303UL)\
01045 + (b&0x03030303UL);\
01046 h1= ((a&0xFCFCFCFCUL)>>2)\
01047 + ((b&0xFCFCFCFCUL)>>2);\
01048 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01049 pixels+=line_size;\
01050 block +=line_size;\
01051 a= AV_RN32(pixels );\
01052 b= AV_RN32(pixels+1);\
01053 l0= (a&0x03030303UL)\
01054 + (b&0x03030303UL)\
01055 + 0x02020202UL;\
01056 h0= ((a&0xFCFCFCFCUL)>>2)\
01057 + ((b&0xFCFCFCFCUL)>>2);\
01058 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01059 pixels+=line_size;\
01060 block +=line_size;\
01061 }\
01062 pixels+=4-line_size*(h+1);\
01063 block +=4-line_size*h;\
01064 }\
01065 }\
01066 \
01067 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
01068 {\
01069 int j;\
01070 for(j=0; j<2; j++){\
01071 int i;\
01072 const uint32_t a= AV_RN32(pixels );\
01073 const uint32_t b= AV_RN32(pixels+1);\
01074 uint32_t l0= (a&0x03030303UL)\
01075 + (b&0x03030303UL)\
01076 + 0x01010101UL;\
01077 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
01078 + ((b&0xFCFCFCFCUL)>>2);\
01079 uint32_t l1,h1;\
01080 \
01081 pixels+=line_size;\
01082 for(i=0; i<h; i+=2){\
01083 uint32_t a= AV_RN32(pixels );\
01084 uint32_t b= AV_RN32(pixels+1);\
01085 l1= (a&0x03030303UL)\
01086 + (b&0x03030303UL);\
01087 h1= ((a&0xFCFCFCFCUL)>>2)\
01088 + ((b&0xFCFCFCFCUL)>>2);\
01089 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01090 pixels+=line_size;\
01091 block +=line_size;\
01092 a= AV_RN32(pixels );\
01093 b= AV_RN32(pixels+1);\
01094 l0= (a&0x03030303UL)\
01095 + (b&0x03030303UL)\
01096 + 0x01010101UL;\
01097 h0= ((a&0xFCFCFCFCUL)>>2)\
01098 + ((b&0xFCFCFCFCUL)>>2);\
01099 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01100 pixels+=line_size;\
01101 block +=line_size;\
01102 }\
01103 pixels+=4-line_size*(h+1);\
01104 block +=4-line_size*h;\
01105 }\
01106 }\
01107 \
01108 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
01109 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
01110 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
01111 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
01112 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
01113 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
01114 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
01115 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
01116
01117 #define op_avg(a, b) a = rnd_avg32(a, b)
01118 #endif
01119 #define op_put(a, b) a = b
01120
01121 PIXOP2(avg, op_avg)
01122 PIXOP2(put, op_put)
01123 #undef op_avg
01124 #undef op_put
01125
01126 #define avg2(a,b) ((a+b+1)>>1)
01127 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
01128
01129 static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
01130 put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
01131 }
01132
01133 static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
01134 put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
01135 }
01136
01137 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
01138 {
01139 const int A=(16-x16)*(16-y16);
01140 const int B=( x16)*(16-y16);
01141 const int C=(16-x16)*( y16);
01142 const int D=( x16)*( y16);
01143 int i;
01144
01145 for(i=0; i<h; i++)
01146 {
01147 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
01148 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
01149 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
01150 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
01151 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
01152 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
01153 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
01154 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
01155 dst+= stride;
01156 src+= stride;
01157 }
01158 }
01159
01160 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
01161 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
01162 {
01163 int y, vx, vy;
01164 const int s= 1<<shift;
01165
01166 width--;
01167 height--;
01168
01169 for(y=0; y<h; y++){
01170 int x;
01171
01172 vx= ox;
01173 vy= oy;
01174 for(x=0; x<8; x++){
01175 int src_x, src_y, frac_x, frac_y, index;
01176
01177 src_x= vx>>16;
01178 src_y= vy>>16;
01179 frac_x= src_x&(s-1);
01180 frac_y= src_y&(s-1);
01181 src_x>>=shift;
01182 src_y>>=shift;
01183
01184 if((unsigned)src_x < width){
01185 if((unsigned)src_y < height){
01186 index= src_x + src_y*stride;
01187 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
01188 + src[index +1]* frac_x )*(s-frac_y)
01189 + ( src[index+stride ]*(s-frac_x)
01190 + src[index+stride+1]* frac_x )* frac_y
01191 + r)>>(shift*2);
01192 }else{
01193 index= src_x + av_clip(src_y, 0, height)*stride;
01194 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
01195 + src[index +1]* frac_x )*s
01196 + r)>>(shift*2);
01197 }
01198 }else{
01199 if((unsigned)src_y < height){
01200 index= av_clip(src_x, 0, width) + src_y*stride;
01201 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
01202 + src[index+stride ]* frac_y )*s
01203 + r)>>(shift*2);
01204 }else{
01205 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
01206 dst[y*stride + x]= src[index ];
01207 }
01208 }
01209
01210 vx+= dxx;
01211 vy+= dyx;
01212 }
01213 ox += dxy;
01214 oy += dyy;
01215 }
01216 }
01217
01218 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01219 switch(width){
01220 case 2: put_pixels2_c (dst, src, stride, height); break;
01221 case 4: put_pixels4_c (dst, src, stride, height); break;
01222 case 8: put_pixels8_c (dst, src, stride, height); break;
01223 case 16:put_pixels16_c(dst, src, stride, height); break;
01224 }
01225 }
01226
01227 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01228 int i,j;
01229 for (i=0; i < height; i++) {
01230 for (j=0; j < width; j++) {
01231 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
01232 }
01233 src += stride;
01234 dst += stride;
01235 }
01236 }
01237
01238 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01239 int i,j;
01240 for (i=0; i < height; i++) {
01241 for (j=0; j < width; j++) {
01242 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
01243 }
01244 src += stride;
01245 dst += stride;
01246 }
01247 }
01248
01249 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01250 int i,j;
01251 for (i=0; i < height; i++) {
01252 for (j=0; j < width; j++) {
01253 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
01254 }
01255 src += stride;
01256 dst += stride;
01257 }
01258 }
01259
01260 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01261 int i,j;
01262 for (i=0; i < height; i++) {
01263 for (j=0; j < width; j++) {
01264 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
01265 }
01266 src += stride;
01267 dst += stride;
01268 }
01269 }
01270
01271 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01272 int i,j;
01273 for (i=0; i < height; i++) {
01274 for (j=0; j < width; j++) {
01275 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
01276 }
01277 src += stride;
01278 dst += stride;
01279 }
01280 }
01281
01282 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01283 int i,j;
01284 for (i=0; i < height; i++) {
01285 for (j=0; j < width; j++) {
01286 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
01287 }
01288 src += stride;
01289 dst += stride;
01290 }
01291 }
01292
01293 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01294 int i,j;
01295 for (i=0; i < height; i++) {
01296 for (j=0; j < width; j++) {
01297 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
01298 }
01299 src += stride;
01300 dst += stride;
01301 }
01302 }
01303
01304 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01305 int i,j;
01306 for (i=0; i < height; i++) {
01307 for (j=0; j < width; j++) {
01308 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
01309 }
01310 src += stride;
01311 dst += stride;
01312 }
01313 }
01314
01315 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01316 switch(width){
01317 case 2: avg_pixels2_c (dst, src, stride, height); break;
01318 case 4: avg_pixels4_c (dst, src, stride, height); break;
01319 case 8: avg_pixels8_c (dst, src, stride, height); break;
01320 case 16:avg_pixels16_c(dst, src, stride, height); break;
01321 }
01322 }
01323
01324 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01325 int i,j;
01326 for (i=0; i < height; i++) {
01327 for (j=0; j < width; j++) {
01328 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
01329 }
01330 src += stride;
01331 dst += stride;
01332 }
01333 }
01334
01335 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01336 int i,j;
01337 for (i=0; i < height; i++) {
01338 for (j=0; j < width; j++) {
01339 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
01340 }
01341 src += stride;
01342 dst += stride;
01343 }
01344 }
01345
01346 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01347 int i,j;
01348 for (i=0; i < height; i++) {
01349 for (j=0; j < width; j++) {
01350 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
01351 }
01352 src += stride;
01353 dst += stride;
01354 }
01355 }
01356
01357 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01358 int i,j;
01359 for (i=0; i < height; i++) {
01360 for (j=0; j < width; j++) {
01361 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01362 }
01363 src += stride;
01364 dst += stride;
01365 }
01366 }
01367
01368 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01369 int i,j;
01370 for (i=0; i < height; i++) {
01371 for (j=0; j < width; j++) {
01372 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01373 }
01374 src += stride;
01375 dst += stride;
01376 }
01377 }
01378
01379 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01380 int i,j;
01381 for (i=0; i < height; i++) {
01382 for (j=0; j < width; j++) {
01383 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
01384 }
01385 src += stride;
01386 dst += stride;
01387 }
01388 }
01389
01390 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01391 int i,j;
01392 for (i=0; i < height; i++) {
01393 for (j=0; j < width; j++) {
01394 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01395 }
01396 src += stride;
01397 dst += stride;
01398 }
01399 }
01400
01401 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01402 int i,j;
01403 for (i=0; i < height; i++) {
01404 for (j=0; j < width; j++) {
01405 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01406 }
01407 src += stride;
01408 dst += stride;
01409 }
01410 }
01411 #if 0
01412 #define TPEL_WIDTH(width)\
01413 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01414 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
01415 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01416 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
01417 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01418 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
01419 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01420 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
01421 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01422 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
01423 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01424 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
01425 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01426 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
01427 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01428 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
01429 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01430 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
01431 #endif
01432
01433 #define H264_CHROMA_MC(OPNAME, OP)\
01434 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
01435 const int A=(8-x)*(8-y);\
01436 const int B=( x)*(8-y);\
01437 const int C=(8-x)*( y);\
01438 const int D=( x)*( y);\
01439 int i;\
01440 \
01441 assert(x<8 && y<8 && x>=0 && y>=0);\
01442 \
01443 for(i=0; i<h; i++)\
01444 {\
01445 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
01446 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
01447 dst+= stride;\
01448 src+= stride;\
01449 }\
01450 }\
01451 \
01452 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
01453 const int A=(8-x)*(8-y);\
01454 const int B=( x)*(8-y);\
01455 const int C=(8-x)*( y);\
01456 const int D=( x)*( y);\
01457 int i;\
01458 \
01459 assert(x<8 && y<8 && x>=0 && y>=0);\
01460 \
01461 for(i=0; i<h; i++)\
01462 {\
01463 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
01464 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
01465 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
01466 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
01467 dst+= stride;\
01468 src+= stride;\
01469 }\
01470 }\
01471 \
01472 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
01473 const int A=(8-x)*(8-y);\
01474 const int B=( x)*(8-y);\
01475 const int C=(8-x)*( y);\
01476 const int D=( x)*( y);\
01477 int i;\
01478 \
01479 assert(x<8 && y<8 && x>=0 && y>=0);\
01480 \
01481 for(i=0; i<h; i++)\
01482 {\
01483 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
01484 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
01485 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
01486 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
01487 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
01488 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
01489 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
01490 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
01491 dst+= stride;\
01492 src+= stride;\
01493 }\
01494 }
01495
01496 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
01497 #define op_put(a, b) a = (((b) + 32)>>6)
01498
01499 H264_CHROMA_MC(put_ , op_put)
01500 H264_CHROMA_MC(avg_ , op_avg)
01501 #undef op_avg
01502 #undef op_put
01503
01504 static void put_no_rnd_h264_chroma_mc8_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){
01505 const int A=(8-x)*(8-y);
01506 const int B=( x)*(8-y);
01507 const int C=(8-x)*( y);
01508 const int D=( x)*( y);
01509 int i;
01510
01511 assert(x<8 && y<8 && x>=0 && y>=0);
01512
01513 for(i=0; i<h; i++)
01514 {
01515 dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6;
01516 dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6;
01517 dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6;
01518 dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6;
01519 dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6;
01520 dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6;
01521 dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6;
01522 dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6;
01523 dst+= stride;
01524 src+= stride;
01525 }
01526 }
01527
01528 #define QPEL_MC(r, OPNAME, RND, OP) \
01529 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
01530 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01531 int i;\
01532 for(i=0; i<h; i++)\
01533 {\
01534 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
01535 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
01536 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
01537 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
01538 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
01539 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
01540 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
01541 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
01542 dst+=dstStride;\
01543 src+=srcStride;\
01544 }\
01545 }\
01546 \
01547 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01548 const int w=8;\
01549 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01550 int i;\
01551 for(i=0; i<w; i++)\
01552 {\
01553 const int src0= src[0*srcStride];\
01554 const int src1= src[1*srcStride];\
01555 const int src2= src[2*srcStride];\
01556 const int src3= src[3*srcStride];\
01557 const int src4= src[4*srcStride];\
01558 const int src5= src[5*srcStride];\
01559 const int src6= src[6*srcStride];\
01560 const int src7= src[7*srcStride];\
01561 const int src8= src[8*srcStride];\
01562 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
01563 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
01564 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
01565 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
01566 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
01567 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
01568 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
01569 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
01570 dst++;\
01571 src++;\
01572 }\
01573 }\
01574 \
01575 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
01576 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01577 int i;\
01578 \
01579 for(i=0; i<h; i++)\
01580 {\
01581 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
01582 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
01583 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
01584 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
01585 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
01586 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
01587 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
01588 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
01589 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
01590 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
01591 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
01592 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
01593 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
01594 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
01595 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
01596 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
01597 dst+=dstStride;\
01598 src+=srcStride;\
01599 }\
01600 }\
01601 \
01602 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01603 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01604 int i;\
01605 const int w=16;\
01606 for(i=0; i<w; i++)\
01607 {\
01608 const int src0= src[0*srcStride];\
01609 const int src1= src[1*srcStride];\
01610 const int src2= src[2*srcStride];\
01611 const int src3= src[3*srcStride];\
01612 const int src4= src[4*srcStride];\
01613 const int src5= src[5*srcStride];\
01614 const int src6= src[6*srcStride];\
01615 const int src7= src[7*srcStride];\
01616 const int src8= src[8*srcStride];\
01617 const int src9= src[9*srcStride];\
01618 const int src10= src[10*srcStride];\
01619 const int src11= src[11*srcStride];\
01620 const int src12= src[12*srcStride];\
01621 const int src13= src[13*srcStride];\
01622 const int src14= src[14*srcStride];\
01623 const int src15= src[15*srcStride];\
01624 const int src16= src[16*srcStride];\
01625 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
01626 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
01627 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
01628 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
01629 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
01630 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
01631 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
01632 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
01633 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
01634 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
01635 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
01636 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
01637 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
01638 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
01639 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
01640 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
01641 dst++;\
01642 src++;\
01643 }\
01644 }\
01645 \
01646 static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
01647 OPNAME ## pixels8_c(dst, src, stride, 8);\
01648 }\
01649 \
01650 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01651 uint8_t half[64];\
01652 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
01653 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
01654 }\
01655 \
01656 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01657 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
01658 }\
01659 \
01660 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01661 uint8_t half[64];\
01662 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
01663 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
01664 }\
01665 \
01666 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01667 uint8_t full[16*9];\
01668 uint8_t half[64];\
01669 copy_block9(full, src, 16, stride, 9);\
01670 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
01671 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
01672 }\
01673 \
01674 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01675 uint8_t full[16*9];\
01676 copy_block9(full, src, 16, stride, 9);\
01677 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
01678 }\
01679 \
01680 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01681 uint8_t full[16*9];\
01682 uint8_t half[64];\
01683 copy_block9(full, src, 16, stride, 9);\
01684 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
01685 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
01686 }\
01687 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
01688 uint8_t full[16*9];\
01689 uint8_t halfH[72];\
01690 uint8_t halfV[64];\
01691 uint8_t halfHV[64];\
01692 copy_block9(full, src, 16, stride, 9);\
01693 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01694 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01695 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01696 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01697 }\
01698 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01699 uint8_t full[16*9];\
01700 uint8_t halfH[72];\
01701 uint8_t halfHV[64];\
01702 copy_block9(full, src, 16, stride, 9);\
01703 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01704 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
01705 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01706 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
01707 }\
01708 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01709 uint8_t full[16*9];\
01710 uint8_t halfH[72];\
01711 uint8_t halfV[64];\
01712 uint8_t halfHV[64];\
01713 copy_block9(full, src, 16, stride, 9);\
01714 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01715 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01716 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01717 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01718 }\
01719 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01720 uint8_t full[16*9];\
01721 uint8_t halfH[72];\
01722 uint8_t halfHV[64];\
01723 copy_block9(full, src, 16, stride, 9);\
01724 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01725 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
01726 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01727 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
01728 }\
01729 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01730 uint8_t full[16*9];\
01731 uint8_t halfH[72];\
01732 uint8_t halfV[64];\
01733 uint8_t halfHV[64];\
01734 copy_block9(full, src, 16, stride, 9);\
01735 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01736 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01737 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01738 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01739 }\
01740 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01741 uint8_t full[16*9];\
01742 uint8_t halfH[72];\
01743 uint8_t halfHV[64];\
01744 copy_block9(full, src, 16, stride, 9);\
01745 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01746 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
01747 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01748 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01749 }\
01750 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01751 uint8_t full[16*9];\
01752 uint8_t halfH[72];\
01753 uint8_t halfV[64];\
01754 uint8_t halfHV[64];\
01755 copy_block9(full, src, 16, stride, 9);\
01756 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
01757 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01758 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01759 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01760 }\
01761 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01762 uint8_t full[16*9];\
01763 uint8_t halfH[72];\
01764 uint8_t halfHV[64];\
01765 copy_block9(full, src, 16, stride, 9);\
01766 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01767 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
01768 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01769 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01770 }\
01771 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01772 uint8_t halfH[72];\
01773 uint8_t halfHV[64];\
01774 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01775 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01776 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
01777 }\
01778 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01779 uint8_t halfH[72];\
01780 uint8_t halfHV[64];\
01781 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01782 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01783 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01784 }\
01785 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01786 uint8_t full[16*9];\
01787 uint8_t halfH[72];\
01788 uint8_t halfV[64];\
01789 uint8_t halfHV[64];\
01790 copy_block9(full, src, 16, stride, 9);\
01791 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01792 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01793 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01794 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
01795 }\
01796 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01797 uint8_t full[16*9];\
01798 uint8_t halfH[72];\
01799 copy_block9(full, src, 16, stride, 9);\
01800 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01801 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
01802 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01803 }\
01804 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01805 uint8_t full[16*9];\
01806 uint8_t halfH[72];\
01807 uint8_t halfV[64];\
01808 uint8_t halfHV[64];\
01809 copy_block9(full, src, 16, stride, 9);\
01810 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01811 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01812 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01813 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
01814 }\
01815 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01816 uint8_t full[16*9];\
01817 uint8_t halfH[72];\
01818 copy_block9(full, src, 16, stride, 9);\
01819 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01820 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
01821 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01822 }\
01823 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01824 uint8_t halfH[72];\
01825 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01826 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01827 }\
01828 static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
01829 OPNAME ## pixels16_c(dst, src, stride, 16);\
01830 }\
01831 \
01832 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01833 uint8_t half[256];\
01834 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01835 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
01836 }\
01837 \
01838 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01839 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
01840 }\
01841 \
01842 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01843 uint8_t half[256];\
01844 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01845 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
01846 }\
01847 \
01848 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01849 uint8_t full[24*17];\
01850 uint8_t half[256];\
01851 copy_block17(full, src, 24, stride, 17);\
01852 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01853 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
01854 }\
01855 \
01856 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01857 uint8_t full[24*17];\
01858 copy_block17(full, src, 24, stride, 17);\
01859 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
01860 }\
01861 \
01862 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01863 uint8_t full[24*17];\
01864 uint8_t half[256];\
01865 copy_block17(full, src, 24, stride, 17);\
01866 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01867 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
01868 }\
01869 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
01870 uint8_t full[24*17];\
01871 uint8_t halfH[272];\
01872 uint8_t halfV[256];\
01873 uint8_t halfHV[256];\
01874 copy_block17(full, src, 24, stride, 17);\
01875 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01876 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01877 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01878 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01879 }\
01880 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01881 uint8_t full[24*17];\
01882 uint8_t halfH[272];\
01883 uint8_t halfHV[256];\
01884 copy_block17(full, src, 24, stride, 17);\
01885 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01886 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
01887 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01888 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
01889 }\
01890 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01891 uint8_t full[24*17];\
01892 uint8_t halfH[272];\
01893 uint8_t halfV[256];\
01894 uint8_t halfHV[256];\
01895 copy_block17(full, src, 24, stride, 17);\
01896 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01897 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01898 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01899 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01900 }\
01901 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01902 uint8_t full[24*17];\
01903 uint8_t halfH[272];\
01904 uint8_t halfHV[256];\
01905 copy_block17(full, src, 24, stride, 17);\
01906 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01907 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
01908 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01909 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
01910 }\
01911 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01912 uint8_t full[24*17];\
01913 uint8_t halfH[272];\
01914 uint8_t halfV[256];\
01915 uint8_t halfHV[256];\
01916 copy_block17(full, src, 24, stride, 17);\
01917 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01918 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01919 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01920 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01921 }\
01922 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01923 uint8_t full[24*17];\
01924 uint8_t halfH[272];\
01925 uint8_t halfHV[256];\
01926 copy_block17(full, src, 24, stride, 17);\
01927 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01928 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
01929 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01930 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01931 }\
01932 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01933 uint8_t full[24*17];\
01934 uint8_t halfH[272];\
01935 uint8_t halfV[256];\
01936 uint8_t halfHV[256];\
01937 copy_block17(full, src, 24, stride, 17);\
01938 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
01939 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01940 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01941 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01942 }\
01943 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01944 uint8_t full[24*17];\
01945 uint8_t halfH[272];\
01946 uint8_t halfHV[256];\
01947 copy_block17(full, src, 24, stride, 17);\
01948 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01949 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
01950 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01951 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01952 }\
01953 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01954 uint8_t halfH[272];\
01955 uint8_t halfHV[256];\
01956 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01957 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01958 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
01959 }\
01960 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01961 uint8_t halfH[272];\
01962 uint8_t halfHV[256];\
01963 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01964 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01965 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01966 }\
01967 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01968 uint8_t full[24*17];\
01969 uint8_t halfH[272];\
01970 uint8_t halfV[256];\
01971 uint8_t halfHV[256];\
01972 copy_block17(full, src, 24, stride, 17);\
01973 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01974 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01975 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01976 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
01977 }\
01978 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01979 uint8_t full[24*17];\
01980 uint8_t halfH[272];\
01981 copy_block17(full, src, 24, stride, 17);\
01982 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01983 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
01984 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01985 }\
01986 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01987 uint8_t full[24*17];\
01988 uint8_t halfH[272];\
01989 uint8_t halfV[256];\
01990 uint8_t halfHV[256];\
01991 copy_block17(full, src, 24, stride, 17);\
01992 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01993 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01994 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01995 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
01996 }\
01997 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01998 uint8_t full[24*17];\
01999 uint8_t halfH[272];\
02000 copy_block17(full, src, 24, stride, 17);\
02001 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02002 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
02003 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
02004 }\
02005 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
02006 uint8_t halfH[272];\
02007 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
02008 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
02009 }
02010
02011 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
02012 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
02013 #define op_put(a, b) a = cm[((b) + 16)>>5]
02014 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
02015
02016 QPEL_MC(0, put_ , _ , op_put)
02017 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
02018 QPEL_MC(0, avg_ , _ , op_avg)
02019
02020 #undef op_avg
02021 #undef op_avg_no_rnd
02022 #undef op_put
02023 #undef op_put_no_rnd
02024
02025 #if 1
02026 #define H264_LOWPASS(OPNAME, OP, OP2) \
02027 static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02028 const int h=2;\
02029 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02030 int i;\
02031 for(i=0; i<h; i++)\
02032 {\
02033 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
02034 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
02035 dst+=dstStride;\
02036 src+=srcStride;\
02037 }\
02038 }\
02039 \
02040 static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02041 const int w=2;\
02042 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02043 int i;\
02044 for(i=0; i<w; i++)\
02045 {\
02046 const int srcB= src[-2*srcStride];\
02047 const int srcA= src[-1*srcStride];\
02048 const int src0= src[0 *srcStride];\
02049 const int src1= src[1 *srcStride];\
02050 const int src2= src[2 *srcStride];\
02051 const int src3= src[3 *srcStride];\
02052 const int src4= src[4 *srcStride];\
02053 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
02054 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
02055 dst++;\
02056 src++;\
02057 }\
02058 }\
02059 \
02060 static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02061 const int h=2;\
02062 const int w=2;\
02063 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02064 int i;\
02065 src -= 2*srcStride;\
02066 for(i=0; i<h+5; i++)\
02067 {\
02068 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
02069 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
02070 tmp+=tmpStride;\
02071 src+=srcStride;\
02072 }\
02073 tmp -= tmpStride*(h+5-2);\
02074 for(i=0; i<w; i++)\
02075 {\
02076 const int tmpB= tmp[-2*tmpStride];\
02077 const int tmpA= tmp[-1*tmpStride];\
02078 const int tmp0= tmp[0 *tmpStride];\
02079 const int tmp1= tmp[1 *tmpStride];\
02080 const int tmp2= tmp[2 *tmpStride];\
02081 const int tmp3= tmp[3 *tmpStride];\
02082 const int tmp4= tmp[4 *tmpStride];\
02083 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
02084 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
02085 dst++;\
02086 tmp++;\
02087 }\
02088 }\
02089 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02090 const int h=4;\
02091 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02092 int i;\
02093 for(i=0; i<h; i++)\
02094 {\
02095 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
02096 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
02097 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
02098 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
02099 dst+=dstStride;\
02100 src+=srcStride;\
02101 }\
02102 }\
02103 \
02104 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02105 const int w=4;\
02106 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02107 int i;\
02108 for(i=0; i<w; i++)\
02109 {\
02110 const int srcB= src[-2*srcStride];\
02111 const int srcA= src[-1*srcStride];\
02112 const int src0= src[0 *srcStride];\
02113 const int src1= src[1 *srcStride];\
02114 const int src2= src[2 *srcStride];\
02115 const int src3= src[3 *srcStride];\
02116 const int src4= src[4 *srcStride];\
02117 const int src5= src[5 *srcStride];\
02118 const int src6= src[6 *srcStride];\
02119 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
02120 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
02121 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
02122 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
02123 dst++;\
02124 src++;\
02125 }\
02126 }\
02127 \
02128 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02129 const int h=4;\
02130 const int w=4;\
02131 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02132 int i;\
02133 src -= 2*srcStride;\
02134 for(i=0; i<h+5; i++)\
02135 {\
02136 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
02137 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
02138 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
02139 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
02140 tmp+=tmpStride;\
02141 src+=srcStride;\
02142 }\
02143 tmp -= tmpStride*(h+5-2);\
02144 for(i=0; i<w; i++)\
02145 {\
02146 const int tmpB= tmp[-2*tmpStride];\
02147 const int tmpA= tmp[-1*tmpStride];\
02148 const int tmp0= tmp[0 *tmpStride];\
02149 const int tmp1= tmp[1 *tmpStride];\
02150 const int tmp2= tmp[2 *tmpStride];\
02151 const int tmp3= tmp[3 *tmpStride];\
02152 const int tmp4= tmp[4 *tmpStride];\
02153 const int tmp5= tmp[5 *tmpStride];\
02154 const int tmp6= tmp[6 *tmpStride];\
02155 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
02156 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
02157 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
02158 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
02159 dst++;\
02160 tmp++;\
02161 }\
02162 }\
02163 \
02164 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02165 const int h=8;\
02166 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02167 int i;\
02168 for(i=0; i<h; i++)\
02169 {\
02170 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
02171 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
02172 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
02173 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
02174 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
02175 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
02176 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
02177 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
02178 dst+=dstStride;\
02179 src+=srcStride;\
02180 }\
02181 }\
02182 \
02183 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02184 const int w=8;\
02185 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02186 int i;\
02187 for(i=0; i<w; i++)\
02188 {\
02189 const int srcB= src[-2*srcStride];\
02190 const int srcA= src[-1*srcStride];\
02191 const int src0= src[0 *srcStride];\
02192 const int src1= src[1 *srcStride];\
02193 const int src2= src[2 *srcStride];\
02194 const int src3= src[3 *srcStride];\
02195 const int src4= src[4 *srcStride];\
02196 const int src5= src[5 *srcStride];\
02197 const int src6= src[6 *srcStride];\
02198 const int src7= src[7 *srcStride];\
02199 const int src8= src[8 *srcStride];\
02200 const int src9= src[9 *srcStride];\
02201 const int src10=src[10*srcStride];\
02202 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
02203 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
02204 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
02205 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
02206 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
02207 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
02208 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
02209 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
02210 dst++;\
02211 src++;\
02212 }\
02213 }\
02214 \
02215 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02216 const int h=8;\
02217 const int w=8;\
02218 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02219 int i;\
02220 src -= 2*srcStride;\
02221 for(i=0; i<h+5; i++)\
02222 {\
02223 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
02224 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
02225 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
02226 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
02227 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
02228 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
02229 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
02230 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
02231 tmp+=tmpStride;\
02232 src+=srcStride;\
02233 }\
02234 tmp -= tmpStride*(h+5-2);\
02235 for(i=0; i<w; i++)\
02236 {\
02237 const int tmpB= tmp[-2*tmpStride];\
02238 const int tmpA= tmp[-1*tmpStride];\
02239 const int tmp0= tmp[0 *tmpStride];\
02240 const int tmp1= tmp[1 *tmpStride];\
02241 const int tmp2= tmp[2 *tmpStride];\
02242 const int tmp3= tmp[3 *tmpStride];\
02243 const int tmp4= tmp[4 *tmpStride];\
02244 const int tmp5= tmp[5 *tmpStride];\
02245 const int tmp6= tmp[6 *tmpStride];\
02246 const int tmp7= tmp[7 *tmpStride];\
02247 const int tmp8= tmp[8 *tmpStride];\
02248 const int tmp9= tmp[9 *tmpStride];\
02249 const int tmp10=tmp[10*tmpStride];\
02250 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
02251 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
02252 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
02253 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
02254 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
02255 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
02256 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
02257 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
02258 dst++;\
02259 tmp++;\
02260 }\
02261 }\
02262 \
02263 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02264 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
02265 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
02266 src += 8*srcStride;\
02267 dst += 8*dstStride;\
02268 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
02269 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
02270 }\
02271 \
02272 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02273 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
02274 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
02275 src += 8*srcStride;\
02276 dst += 8*dstStride;\
02277 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
02278 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
02279 }\
02280 \
02281 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02282 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
02283 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
02284 src += 8*srcStride;\
02285 dst += 8*dstStride;\
02286 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
02287 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
02288 }\
02289
02290 #define H264_MC(OPNAME, SIZE) \
02291 static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
02292 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
02293 }\
02294 \
02295 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
02296 uint8_t half[SIZE*SIZE];\
02297 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
02298 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
02299 }\
02300 \
02301 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
02302 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
02303 }\
02304 \
02305 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
02306 uint8_t half[SIZE*SIZE];\
02307 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
02308 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
02309 }\
02310 \
02311 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
02312 uint8_t full[SIZE*(SIZE+5)];\
02313 uint8_t * const full_mid= full + SIZE*2;\
02314 uint8_t half[SIZE*SIZE];\
02315 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02316 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
02317 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
02318 }\
02319 \
02320 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
02321 uint8_t full[SIZE*(SIZE+5)];\
02322 uint8_t * const full_mid= full + SIZE*2;\
02323 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02324 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
02325 }\
02326 \
02327 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
02328 uint8_t full[SIZE*(SIZE+5)];\
02329 uint8_t * const full_mid= full + SIZE*2;\
02330 uint8_t half[SIZE*SIZE];\
02331 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02332 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
02333 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
02334 }\
02335 \
02336 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
02337 uint8_t full[SIZE*(SIZE+5)];\
02338 uint8_t * const full_mid= full + SIZE*2;\
02339 uint8_t halfH[SIZE*SIZE];\
02340 uint8_t halfV[SIZE*SIZE];\
02341 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
02342 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02343 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02344 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02345 }\
02346 \
02347 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
02348 uint8_t full[SIZE*(SIZE+5)];\
02349 uint8_t * const full_mid= full + SIZE*2;\
02350 uint8_t halfH[SIZE*SIZE];\
02351 uint8_t halfV[SIZE*SIZE];\
02352 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
02353 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
02354 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02355 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02356 }\
02357 \
02358 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
02359 uint8_t full[SIZE*(SIZE+5)];\
02360 uint8_t * const full_mid= full + SIZE*2;\
02361 uint8_t halfH[SIZE*SIZE];\
02362 uint8_t halfV[SIZE*SIZE];\
02363 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
02364 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02365 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02366 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02367 }\
02368 \
02369 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
02370 uint8_t full[SIZE*(SIZE+5)];\
02371 uint8_t * const full_mid= full + SIZE*2;\
02372 uint8_t halfH[SIZE*SIZE];\
02373 uint8_t halfV[SIZE*SIZE];\
02374 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
02375 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
02376 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02377 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02378 }\
02379 \
02380 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
02381 int16_t tmp[SIZE*(SIZE+5)];\
02382 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
02383 }\
02384 \
02385 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
02386 int16_t tmp[SIZE*(SIZE+5)];\
02387 uint8_t halfH[SIZE*SIZE];\
02388 uint8_t halfHV[SIZE*SIZE];\
02389 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
02390 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02391 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
02392 }\
02393 \
02394 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
02395 int16_t tmp[SIZE*(SIZE+5)];\
02396 uint8_t halfH[SIZE*SIZE];\
02397 uint8_t halfHV[SIZE*SIZE];\
02398 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
02399 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02400 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
02401 }\
02402 \
02403 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
02404 uint8_t full[SIZE*(SIZE+5)];\
02405 uint8_t * const full_mid= full + SIZE*2;\
02406 int16_t tmp[SIZE*(SIZE+5)];\
02407 uint8_t halfV[SIZE*SIZE];\
02408 uint8_t halfHV[SIZE*SIZE];\
02409 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02410 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02411 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02412 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
02413 }\
02414 \
02415 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
02416 uint8_t full[SIZE*(SIZE+5)];\
02417 uint8_t * const full_mid= full + SIZE*2;\
02418 int16_t tmp[SIZE*(SIZE+5)];\
02419 uint8_t halfV[SIZE*SIZE];\
02420 uint8_t halfHV[SIZE*SIZE];\
02421 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
02422 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02423 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02424 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
02425 }\
02426
02427 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
02428
02429 #define op_put(a, b) a = cm[((b) + 16)>>5]
02430 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
02431 #define op2_put(a, b) a = cm[((b) + 512)>>10]
02432
02433 H264_LOWPASS(put_ , op_put, op2_put)
02434 H264_LOWPASS(avg_ , op_avg, op2_avg)
02435 H264_MC(put_, 2)
02436 H264_MC(put_, 4)
02437 H264_MC(put_, 8)
02438 H264_MC(put_, 16)
02439 H264_MC(avg_, 4)
02440 H264_MC(avg_, 8)
02441 H264_MC(avg_, 16)
02442
02443 #undef op_avg
02444 #undef op_put
02445 #undef op2_avg
02446 #undef op2_put
02447 #endif
02448
02449 #define op_scale1(x) block[x] = av_clip_uint8( (block[x]*weight + offset) >> log2_denom )
02450 #define op_scale2(x) dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
02451 #define H264_WEIGHT(W,H) \
02452 static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
02453 int y; \
02454 offset <<= log2_denom; \
02455 if(log2_denom) offset += 1<<(log2_denom-1); \
02456 for(y=0; y<H; y++, block += stride){ \
02457 op_scale1(0); \
02458 op_scale1(1); \
02459 if(W==2) continue; \
02460 op_scale1(2); \
02461 op_scale1(3); \
02462 if(W==4) continue; \
02463 op_scale1(4); \
02464 op_scale1(5); \
02465 op_scale1(6); \
02466 op_scale1(7); \
02467 if(W==8) continue; \
02468 op_scale1(8); \
02469 op_scale1(9); \
02470 op_scale1(10); \
02471 op_scale1(11); \
02472 op_scale1(12); \
02473 op_scale1(13); \
02474 op_scale1(14); \
02475 op_scale1(15); \
02476 } \
02477 } \
02478 static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
02479 int y; \
02480 offset = ((offset + 1) | 1) << log2_denom; \
02481 for(y=0; y<H; y++, dst += stride, src += stride){ \
02482 op_scale2(0); \
02483 op_scale2(1); \
02484 if(W==2) continue; \
02485 op_scale2(2); \
02486 op_scale2(3); \
02487 if(W==4) continue; \
02488 op_scale2(4); \
02489 op_scale2(5); \
02490 op_scale2(6); \
02491 op_scale2(7); \
02492 if(W==8) continue; \
02493 op_scale2(8); \
02494 op_scale2(9); \
02495 op_scale2(10); \
02496 op_scale2(11); \
02497 op_scale2(12); \
02498 op_scale2(13); \
02499 op_scale2(14); \
02500 op_scale2(15); \
02501 } \
02502 }
02503
02504 H264_WEIGHT(16,16)
02505 H264_WEIGHT(16,8)
02506 H264_WEIGHT(8,16)
02507 H264_WEIGHT(8,8)
02508 H264_WEIGHT(8,4)
02509 H264_WEIGHT(4,8)
02510 H264_WEIGHT(4,4)
02511 H264_WEIGHT(4,2)
02512 H264_WEIGHT(2,4)
02513 H264_WEIGHT(2,2)
02514
02515 #undef op_scale1
02516 #undef op_scale2
02517 #undef H264_WEIGHT
02518
02519 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
02520 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
02521 int i;
02522
02523 for(i=0; i<h; i++){
02524 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
02525 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
02526 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
02527 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
02528 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
02529 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
02530 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
02531 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
02532 dst+=dstStride;
02533 src+=srcStride;
02534 }
02535 }
02536
02537 #ifdef CONFIG_CAVS_DECODER
02538
02539 void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx);
02540
02541 void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
02542 put_pixels8_c(dst, src, stride, 8);
02543 }
02544 void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
02545 avg_pixels8_c(dst, src, stride, 8);
02546 }
02547 void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
02548 put_pixels16_c(dst, src, stride, 16);
02549 }
02550 void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
02551 avg_pixels16_c(dst, src, stride, 16);
02552 }
02553 #endif
02554
02555 #if defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
02556
02557 void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx);
02558
02559 void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) {
02560 put_pixels8_c(dst, src, stride, 8);
02561 }
02562 #endif
02563
02564 void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
02565
02566
02567 void ff_h264dspenc_init(DSPContext* c, AVCodecContext *avctx);
02568
02569 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
02570 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
02571 int i;
02572
02573 for(i=0; i<w; i++){
02574 const int src_1= src[ -srcStride];
02575 const int src0 = src[0 ];
02576 const int src1 = src[ srcStride];
02577 const int src2 = src[2*srcStride];
02578 const int src3 = src[3*srcStride];
02579 const int src4 = src[4*srcStride];
02580 const int src5 = src[5*srcStride];
02581 const int src6 = src[6*srcStride];
02582 const int src7 = src[7*srcStride];
02583 const int src8 = src[8*srcStride];
02584 const int src9 = src[9*srcStride];
02585 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
02586 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
02587 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
02588 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
02589 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
02590 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
02591 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
02592 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
02593 src++;
02594 dst++;
02595 }
02596 }
02597
02598 static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
02599 put_pixels8_c(dst, src, stride, 8);
02600 }
02601
02602 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
02603 uint8_t half[64];
02604 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
02605 put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
02606 }
02607
02608 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
02609 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
02610 }
02611
02612 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
02613 uint8_t half[64];
02614 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
02615 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
02616 }
02617
02618 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
02619 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
02620 }
02621
02622 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
02623 uint8_t halfH[88];
02624 uint8_t halfV[64];
02625 uint8_t halfHV[64];
02626 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
02627 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
02628 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
02629 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
02630 }
02631 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
02632 uint8_t halfH[88];
02633 uint8_t halfV[64];
02634 uint8_t halfHV[64];
02635 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
02636 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
02637 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
02638 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
02639 }
02640 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
02641 uint8_t halfH[88];
02642 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
02643 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
02644 }
02645
02646 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
02647 if(ENABLE_ANY_H263) {
02648 int x;
02649 const int strength= ff_h263_loop_filter_strength[qscale];
02650
02651 for(x=0; x<8; x++){
02652 int d1, d2, ad1;
02653 int p0= src[x-2*stride];
02654 int p1= src[x-1*stride];
02655 int p2= src[x+0*stride];
02656 int p3= src[x+1*stride];
02657 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
02658
02659 if (d<-2*strength) d1= 0;
02660 else if(d<- strength) d1=-2*strength - d;
02661 else if(d< strength) d1= d;
02662 else if(d< 2*strength) d1= 2*strength - d;
02663 else d1= 0;
02664
02665 p1 += d1;
02666 p2 -= d1;
02667 if(p1&256) p1= ~(p1>>31);
02668 if(p2&256) p2= ~(p2>>31);
02669
02670 src[x-1*stride] = p1;
02671 src[x+0*stride] = p2;
02672
02673 ad1= FFABS(d1)>>1;
02674
02675 d2= av_clip((p0-p3)/4, -ad1, ad1);
02676
02677 src[x-2*stride] = p0 - d2;
02678 src[x+ stride] = p3 + d2;
02679 }
02680 }
02681 }
02682
02683 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
02684 if(ENABLE_ANY_H263) {
02685 int y;
02686 const int strength= ff_h263_loop_filter_strength[qscale];
02687
02688 for(y=0; y<8; y++){
02689 int d1, d2, ad1;
02690 int p0= src[y*stride-2];
02691 int p1= src[y*stride-1];
02692 int p2= src[y*stride+0];
02693 int p3= src[y*stride+1];
02694 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
02695
02696 if (d<-2*strength) d1= 0;
02697 else if(d<- strength) d1=-2*strength - d;
02698 else if(d< strength) d1= d;
02699 else if(d< 2*strength) d1= 2*strength - d;
02700 else d1= 0;
02701
02702 p1 += d1;
02703 p2 -= d1;
02704 if(p1&256) p1= ~(p1>>31);
02705 if(p2&256) p2= ~(p2>>31);
02706
02707 src[y*stride-1] = p1;
02708 src[y*stride+0] = p2;
02709
02710 ad1= FFABS(d1)>>1;
02711
02712 d2= av_clip((p0-p3)/4, -ad1, ad1);
02713
02714 src[y*stride-2] = p0 - d2;
02715 src[y*stride+1] = p3 + d2;
02716 }
02717 }
02718 }
02719
02720 static void h261_loop_filter_c(uint8_t *src, int stride){
02721 int x,y,xy,yz;
02722 int temp[64];
02723
02724 for(x=0; x<8; x++){
02725 temp[x ] = 4*src[x ];
02726 temp[x + 7*8] = 4*src[x + 7*stride];
02727 }
02728 for(y=1; y<7; y++){
02729 for(x=0; x<8; x++){
02730 xy = y * stride + x;
02731 yz = y * 8 + x;
02732 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
02733 }
02734 }
02735
02736 for(y=0; y<8; y++){
02737 src[ y*stride] = (temp[ y*8] + 2)>>2;
02738 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
02739 for(x=1; x<7; x++){
02740 xy = y * stride + x;
02741 yz = y * 8 + x;
02742 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
02743 }
02744 }
02745 }
02746
02747 static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
02748 {
02749 int i, d;
02750 for( i = 0; i < 4; i++ ) {
02751 if( tc0[i] < 0 ) {
02752 pix += 4*ystride;
02753 continue;
02754 }
02755 for( d = 0; d < 4; d++ ) {
02756 const int p0 = pix[-1*xstride];
02757 const int p1 = pix[-2*xstride];
02758 const int p2 = pix[-3*xstride];
02759 const int q0 = pix[0];
02760 const int q1 = pix[1*xstride];
02761 const int q2 = pix[2*xstride];
02762
02763 if( FFABS( p0 - q0 ) < alpha &&
02764 FFABS( p1 - p0 ) < beta &&
02765 FFABS( q1 - q0 ) < beta ) {
02766
02767 int tc = tc0[i];
02768 int i_delta;
02769
02770 if( FFABS( p2 - p0 ) < beta ) {
02771 pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] );
02772 tc++;
02773 }
02774 if( FFABS( q2 - q0 ) < beta ) {
02775 pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] );
02776 tc++;
02777 }
02778
02779 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
02780 pix[-xstride] = av_clip_uint8( p0 + i_delta );
02781 pix[0] = av_clip_uint8( q0 - i_delta );
02782 }
02783 pix += ystride;
02784 }
02785 }
02786 }
02787 static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
02788 {
02789 h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0);
02790 }
02791 static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
02792 {
02793 h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0);
02794 }
02795
02796 static inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
02797 {
02798 int i, d;
02799 for( i = 0; i < 4; i++ ) {
02800 const int tc = tc0[i];
02801 if( tc <= 0 ) {
02802 pix += 2*ystride;
02803 continue;
02804 }
02805 for( d = 0; d < 2; d++ ) {
02806 const int p0 = pix[-1*xstride];
02807 const int p1 = pix[-2*xstride];
02808 const int q0 = pix[0];
02809 const int q1 = pix[1*xstride];
02810
02811 if( FFABS( p0 - q0 ) < alpha &&
02812 FFABS( p1 - p0 ) < beta &&
02813 FFABS( q1 - q0 ) < beta ) {
02814
02815 int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
02816
02817 pix[-xstride] = av_clip_uint8( p0 + delta );
02818 pix[0] = av_clip_uint8( q0 - delta );
02819 }
02820 pix += ystride;
02821 }
02822 }
02823 }
02824 static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
02825 {
02826 h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0);
02827 }
02828 static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
02829 {
02830 h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0);
02831 }
02832
02833 static inline void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
02834 {
02835 int d;
02836 for( d = 0; d < 8; d++ ) {
02837 const int p0 = pix[-1*xstride];
02838 const int p1 = pix[-2*xstride];
02839 const int q0 = pix[0];
02840 const int q1 = pix[1*xstride];
02841
02842 if( FFABS( p0 - q0 ) < alpha &&
02843 FFABS( p1 - p0 ) < beta &&
02844 FFABS( q1 - q0 ) < beta ) {
02845
02846 pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
02847 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
02848 }
02849 pix += ystride;
02850 }
02851 }
02852 static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
02853 {
02854 h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta);
02855 }
02856 static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
02857 {
02858 h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta);
02859 }
02860
02861 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02862 {
02863 int s, i;
02864
02865 s = 0;
02866 for(i=0;i<h;i++) {
02867 s += abs(pix1[0] - pix2[0]);
02868 s += abs(pix1[1] - pix2[1]);
02869 s += abs(pix1[2] - pix2[2]);
02870 s += abs(pix1[3] - pix2[3]);
02871 s += abs(pix1[4] - pix2[4]);
02872 s += abs(pix1[5] - pix2[5]);
02873 s += abs(pix1[6] - pix2[6]);
02874 s += abs(pix1[7] - pix2[7]);
02875 s += abs(pix1[8] - pix2[8]);
02876 s += abs(pix1[9] - pix2[9]);
02877 s += abs(pix1[10] - pix2[10]);
02878 s += abs(pix1[11] - pix2[11]);
02879 s += abs(pix1[12] - pix2[12]);
02880 s += abs(pix1[13] - pix2[13]);
02881 s += abs(pix1[14] - pix2[14]);
02882 s += abs(pix1[15] - pix2[15]);
02883 pix1 += line_size;
02884 pix2 += line_size;
02885 }
02886 return s;
02887 }
02888
02889 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02890 {
02891 int s, i;
02892
02893 s = 0;
02894 for(i=0;i<h;i++) {
02895 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
02896 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
02897 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
02898 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
02899 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
02900 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
02901 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
02902 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
02903 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
02904 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
02905 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
02906 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
02907 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
02908 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
02909 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
02910 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
02911 pix1 += line_size;
02912 pix2 += line_size;
02913 }
02914 return s;
02915 }
02916
02917 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02918 {
02919 int s, i;
02920 uint8_t *pix3 = pix2 + line_size;
02921
02922 s = 0;
02923 for(i=0;i<h;i++) {
02924 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
02925 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
02926 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
02927 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
02928 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
02929 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
02930 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
02931 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
02932 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
02933 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
02934 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
02935 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
02936 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
02937 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
02938 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
02939 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
02940 pix1 += line_size;
02941 pix2 += line_size;
02942 pix3 += line_size;
02943 }
02944 return s;
02945 }
02946
02947 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02948 {
02949 int s, i;
02950 uint8_t *pix3 = pix2 + line_size;
02951
02952 s = 0;
02953 for(i=0;i<h;i++) {
02954 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
02955 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
02956 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
02957 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
02958 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
02959 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
02960 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
02961 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
02962 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
02963 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
02964 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
02965 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
02966 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
02967 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
02968 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
02969 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
02970 pix1 += line_size;
02971 pix2 += line_size;
02972 pix3 += line_size;
02973 }
02974 return s;
02975 }
02976
02977 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02978 {
02979 int s, i;
02980
02981 s = 0;
02982 for(i=0;i<h;i++) {
02983 s += abs(pix1[0] - pix2[0]);
02984 s += abs(pix1[1] - pix2[1]);
02985 s += abs(pix1[2] - pix2[2]);
02986 s += abs(pix1[3] - pix2[3]);
02987 s += abs(pix1[4] - pix2[4]);
02988 s += abs(pix1[5] - pix2[5]);
02989 s += abs(pix1[6] - pix2[6]);
02990 s += abs(pix1[7] - pix2[7]);
02991 pix1 += line_size;
02992 pix2 += line_size;
02993 }
02994 return s;
02995 }
02996
02997 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02998 {
02999 int s, i;
03000
03001 s = 0;
03002 for(i=0;i<h;i++) {
03003 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
03004 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
03005 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
03006 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
03007 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
03008 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
03009 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
03010 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
03011 pix1 += line_size;
03012 pix2 += line_size;
03013 }
03014 return s;
03015 }
03016
03017 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
03018 {
03019 int s, i;
03020 uint8_t *pix3 = pix2 + line_size;
03021
03022 s = 0;
03023 for(i=0;i<h;i++) {
03024 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
03025 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
03026 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
03027 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
03028 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
03029 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
03030 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
03031 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
03032 pix1 += line_size;
03033 pix2 += line_size;
03034 pix3 += line_size;
03035 }
03036 return s;
03037 }
03038
03039 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
03040 {
03041 int s, i;
03042 uint8_t *pix3 = pix2 + line_size;
03043
03044 s = 0;
03045 for(i=0;i<h;i++) {
03046 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
03047 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
03048 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
03049 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
03050 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
03051 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
03052 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
03053 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
03054 pix1 += line_size;
03055 pix2 += line_size;
03056 pix3 += line_size;
03057 }
03058 return s;
03059 }
03060
03061 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
03062 MpegEncContext *c = v;
03063 int score1=0;
03064 int score2=0;
03065 int x,y;
03066
03067 for(y=0; y<h; y++){
03068 for(x=0; x<16; x++){
03069 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
03070 }
03071 if(y+1<h){
03072 for(x=0; x<15; x++){
03073 score2+= FFABS( s1[x ] - s1[x +stride]
03074 - s1[x+1] + s1[x+1+stride])
03075 -FFABS( s2[x ] - s2[x +stride]
03076 - s2[x+1] + s2[x+1+stride]);
03077 }
03078 }
03079 s1+= stride;
03080 s2+= stride;
03081 }
03082
03083 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
03084 else return score1 + FFABS(score2)*8;
03085 }
03086
03087 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
03088 MpegEncContext *c = v;
03089 int score1=0;
03090 int score2=0;
03091 int x,y;
03092
03093 for(y=0; y<h; y++){
03094 for(x=0; x<8; x++){
03095 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
03096 }
03097 if(y+1<h){
03098 for(x=0; x<7; x++){
03099 score2+= FFABS( s1[x ] - s1[x +stride]
03100 - s1[x+1] + s1[x+1+stride])
03101 -FFABS( s2[x ] - s2[x +stride]
03102 - s2[x+1] + s2[x+1+stride]);
03103 }
03104 }
03105 s1+= stride;
03106 s2+= stride;
03107 }
03108
03109 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
03110 else return score1 + FFABS(score2)*8;
03111 }
03112
03113 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
03114 int i;
03115 unsigned int sum=0;
03116
03117 for(i=0; i<8*8; i++){
03118 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
03119 int w= weight[i];
03120 b>>= RECON_SHIFT;
03121 assert(-512<b && b<512);
03122
03123 sum += (w*b)*(w*b)>>4;
03124 }
03125 return sum>>2;
03126 }
03127
03128 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
03129 int i;
03130
03131 for(i=0; i<8*8; i++){
03132 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
03133 }
03134 }
03135
03144 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
03145 {
03146 int i;
03147 DCTELEM temp[64];
03148
03149 if(last<=0) return;
03150
03151
03152 for(i=0; i<=last; i++){
03153 const int j= scantable[i];
03154 temp[j]= block[j];
03155 block[j]=0;
03156 }
03157
03158 for(i=0; i<=last; i++){
03159 const int j= scantable[i];
03160 const int perm_j= permutation[j];
03161 block[perm_j]= temp[j];
03162 }
03163 }
03164
03165 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
03166 return 0;
03167 }
03168
03169 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
03170 int i;
03171
03172 memset(cmp, 0, sizeof(void*)*5);
03173
03174 for(i=0; i<5; i++){
03175 switch(type&0xFF){
03176 case FF_CMP_SAD:
03177 cmp[i]= c->sad[i];
03178 break;
03179 case FF_CMP_SATD:
03180 cmp[i]= c->hadamard8_diff[i];
03181 break;
03182 case FF_CMP_SSE:
03183 cmp[i]= c->sse[i];
03184 break;
03185 case FF_CMP_DCT:
03186 cmp[i]= c->dct_sad[i];
03187 break;
03188 case FF_CMP_DCT264:
03189 cmp[i]= c->dct264_sad[i];
03190 break;
03191 case FF_CMP_DCTMAX:
03192 cmp[i]= c->dct_max[i];
03193 break;
03194 case FF_CMP_PSNR:
03195 cmp[i]= c->quant_psnr[i];
03196 break;
03197 case FF_CMP_BIT:
03198 cmp[i]= c->bit[i];
03199 break;
03200 case FF_CMP_RD:
03201 cmp[i]= c->rd[i];
03202 break;
03203 case FF_CMP_VSAD:
03204 cmp[i]= c->vsad[i];
03205 break;
03206 case FF_CMP_VSSE:
03207 cmp[i]= c->vsse[i];
03208 break;
03209 case FF_CMP_ZERO:
03210 cmp[i]= zero_cmp;
03211 break;
03212 case FF_CMP_NSSE:
03213 cmp[i]= c->nsse[i];
03214 break;
03215 #ifdef CONFIG_SNOW_ENCODER
03216 case FF_CMP_W53:
03217 cmp[i]= c->w53[i];
03218 break;
03219 case FF_CMP_W97:
03220 cmp[i]= c->w97[i];
03221 break;
03222 #endif
03223 default:
03224 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
03225 }
03226 }
03227 }
03228
03232 static void clear_blocks_c(DCTELEM *blocks)
03233 {
03234 memset(blocks, 0, sizeof(DCTELEM)*6*64);
03235 }
03236
03237 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
03238 int i;
03239 for(i=0; i+7<w; i+=8){
03240 dst[i+0] += src[i+0];
03241 dst[i+1] += src[i+1];
03242 dst[i+2] += src[i+2];
03243 dst[i+3] += src[i+3];
03244 dst[i+4] += src[i+4];
03245 dst[i+5] += src[i+5];
03246 dst[i+6] += src[i+6];
03247 dst[i+7] += src[i+7];
03248 }
03249 for(; i<w; i++)
03250 dst[i+0] += src[i+0];
03251 }
03252
03253 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
03254 int i;
03255 for(i=0; i+7<w; i+=8){
03256 dst[i+0] = src1[i+0]-src2[i+0];
03257 dst[i+1] = src1[i+1]-src2[i+1];
03258 dst[i+2] = src1[i+2]-src2[i+2];
03259 dst[i+3] = src1[i+3]-src2[i+3];
03260 dst[i+4] = src1[i+4]-src2[i+4];
03261 dst[i+5] = src1[i+5]-src2[i+5];
03262 dst[i+6] = src1[i+6]-src2[i+6];
03263 dst[i+7] = src1[i+7]-src2[i+7];
03264 }
03265 for(; i<w; i++)
03266 dst[i+0] = src1[i+0]-src2[i+0];
03267 }
03268
03269 static void sub_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){
03270 int i;
03271 uint8_t l, lt;
03272
03273 l= *left;
03274 lt= *left_top;
03275
03276 for(i=0; i<w; i++){
03277 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
03278 lt= src1[i];
03279 l= src2[i];
03280 dst[i]= l - pred;
03281 }
03282
03283 *left= l;
03284 *left_top= lt;
03285 }
03286
03287 #define BUTTERFLY2(o1,o2,i1,i2) \
03288 o1= (i1)+(i2);\
03289 o2= (i1)-(i2);
03290
03291 #define BUTTERFLY1(x,y) \
03292 {\
03293 int a,b;\
03294 a= x;\
03295 b= y;\
03296 x= a+b;\
03297 y= a-b;\
03298 }
03299
03300 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
03301
03302 static int hadamard8_diff8x8_c( void *s, uint8_t *dst, uint8_t *src, int stride, int h){
03303 int i;
03304 int temp[64];
03305 int sum=0;
03306
03307 assert(h==8);
03308
03309 for(i=0; i<8; i++){
03310
03311 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
03312 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
03313 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
03314 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
03315
03316 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
03317 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
03318 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
03319 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
03320
03321 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
03322 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
03323 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
03324 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
03325 }
03326
03327 for(i=0; i<8; i++){
03328 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
03329 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
03330 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
03331 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
03332
03333 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
03334 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
03335 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
03336 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
03337
03338 sum +=
03339 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
03340 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
03341 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
03342 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
03343 }
03344 #if 0
03345 static int maxi=0;
03346 if(sum>maxi){
03347 maxi=sum;
03348 printf("MAX:%d\n", maxi);
03349 }
03350 #endif
03351 return sum;
03352 }
03353
03354 static int hadamard8_intra8x8_c( void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
03355 int i;
03356 int temp[64];
03357 int sum=0;
03358
03359 assert(h==8);
03360
03361 for(i=0; i<8; i++){
03362
03363 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
03364 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
03365 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
03366 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
03367
03368 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
03369 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
03370 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
03371 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
03372
03373 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
03374 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
03375 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
03376 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
03377 }
03378
03379 for(i=0; i<8; i++){
03380 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
03381 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
03382 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
03383 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
03384
03385 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
03386 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
03387 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
03388 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
03389
03390 sum +=
03391 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
03392 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
03393 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
03394 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
03395 }
03396
03397 sum -= FFABS(temp[8*0] + temp[8*4]);
03398
03399 return sum;
03400 }
03401
03402 static int dct_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03403 MpegEncContext * const s= (MpegEncContext *)c;
03404 DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
03405 DCTELEM * const temp= (DCTELEM*)aligned_temp;
03406
03407 assert(h==8);
03408
03409 s->dsp.diff_pixels(temp, src1, src2, stride);
03410 s->dsp.fdct(temp);
03411 return s->dsp.sum_abs_dctelem(temp);
03412 }
03413
03414 #ifdef CONFIG_GPL
03415 #define DCT8_1D {\
03416 const int s07 = SRC(0) + SRC(7);\
03417 const int s16 = SRC(1) + SRC(6);\
03418 const int s25 = SRC(2) + SRC(5);\
03419 const int s34 = SRC(3) + SRC(4);\
03420 const int a0 = s07 + s34;\
03421 const int a1 = s16 + s25;\
03422 const int a2 = s07 - s34;\
03423 const int a3 = s16 - s25;\
03424 const int d07 = SRC(0) - SRC(7);\
03425 const int d16 = SRC(1) - SRC(6);\
03426 const int d25 = SRC(2) - SRC(5);\
03427 const int d34 = SRC(3) - SRC(4);\
03428 const int a4 = d16 + d25 + (d07 + (d07>>1));\
03429 const int a5 = d07 - d34 - (d25 + (d25>>1));\
03430 const int a6 = d07 + d34 - (d16 + (d16>>1));\
03431 const int a7 = d16 - d25 + (d34 + (d34>>1));\
03432 DST(0, a0 + a1 ) ;\
03433 DST(1, a4 + (a7>>2)) ;\
03434 DST(2, a2 + (a3>>1)) ;\
03435 DST(3, a5 + (a6>>2)) ;\
03436 DST(4, a0 - a1 ) ;\
03437 DST(5, a6 - (a5>>2)) ;\
03438 DST(6, (a2>>1) - a3 ) ;\
03439 DST(7, (a4>>2) - a7 ) ;\
03440 }
03441
03442 static int dct264_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03443 MpegEncContext * const s= (MpegEncContext *)c;
03444 DCTELEM dct[8][8];
03445 int i;
03446 int sum=0;
03447
03448 s->dsp.diff_pixels(dct[0], src1, src2, stride);
03449
03450 #define SRC(x) dct[i][x]
03451 #define DST(x,v) dct[i][x]= v
03452 for( i = 0; i < 8; i++ )
03453 DCT8_1D
03454 #undef SRC
03455 #undef DST
03456
03457 #define SRC(x) dct[x][i]
03458 #define DST(x,v) sum += FFABS(v)
03459 for( i = 0; i < 8; i++ )
03460 DCT8_1D
03461 #undef SRC
03462 #undef DST
03463 return sum;
03464 }
03465 #endif
03466
03467 static int dct_max8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03468 MpegEncContext * const s= (MpegEncContext *)c;
03469 DECLARE_ALIGNED_8(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
03470 DCTELEM * const temp= (DCTELEM*)aligned_temp;
03471 int sum=0, i;
03472
03473 assert(h==8);
03474
03475 s->dsp.diff_pixels(temp, src1, src2, stride);
03476 s->dsp.fdct(temp);
03477
03478 for(i=0; i<64; i++)
03479 sum= FFMAX(sum, FFABS(temp[i]));
03480
03481 return sum;
03482 }
03483
03484 static int quant_psnr8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03485 MpegEncContext * const s= (MpegEncContext *)c;
03486 DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64*2/8]);
03487 DCTELEM * const temp= (DCTELEM*)aligned_temp;
03488 DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64;
03489 int sum=0, i;
03490
03491 assert(h==8);
03492 s->mb_intra=0;
03493
03494 s->dsp.diff_pixels(temp, src1, src2, stride);
03495
03496 memcpy(bak, temp, 64*sizeof(DCTELEM));
03497
03498 s->block_last_index[0]= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
03499 s->dct_unquantize_inter(s, temp, 0, s->qscale);
03500 simple_idct(temp);
03501
03502 for(i=0; i<64; i++)
03503 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
03504
03505 return sum;
03506 }
03507
03508 static int rd8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03509 MpegEncContext * const s= (MpegEncContext *)c;
03510 const uint8_t *scantable= s->intra_scantable.permutated;
03511 DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
03512 DECLARE_ALIGNED_8 (uint64_t, aligned_bak[stride]);
03513 DCTELEM * const temp= (DCTELEM*)aligned_temp;
03514 uint8_t * const bak= (uint8_t*)aligned_bak;
03515 int i, last, run, bits, level, distoration, start_i;
03516 const int esc_length= s->ac_esc_length;
03517 uint8_t * length;
03518 uint8_t * last_length;
03519
03520 assert(h==8);
03521
03522 for(i=0; i<8; i++){
03523 ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0];
03524 ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1];
03525 }
03526
03527 s->dsp.diff_pixels(temp, src1, src2, stride);
03528
03529 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
03530
03531 bits=0;
03532
03533 if (s->mb_intra) {
03534 start_i = 1;
03535 length = s->intra_ac_vlc_length;
03536 last_length= s->intra_ac_vlc_last_length;
03537 bits+= s->luma_dc_vlc_length[temp[0] + 256];
03538 } else {
03539 start_i = 0;
03540 length = s->inter_ac_vlc_length;
03541 last_length= s->inter_ac_vlc_last_length;
03542 }
03543
03544 if(last>=start_i){
03545 run=0;
03546 for(i=start_i; i<last; i++){
03547 int j= scantable[i];
03548 level= temp[j];
03549
03550 if(level){
03551 level+=64;
03552 if((level&(~127)) == 0){
03553 bits+= length[UNI_AC_ENC_INDEX(run, level)];
03554 }else
03555 bits+= esc_length;
03556 run=0;
03557 }else
03558 run++;
03559 }
03560 i= scantable[last];
03561
03562 level= temp[i] + 64;
03563
03564 assert(level - 64);
03565
03566 if((level&(~127)) == 0){
03567 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
03568 }else
03569 bits+= esc_length;
03570
03571 }
03572
03573 if(last>=0){
03574 if(s->mb_intra)
03575 s->dct_unquantize_intra(s, temp, 0, s->qscale);
03576 else
03577 s->dct_unquantize_inter(s, temp, 0, s->qscale);
03578 }
03579
03580 s->dsp.idct_add(bak, stride, temp);
03581
03582 distoration= s->dsp.sse[1](NULL, bak, src1, stride, 8);
03583
03584 return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7);
03585 }
03586
03587 static int bit8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03588 MpegEncContext * const s= (MpegEncContext *)c;
03589 const uint8_t *scantable= s->intra_scantable.permutated;
03590 DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
03591 DCTELEM * const temp= (DCTELEM*)aligned_temp;
03592 int i, last, run, bits, level, start_i;
03593 const int esc_length= s->ac_esc_length;
03594 uint8_t * length;
03595 uint8_t * last_length;
03596
03597 assert(h==8);
03598
03599 s->dsp.diff_pixels(temp, src1, src2, stride);
03600
03601 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
03602
03603 bits=0;
03604
03605 if (s->mb_intra) {
03606 start_i = 1;
03607 length = s->intra_ac_vlc_length;
03608 last_length= s->intra_ac_vlc_last_length;
03609 bits+= s->luma_dc_vlc_length[temp[0] + 256];
03610 } else {
03611 start_i = 0;
03612 length = s->inter_ac_vlc_length;
03613 last_length= s->inter_ac_vlc_last_length;
03614 }
03615
03616 if(last>=start_i){
03617 run=0;
03618 for(i=start_i; i<last; i++){
03619 int j= scantable[i];
03620 level= temp[j];
03621
03622 if(level){
03623 level+=64;
03624 if((level&(~127)) == 0){
03625 bits+= length[UNI_AC_ENC_INDEX(run, level)];
03626 }else
03627 bits+= esc_length;
03628 run=0;
03629 }else
03630 run++;
03631 }
03632 i= scantable[last];
03633
03634 level= temp[i] + 64;
03635
03636 assert(level - 64);
03637
03638 if((level&(~127)) == 0){
03639 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
03640 }else
03641 bits+= esc_length;
03642 }
03643
03644 return bits;
03645 }
03646
03647 static int vsad_intra16_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){
03648 int score=0;
03649 int x,y;
03650
03651 for(y=1; y<h; y++){
03652 for(x=0; x<16; x+=4){
03653 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride])
03654 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]);
03655 }
03656 s+= stride;
03657 }
03658
03659 return score;
03660 }
03661
03662 static int vsad16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
03663 int score=0;
03664 int x,y;
03665
03666 for(y=1; y<h; y++){
03667 for(x=0; x<16; x++){
03668 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
03669 }
03670 s1+= stride;
03671 s2+= stride;
03672 }
03673
03674 return score;
03675 }
03676
03677 #define SQ(a) ((a)*(a))
03678 static int vsse_intra16_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){
03679 int score=0;
03680 int x,y;
03681
03682 for(y=1; y<h; y++){
03683 for(x=0; x<16; x+=4){
03684 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride])
03685 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
03686 }
03687 s+= stride;
03688 }
03689
03690 return score;
03691 }
03692
03693 static int vsse16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
03694 int score=0;
03695 int x,y;
03696
03697 for(y=1; y<h; y++){
03698 for(x=0; x<16; x++){
03699 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
03700 }
03701 s1+= stride;
03702 s2+= stride;
03703 }
03704
03705 return score;
03706 }
03707
03708 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
03709 int size){
03710 int score=0;
03711 int i;
03712 for(i=0; i<size; i++)
03713 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
03714 return score;
03715 }
03716
03717 WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
03718 WARPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
03719 WARPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
03720 #ifdef CONFIG_GPL
03721 WARPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
03722 #endif
03723 WARPER8_16_SQ(dct_max8x8_c, dct_max16_c)
03724 WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
03725 WARPER8_16_SQ(rd8x8_c, rd16_c)
03726 WARPER8_16_SQ(bit8x8_c, bit16_c)
03727
03728 static void vector_fmul_c(float *dst, const float *src, int len){
03729 int i;
03730 for(i=0; i<len; i++)
03731 dst[i] *= src[i];
03732 }
03733
03734 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
03735 int i;
03736 src1 += len-1;
03737 for(i=0; i<len; i++)
03738 dst[i] = src0[i] * src1[-i];
03739 }
03740
03741 void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step){
03742 int i;
03743 for(i=0; i<len; i++)
03744 dst[i*step] = src0[i] * src1[i] + src2[i] + src3;
03745 }
03746
03747 void ff_float_to_int16_c(int16_t *dst, const float *src, int len){
03748 int i;
03749 for(i=0; i<len; i++) {
03750 int_fast32_t tmp = ((int32_t*)src)[i];
03751 if(tmp & 0xf0000){
03752 tmp = (0x43c0ffff - tmp)>>31;
03753
03754
03755
03756 }
03757 dst[i] = tmp - 0x8000;
03758 }
03759 }
03760
03761 #define W0 2048
03762 #define W1 2841
03763 #define W2 2676
03764 #define W3 2408
03765 #define W4 2048
03766 #define W5 1609
03767 #define W6 1108
03768 #define W7 565
03769
03770 static void wmv2_idct_row(short * b)
03771 {
03772 int s1,s2;
03773 int a0,a1,a2,a3,a4,a5,a6,a7;
03774
03775 a1 = W1*b[1]+W7*b[7];
03776 a7 = W7*b[1]-W1*b[7];
03777 a5 = W5*b[5]+W3*b[3];
03778 a3 = W3*b[5]-W5*b[3];
03779 a2 = W2*b[2]+W6*b[6];
03780 a6 = W6*b[2]-W2*b[6];
03781 a0 = W0*b[0]+W0*b[4];
03782 a4 = W0*b[0]-W0*b[4];
03783
03784 s1 = (181*(a1-a5+a7-a3)+128)>>8;
03785 s2 = (181*(a1-a5-a7+a3)+128)>>8;
03786
03787 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
03788 b[1] = (a4+a6 +s1 + (1<<7))>>8;
03789 b[2] = (a4-a6 +s2 + (1<<7))>>8;
03790 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
03791 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
03792 b[5] = (a4-a6 -s2 + (1<<7))>>8;
03793 b[6] = (a4+a6 -s1 + (1<<7))>>8;
03794 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
03795 }
03796 static void wmv2_idct_col(short * b)
03797 {
03798 int s1,s2;
03799 int a0,a1,a2,a3,a4,a5,a6,a7;
03800
03801 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
03802 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
03803 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
03804 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
03805 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
03806 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
03807 a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
03808 a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
03809
03810 s1 = (181*(a1-a5+a7-a3)+128)>>8;
03811 s2 = (181*(a1-a5-a7+a3)+128)>>8;
03812
03813 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
03814 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
03815 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
03816 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
03817
03818 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
03819 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
03820 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
03821 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
03822 }
03823 void ff_wmv2_idct_c(short * block){
03824 int i;
03825
03826 for(i=0;i<64;i+=8){
03827 wmv2_idct_row(block+i);
03828 }
03829 for(i=0;i<8;i++){
03830 wmv2_idct_col(block+i);
03831 }
03832 }
03833
03834
03835 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
03836 {
03837 ff_wmv2_idct_c(block);
03838 put_pixels_clamped_c(block, dest, line_size);
03839 }
03840 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
03841 {
03842 ff_wmv2_idct_c(block);
03843 add_pixels_clamped_c(block, dest, line_size);
03844 }
03845 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
03846 {
03847 j_rev_dct (block);
03848 put_pixels_clamped_c(block, dest, line_size);
03849 }
03850 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
03851 {
03852 j_rev_dct (block);
03853 add_pixels_clamped_c(block, dest, line_size);
03854 }
03855
03856 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
03857 {
03858 j_rev_dct4 (block);
03859 put_pixels_clamped4_c(block, dest, line_size);
03860 }
03861 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
03862 {
03863 j_rev_dct4 (block);
03864 add_pixels_clamped4_c(block, dest, line_size);
03865 }
03866
03867 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
03868 {
03869 j_rev_dct2 (block);
03870 put_pixels_clamped2_c(block, dest, line_size);
03871 }
03872 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
03873 {
03874 j_rev_dct2 (block);
03875 add_pixels_clamped2_c(block, dest, line_size);
03876 }
03877
03878 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
03879 {
03880 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
03881
03882 dest[0] = cm[(block[0] + 4)>>3];
03883 }
03884 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
03885 {
03886 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
03887
03888 dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
03889 }
03890
03891 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
03892
03893
03894 void dsputil_static_init(void)
03895 {
03896 int i;
03897
03898 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
03899 for(i=0;i<MAX_NEG_CROP;i++) {
03900 ff_cropTbl[i] = 0;
03901 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
03902 }
03903
03904 for(i=0;i<512;i++) {
03905 ff_squareTbl[i] = (i - 256) * (i - 256);
03906 }
03907
03908 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
03909 }
03910
03911 int ff_check_alignment(void){
03912 static int did_fail=0;
03913 DECLARE_ALIGNED_16(int, aligned);
03914
03915 if((long)&aligned & 15){
03916 if(!did_fail){
03917 #if defined(HAVE_MMX) || defined(HAVE_ALTIVEC)
03918 av_log(NULL, AV_LOG_ERROR,
03919 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
03920 "and may be very slow or crash. This is not a bug in libavcodec,\n"
03921 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
03922 "Do not report crashes to FFmpeg developers.\n");
03923 #endif
03924 did_fail=1;
03925 }
03926 return -1;
03927 }
03928 return 0;
03929 }
03930
03931 void dsputil_init(DSPContext* c, AVCodecContext *avctx)
03932 {
03933 int i;
03934
03935 ff_check_alignment();
03936
03937 #ifdef CONFIG_ENCODERS
03938 if(avctx->dct_algo==FF_DCT_FASTINT) {
03939 c->fdct = fdct_ifast;
03940 c->fdct248 = fdct_ifast248;
03941 }
03942 else if(avctx->dct_algo==FF_DCT_FAAN) {
03943 c->fdct = ff_faandct;
03944 c->fdct248 = ff_faandct248;
03945 }
03946 else {
03947 c->fdct = ff_jpeg_fdct_islow;
03948 c->fdct248 = ff_fdct248_islow;
03949 }
03950 #endif //CONFIG_ENCODERS
03951
03952 if(avctx->lowres==1){
03953 if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO || !ENABLE_H264_DECODER){
03954 c->idct_put= ff_jref_idct4_put;
03955 c->idct_add= ff_jref_idct4_add;
03956 }else{
03957 c->idct_put= ff_h264_lowres_idct_put_c;
03958 c->idct_add= ff_h264_lowres_idct_add_c;
03959 }
03960 c->idct = j_rev_dct4;
03961 c->idct_permutation_type= FF_NO_IDCT_PERM;
03962 }else if(avctx->lowres==2){
03963 c->idct_put= ff_jref_idct2_put;
03964 c->idct_add= ff_jref_idct2_add;
03965 c->idct = j_rev_dct2;
03966 c->idct_permutation_type= FF_NO_IDCT_PERM;
03967 }else if(avctx->lowres==3){
03968 c->idct_put= ff_jref_idct1_put;
03969 c->idct_add= ff_jref_idct1_add;
03970 c->idct = j_rev_dct1;
03971 c->idct_permutation_type= FF_NO_IDCT_PERM;
03972 }else{
03973 if(avctx->idct_algo==FF_IDCT_INT){
03974 c->idct_put= ff_jref_idct_put;
03975 c->idct_add= ff_jref_idct_add;
03976 c->idct = j_rev_dct;
03977 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
03978 }else if((ENABLE_VP3_DECODER || ENABLE_VP5_DECODER || ENABLE_VP6_DECODER || ENABLE_THEORA_DECODER ) &&
03979 avctx->idct_algo==FF_IDCT_VP3){
03980 c->idct_put= ff_vp3_idct_put_c;
03981 c->idct_add= ff_vp3_idct_add_c;
03982 c->idct = ff_vp3_idct_c;
03983 c->idct_permutation_type= FF_NO_IDCT_PERM;
03984 }else if(avctx->idct_algo==FF_IDCT_WMV2){
03985 c->idct_put= ff_wmv2_idct_put_c;
03986 c->idct_add= ff_wmv2_idct_add_c;
03987 c->idct = ff_wmv2_idct_c;
03988 c->idct_permutation_type= FF_NO_IDCT_PERM;
03989 }else{
03990 c->idct_put= simple_idct_put;
03991 c->idct_add= simple_idct_add;
03992 c->idct = simple_idct;
03993 c->idct_permutation_type= FF_NO_IDCT_PERM;
03994 }
03995 }
03996
03997 if (ENABLE_H264_DECODER) {
03998 c->h264_idct_add= ff_h264_idct_add_c;
03999 c->h264_idct8_add= ff_h264_idct8_add_c;
04000 c->h264_idct_dc_add= ff_h264_idct_dc_add_c;
04001 c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c;
04002 }
04003
04004 c->get_pixels = get_pixels_c;
04005 c->diff_pixels = diff_pixels_c;
04006 c->put_pixels_clamped = put_pixels_clamped_c;
04007 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
04008 c->add_pixels_clamped = add_pixels_clamped_c;
04009 c->add_pixels8 = add_pixels8_c;
04010 c->add_pixels4 = add_pixels4_c;
04011 c->sum_abs_dctelem = sum_abs_dctelem_c;
04012 c->gmc1 = gmc1_c;
04013 c->gmc = ff_gmc_c;
04014 c->clear_blocks = clear_blocks_c;
04015 c->pix_sum = pix_sum_c;
04016 c->pix_norm1 = pix_norm1_c;
04017
04018
04019 c->pix_abs[0][0] = pix_abs16_c;
04020 c->pix_abs[0][1] = pix_abs16_x2_c;
04021 c->pix_abs[0][2] = pix_abs16_y2_c;
04022 c->pix_abs[0][3] = pix_abs16_xy2_c;
04023 c->pix_abs[1][0] = pix_abs8_c;
04024 c->pix_abs[1][1] = pix_abs8_x2_c;
04025 c->pix_abs[1][2] = pix_abs8_y2_c;
04026 c->pix_abs[1][3] = pix_abs8_xy2_c;
04027
04028 #define dspfunc(PFX, IDX, NUM) \
04029 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \
04030 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \
04031 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \
04032 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
04033
04034 dspfunc(put, 0, 16);
04035 dspfunc(put_no_rnd, 0, 16);
04036 dspfunc(put, 1, 8);
04037 dspfunc(put_no_rnd, 1, 8);
04038 dspfunc(put, 2, 4);
04039 dspfunc(put, 3, 2);
04040
04041 dspfunc(avg, 0, 16);
04042 dspfunc(avg_no_rnd, 0, 16);
04043 dspfunc(avg, 1, 8);
04044 dspfunc(avg_no_rnd, 1, 8);
04045 dspfunc(avg, 2, 4);
04046 dspfunc(avg, 3, 2);
04047 #undef dspfunc
04048
04049 c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c;
04050 c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c;
04051
04052 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
04053 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
04054 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
04055 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
04056 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
04057 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
04058 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
04059 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
04060 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
04061
04062 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
04063 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
04064 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
04065 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
04066 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
04067 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
04068 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
04069 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
04070 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
04071
04072 #define dspfunc(PFX, IDX, NUM) \
04073 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
04074 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
04075 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
04076 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
04077 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
04078 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
04079 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
04080 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
04081 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
04082 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
04083 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
04084 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
04085 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
04086 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
04087 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
04088 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
04089
04090 dspfunc(put_qpel, 0, 16);
04091 dspfunc(put_no_rnd_qpel, 0, 16);
04092
04093 dspfunc(avg_qpel, 0, 16);
04094
04095
04096 dspfunc(put_qpel, 1, 8);
04097 dspfunc(put_no_rnd_qpel, 1, 8);
04098
04099 dspfunc(avg_qpel, 1, 8);
04100
04101
04102 dspfunc(put_h264_qpel, 0, 16);
04103 dspfunc(put_h264_qpel, 1, 8);
04104 dspfunc(put_h264_qpel, 2, 4);
04105 dspfunc(put_h264_qpel, 3, 2);
04106 dspfunc(avg_h264_qpel, 0, 16);
04107 dspfunc(avg_h264_qpel, 1, 8);
04108 dspfunc(avg_h264_qpel, 2, 4);
04109
04110 #undef dspfunc
04111 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
04112 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
04113 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
04114 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
04115 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
04116 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
04117 c->put_no_rnd_h264_chroma_pixels_tab[0]= put_no_rnd_h264_chroma_mc8_c;
04118
04119 c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c;
04120 c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c;
04121 c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c;
04122 c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c;
04123 c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c;
04124 c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c;
04125 c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c;
04126 c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c;
04127 c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c;
04128 c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c;
04129 c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c;
04130 c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c;
04131 c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c;
04132 c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c;
04133 c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c;
04134 c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c;
04135 c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c;
04136 c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c;
04137 c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c;
04138 c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c;
04139
04140 #ifdef CONFIG_CAVS_DECODER
04141 ff_cavsdsp_init(c,avctx);
04142 #endif
04143 #if defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
04144 ff_vc1dsp_init(c,avctx);
04145 #endif
04146 #if defined(CONFIG_WMV2_DECODER) || defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
04147 ff_intrax8dsp_init(c,avctx);
04148 #endif
04149 #if defined(CONFIG_H264_ENCODER)
04150 ff_h264dspenc_init(c,avctx);
04151 #endif
04152
04153 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
04154 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
04155 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
04156 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
04157 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
04158 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
04159 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
04160 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
04161
04162 #define SET_CMP_FUNC(name) \
04163 c->name[0]= name ## 16_c;\
04164 c->name[1]= name ## 8x8_c;
04165
04166 SET_CMP_FUNC(hadamard8_diff)
04167 c->hadamard8_diff[4]= hadamard8_intra16_c;
04168 SET_CMP_FUNC(dct_sad)
04169 SET_CMP_FUNC(dct_max)
04170 #ifdef CONFIG_GPL
04171 SET_CMP_FUNC(dct264_sad)
04172 #endif
04173 c->sad[0]= pix_abs16_c;
04174 c->sad[1]= pix_abs8_c;
04175 c->sse[0]= sse16_c;
04176 c->sse[1]= sse8_c;
04177 c->sse[2]= sse4_c;
04178 SET_CMP_FUNC(quant_psnr)
04179 SET_CMP_FUNC(rd)
04180 SET_CMP_FUNC(bit)
04181 c->vsad[0]= vsad16_c;
04182 c->vsad[4]= vsad_intra16_c;
04183 c->vsse[0]= vsse16_c;
04184 c->vsse[4]= vsse_intra16_c;
04185 c->nsse[0]= nsse16_c;
04186 c->nsse[1]= nsse8_c;
04187 #ifdef CONFIG_SNOW_ENCODER
04188 c->w53[0]= w53_16_c;
04189 c->w53[1]= w53_8_c;
04190 c->w97[0]= w97_16_c;
04191 c->w97[1]= w97_8_c;
04192 #endif
04193
04194 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
04195
04196 c->add_bytes= add_bytes_c;
04197 c->diff_bytes= diff_bytes_c;
04198 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
04199 c->bswap_buf= bswap_buf;
04200
04201 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c;
04202 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c;
04203 c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c;
04204 c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c;
04205 c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c;
04206 c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c;
04207 c->h264_loop_filter_strength= NULL;
04208
04209 if (ENABLE_ANY_H263) {
04210 c->h263_h_loop_filter= h263_h_loop_filter_c;
04211 c->h263_v_loop_filter= h263_v_loop_filter_c;
04212 }
04213
04214 c->h261_loop_filter= h261_loop_filter_c;
04215
04216 c->try_8x8basis= try_8x8basis_c;
04217 c->add_8x8basis= add_8x8basis_c;
04218
04219 #ifdef CONFIG_SNOW_DECODER
04220 c->vertical_compose97i = ff_snow_vertical_compose97i;
04221 c->horizontal_compose97i = ff_snow_horizontal_compose97i;
04222 c->inner_add_yblock = ff_snow_inner_add_yblock;
04223 #endif
04224
04225 #ifdef CONFIG_VORBIS_DECODER
04226 c->vorbis_inverse_coupling = vorbis_inverse_coupling;
04227 #endif
04228 #ifdef CONFIG_FLAC_ENCODER
04229 c->flac_compute_autocorr = ff_flac_compute_autocorr;
04230 #endif
04231 c->vector_fmul = vector_fmul_c;
04232 c->vector_fmul_reverse = vector_fmul_reverse_c;
04233 c->vector_fmul_add_add = ff_vector_fmul_add_add_c;
04234 c->float_to_int16 = ff_float_to_int16_c;
04235
04236 c->shrink[0]= ff_img_copy_plane;
04237 c->shrink[1]= ff_shrink22;
04238 c->shrink[2]= ff_shrink44;
04239 c->shrink[3]= ff_shrink88;
04240
04241 c->prefetch= just_return;
04242
04243 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
04244 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
04245
04246 if (ENABLE_MMX) dsputil_init_mmx (c, avctx);
04247 if (ENABLE_ARMV4L) dsputil_init_armv4l(c, avctx);
04248 if (ENABLE_MLIB) dsputil_init_mlib (c, avctx);
04249 if (ENABLE_VIS) dsputil_init_vis (c, avctx);
04250 if (ENABLE_ALPHA) dsputil_init_alpha (c, avctx);
04251 if (ENABLE_POWERPC) dsputil_init_ppc (c, avctx);
04252 if (ENABLE_MMI) dsputil_init_mmi (c, avctx);
04253 if (ENABLE_SH4) dsputil_init_sh4 (c, avctx);
04254 if (ENABLE_BFIN) dsputil_init_bfin (c, avctx);
04255
04256 for(i=0; i<64; i++){
04257 if(!c->put_2tap_qpel_pixels_tab[0][i])
04258 c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
04259 if(!c->avg_2tap_qpel_pixels_tab[0][i])
04260 c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
04261 }
04262
04263 switch(c->idct_permutation_type){
04264 case FF_NO_IDCT_PERM:
04265 for(i=0; i<64; i++)
04266 c->idct_permutation[i]= i;
04267 break;
04268 case FF_LIBMPEG2_IDCT_PERM:
04269 for(i=0; i<64; i++)
04270 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
04271 break;
04272 case FF_SIMPLE_IDCT_PERM:
04273 for(i=0; i<64; i++)
04274 c->idct_permutation[i]= simple_mmx_permutation[i];
04275 break;
04276 case FF_TRANSPOSE_IDCT_PERM:
04277 for(i=0; i<64; i++)
04278 c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
04279 break;
04280 case FF_PARTTRANS_IDCT_PERM:
04281 for(i=0; i<64; i++)
04282 c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
04283 break;
04284 default:
04285 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
04286 }
04287 }
04288