00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076 #include "config.h"
00077 #include "avutil.h"
00078 #include <inttypes.h>
00079 #include <stdio.h>
00080 #include <stdlib.h>
00081 #include <string.h>
00082 #ifdef HAVE_MALLOC_H
00083 #include <malloc.h>
00084 #endif
00085
00086
00087
00088
00089
00090 #include "postprocess.h"
00091 #include "postprocess_internal.h"
00092
00093 #ifdef HAVE_ALTIVEC_H
00094 #include <altivec.h>
00095 #endif
00096
00097 #define GET_MODE_BUFFER_SIZE 500
00098 #define OPTIONS_ARRAY_SIZE 10
00099 #define BLOCK_SIZE 8
00100 #define TEMP_STRIDE 8
00101
00102
00103 #if defined(ARCH_X86)
00104 static DECLARE_ALIGNED(8, uint64_t attribute_used, w05)= 0x0005000500050005LL;
00105 static DECLARE_ALIGNED(8, uint64_t attribute_used, w04)= 0x0004000400040004LL;
00106 static DECLARE_ALIGNED(8, uint64_t attribute_used, w20)= 0x0020002000200020LL;
00107 static DECLARE_ALIGNED(8, uint64_t attribute_used, b00)= 0x0000000000000000LL;
00108 static DECLARE_ALIGNED(8, uint64_t attribute_used, b01)= 0x0101010101010101LL;
00109 static DECLARE_ALIGNED(8, uint64_t attribute_used, b02)= 0x0202020202020202LL;
00110 static DECLARE_ALIGNED(8, uint64_t attribute_used, b08)= 0x0808080808080808LL;
00111 static DECLARE_ALIGNED(8, uint64_t attribute_used, b80)= 0x8080808080808080LL;
00112 #endif
00113
00114 static uint8_t clip_table[3*256];
00115 static uint8_t * const clip_tab= clip_table + 256;
00116
00117 static const int attribute_used deringThreshold= 20;
00118
00119
00120 static struct PPFilter filters[]=
00121 {
00122 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
00123 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
00124
00125
00126 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
00127 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
00128 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
00129 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
00130 {"dr", "dering", 1, 5, 6, DERING},
00131 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
00132 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
00133 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
00134 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
00135 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
00136 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
00137 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
00138 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
00139 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
00140 {NULL, NULL,0,0,0,0}
00141 };
00142
00143 static const char *replaceTable[]=
00144 {
00145 "default", "hdeblock:a,vdeblock:a,dering:a",
00146 "de", "hdeblock:a,vdeblock:a,dering:a",
00147 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a",
00148 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a",
00149 "ac", "ha:a:128:7,va:a,dering:a",
00150 NULL
00151 };
00152
00153
00154 #if defined(ARCH_X86)
00155 static inline void prefetchnta(void *p)
00156 {
00157 asm volatile( "prefetchnta (%0)\n\t"
00158 : : "r" (p)
00159 );
00160 }
00161
00162 static inline void prefetcht0(void *p)
00163 {
00164 asm volatile( "prefetcht0 (%0)\n\t"
00165 : : "r" (p)
00166 );
00167 }
00168
00169 static inline void prefetcht1(void *p)
00170 {
00171 asm volatile( "prefetcht1 (%0)\n\t"
00172 : : "r" (p)
00173 );
00174 }
00175
00176 static inline void prefetcht2(void *p)
00177 {
00178 asm volatile( "prefetcht2 (%0)\n\t"
00179 : : "r" (p)
00180 );
00181 }
00182 #endif
00183
00184
00185
00189 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
00190 {
00191 int numEq= 0;
00192 int y;
00193 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00194 const int dcThreshold= dcOffset*2 + 1;
00195
00196 for(y=0; y<BLOCK_SIZE; y++)
00197 {
00198 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
00199 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
00200 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
00201 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
00202 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
00203 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
00204 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
00205 src+= stride;
00206 }
00207 return numEq > c->ppMode.flatnessThreshold;
00208 }
00209
00213 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
00214 int numEq= 0;
00215 int y;
00216 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00217 const int dcThreshold= dcOffset*2 + 1;
00218
00219 src+= stride*4;
00220 for(y=0; y<BLOCK_SIZE-1; y++)
00221 {
00222 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
00223 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
00224 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
00225 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
00226 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
00227 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
00228 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
00229 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
00230 src+= stride;
00231 }
00232 return numEq > c->ppMode.flatnessThreshold;
00233 }
00234
00235 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
00236 {
00237 int i;
00238 #if 1
00239 for(i=0; i<2; i++){
00240 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
00241 src += stride;
00242 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
00243 src += stride;
00244 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
00245 src += stride;
00246 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
00247 src += stride;
00248 }
00249 #else
00250 for(i=0; i<8; i++){
00251 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
00252 src += stride;
00253 }
00254 #endif
00255 return 1;
00256 }
00257
00258 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
00259 {
00260 #if 1
00261 #if 1
00262 int x;
00263 src+= stride*4;
00264 for(x=0; x<BLOCK_SIZE; x+=4)
00265 {
00266 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
00267 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
00268 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
00269 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
00270 }
00271 #else
00272 int x;
00273 src+= stride*3;
00274 for(x=0; x<BLOCK_SIZE; x++)
00275 {
00276 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
00277 }
00278 #endif
00279 return 1;
00280 #else
00281 int x;
00282 src+= stride*4;
00283 for(x=0; x<BLOCK_SIZE; x++)
00284 {
00285 int min=255;
00286 int max=0;
00287 int y;
00288 for(y=0; y<8; y++){
00289 int v= src[x + y*stride];
00290 if(v>max) max=v;
00291 if(v<min) min=v;
00292 }
00293 if(max-min > 2*QP) return 0;
00294 }
00295 return 1;
00296 #endif
00297 }
00298
00299 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
00300 if( isHorizDC_C(src, stride, c) ){
00301 if( isHorizMinMaxOk_C(src, stride, c->QP) )
00302 return 1;
00303 else
00304 return 0;
00305 }else{
00306 return 2;
00307 }
00308 }
00309
00310 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
00311 if( isVertDC_C(src, stride, c) ){
00312 if( isVertMinMaxOk_C(src, stride, c->QP) )
00313 return 1;
00314 else
00315 return 0;
00316 }else{
00317 return 2;
00318 }
00319 }
00320
00321 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
00322 {
00323 int y;
00324 for(y=0; y<BLOCK_SIZE; y++)
00325 {
00326 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
00327
00328 if(FFABS(middleEnergy) < 8*c->QP)
00329 {
00330 const int q=(dst[3] - dst[4])/2;
00331 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
00332 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
00333
00334 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00335 d= FFMAX(d, 0);
00336
00337 d= (5*d + 32) >> 6;
00338 d*= FFSIGN(-middleEnergy);
00339
00340 if(q>0)
00341 {
00342 d= d<0 ? 0 : d;
00343 d= d>q ? q : d;
00344 }
00345 else
00346 {
00347 d= d>0 ? 0 : d;
00348 d= d<q ? q : d;
00349 }
00350
00351 dst[3]-= d;
00352 dst[4]+= d;
00353 }
00354 dst+= stride;
00355 }
00356 }
00357
00362 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
00363 {
00364 int y;
00365 for(y=0; y<BLOCK_SIZE; y++)
00366 {
00367 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
00368 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
00369
00370 int sums[10];
00371 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
00372 sums[1] = sums[0] - first + dst[3];
00373 sums[2] = sums[1] - first + dst[4];
00374 sums[3] = sums[2] - first + dst[5];
00375 sums[4] = sums[3] - first + dst[6];
00376 sums[5] = sums[4] - dst[0] + dst[7];
00377 sums[6] = sums[5] - dst[1] + last;
00378 sums[7] = sums[6] - dst[2] + last;
00379 sums[8] = sums[7] - dst[3] + last;
00380 sums[9] = sums[8] - dst[4] + last;
00381
00382 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
00383 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
00384 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
00385 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
00386 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
00387 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
00388 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
00389 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
00390
00391 dst+= stride;
00392 }
00393 }
00394
00403 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
00404 {
00405 int y;
00406 static uint64_t *lut= NULL;
00407 if(lut==NULL)
00408 {
00409 int i;
00410 lut = av_malloc(256*8);
00411 for(i=0; i<256; i++)
00412 {
00413 int v= i < 128 ? 2*i : 2*(i-256);
00414
00415
00416
00417
00418
00419
00420
00421
00422 uint64_t a= (v/16) & 0xFF;
00423 uint64_t b= (v*3/16) & 0xFF;
00424 uint64_t c= (v*5/16) & 0xFF;
00425 uint64_t d= (7*v/16) & 0xFF;
00426 uint64_t A= (0x100 - a)&0xFF;
00427 uint64_t B= (0x100 - b)&0xFF;
00428 uint64_t C= (0x100 - c)&0xFF;
00429 uint64_t D= (0x100 - c)&0xFF;
00430
00431 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
00432 (D<<24) | (C<<16) | (B<<8) | (A);
00433
00434 }
00435 }
00436
00437 for(y=0; y<BLOCK_SIZE; y++)
00438 {
00439 int a= src[1] - src[2];
00440 int b= src[3] - src[4];
00441 int c= src[5] - src[6];
00442
00443 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
00444
00445 if(d < QP)
00446 {
00447 int v = d * FFSIGN(-b);
00448
00449 src[1] +=v/8;
00450 src[2] +=v/4;
00451 src[3] +=3*v/8;
00452 src[4] -=3*v/8;
00453 src[5] -=v/4;
00454 src[6] -=v/8;
00455
00456 }
00457 src+=stride;
00458 }
00459 }
00460
00464 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
00465 int y;
00466 const int QP= c->QP;
00467 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00468 const int dcThreshold= dcOffset*2 + 1;
00469
00470 src+= step*4;
00471 for(y=0; y<8; y++){
00472 int numEq= 0;
00473
00474 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
00475 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
00476 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
00477 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
00478 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
00479 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
00480 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
00481 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
00482 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
00483 if(numEq > c->ppMode.flatnessThreshold){
00484 int min, max, x;
00485
00486 if(src[0] > src[step]){
00487 max= src[0];
00488 min= src[step];
00489 }else{
00490 max= src[step];
00491 min= src[0];
00492 }
00493 for(x=2; x<8; x+=2){
00494 if(src[x*step] > src[(x+1)*step]){
00495 if(src[x *step] > max) max= src[ x *step];
00496 if(src[(x+1)*step] < min) min= src[(x+1)*step];
00497 }else{
00498 if(src[(x+1)*step] > max) max= src[(x+1)*step];
00499 if(src[ x *step] < min) min= src[ x *step];
00500 }
00501 }
00502 if(max-min < 2*QP){
00503 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
00504 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
00505
00506 int sums[10];
00507 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
00508 sums[1] = sums[0] - first + src[3*step];
00509 sums[2] = sums[1] - first + src[4*step];
00510 sums[3] = sums[2] - first + src[5*step];
00511 sums[4] = sums[3] - first + src[6*step];
00512 sums[5] = sums[4] - src[0*step] + src[7*step];
00513 sums[6] = sums[5] - src[1*step] + last;
00514 sums[7] = sums[6] - src[2*step] + last;
00515 sums[8] = sums[7] - src[3*step] + last;
00516 sums[9] = sums[8] - src[4*step] + last;
00517
00518 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
00519 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
00520 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
00521 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
00522 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
00523 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
00524 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
00525 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
00526 }
00527 }else{
00528 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
00529
00530 if(FFABS(middleEnergy) < 8*QP)
00531 {
00532 const int q=(src[3*step] - src[4*step])/2;
00533 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
00534 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
00535
00536 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00537 d= FFMAX(d, 0);
00538
00539 d= (5*d + 32) >> 6;
00540 d*= FFSIGN(-middleEnergy);
00541
00542 if(q>0)
00543 {
00544 d= d<0 ? 0 : d;
00545 d= d>q ? q : d;
00546 }
00547 else
00548 {
00549 d= d>0 ? 0 : d;
00550 d= d<q ? q : d;
00551 }
00552
00553 src[3*step]-= d;
00554 src[4*step]+= d;
00555 }
00556 }
00557
00558 src += stride;
00559 }
00560
00561
00562
00563
00564
00565 }
00566
00567
00568
00569 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
00570 #define COMPILE_C
00571 #endif
00572
00573 #ifdef ARCH_POWERPC
00574 #ifdef HAVE_ALTIVEC
00575 #define COMPILE_ALTIVEC
00576 #endif //HAVE_ALTIVEC
00577 #endif //ARCH_POWERPC
00578
00579 #if defined(ARCH_X86)
00580
00581 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
00582 #define COMPILE_MMX
00583 #endif
00584
00585 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
00586 #define COMPILE_MMX2
00587 #endif
00588
00589 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
00590 #define COMPILE_3DNOW
00591 #endif
00592 #endif
00593
00594 #undef HAVE_MMX
00595 #undef HAVE_MMX2
00596 #undef HAVE_3DNOW
00597 #undef HAVE_ALTIVEC
00598
00599 #ifdef COMPILE_C
00600 #undef HAVE_MMX
00601 #undef HAVE_MMX2
00602 #undef HAVE_3DNOW
00603 #define RENAME(a) a ## _C
00604 #include "postprocess_template.c"
00605 #endif
00606
00607 #ifdef ARCH_POWERPC
00608 #ifdef COMPILE_ALTIVEC
00609 #undef RENAME
00610 #define HAVE_ALTIVEC
00611 #define RENAME(a) a ## _altivec
00612 #include "postprocess_altivec_template.c"
00613 #include "postprocess_template.c"
00614 #endif
00615 #endif //ARCH_POWERPC
00616
00617
00618 #ifdef COMPILE_MMX
00619 #undef RENAME
00620 #define HAVE_MMX
00621 #undef HAVE_MMX2
00622 #undef HAVE_3DNOW
00623 #define RENAME(a) a ## _MMX
00624 #include "postprocess_template.c"
00625 #endif
00626
00627
00628 #ifdef COMPILE_MMX2
00629 #undef RENAME
00630 #define HAVE_MMX
00631 #define HAVE_MMX2
00632 #undef HAVE_3DNOW
00633 #define RENAME(a) a ## _MMX2
00634 #include "postprocess_template.c"
00635 #endif
00636
00637
00638 #ifdef COMPILE_3DNOW
00639 #undef RENAME
00640 #define HAVE_MMX
00641 #undef HAVE_MMX2
00642 #define HAVE_3DNOW
00643 #define RENAME(a) a ## _3DNow
00644 #include "postprocess_template.c"
00645 #endif
00646
00647
00648
00649 static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
00650 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
00651 {
00652 PPContext *c= (PPContext *)vc;
00653 PPMode *ppMode= (PPMode *)vm;
00654 c->ppMode= *ppMode;
00655
00656
00657
00658
00659 #ifdef RUNTIME_CPUDETECT
00660 #if defined(ARCH_X86)
00661
00662 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
00663 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00664 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
00665 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00666 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
00667 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00668 else
00669 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00670 #else
00671 #ifdef ARCH_POWERPC
00672 #ifdef HAVE_ALTIVEC
00673 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
00674 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00675 else
00676 #endif
00677 #endif
00678 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00679 #endif
00680 #else //RUNTIME_CPUDETECT
00681 #ifdef HAVE_MMX2
00682 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00683 #elif defined (HAVE_3DNOW)
00684 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00685 #elif defined (HAVE_MMX)
00686 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00687 #elif defined (HAVE_ALTIVEC)
00688 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00689 #else
00690 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00691 #endif
00692 #endif
00693 }
00694
00695
00696
00697
00698
00699
00700 char *pp_help=
00701 "Available postprocessing filters:\n"
00702 "Filters Options\n"
00703 "short long name short long option Description\n"
00704 "* * a autoq CPU power dependent enabler\n"
00705 " c chrom chrominance filtering enabled\n"
00706 " y nochrom chrominance filtering disabled\n"
00707 " n noluma luma filtering disabled\n"
00708 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
00709 " 1. difference factor: default=32, higher -> more deblocking\n"
00710 " 2. flatness threshold: default=39, lower -> more deblocking\n"
00711 " the h & v deblocking filters share these\n"
00712 " so you can't set different thresholds for h / v\n"
00713 "vb vdeblock (2 threshold) vertical deblocking filter\n"
00714 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
00715 "va vadeblock (2 threshold) vertical deblocking filter\n"
00716 "h1 x1hdeblock experimental h deblock filter 1\n"
00717 "v1 x1vdeblock experimental v deblock filter 1\n"
00718 "dr dering deringing filter\n"
00719 "al autolevels automatic brightness / contrast\n"
00720 " f fullyrange stretch luminance to (0..255)\n"
00721 "lb linblenddeint linear blend deinterlacer\n"
00722 "li linipoldeint linear interpolating deinterlace\n"
00723 "ci cubicipoldeint cubic interpolating deinterlacer\n"
00724 "md mediandeint median deinterlacer\n"
00725 "fd ffmpegdeint ffmpeg deinterlacer\n"
00726 "l5 lowpass5 FIR lowpass deinterlacer\n"
00727 "de default hb:a,vb:a,dr:a\n"
00728 "fa fast h1:a,v1:a,dr:a\n"
00729 "ac ha:a:128:7,va:a,dr:a\n"
00730 "tn tmpnoise (3 threshold) temporal noise reducer\n"
00731 " 1. <= 2. <= 3. larger -> stronger filtering\n"
00732 "fq forceQuant <quantizer> force quantizer\n"
00733 "Usage:\n"
00734 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
00735 "long form example:\n"
00736 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
00737 "short form example:\n"
00738 "vb:a/hb:a/lb de,-vb\n"
00739 "more examples:\n"
00740 "tn:64:128:256\n"
00741 "\n"
00742 ;
00743
00744 pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
00745 {
00746 char temp[GET_MODE_BUFFER_SIZE];
00747 char *p= temp;
00748 const char *filterDelimiters= ",/";
00749 const char *optionDelimiters= ":";
00750 struct PPMode *ppMode;
00751 char *filterToken;
00752
00753 ppMode= av_malloc(sizeof(PPMode));
00754
00755 ppMode->lumMode= 0;
00756 ppMode->chromMode= 0;
00757 ppMode->maxTmpNoise[0]= 700;
00758 ppMode->maxTmpNoise[1]= 1500;
00759 ppMode->maxTmpNoise[2]= 3000;
00760 ppMode->maxAllowedY= 234;
00761 ppMode->minAllowedY= 16;
00762 ppMode->baseDcDiff= 256/8;
00763 ppMode->flatnessThreshold= 56-16-1;
00764 ppMode->maxClippedThreshold= 0.01;
00765 ppMode->error=0;
00766
00767 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
00768
00769 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
00770
00771 for(;;){
00772 char *filterName;
00773 int q= 1000000;
00774 int chrom=-1;
00775 int luma=-1;
00776 char *option;
00777 char *options[OPTIONS_ARRAY_SIZE];
00778 int i;
00779 int filterNameOk=0;
00780 int numOfUnknownOptions=0;
00781 int enable=1;
00782
00783 filterToken= strtok(p, filterDelimiters);
00784 if(filterToken == NULL) break;
00785 p+= strlen(filterToken) + 1;
00786 filterName= strtok(filterToken, optionDelimiters);
00787 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
00788
00789 if(*filterName == '-')
00790 {
00791 enable=0;
00792 filterName++;
00793 }
00794
00795 for(;;){
00796 option= strtok(NULL, optionDelimiters);
00797 if(option == NULL) break;
00798
00799 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
00800 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
00801 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
00802 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
00803 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
00804 else
00805 {
00806 options[numOfUnknownOptions] = option;
00807 numOfUnknownOptions++;
00808 }
00809 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
00810 }
00811 options[numOfUnknownOptions] = NULL;
00812
00813
00814 for(i=0; replaceTable[2*i]!=NULL; i++)
00815 {
00816 if(!strcmp(replaceTable[2*i], filterName))
00817 {
00818 int newlen= strlen(replaceTable[2*i + 1]);
00819 int plen;
00820 int spaceLeft;
00821
00822 if(p==NULL) p= temp, *p=0;
00823 else p--, *p=',';
00824
00825 plen= strlen(p);
00826 spaceLeft= p - temp + plen;
00827 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
00828 {
00829 ppMode->error++;
00830 break;
00831 }
00832 memmove(p + newlen, p, plen+1);
00833 memcpy(p, replaceTable[2*i + 1], newlen);
00834 filterNameOk=1;
00835 }
00836 }
00837
00838 for(i=0; filters[i].shortName!=NULL; i++)
00839 {
00840 if( !strcmp(filters[i].longName, filterName)
00841 || !strcmp(filters[i].shortName, filterName))
00842 {
00843 ppMode->lumMode &= ~filters[i].mask;
00844 ppMode->chromMode &= ~filters[i].mask;
00845
00846 filterNameOk=1;
00847 if(!enable) break;
00848
00849 if(q >= filters[i].minLumQuality && luma)
00850 ppMode->lumMode|= filters[i].mask;
00851 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
00852 if(q >= filters[i].minChromQuality)
00853 ppMode->chromMode|= filters[i].mask;
00854
00855 if(filters[i].mask == LEVEL_FIX)
00856 {
00857 int o;
00858 ppMode->minAllowedY= 16;
00859 ppMode->maxAllowedY= 234;
00860 for(o=0; options[o]!=NULL; o++)
00861 {
00862 if( !strcmp(options[o],"fullyrange")
00863 ||!strcmp(options[o],"f"))
00864 {
00865 ppMode->minAllowedY= 0;
00866 ppMode->maxAllowedY= 255;
00867 numOfUnknownOptions--;
00868 }
00869 }
00870 }
00871 else if(filters[i].mask == TEMP_NOISE_FILTER)
00872 {
00873 int o;
00874 int numOfNoises=0;
00875
00876 for(o=0; options[o]!=NULL; o++)
00877 {
00878 char *tail;
00879 ppMode->maxTmpNoise[numOfNoises]=
00880 strtol(options[o], &tail, 0);
00881 if(tail!=options[o])
00882 {
00883 numOfNoises++;
00884 numOfUnknownOptions--;
00885 if(numOfNoises >= 3) break;
00886 }
00887 }
00888 }
00889 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
00890 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
00891 {
00892 int o;
00893
00894 for(o=0; options[o]!=NULL && o<2; o++)
00895 {
00896 char *tail;
00897 int val= strtol(options[o], &tail, 0);
00898 if(tail==options[o]) break;
00899
00900 numOfUnknownOptions--;
00901 if(o==0) ppMode->baseDcDiff= val;
00902 else ppMode->flatnessThreshold= val;
00903 }
00904 }
00905 else if(filters[i].mask == FORCE_QUANT)
00906 {
00907 int o;
00908 ppMode->forcedQuant= 15;
00909
00910 for(o=0; options[o]!=NULL && o<1; o++)
00911 {
00912 char *tail;
00913 int val= strtol(options[o], &tail, 0);
00914 if(tail==options[o]) break;
00915
00916 numOfUnknownOptions--;
00917 ppMode->forcedQuant= val;
00918 }
00919 }
00920 }
00921 }
00922 if(!filterNameOk) ppMode->error++;
00923 ppMode->error += numOfUnknownOptions;
00924 }
00925
00926 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
00927 if(ppMode->error)
00928 {
00929 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
00930 av_free(ppMode);
00931 return NULL;
00932 }
00933 return ppMode;
00934 }
00935
00936 void pp_free_mode(pp_mode_t *mode){
00937 av_free(mode);
00938 }
00939
00940 static void reallocAlign(void **p, int alignment, int size){
00941 av_free(*p);
00942 *p= av_mallocz(size);
00943 }
00944
00945 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
00946 int mbWidth = (width+15)>>4;
00947 int mbHeight= (height+15)>>4;
00948 int i;
00949
00950 c->stride= stride;
00951 c->qpStride= qpStride;
00952
00953 reallocAlign((void **)&c->tempDst, 8, stride*24);
00954 reallocAlign((void **)&c->tempSrc, 8, stride*24);
00955 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
00956 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
00957 for(i=0; i<256; i++)
00958 c->yHistogram[i]= width*height/64*15/256;
00959
00960 for(i=0; i<3; i++)
00961 {
00962
00963 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
00964 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);
00965 }
00966
00967 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
00968 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00969 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00970 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
00971 }
00972
00973 static void global_init(void){
00974 int i;
00975 memset(clip_table, 0, 256);
00976 for(i=256; i<512; i++)
00977 clip_table[i]= i;
00978 memset(clip_table+512, 0, 256);
00979 }
00980
00981 static const char * context_to_name(void * ptr) {
00982 return "postproc";
00983 }
00984
00985 static AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
00986
00987 pp_context_t *pp_get_context(int width, int height, int cpuCaps){
00988 PPContext *c= av_malloc(sizeof(PPContext));
00989 int stride= (width+15)&(~15);
00990 int qpStride= (width+15)/16 + 2;
00991
00992 global_init();
00993
00994 memset(c, 0, sizeof(PPContext));
00995 c->av_class = &av_codec_context_class;
00996 c->cpuCaps= cpuCaps;
00997 if(cpuCaps&PP_FORMAT){
00998 c->hChromaSubSample= cpuCaps&0x3;
00999 c->vChromaSubSample= (cpuCaps>>4)&0x3;
01000 }else{
01001 c->hChromaSubSample= 1;
01002 c->vChromaSubSample= 1;
01003 }
01004
01005 reallocBuffers(c, width, height, stride, qpStride);
01006
01007 c->frameNum=-1;
01008
01009 return c;
01010 }
01011
01012 void pp_free_context(void *vc){
01013 PPContext *c = (PPContext*)vc;
01014 int i;
01015
01016 for(i=0; i<3; i++) av_free(c->tempBlured[i]);
01017 for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
01018
01019 av_free(c->tempBlocks);
01020 av_free(c->yHistogram);
01021 av_free(c->tempDst);
01022 av_free(c->tempSrc);
01023 av_free(c->deintTemp);
01024 av_free(c->stdQPTable);
01025 av_free(c->nonBQPTable);
01026 av_free(c->forcedQPTable);
01027
01028 memset(c, 0, sizeof(PPContext));
01029
01030 av_free(c);
01031 }
01032
01033 void pp_postprocess(uint8_t * src[3], int srcStride[3],
01034 uint8_t * dst[3], int dstStride[3],
01035 int width, int height,
01036 QP_STORE_T *QP_store, int QPStride,
01037 pp_mode_t *vm, void *vc, int pict_type)
01038 {
01039 int mbWidth = (width+15)>>4;
01040 int mbHeight= (height+15)>>4;
01041 PPMode *mode = (PPMode*)vm;
01042 PPContext *c = (PPContext*)vc;
01043 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
01044 int absQPStride = FFABS(QPStride);
01045
01046
01047 if(c->stride < minStride || c->qpStride < absQPStride)
01048 reallocBuffers(c, width, height,
01049 FFMAX(minStride, c->stride),
01050 FFMAX(c->qpStride, absQPStride));
01051
01052 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
01053 {
01054 int i;
01055 QP_store= c->forcedQPTable;
01056 absQPStride = QPStride = 0;
01057 if(mode->lumMode & FORCE_QUANT)
01058 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
01059 else
01060 for(i=0; i<mbWidth; i++) QP_store[i]= 1;
01061 }
01062
01063 if(pict_type & PP_PICT_TYPE_QP2){
01064 int i;
01065 const int count= mbHeight * absQPStride;
01066 for(i=0; i<(count>>2); i++){
01067 ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
01068 }
01069 for(i<<=2; i<count; i++){
01070 c->stdQPTable[i] = QP_store[i]>>1;
01071 }
01072 QP_store= c->stdQPTable;
01073 QPStride= absQPStride;
01074 }
01075
01076 if(0){
01077 int x,y;
01078 for(y=0; y<mbHeight; y++){
01079 for(x=0; x<mbWidth; x++){
01080 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
01081 }
01082 av_log(c, AV_LOG_INFO, "\n");
01083 }
01084 av_log(c, AV_LOG_INFO, "\n");
01085 }
01086
01087 if((pict_type&7)!=3)
01088 {
01089 if (QPStride >= 0) {
01090 int i;
01091 const int count= mbHeight * QPStride;
01092 for(i=0; i<(count>>2); i++){
01093 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
01094 }
01095 for(i<<=2; i<count; i++){
01096 c->nonBQPTable[i] = QP_store[i] & 0x3F;
01097 }
01098 } else {
01099 int i,j;
01100 for(i=0; i<mbHeight; i++) {
01101 for(j=0; j<absQPStride; j++) {
01102 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
01103 }
01104 }
01105 }
01106 }
01107
01108 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
01109 mode->lumMode, mode->chromMode);
01110
01111 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
01112 width, height, QP_store, QPStride, 0, mode, c);
01113
01114 width = (width )>>c->hChromaSubSample;
01115 height = (height)>>c->vChromaSubSample;
01116
01117 if(mode->chromMode)
01118 {
01119 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
01120 width, height, QP_store, QPStride, 1, mode, c);
01121 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
01122 width, height, QP_store, QPStride, 2, mode, c);
01123 }
01124 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
01125 {
01126 linecpy(dst[1], src[1], height, srcStride[1]);
01127 linecpy(dst[2], src[2], height, srcStride[2]);
01128 }
01129 else
01130 {
01131 int y;
01132 for(y=0; y<height; y++)
01133 {
01134 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
01135 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
01136 }
01137 }
01138 }
01139