00001
00002
00003 #include <stdlib.h>
00004 #include <stdio.h>
00005
00006 #include "mythconfig.h"
00007 #if HAVE_STDINT_H
00008 #include <stdint.h>
00009 #endif
00010
00011 #if HAVE_MMX || HAVE_AMD3DNOW
00012 #include "ffmpeg-mmx.h"
00013 #endif
00014
00015 #include "../mm_arch.h"
00016 #if HAVE_ALTIVEC_H
00017 #include <altivec.h>
00018 #endif
00019
00020 #define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
00021 #define PAVGUSB(a,b) "pavgusb " #a ", " #b " \n\t"
00022
00023 #include "filter.h"
00024 #include "frame.h"
00025
00026 typedef struct LBFilter
00027 {
00028 VideoFilter vf;
00029
00030
00031 int mm_flags;
00032 void (*subfilter)(unsigned char *, int);
00033 TF_STRUCT;
00034 } LBFilter;
00035
00036 void linearBlend(unsigned char *src, int stride);
00037 void linearBlendMMX(unsigned char *src, int stride);
00038 void linearBlend3DNow(unsigned char *src, int stride);
00039 int linearBlendFilterAltivec(VideoFilter *f, VideoFrame *frame, int field);
00040
00041 #if HAVE_ALTIVEC
00042 inline void linearBlendAltivec(unsigned char *src, int stride);
00043 #endif
00044
00045 #ifdef MMX
00046
00047 void linearBlendMMX(unsigned char *src, int stride)
00048 {
00049
00050 __asm__ volatile(
00051 "lea (%0, %1), %%"REG_a" \n\t"
00052 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
00053
00054 "movq (%0), %%mm0 \n\t"
00055 "movq (%%"REG_a", %1), %%mm1 \n\t"
00056 PAVGB(%%mm1, %%mm0)
00057 "movq (%%"REG_a"), %%mm2 \n\t"
00058 PAVGB(%%mm2, %%mm0)
00059 "movq %%mm0, (%0) \n\t"
00060 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00061 PAVGB(%%mm0, %%mm2)
00062 PAVGB(%%mm1, %%mm2)
00063 "movq %%mm2, (%%"REG_a") \n\t"
00064 "movq (%0, %1, 4), %%mm2 \n\t"
00065 PAVGB(%%mm2, %%mm1)
00066 PAVGB(%%mm0, %%mm1)
00067 "movq %%mm1, (%%"REG_a", %1) \n\t"
00068 "movq (%%"REG_d"), %%mm1 \n\t"
00069 PAVGB(%%mm1, %%mm0)
00070 PAVGB(%%mm2, %%mm0)
00071 "movq %%mm0, (%%"REG_a", %1, 2) \n\t"
00072 "movq (%%"REG_d", %1), %%mm0 \n\t"
00073 PAVGB(%%mm0, %%mm2)
00074 PAVGB(%%mm1, %%mm2)
00075 "movq %%mm2, (%0, %1, 4) \n\t"
00076 "movq (%%"REG_d", %1, 2), %%mm2 \n\t"
00077 PAVGB(%%mm2, %%mm1)
00078 PAVGB(%%mm0, %%mm1)
00079 "movq %%mm1, (%%"REG_d") \n\t"
00080 "movq (%0, %1, 8), %%mm1 \n\t"
00081 PAVGB(%%mm1, %%mm0)
00082 PAVGB(%%mm2, %%mm0)
00083 "movq %%mm0, (%%"REG_d", %1) \n\t"
00084 "movq (%%"REG_d", %1, 4), %%mm0 \n\t"
00085 PAVGB(%%mm0, %%mm2)
00086 PAVGB(%%mm1, %%mm2)
00087 "movq %%mm2, (%%"REG_d", %1, 2) \n\t"
00088
00089 : : "r" (src), "r" ((long)stride)
00090 : "%"REG_a, "%"REG_d
00091 );
00092 }
00093
00094 void linearBlend3DNow(unsigned char *src, int stride)
00095 {
00096
00097 __asm__ volatile(
00098 "lea (%0, %1), %%"REG_a" \n\t"
00099 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
00100
00101 "movq (%0), %%mm0 \n\t"
00102 "movq (%%"REG_a", %1), %%mm1 \n\t"
00103 PAVGUSB(%%mm1, %%mm0)
00104 "movq (%%"REG_a"), %%mm2 \n\t"
00105 PAVGUSB(%%mm2, %%mm0)
00106 "movq %%mm0, (%0) \n\t"
00107 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00108 PAVGUSB(%%mm0, %%mm2)
00109 PAVGUSB(%%mm1, %%mm2)
00110 "movq %%mm2, (%%"REG_a") \n\t"
00111 "movq (%0, %1, 4), %%mm2 \n\t"
00112 PAVGUSB(%%mm2, %%mm1)
00113 PAVGUSB(%%mm0, %%mm1)
00114 "movq %%mm1, (%%"REG_a", %1) \n\t"
00115 "movq (%%"REG_d"), %%mm1 \n\t"
00116 PAVGUSB(%%mm1, %%mm0)
00117 PAVGUSB(%%mm2, %%mm0)
00118 "movq %%mm0, (%%"REG_a", %1, 2) \n\t"
00119 "movq (%%"REG_d", %1), %%mm0 \n\t"
00120 PAVGUSB(%%mm0, %%mm2)
00121 PAVGUSB(%%mm1, %%mm2)
00122 "movq %%mm2, (%0, %1, 4) \n\t"
00123 "movq (%%"REG_d", %1, 2), %%mm2 \n\t"
00124 PAVGUSB(%%mm2, %%mm1)
00125 PAVGUSB(%%mm0, %%mm1)
00126 "movq %%mm1, (%%"REG_d") \n\t"
00127 "movq (%0, %1, 8), %%mm1 \n\t"
00128 PAVGUSB(%%mm1, %%mm0)
00129 PAVGUSB(%%mm2, %%mm0)
00130 "movq %%mm0, (%%"REG_d", %1) \n\t"
00131 "movq (%%"REG_d", %1, 4), %%mm0 \n\t"
00132 PAVGUSB(%%mm0, %%mm2)
00133 PAVGUSB(%%mm1, %%mm2)
00134 "movq %%mm2, (%%"REG_d", %1, 2) \n\t"
00135
00136 : : "r" (src), "r" ((long)stride)
00137 : "%"REG_a, "%"REG_d
00138 );
00139 }
00140
00141 #endif
00142
00143 #if HAVE_ALTIVEC
00144
00145 inline void linearBlendAltivec(unsigned char *src, int stride)
00146 {
00147 vector unsigned char a, b, c;
00148 int i;
00149
00150 b = vec_ld(0, src);
00151 c = vec_ld(stride, src);
00152
00153 for (i = 2; i < 10; i++)
00154 {
00155 a = b;
00156 b = c;
00157 c = vec_ld(stride * i, src);
00158 vec_st(vec_avg(vec_avg(a, c), b), stride * (i - 2), src);
00159 }
00160 }
00161
00162 int linearBlendFilterAltivec(VideoFilter *f, VideoFrame *frame, int field)
00163 {
00164 (void)field;
00165 (void)f;
00166 int height = frame->height;
00167 unsigned char *yptr = frame->buf + frame->offsets[0];
00168 int stride = frame->pitches[0];
00169 int ymax = height - 8;
00170 int x,y;
00171 unsigned char *src = 0;
00172 unsigned char *uoff = frame->buf + frame->offsets[1];
00173 unsigned char *voff = frame->buf + frame->offsets[2];
00174 TF_VARS;
00175
00176 TF_START;
00177
00178 if ((stride & 0xf) || ((unsigned int)yptr & 0xf))
00179 {
00180 for (y = 0; y < ymax; y += 8)
00181 {
00182 for (x = 0; x < stride; x += 8)
00183 {
00184 src = yptr + x + y * stride;
00185 linearBlend(src, stride);
00186 }
00187 }
00188 }
00189 else
00190 {
00191 src = yptr;
00192 for (y = 0; y < ymax; y += 8)
00193 {
00194 for (x = 0; x < stride; x += 16)
00195 {
00196 linearBlendAltivec(src, stride);
00197 src += 16;
00198 }
00199 src += stride * 7;
00200 }
00201 }
00202
00203 stride = frame->pitches[1];
00204 ymax = height / 2 - 8;
00205
00206 if ((stride & 0xf) || ((unsigned int)uoff & 0xf))
00207 {
00208 for (y = 0; y < ymax; y += 8)
00209 {
00210 for (x = 0; x < stride; x += 8)
00211 {
00212 src = uoff + x + y * stride;
00213 linearBlend(src, stride);
00214
00215 src = voff + x + y * stride;
00216 linearBlend(src, stride);
00217 }
00218 }
00219 }
00220 else
00221 {
00222 for (y = 0; y < ymax; y += 8)
00223 {
00224 for (x = 0; x < stride; x += 16)
00225 {
00226 linearBlendAltivec(src, stride);
00227 uoff += 16;
00228
00229 linearBlendAltivec(src, stride);
00230 voff += 16;
00231 }
00232 uoff += stride * 7;
00233 voff += stride * 7;
00234 }
00235 }
00236
00237 TF_END(vf, "LinearBlendAltivec: ");
00238 return 0;
00239 }
00240
00241 #endif
00242
00243 void linearBlend(unsigned char *src, int stride)
00244 {
00245 int a, b, c, x;
00246
00247 for (x = 0; x < 2; x++)
00248 {
00249 a= *(uint32_t*)&src[stride*0];
00250 b= *(uint32_t*)&src[stride*1];
00251 c= *(uint32_t*)&src[stride*2];
00252 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
00253 *(uint32_t*)&src[stride*0]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
00254
00255 a= *(uint32_t*)&src[stride*3];
00256 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
00257 *(uint32_t*)&src[stride*1]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
00258
00259 b= *(uint32_t*)&src[stride*4];
00260 c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
00261 *(uint32_t*)&src[stride*2]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
00262
00263 c= *(uint32_t*)&src[stride*5];
00264 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
00265 *(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
00266
00267 a= *(uint32_t*)&src[stride*6];
00268 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
00269 *(uint32_t*)&src[stride*4]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
00270
00271 b= *(uint32_t*)&src[stride*7];
00272 c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
00273 *(uint32_t*)&src[stride*5]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
00274
00275 c= *(uint32_t*)&src[stride*8];
00276 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
00277 *(uint32_t*)&src[stride*6]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
00278
00279 a= *(uint32_t*)&src[stride*9];
00280 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
00281 *(uint32_t*)&src[stride*7]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
00282
00283 src += 4;
00284 }
00285 }
00286
00287 static int linearBlendFilter(VideoFilter *f, VideoFrame *frame, int field)
00288 {
00289 (void)field;
00290 int height = frame->height;
00291 unsigned char *yptr = frame->buf + frame->offsets[0];
00292 int stride = frame->pitches[0];
00293 int ymax = height - 8;
00294 int x,y;
00295 unsigned char *src;
00296 unsigned char *uoff = frame->buf + frame->offsets[1];
00297 unsigned char *voff = frame->buf + frame->offsets[2];
00298 LBFilter *vf = (LBFilter *)f;
00299 TF_VARS;
00300
00301 TF_START;
00302
00303 for (y = 0; y < ymax; y+=8)
00304 {
00305 for (x = 0; x < stride; x+=8)
00306 {
00307 src = yptr + x + y * stride;
00308 (vf->subfilter)(src, stride);
00309 }
00310 }
00311
00312 stride = frame->pitches[1];
00313 ymax = height / 2 - 8;
00314
00315 for (y = 0; y < ymax; y += 8)
00316 {
00317 for (x = 0; x < stride; x += 8)
00318 {
00319 src = uoff + x + y * stride;
00320 (vf->subfilter)(src, stride);
00321
00322 src = voff + x + y * stride;
00323 (vf->subfilter)(src, stride);
00324 }
00325 }
00326
00327 #if HAVE_MMX || HAVE_AMD3DNOW
00328 if ((vf->mm_flags & AV_CPU_FLAG_MMX2) || (vf->mm_flags & AV_CPU_FLAG_3DNOW))
00329 emms();
00330 #endif
00331
00332 TF_END(vf, "LinearBlend: ");
00333 return 0;
00334 }
00335
00336 static VideoFilter *new_filter(VideoFrameType inpixfmt,
00337 VideoFrameType outpixfmt,
00338 int *width, int *height, char *options,
00339 int threads)
00340 {
00341 LBFilter *filter;
00342 (void)width;
00343 (void)height;
00344 (void)options;
00345 (void)threads;
00346 if (inpixfmt != FMT_YV12 || outpixfmt != FMT_YV12)
00347 return NULL;
00348
00349 filter = malloc(sizeof(LBFilter));
00350
00351 if (filter == NULL)
00352 {
00353 fprintf(stderr,"Couldn't allocate memory for filter\n");
00354 return NULL;
00355 }
00356
00357 filter->vf.filter = &linearBlendFilter;
00358 filter->subfilter = &linearBlend;
00359 filter->mm_flags = av_get_cpu_flags();
00360 if (HAVE_MMX && filter->mm_flags & AV_CPU_FLAG_MMX2)
00361 filter->subfilter = &linearBlendMMX;
00362 else if (HAVE_AMD3DNOW && filter->mm_flags & AV_CPU_FLAG_3DNOW)
00363 filter->subfilter = &linearBlend3DNow;
00364 else if (HAVE_ALTIVEC && filter->mm_flags & AV_CPU_FLAG_ALTIVEC)
00365 filter->vf.filter = &linearBlendFilterAltivec;
00366
00367 filter->vf.cleanup = NULL;
00368 TF_INIT(filter);
00369 return (VideoFilter *)filter;
00370 }
00371
00372 static FmtConv FmtList[] =
00373 {
00374 { FMT_YV12, FMT_YV12 },
00375 FMT_NULL
00376 };
00377
00378 ConstFilterInfo filter_table[] =
00379 {
00380 {
00381 filter_init: &new_filter,
00382 name: "linearblend",
00383 descript: "fast blending deinterlace filter",
00384 formats: FmtList,
00385 libname: NULL
00386 },
00387 FILT_NULL
00388 };