00001
00002
00003 #include <stdlib.h>
00004 #include <stdio.h>
00005
00006 #ifdef HAVE_STDINT_H
00007 #include <stdint.h>
00008 #endif
00009
00010 #include "config.h"
00011 #include "dsputil.h"
00012 #include "../mm_arch.h"
00013 #ifdef HAVE_ALTIVEC_H
00014 #include <altivec.h>
00015 #endif
00016
00017 #define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
00018 #define PAVGUSB(a,b) "pavgusb " #a ", " #b " \n\t"
00019
00020 #include "filter.h"
00021 #include "frame.h"
00022
00023 typedef struct LBFilter
00024 {
00025 int (*filter)(VideoFilter *, VideoFrame *);
00026 void (*cleanup)(VideoFilter *);
00027
00028 void *handle;
00029 VideoFrameType inpixfmt;
00030 VideoFrameType outpixfmt;
00031 char *opts;
00032 FilterInfo *info;
00033
00034
00035 int mm_flags;
00036 void (*subfilter)(unsigned char *, int);
00037 TF_STRUCT;
00038 } LBFilter;
00039
00040 #ifdef MMX
00041
00042 void linearBlendMMX(unsigned char *src, int stride)
00043 {
00044
00045 asm volatile(
00046 "lea (%0, %1), %%"REG_a" \n\t"
00047 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
00048
00049 "movq (%0), %%mm0 \n\t"
00050 "movq (%%"REG_a", %1), %%mm1 \n\t"
00051 PAVGB(%%mm1, %%mm0)
00052 "movq (%%"REG_a"), %%mm2 \n\t"
00053 PAVGB(%%mm2, %%mm0)
00054 "movq %%mm0, (%0) \n\t"
00055 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00056 PAVGB(%%mm0, %%mm2)
00057 PAVGB(%%mm1, %%mm2)
00058 "movq %%mm2, (%%"REG_a") \n\t"
00059 "movq (%0, %1, 4), %%mm2 \n\t"
00060 PAVGB(%%mm2, %%mm1)
00061 PAVGB(%%mm0, %%mm1)
00062 "movq %%mm1, (%%"REG_a", %1) \n\t"
00063 "movq (%%"REG_d"), %%mm1 \n\t"
00064 PAVGB(%%mm1, %%mm0)
00065 PAVGB(%%mm2, %%mm0)
00066 "movq %%mm0, (%%"REG_a", %1, 2) \n\t"
00067 "movq (%%"REG_d", %1), %%mm0 \n\t"
00068 PAVGB(%%mm0, %%mm2)
00069 PAVGB(%%mm1, %%mm2)
00070 "movq %%mm2, (%0, %1, 4) \n\t"
00071 "movq (%%"REG_d", %1, 2), %%mm2 \n\t"
00072 PAVGB(%%mm2, %%mm1)
00073 PAVGB(%%mm0, %%mm1)
00074 "movq %%mm1, (%%"REG_d") \n\t"
00075 "movq (%0, %1, 8), %%mm1 \n\t"
00076 PAVGB(%%mm1, %%mm0)
00077 PAVGB(%%mm2, %%mm0)
00078 "movq %%mm0, (%%"REG_d", %1) \n\t"
00079 "movq (%%"REG_d", %1, 4), %%mm0 \n\t"
00080 PAVGB(%%mm0, %%mm2)
00081 PAVGB(%%mm1, %%mm2)
00082 "movq %%mm2, (%%"REG_d", %1, 2) \n\t"
00083
00084 : : "r" (src), "r" ((long)stride)
00085 : "%"REG_a, "%"REG_d
00086 );
00087 }
00088
00089 void linearBlend3DNow(unsigned char *src, int stride)
00090 {
00091
00092 asm volatile(
00093 "lea (%0, %1), %%"REG_a" \n\t"
00094 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
00095
00096 "movq (%0), %%mm0 \n\t"
00097 "movq (%%"REG_a", %1), %%mm1 \n\t"
00098 PAVGUSB(%%mm1, %%mm0)
00099 "movq (%%"REG_a"), %%mm2 \n\t"
00100 PAVGUSB(%%mm2, %%mm0)
00101 "movq %%mm0, (%0) \n\t"
00102 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00103 PAVGUSB(%%mm0, %%mm2)
00104 PAVGUSB(%%mm1, %%mm2)
00105 "movq %%mm2, (%%"REG_a") \n\t"
00106 "movq (%0, %1, 4), %%mm2 \n\t"
00107 PAVGUSB(%%mm2, %%mm1)
00108 PAVGUSB(%%mm0, %%mm1)
00109 "movq %%mm1, (%%"REG_a", %1) \n\t"
00110 "movq (%%"REG_d"), %%mm1 \n\t"
00111 PAVGUSB(%%mm1, %%mm0)
00112 PAVGUSB(%%mm2, %%mm0)
00113 "movq %%mm0, (%%"REG_a", %1, 2) \n\t"
00114 "movq (%%"REG_d", %1), %%mm0 \n\t"
00115 PAVGUSB(%%mm0, %%mm2)
00116 PAVGUSB(%%mm1, %%mm2)
00117 "movq %%mm2, (%0, %1, 4) \n\t"
00118 "movq (%%"REG_d", %1, 2), %%mm2 \n\t"
00119 PAVGUSB(%%mm2, %%mm1)
00120 PAVGUSB(%%mm0, %%mm1)
00121 "movq %%mm1, (%%"REG_d") \n\t"
00122 "movq (%0, %1, 8), %%mm1 \n\t"
00123 PAVGUSB(%%mm1, %%mm0)
00124 PAVGUSB(%%mm2, %%mm0)
00125 "movq %%mm0, (%%"REG_d", %1) \n\t"
00126 "movq (%%"REG_d", %1, 4), %%mm0 \n\t"
00127 PAVGUSB(%%mm0, %%mm2)
00128 PAVGUSB(%%mm1, %%mm2)
00129 "movq %%mm2, (%%"REG_d", %1, 2) \n\t"
00130
00131 : : "r" (src), "r" ((long)stride)
00132 : "%"REG_a, "%"REG_d
00133 );
00134 }
00135
00136 #endif
00137
00138 #ifdef HAVE_ALTIVEC
00139
00140
00141 void linearBlend(unsigned char *src, int stride);
00142
00143 inline void linearBlendAltivec(unsigned char *src, int stride)
00144 {
00145 vector unsigned char a, b, c;
00146 int i;
00147
00148 b = vec_ld(0, src);
00149 c = vec_ld(stride, src);
00150
00151 for (i = 2; i < 10; i++)
00152 {
00153 a = b;
00154 b = c;
00155 c = vec_ld(stride * i, src);
00156 vec_st(vec_avg(vec_avg(a, c), b), stride * (i - 2), src);
00157 }
00158 }
00159
00160 int linearBlendFilterAltivec(VideoFilter *f, VideoFrame *frame)
00161 {
00162 (void)f;
00163 int height = frame->height;
00164 unsigned char *yptr = frame->buf + frame->offsets[0];
00165 int stride = frame->pitches[0];
00166 int ymax = height - 8;
00167 int x,y;
00168 unsigned char *src = 0;
00169 unsigned char *uoff = frame->buf + frame->offsets[1];
00170 unsigned char *voff = frame->buf + frame->offsets[2];
00171 TF_VARS;
00172
00173 TF_START;
00174
00175 if ((stride & 0xf) || ((unsigned int)yptr & 0xf))
00176 {
00177 for (y = 0; y < ymax; y += 8)
00178 {
00179 for (x = 0; x < stride; x += 8)
00180 {
00181 src = yptr + x + y * stride;
00182 linearBlend(src, stride);
00183 }
00184 }
00185 }
00186 else
00187 {
00188 src = yptr;
00189 for (y = 0; y < ymax; y += 8)
00190 {
00191 for (x = 0; x < stride; x += 16)
00192 {
00193 linearBlendAltivec(src, stride);
00194 src += 16;
00195 }
00196 src += stride * 7;
00197 }
00198 }
00199
00200 stride = frame->pitches[1];
00201 ymax = height / 2 - 8;
00202
00203 if ((stride & 0xf) || ((unsigned int)uoff & 0xf))
00204 {
00205 for (y = 0; y < ymax; y += 8)
00206 {
00207 for (x = 0; x < stride; x += 8)
00208 {
00209 src = uoff + x + y * stride;
00210 linearBlend(src, stride);
00211
00212 src = voff + x + y * stride;
00213 linearBlend(src, stride);
00214 }
00215 }
00216 }
00217 else
00218 {
00219 for (y = 0; y < ymax; y += 8)
00220 {
00221 for (x = 0; x < stride; x += 16)
00222 {
00223 linearBlendAltivec(src, stride);
00224 uoff += 16;
00225
00226 linearBlendAltivec(src, stride);
00227 voff += 16;
00228 }
00229 uoff += stride * 7;
00230 voff += stride * 7;
00231 }
00232 }
00233
00234 TF_END(vf, "LinearBlendAltivec: ");
00235 return 0;
00236 }
00237
00238 #endif
00239
00240 void linearBlend(unsigned char *src, int stride)
00241 {
00242 int a, b, c, x;
00243
00244 for (x = 0; x < 2; x++)
00245 {
00246 a= *(uint32_t*)&src[stride*0];
00247 b= *(uint32_t*)&src[stride*1];
00248 c= *(uint32_t*)&src[stride*2];
00249 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
00250 *(uint32_t*)&src[stride*0]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
00251
00252 a= *(uint32_t*)&src[stride*3];
00253 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
00254 *(uint32_t*)&src[stride*1]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
00255
00256 b= *(uint32_t*)&src[stride*4];
00257 c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
00258 *(uint32_t*)&src[stride*2]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
00259
00260 c= *(uint32_t*)&src[stride*5];
00261 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
00262 *(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
00263
00264 a= *(uint32_t*)&src[stride*6];
00265 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
00266 *(uint32_t*)&src[stride*4]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
00267
00268 b= *(uint32_t*)&src[stride*7];
00269 c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
00270 *(uint32_t*)&src[stride*5]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
00271
00272 c= *(uint32_t*)&src[stride*8];
00273 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
00274 *(uint32_t*)&src[stride*6]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
00275
00276 a= *(uint32_t*)&src[stride*9];
00277 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
00278 *(uint32_t*)&src[stride*7]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
00279
00280 src += 4;
00281 }
00282 }
00283
00284 int linearBlendFilter(VideoFilter *f, VideoFrame *frame)
00285 {
00286 int height = frame->height;
00287 unsigned char *yptr = frame->buf + frame->offsets[0];
00288 int stride = frame->pitches[0];
00289 int ymax = height - 8;
00290 int x,y;
00291 unsigned char *src;
00292 unsigned char *uoff = frame->buf + frame->offsets[1];
00293 unsigned char *voff = frame->buf + frame->offsets[2];
00294 LBFilter *vf = (LBFilter *)f;
00295 TF_VARS;
00296
00297 TF_START;
00298
00299 for (y = 0; y < ymax; y+=8)
00300 {
00301 for (x = 0; x < stride; x+=8)
00302 {
00303 src = yptr + x + y * stride;
00304 (vf->subfilter)(src, stride);
00305 }
00306 }
00307
00308 stride = frame->pitches[1];
00309 ymax = height / 2 - 8;
00310
00311 for (y = 0; y < ymax; y += 8)
00312 {
00313 for (x = 0; x < stride; x += 8)
00314 {
00315 src = uoff + x + y * stride;
00316 (vf->subfilter)(src, stride);
00317
00318 src = voff + x + y * stride;
00319 (vf->subfilter)(src, stride);
00320 }
00321 }
00322
00323 #ifdef MMX
00324 if ((vf->mm_flags & MM_MMXEXT) || (vf->mm_flags & MM_3DNOW))
00325 emms();
00326 #endif
00327
00328 TF_END(vf, "LinearBlend: ");
00329 return 0;
00330 }
00331
00332 VideoFilter *new_filter(VideoFrameType inpixfmt, VideoFrameType outpixfmt,
00333 int *width, int *height, char *options)
00334 {
00335 LBFilter *filter;
00336 (void)width;
00337 (void)height;
00338 (void)options;
00339
00340 if (inpixfmt != FMT_YV12 || outpixfmt != FMT_YV12)
00341 return NULL;
00342
00343 filter = malloc(sizeof(LBFilter));
00344
00345 if (filter == NULL)
00346 {
00347 fprintf(stderr,"Couldn't allocate memory for filter\n");
00348 return NULL;
00349 }
00350
00351 filter->filter = &linearBlendFilter;
00352 filter->subfilter = &linearBlend;
00353 filter->mm_flags = mm_support();
00354 #ifdef MMX
00355 if (filter->mm_flags & MM_MMXEXT)
00356 filter->subfilter = &linearBlendMMX;
00357 else if (filter->mm_flags & MM_3DNOW)
00358 filter->subfilter = &linearBlend3DNow;
00359 #endif
00360 #ifdef HAVE_ALTIVEC
00361 if (filter->mm_flags & MM_ALTIVEC)
00362 filter->filter = &linearBlendFilterAltivec;
00363 #endif
00364
00365 filter->cleanup = NULL;
00366 TF_INIT(filter);
00367 return (VideoFilter *)filter;
00368 }
00369
00370 static FmtConv FmtList[] =
00371 {
00372 { FMT_YV12, FMT_YV12 },
00373 FMT_NULL
00374 };
00375
00376 FilterInfo filter_table[] =
00377 {
00378 {
00379 symbol: "new_filter",
00380 name: "linearblend",
00381 descript: "fast blending deinterlace filter",
00382 formats: FmtList,
00383 libname: NULL
00384 },
00385 FILT_NULL
00386 };