00001 #include <mythtv/mythconfig.h>
00002
00003
00004 void zoom_filter_xmmx (int prevX, int prevY, unsigned int *expix1, unsigned int *expix2, int *brutS, int *brutD, int buffratio, int precalCoef[16][16]);
00005 int zoom_filter_xmmx_supported (void);
00006
00007
00008 #if defined(MMX) && !defined(ARCH_X86_64)
00009
00010
00011
00012 #define STRICT_COMPAT
00013
00014
00015 #define BUFFPOINTNB 16
00016 #define BUFFPOINTMASK 0xffff
00017 #define BUFFINCR 0xff
00018
00019 #define sqrtperte 16
00020
00021 #define PERTEMASK 0xf
00022
00023 #define PERTEDEC 4
00024
00025
00026
00027 #include "mmx.h"
00028
00029 int zoom_filter_xmmx_supported () {
00030 return (av_get_cpu_flags() & AV_CPU_FLAG_SSE) >> 3;
00031 }
00032
00033 void zoom_filter_xmmx (int prevX, int prevY,
00034 unsigned int *expix1, unsigned int *expix2,
00035 int *lbruS, int *lbruD, int buffratio,
00036 int precalCoef[16][16])
00037 {
00038 int bufsize = prevX * prevY;
00039 volatile int loop;
00040
00041 mmx_t *brutS = (mmx_t*)lbruS;
00042 mmx_t *brutD = (mmx_t*)lbruD;
00043
00044 volatile mmx_t prevXY;
00045 volatile mmx_t ratiox;
00046
00047
00048 expix1[0]=expix1[prevX-1]=expix1[prevX*prevY-1]=expix1[prevX*prevY-prevX]=0;
00049
00050 prevXY.ud[0] = (prevX-1)<<PERTEDEC;
00051 prevXY.ud[1] = (prevY-1)<<PERTEDEC;
00052
00053 ratiox.d[0] = buffratio;
00054 ratiox.d[1] = buffratio;
00055 movq_m2r (ratiox, mm6);
00056 pslld_i2r (16,mm6);
00057
00058 pxor_r2r (mm7,mm7);
00059
00060 loop=0;
00061
00062
00063
00064
00065 while (loop < bufsize)
00066 {
00067
00068
00069
00070
00071
00072
00073 __asm__ __volatile__ (
00074 "movq %0,%%mm0\n"
00075 "movq %1,%%mm1\n"
00076 : :"m"(brutS[loop]),"m"(brutD[loop])
00077 );
00078
00079 psubd_r2r (mm0,mm1);
00080 movq_r2r (mm1, mm2);
00081
00082 pslld_i2r (16,mm1);
00083 mmx_r2r (pmulhuw, mm6, mm1);
00084 pmullw_r2r (mm6, mm2);
00085
00086 paddd_r2r (mm2, mm1);
00087 pslld_i2r (16,mm0);
00088
00089 paddd_r2r (mm1, mm0);
00090 psrld_i2r (16, mm0);
00091
00092
00093
00094
00095
00096
00097
00098 movq_m2r (prevXY,mm1);
00099 pcmpgtd_r2r (mm0, mm1);
00100
00101
00102
00103 #ifdef STRICT_COMPAT
00104 movq_r2r (mm1,mm2);
00105 punpckhdq_r2r (mm2,mm2);
00106 punpckldq_r2r (mm1,mm1);
00107 pand_r2r (mm2, mm0);
00108 #endif
00109 pand_r2r (mm1, mm0);
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119 __asm__ __volatile__ (
00120 "movd %%mm0,%%ecx\n"
00121 "movq %%mm0,%%mm1\n"
00122
00123 "andl $15,%%ecx\n"
00124 "psrlq $32,%%mm1\n"
00125
00126 "shll $6,%%ecx\n"
00127 "movd %%mm1,%%eax\n"
00128
00129 "addl %0,%%ecx\n"
00130 "andl $15,%%eax\n"
00131
00132 "movd (%%ecx,%%eax,4),%%mm3\n"
00133
00134 ::"m"(precalCoef):"eax","ecx");
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165 psrld_i2r (PERTEDEC,mm0);
00166 psrld_i2r (PERTEDEC,mm1);
00167 __asm__ __volatile__ (
00168 "movd %%mm1,%%eax\n"
00169 "movq %%mm3,%%mm5\n"
00170
00171 "mull %1\n"
00172 "movd %%mm0,%%ecx\n"
00173 "punpcklbw %%mm5, %%mm3\n"
00174
00175 "addl %%ecx,%%eax\n"
00176 "movq %%mm3,%%mm4\n"
00177 "movq %%mm3,%%mm5\n"
00178
00179 "movl %0,%%ecx\n"
00180 "punpcklbw %%mm5,%%mm3\n"
00181
00182 "movq (%%ecx,%%eax,4),%%mm0\n"
00183 "punpckhbw %%mm5,%%mm4\n"
00184
00185 "addl %1,%%eax\n"
00186 "movq (%%ecx,%%eax,4),%%mm2\n"
00187
00188 : : "X"(expix1), "X"(prevX):"eax","ecx"
00189 );
00190
00191
00192
00193
00194
00195
00196
00197
00198 movq_r2r (mm0, mm1);
00199
00200
00201 punpcklbw_r2r (mm7, mm0);
00202
00203
00204
00205 movq_r2r (mm3, mm5);
00206
00207
00208 punpckhbw_r2r (mm7, mm1);
00209
00210 punpcklbw_r2r (mm7, mm5);
00211 punpckhbw_r2r (mm7, mm3);
00212
00213
00214 pmullw_r2r (mm5, mm0);
00215 pmullw_r2r (mm3, mm1);
00216 paddw_r2r (mm1, mm0);
00217
00218
00219 movq_r2r (mm4, mm5);
00220 punpcklbw_r2r (mm7, mm4);
00221 punpckhbw_r2r (mm7, mm5);
00222
00223
00224 movq_r2r (mm2, mm1);
00225
00226
00227 punpcklbw_r2r (mm7, mm1);
00228 punpckhbw_r2r (mm7, mm2);
00229
00230
00231 pmullw_r2r (mm4, mm1);
00232 pmullw_r2r (mm5, mm2);
00233
00234
00235 paddw_r2r (mm1, mm0);
00236 paddw_r2r (mm2, mm0);
00237
00238
00239 psrlw_i2r (8, mm0);
00240 packuswb_r2r (mm7, mm0);
00241
00242 movd_r2m (mm0,expix2[loop]);
00243
00244 ++loop;
00245 }
00246 #ifdef HAVE_ATHLON
00247 __asm__ __volatile__ ("femms\n");
00248 #else
00249 emms();
00250 #endif
00251 }
00252 #else
00253 int zoom_filter_xmmx_supported () {
00254 return 0;
00255 }
00256 void zoom_filter_xmmx (int prevX, int prevY,
00257 unsigned int *expix1, unsigned int *expix2,
00258 int *lbruS, int *lbruD, int buffratio,
00259 int precalCoef[16][16])
00260 {
00261 (void) prevX; (void) prevY;
00262 (void) expix1; (void) expix2;
00263 (void) lbruS; (void) lbruD;
00264 (void) buffratio; (void) precalCoef;
00265 return;
00266 }
00267 #endif