00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00028 static void H264_CHROMA_MC8_TMPL(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y)
00029 {
00030 DECLARE_ALIGNED_8(uint64_t, AA);
00031 DECLARE_ALIGNED_8(uint64_t, DD);
00032 int i;
00033
00034 if(y==0 && x==0) {
00035
00036 H264_CHROMA_MC8_MV0(dst, src, stride, h);
00037 return;
00038 }
00039
00040 assert(x<8 && y<8 && x>=0 && y>=0);
00041
00042 if(y==0 || x==0)
00043 {
00044
00045 const int dxy = x ? 1 : stride;
00046
00047 asm volatile(
00048 "movd %0, %%mm5\n\t"
00049 "movq %1, %%mm4\n\t"
00050 "punpcklwd %%mm5, %%mm5\n\t"
00051 "punpckldq %%mm5, %%mm5\n\t"
00052 "movq %%mm4, %%mm6\n\t"
00053 "pxor %%mm7, %%mm7\n\t"
00054 "psubw %%mm5, %%mm4\n\t"
00055 "psrlw $1, %%mm6\n\t"
00056 :: "rm"(x+y), "m"(ff_pw_8));
00057
00058 for(i=0; i<h; i++) {
00059 asm volatile(
00060
00061 "movq %0, %%mm0\n\t"
00062 "movq %1, %%mm2\n\t"
00063 :: "m"(src[0]), "m"(src[dxy]));
00064
00065 asm volatile(
00066
00067
00068 "movq %%mm0, %%mm1\n\t"
00069 "movq %%mm2, %%mm3\n\t"
00070 "punpcklbw %%mm7, %%mm0\n\t"
00071 "punpckhbw %%mm7, %%mm1\n\t"
00072 "punpcklbw %%mm7, %%mm2\n\t"
00073 "punpckhbw %%mm7, %%mm3\n\t"
00074 "pmullw %%mm4, %%mm0\n\t"
00075 "pmullw %%mm4, %%mm1\n\t"
00076 "pmullw %%mm5, %%mm2\n\t"
00077 "pmullw %%mm5, %%mm3\n\t"
00078
00079
00080 "paddw %%mm6, %%mm0\n\t"
00081 "paddw %%mm6, %%mm1\n\t"
00082 "paddw %%mm2, %%mm0\n\t"
00083 "paddw %%mm3, %%mm1\n\t"
00084 "psrlw $3, %%mm0\n\t"
00085 "psrlw $3, %%mm1\n\t"
00086 "packuswb %%mm1, %%mm0\n\t"
00087 H264_CHROMA_OP(%0, %%mm0)
00088 "movq %%mm0, %0\n\t"
00089 : "=m" (dst[0]));
00090
00091 src += stride;
00092 dst += stride;
00093 }
00094 return;
00095 }
00096
00097
00098 asm volatile("movd %2, %%mm4\n\t"
00099 "movd %3, %%mm6\n\t"
00100 "punpcklwd %%mm4, %%mm4\n\t"
00101 "punpcklwd %%mm6, %%mm6\n\t"
00102 "punpckldq %%mm4, %%mm4\n\t"
00103 "punpckldq %%mm6, %%mm6\n\t"
00104 "movq %%mm4, %%mm5\n\t"
00105 "pmullw %%mm6, %%mm4\n\t"
00106 "psllw $3, %%mm5\n\t"
00107 "psllw $3, %%mm6\n\t"
00108 "movq %%mm5, %%mm7\n\t"
00109 "paddw %%mm6, %%mm7\n\t"
00110 "movq %%mm4, %1\n\t"
00111 "psubw %%mm4, %%mm5\n\t"
00112 "psubw %%mm4, %%mm6\n\t"
00113 "paddw %4, %%mm4\n\t"
00114 "psubw %%mm7, %%mm4\n\t"
00115 "pxor %%mm7, %%mm7\n\t"
00116 "movq %%mm4, %0\n\t"
00117 : "=m" (AA), "=m" (DD) : "rm" (x), "rm" (y), "m" (ff_pw_64));
00118
00119 asm volatile(
00120
00121 "movq %0, %%mm0\n\t"
00122 "movq %1, %%mm1\n\t"
00123 : : "m" (src[0]), "m" (src[1]));
00124
00125 for(i=0; i<h; i++) {
00126 src += stride;
00127
00128 asm volatile(
00129
00130
00131 "movq %%mm0, %%mm2\n\t"
00132 "movq %%mm1, %%mm3\n\t"
00133 "punpckhbw %%mm7, %%mm0\n\t"
00134 "punpcklbw %%mm7, %%mm1\n\t"
00135 "punpcklbw %%mm7, %%mm2\n\t"
00136 "punpckhbw %%mm7, %%mm3\n\t"
00137 "pmullw %0, %%mm0\n\t"
00138 "pmullw %0, %%mm2\n\t"
00139 "pmullw %%mm5, %%mm1\n\t"
00140 "pmullw %%mm5, %%mm3\n\t"
00141 "paddw %%mm1, %%mm2\n\t"
00142 "paddw %%mm0, %%mm3\n\t"
00143 : : "m" (AA));
00144
00145 asm volatile(
00146
00147 "movq %0, %%mm0\n\t"
00148 "movq %%mm0, %%mm1\n\t"
00149 "punpcklbw %%mm7, %%mm0\n\t"
00150 "punpckhbw %%mm7, %%mm1\n\t"
00151 "pmullw %%mm6, %%mm0\n\t"
00152 "pmullw %%mm6, %%mm1\n\t"
00153 "paddw %%mm0, %%mm2\n\t"
00154 "paddw %%mm1, %%mm3\n\t"
00155 : : "m" (src[0]));
00156
00157 asm volatile(
00158
00159 "movq %1, %%mm1\n\t"
00160 "movq %%mm1, %%mm0\n\t"
00161 "movq %%mm1, %%mm4\n\t"
00162 "punpcklbw %%mm7, %%mm0\n\t"
00163 "punpckhbw %%mm7, %%mm4\n\t"
00164 "pmullw %2, %%mm0\n\t"
00165 "pmullw %2, %%mm4\n\t"
00166 "paddw %%mm0, %%mm2\n\t"
00167 "paddw %%mm4, %%mm3\n\t"
00168 "movq %0, %%mm0\n\t"
00169 : : "m" (src[0]), "m" (src[1]), "m" (DD));
00170
00171 asm volatile(
00172
00173 "paddw %1, %%mm2\n\t"
00174 "paddw %1, %%mm3\n\t"
00175 "psrlw $6, %%mm2\n\t"
00176 "psrlw $6, %%mm3\n\t"
00177 "packuswb %%mm3, %%mm2\n\t"
00178 H264_CHROMA_OP(%0, %%mm2)
00179 "movq %%mm2, %0\n\t"
00180 : "=m" (dst[0]) : "m" (ff_pw_32));
00181 dst+= stride;
00182 }
00183 }
00184
00185 static void H264_CHROMA_MC4_TMPL(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y)
00186 {
00187 asm volatile(
00188 "pxor %%mm7, %%mm7 \n\t"
00189 "movd %5, %%mm2 \n\t"
00190 "movd %6, %%mm3 \n\t"
00191 "movq "MANGLE(ff_pw_8)", %%mm4\n\t"
00192 "movq "MANGLE(ff_pw_8)", %%mm5\n\t"
00193 "punpcklwd %%mm2, %%mm2 \n\t"
00194 "punpcklwd %%mm3, %%mm3 \n\t"
00195 "punpcklwd %%mm2, %%mm2 \n\t"
00196 "punpcklwd %%mm3, %%mm3 \n\t"
00197 "psubw %%mm2, %%mm4 \n\t"
00198 "psubw %%mm3, %%mm5 \n\t"
00199
00200 "movd (%1), %%mm0 \n\t"
00201 "movd 1(%1), %%mm6 \n\t"
00202 "add %3, %1 \n\t"
00203 "punpcklbw %%mm7, %%mm0 \n\t"
00204 "punpcklbw %%mm7, %%mm6 \n\t"
00205 "pmullw %%mm4, %%mm0 \n\t"
00206 "pmullw %%mm2, %%mm6 \n\t"
00207 "paddw %%mm0, %%mm6 \n\t"
00208
00209 "1: \n\t"
00210 "movd (%1), %%mm0 \n\t"
00211 "movd 1(%1), %%mm1 \n\t"
00212 "add %3, %1 \n\t"
00213 "punpcklbw %%mm7, %%mm0 \n\t"
00214 "punpcklbw %%mm7, %%mm1 \n\t"
00215 "pmullw %%mm4, %%mm0 \n\t"
00216 "pmullw %%mm2, %%mm1 \n\t"
00217 "paddw %%mm0, %%mm1 \n\t"
00218 "movq %%mm1, %%mm0 \n\t"
00219 "pmullw %%mm5, %%mm6 \n\t"
00220 "pmullw %%mm3, %%mm1 \n\t"
00221 "paddw %4, %%mm6 \n\t"
00222 "paddw %%mm6, %%mm1 \n\t"
00223 "psrlw $6, %%mm1 \n\t"
00224 "packuswb %%mm1, %%mm1 \n\t"
00225 H264_CHROMA_OP4((%0), %%mm1, %%mm6)
00226 "movd %%mm1, (%0) \n\t"
00227 "add %3, %0 \n\t"
00228 "movd (%1), %%mm6 \n\t"
00229 "movd 1(%1), %%mm1 \n\t"
00230 "add %3, %1 \n\t"
00231 "punpcklbw %%mm7, %%mm6 \n\t"
00232 "punpcklbw %%mm7, %%mm1 \n\t"
00233 "pmullw %%mm4, %%mm6 \n\t"
00234 "pmullw %%mm2, %%mm1 \n\t"
00235 "paddw %%mm6, %%mm1 \n\t"
00236 "movq %%mm1, %%mm6 \n\t"
00237 "pmullw %%mm5, %%mm0 \n\t"
00238 "pmullw %%mm3, %%mm1 \n\t"
00239 "paddw %4, %%mm0 \n\t"
00240 "paddw %%mm0, %%mm1 \n\t"
00241 "psrlw $6, %%mm1 \n\t"
00242 "packuswb %%mm1, %%mm1 \n\t"
00243 H264_CHROMA_OP4((%0), %%mm1, %%mm0)
00244 "movd %%mm1, (%0) \n\t"
00245 "add %3, %0 \n\t"
00246 "sub $2, %2 \n\t"
00247 "jnz 1b \n\t"
00248 : "+r"(dst), "+r"(src), "+r"(h)
00249 : "r"((long)stride), "m"(ff_pw_32), "m"(x), "m"(y)
00250 );
00251 }
00252
00253 #ifdef H264_CHROMA_MC2_TMPL
00254 static void H264_CHROMA_MC2_TMPL(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y)
00255 {
00256 int tmp = ((1<<16)-1)*x + 8;
00257 int CD= tmp*y;
00258 int AB= (tmp<<3) - CD;
00259 asm volatile(
00260
00261
00262 "movd %0, %%mm5\n\t"
00263 "movd %1, %%mm6\n\t"
00264 "punpckldq %%mm5, %%mm5\n\t"
00265 "punpckldq %%mm6, %%mm6\n\t"
00266 "pxor %%mm7, %%mm7\n\t"
00267
00268 "movd %2, %%mm2\n\t"
00269 "punpcklbw %%mm7, %%mm2\n\t"
00270 "pshufw $0x94, %%mm2, %%mm2\n\t"
00271 :: "r"(AB), "r"(CD), "m"(src[0]));
00272
00273
00274 asm volatile(
00275 "1:\n\t"
00276 "add %4, %1\n\t"
00277
00278 "movq %%mm2, %%mm1\n\t"
00279 "pmaddwd %%mm5, %%mm1\n\t"
00280
00281 "movd (%1), %%mm0\n\t"
00282 "punpcklbw %%mm7, %%mm0\n\t"
00283 "pshufw $0x94, %%mm0, %%mm0\n\t"
00284
00285 "movq %%mm0, %%mm2\n\t"
00286 "pmaddwd %%mm6, %%mm0\n\t"
00287 "paddw %3, %%mm1\n\t"
00288 "paddw %%mm0, %%mm1\n\t"
00289
00290 "psrlw $6, %%mm1\n\t"
00291 "packssdw %%mm7, %%mm1\n\t"
00292 "packuswb %%mm7, %%mm1\n\t"
00293 H264_CHROMA_OP4((%0), %%mm1, %%mm3)
00294 "movd %%mm1, %%esi\n\t"
00295 "movw %%si, (%0)\n\t"
00296 "add %4, %0\n\t"
00297 "sub $1, %2\n\t"
00298 "jnz 1b\n\t"
00299 : "+r" (dst), "+r"(src), "+r"(h)
00300 : "m" (ff_pw_32), "r"((long)stride)
00301 : "%esi");
00302
00303 }
00304 #endif
00305