00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 #include <stddef.h>
00031 #include <inttypes.h>
00032
00033 #ifndef __WORDSIZE
00034
00035 #define __WORDSIZE MP_WORDSIZE
00036 #endif
00037
00038 #undef PREFETCH
00039 #undef MOVNTQ
00040 #undef EMMS
00041 #undef SFENCE
00042 #undef MMREG_SIZE
00043 #undef PREFETCHW
00044 #undef PAVGB
00045
00046 #ifdef HAVE_SSE2
00047 #define MMREG_SIZE 16
00048 #else
00049 #define MMREG_SIZE 8
00050 #endif
00051
00052 #ifdef HAVE_3DNOW
00053 #define PREFETCH "prefetch"
00054 #define PREFETCHW "prefetchw"
00055 #define PAVGB "pavgusb"
00056 #elif defined ( HAVE_MMX2 )
00057 #define PREFETCH "prefetchnta"
00058 #define PREFETCHW "prefetcht0"
00059 #define PAVGB "pavgb"
00060 #else
00061 #ifdef __APPLE__
00062 #define PREFETCH "#"
00063 #define PREFETCHW "#"
00064 #else
00065 #define PREFETCH " # nop"
00066 #define PREFETCHW " # nop"
00067 #endif
00068 #endif
00069
00070 #ifdef HAVE_3DNOW
00071
00072 #define EMMS "femms"
00073 #else
00074 #define EMMS "emms"
00075 #endif
00076
00077 #ifdef HAVE_MMX2
00078 #define MOVNTQ "movntq"
00079 #define SFENCE "sfence"
00080 #else
00081 #define MOVNTQ "movq"
00082 #define SFENCE " # nop"
00083 #endif
00084
00085 static inline void RENAME(rgb24to32)(const uint8_t *src,uint8_t *dst,long src_size)
00086 {
00087 uint8_t *dest = dst;
00088 const uint8_t *s = src;
00089 const uint8_t *end;
00090 #ifdef HAVE_MMX
00091 const uint8_t *mm_end;
00092 #endif
00093 end = s + src_size;
00094 #ifdef HAVE_MMX
00095 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
00096 mm_end = end - 23;
00097 __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory");
00098 while (s < mm_end)
00099 {
00100 __asm __volatile(
00101 PREFETCH" 32%1 \n\t"
00102 "movd %1, %%mm0 \n\t"
00103 "punpckldq 3%1, %%mm0 \n\t"
00104 "movd 6%1, %%mm1 \n\t"
00105 "punpckldq 9%1, %%mm1 \n\t"
00106 "movd 12%1, %%mm2 \n\t"
00107 "punpckldq 15%1, %%mm2 \n\t"
00108 "movd 18%1, %%mm3 \n\t"
00109 "punpckldq 21%1, %%mm3 \n\t"
00110 "pand %%mm7, %%mm0 \n\t"
00111 "pand %%mm7, %%mm1 \n\t"
00112 "pand %%mm7, %%mm2 \n\t"
00113 "pand %%mm7, %%mm3 \n\t"
00114 MOVNTQ" %%mm0, %0 \n\t"
00115 MOVNTQ" %%mm1, 8%0 \n\t"
00116 MOVNTQ" %%mm2, 16%0 \n\t"
00117 MOVNTQ" %%mm3, 24%0"
00118 :"=m"(*dest)
00119 :"m"(*s)
00120 :"memory");
00121 dest += 32;
00122 s += 24;
00123 }
00124 __asm __volatile(SFENCE:::"memory");
00125 __asm __volatile(EMMS:::"memory");
00126 #endif
00127 while (s < end)
00128 {
00129 #ifdef WORDS_BIGENDIAN
00130
00131 *dest++ = 0;
00132 *dest++ = s[2];
00133 *dest++ = s[1];
00134 *dest++ = s[0];
00135 s+=3;
00136 #else
00137 *dest++ = *s++;
00138 *dest++ = *s++;
00139 *dest++ = *s++;
00140 *dest++ = 0;
00141 #endif
00142 }
00143 }
00144
00145 static inline void RENAME(rgb32to24)(const uint8_t *src,uint8_t *dst,long src_size)
00146 {
00147 uint8_t *dest = dst;
00148 const uint8_t *s = src;
00149 const uint8_t *end;
00150 #ifdef HAVE_MMX
00151 const uint8_t *mm_end;
00152 #endif
00153 end = s + src_size;
00154 #ifdef HAVE_MMX
00155 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
00156 mm_end = end - 31;
00157 while (s < mm_end)
00158 {
00159 __asm __volatile(
00160 PREFETCH" 32%1 \n\t"
00161 "movq %1, %%mm0 \n\t"
00162 "movq 8%1, %%mm1 \n\t"
00163 "movq 16%1, %%mm4 \n\t"
00164 "movq 24%1, %%mm5 \n\t"
00165 "movq %%mm0, %%mm2 \n\t"
00166 "movq %%mm1, %%mm3 \n\t"
00167 "movq %%mm4, %%mm6 \n\t"
00168 "movq %%mm5, %%mm7 \n\t"
00169 "psrlq $8, %%mm2 \n\t"
00170 "psrlq $8, %%mm3 \n\t"
00171 "psrlq $8, %%mm6 \n\t"
00172 "psrlq $8, %%mm7 \n\t"
00173 "pand %2, %%mm0 \n\t"
00174 "pand %2, %%mm1 \n\t"
00175 "pand %2, %%mm4 \n\t"
00176 "pand %2, %%mm5 \n\t"
00177 "pand %3, %%mm2 \n\t"
00178 "pand %3, %%mm3 \n\t"
00179 "pand %3, %%mm6 \n\t"
00180 "pand %3, %%mm7 \n\t"
00181 "por %%mm2, %%mm0 \n\t"
00182 "por %%mm3, %%mm1 \n\t"
00183 "por %%mm6, %%mm4 \n\t"
00184 "por %%mm7, %%mm5 \n\t"
00185
00186 "movq %%mm1, %%mm2 \n\t"
00187 "movq %%mm4, %%mm3 \n\t"
00188 "psllq $48, %%mm2 \n\t"
00189 "psllq $32, %%mm3 \n\t"
00190 "pand %4, %%mm2 \n\t"
00191 "pand %5, %%mm3 \n\t"
00192 "por %%mm2, %%mm0 \n\t"
00193 "psrlq $16, %%mm1 \n\t"
00194 "psrlq $32, %%mm4 \n\t"
00195 "psllq $16, %%mm5 \n\t"
00196 "por %%mm3, %%mm1 \n\t"
00197 "pand %6, %%mm5 \n\t"
00198 "por %%mm5, %%mm4 \n\t"
00199
00200 MOVNTQ" %%mm0, %0 \n\t"
00201 MOVNTQ" %%mm1, 8%0 \n\t"
00202 MOVNTQ" %%mm4, 16%0"
00203 :"=m"(*dest)
00204 :"m"(*s),"m"(mask24l),
00205 "m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
00206 :"memory");
00207 dest += 24;
00208 s += 32;
00209 }
00210 __asm __volatile(SFENCE:::"memory");
00211 __asm __volatile(EMMS:::"memory");
00212 #endif
00213 while (s < end)
00214 {
00215 #ifdef WORDS_BIGENDIAN
00216
00217 s++;
00218 dest[2] = *s++;
00219 dest[1] = *s++;
00220 dest[0] = *s++;
00221 dest += 3;
00222 #else
00223 *dest++ = *s++;
00224 *dest++ = *s++;
00225 *dest++ = *s++;
00226 s++;
00227 #endif
00228 }
00229 }
00230
00231
00232
00233
00234
00235
00236
00237 static inline void RENAME(rgb15to16)(const uint8_t *src,uint8_t *dst,long src_size)
00238 {
00239 register const uint8_t* s=src;
00240 register uint8_t* d=dst;
00241 register const uint8_t *end;
00242 const uint8_t *mm_end;
00243 end = s + src_size;
00244 #ifdef HAVE_MMX
00245 __asm __volatile(PREFETCH" %0"::"m"(*s));
00246 __asm __volatile("movq %0, %%mm4"::"m"(mask15s));
00247 mm_end = end - 15;
00248 while (s<mm_end)
00249 {
00250 __asm __volatile(
00251 PREFETCH" 32%1 \n\t"
00252 "movq %1, %%mm0 \n\t"
00253 "movq 8%1, %%mm2 \n\t"
00254 "movq %%mm0, %%mm1 \n\t"
00255 "movq %%mm2, %%mm3 \n\t"
00256 "pand %%mm4, %%mm0 \n\t"
00257 "pand %%mm4, %%mm2 \n\t"
00258 "paddw %%mm1, %%mm0 \n\t"
00259 "paddw %%mm3, %%mm2 \n\t"
00260 MOVNTQ" %%mm0, %0 \n\t"
00261 MOVNTQ" %%mm2, 8%0"
00262 :"=m"(*d)
00263 :"m"(*s)
00264 );
00265 d+=16;
00266 s+=16;
00267 }
00268 __asm __volatile(SFENCE:::"memory");
00269 __asm __volatile(EMMS:::"memory");
00270 #endif
00271 mm_end = end - 3;
00272 while (s < mm_end)
00273 {
00274 register unsigned x= *((uint32_t *)s);
00275 *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
00276 d+=4;
00277 s+=4;
00278 }
00279 if (s < end)
00280 {
00281 register unsigned short x= *((uint16_t *)s);
00282 *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
00283 }
00284 }
00285
00286 static inline void RENAME(rgb16to15)(const uint8_t *src,uint8_t *dst,long src_size)
00287 {
00288 register const uint8_t* s=src;
00289 register uint8_t* d=dst;
00290 register const uint8_t *end;
00291 const uint8_t *mm_end;
00292 end = s + src_size;
00293 #ifdef HAVE_MMX
00294 __asm __volatile(PREFETCH" %0"::"m"(*s));
00295 __asm __volatile("movq %0, %%mm7"::"m"(mask15rg));
00296 __asm __volatile("movq %0, %%mm6"::"m"(mask15b));
00297 mm_end = end - 15;
00298 while (s<mm_end)
00299 {
00300 __asm __volatile(
00301 PREFETCH" 32%1 \n\t"
00302 "movq %1, %%mm0 \n\t"
00303 "movq 8%1, %%mm2 \n\t"
00304 "movq %%mm0, %%mm1 \n\t"
00305 "movq %%mm2, %%mm3 \n\t"
00306 "psrlq $1, %%mm0 \n\t"
00307 "psrlq $1, %%mm2 \n\t"
00308 "pand %%mm7, %%mm0 \n\t"
00309 "pand %%mm7, %%mm2 \n\t"
00310 "pand %%mm6, %%mm1 \n\t"
00311 "pand %%mm6, %%mm3 \n\t"
00312 "por %%mm1, %%mm0 \n\t"
00313 "por %%mm3, %%mm2 \n\t"
00314 MOVNTQ" %%mm0, %0 \n\t"
00315 MOVNTQ" %%mm2, 8%0"
00316 :"=m"(*d)
00317 :"m"(*s)
00318 );
00319 d+=16;
00320 s+=16;
00321 }
00322 __asm __volatile(SFENCE:::"memory");
00323 __asm __volatile(EMMS:::"memory");
00324 #endif
00325 mm_end = end - 3;
00326 while (s < mm_end)
00327 {
00328 register uint32_t x= *((uint32_t *)s);
00329 *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
00330 s+=4;
00331 d+=4;
00332 }
00333 if (s < end)
00334 {
00335 register uint16_t x= *((uint16_t *)s);
00336 *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
00337 s+=2;
00338 d+=2;
00339 }
00340 }
00341
00342 static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size)
00343 {
00344 const uint8_t *s = src;
00345 const uint8_t *end;
00346 #ifdef HAVE_MMX
00347 const uint8_t *mm_end;
00348 #endif
00349 uint16_t *d = (uint16_t *)dst;
00350 end = s + src_size;
00351 #ifdef HAVE_MMX
00352 mm_end = end - 15;
00353 #if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
00354 asm volatile(
00355 "movq %3, %%mm5 \n\t"
00356 "movq %4, %%mm6 \n\t"
00357 "movq %5, %%mm7 \n\t"
00358 "jmp 2f \n\t"
00359 ASMALIGN(4)
00360 "1: \n\t"
00361 PREFETCH" 32(%1) \n\t"
00362 "movd (%1), %%mm0 \n\t"
00363 "movd 4(%1), %%mm3 \n\t"
00364 "punpckldq 8(%1), %%mm0 \n\t"
00365 "punpckldq 12(%1), %%mm3 \n\t"
00366 "movq %%mm0, %%mm1 \n\t"
00367 "movq %%mm3, %%mm4 \n\t"
00368 "pand %%mm6, %%mm0 \n\t"
00369 "pand %%mm6, %%mm3 \n\t"
00370 "pmaddwd %%mm7, %%mm0 \n\t"
00371 "pmaddwd %%mm7, %%mm3 \n\t"
00372 "pand %%mm5, %%mm1 \n\t"
00373 "pand %%mm5, %%mm4 \n\t"
00374 "por %%mm1, %%mm0 \n\t"
00375 "por %%mm4, %%mm3 \n\t"
00376 "psrld $5, %%mm0 \n\t"
00377 "pslld $11, %%mm3 \n\t"
00378 "por %%mm3, %%mm0 \n\t"
00379 MOVNTQ" %%mm0, (%0) \n\t"
00380 "add $16, %1 \n\t"
00381 "add $8, %0 \n\t"
00382 "2: \n\t"
00383 "cmp %2, %1 \n\t"
00384 " jb 1b \n\t"
00385 : "+r" (d), "+r"(s)
00386 : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216)
00387 );
00388 #else
00389 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
00390 __asm __volatile(
00391 "movq %0, %%mm7 \n\t"
00392 "movq %1, %%mm6 \n\t"
00393 ::"m"(red_16mask),"m"(green_16mask));
00394 while (s < mm_end)
00395 {
00396 __asm __volatile(
00397 PREFETCH" 32%1 \n\t"
00398 "movd %1, %%mm0 \n\t"
00399 "movd 4%1, %%mm3 \n\t"
00400 "punpckldq 8%1, %%mm0 \n\t"
00401 "punpckldq 12%1, %%mm3 \n\t"
00402 "movq %%mm0, %%mm1 \n\t"
00403 "movq %%mm0, %%mm2 \n\t"
00404 "movq %%mm3, %%mm4 \n\t"
00405 "movq %%mm3, %%mm5 \n\t"
00406 "psrlq $3, %%mm0 \n\t"
00407 "psrlq $3, %%mm3 \n\t"
00408 "pand %2, %%mm0 \n\t"
00409 "pand %2, %%mm3 \n\t"
00410 "psrlq $5, %%mm1 \n\t"
00411 "psrlq $5, %%mm4 \n\t"
00412 "pand %%mm6, %%mm1 \n\t"
00413 "pand %%mm6, %%mm4 \n\t"
00414 "psrlq $8, %%mm2 \n\t"
00415 "psrlq $8, %%mm5 \n\t"
00416 "pand %%mm7, %%mm2 \n\t"
00417 "pand %%mm7, %%mm5 \n\t"
00418 "por %%mm1, %%mm0 \n\t"
00419 "por %%mm4, %%mm3 \n\t"
00420 "por %%mm2, %%mm0 \n\t"
00421 "por %%mm5, %%mm3 \n\t"
00422 "psllq $16, %%mm3 \n\t"
00423 "por %%mm3, %%mm0 \n\t"
00424 MOVNTQ" %%mm0, %0 \n\t"
00425 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
00426 d += 4;
00427 s += 16;
00428 }
00429 #endif
00430 __asm __volatile(SFENCE:::"memory");
00431 __asm __volatile(EMMS:::"memory");
00432 #endif
00433 while (s < end)
00434 {
00435 register int rgb = *(uint32_t*)s; s += 4;
00436 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
00437 }
00438 }
00439
00440 static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
00441 {
00442 const uint8_t *s = src;
00443 const uint8_t *end;
00444 #ifdef HAVE_MMX
00445 const uint8_t *mm_end;
00446 #endif
00447 uint16_t *d = (uint16_t *)dst;
00448 end = s + src_size;
00449 #ifdef HAVE_MMX
00450 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
00451 __asm __volatile(
00452 "movq %0, %%mm7 \n\t"
00453 "movq %1, %%mm6 \n\t"
00454 ::"m"(red_16mask),"m"(green_16mask));
00455 mm_end = end - 15;
00456 while (s < mm_end)
00457 {
00458 __asm __volatile(
00459 PREFETCH" 32%1 \n\t"
00460 "movd %1, %%mm0 \n\t"
00461 "movd 4%1, %%mm3 \n\t"
00462 "punpckldq 8%1, %%mm0 \n\t"
00463 "punpckldq 12%1, %%mm3 \n\t"
00464 "movq %%mm0, %%mm1 \n\t"
00465 "movq %%mm0, %%mm2 \n\t"
00466 "movq %%mm3, %%mm4 \n\t"
00467 "movq %%mm3, %%mm5 \n\t"
00468 "psllq $8, %%mm0 \n\t"
00469 "psllq $8, %%mm3 \n\t"
00470 "pand %%mm7, %%mm0 \n\t"
00471 "pand %%mm7, %%mm3 \n\t"
00472 "psrlq $5, %%mm1 \n\t"
00473 "psrlq $5, %%mm4 \n\t"
00474 "pand %%mm6, %%mm1 \n\t"
00475 "pand %%mm6, %%mm4 \n\t"
00476 "psrlq $19, %%mm2 \n\t"
00477 "psrlq $19, %%mm5 \n\t"
00478 "pand %2, %%mm2 \n\t"
00479 "pand %2, %%mm5 \n\t"
00480 "por %%mm1, %%mm0 \n\t"
00481 "por %%mm4, %%mm3 \n\t"
00482 "por %%mm2, %%mm0 \n\t"
00483 "por %%mm5, %%mm3 \n\t"
00484 "psllq $16, %%mm3 \n\t"
00485 "por %%mm3, %%mm0 \n\t"
00486 MOVNTQ" %%mm0, %0 \n\t"
00487 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
00488 d += 4;
00489 s += 16;
00490 }
00491 __asm __volatile(SFENCE:::"memory");
00492 __asm __volatile(EMMS:::"memory");
00493 #endif
00494 while (s < end)
00495 {
00496 register int rgb = *(uint32_t*)s; s += 4;
00497 *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
00498 }
00499 }
00500
00501 static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size)
00502 {
00503 const uint8_t *s = src;
00504 const uint8_t *end;
00505 #ifdef HAVE_MMX
00506 const uint8_t *mm_end;
00507 #endif
00508 uint16_t *d = (uint16_t *)dst;
00509 end = s + src_size;
00510 #ifdef HAVE_MMX
00511 mm_end = end - 15;
00512 #if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
00513 asm volatile(
00514 "movq %3, %%mm5 \n\t"
00515 "movq %4, %%mm6 \n\t"
00516 "movq %5, %%mm7 \n\t"
00517 "jmp 2f \n\t"
00518 ASMALIGN(4)
00519 "1: \n\t"
00520 PREFETCH" 32(%1) \n\t"
00521 "movd (%1), %%mm0 \n\t"
00522 "movd 4(%1), %%mm3 \n\t"
00523 "punpckldq 8(%1), %%mm0 \n\t"
00524 "punpckldq 12(%1), %%mm3 \n\t"
00525 "movq %%mm0, %%mm1 \n\t"
00526 "movq %%mm3, %%mm4 \n\t"
00527 "pand %%mm6, %%mm0 \n\t"
00528 "pand %%mm6, %%mm3 \n\t"
00529 "pmaddwd %%mm7, %%mm0 \n\t"
00530 "pmaddwd %%mm7, %%mm3 \n\t"
00531 "pand %%mm5, %%mm1 \n\t"
00532 "pand %%mm5, %%mm4 \n\t"
00533 "por %%mm1, %%mm0 \n\t"
00534 "por %%mm4, %%mm3 \n\t"
00535 "psrld $6, %%mm0 \n\t"
00536 "pslld $10, %%mm3 \n\t"
00537 "por %%mm3, %%mm0 \n\t"
00538 MOVNTQ" %%mm0, (%0) \n\t"
00539 "add $16, %1 \n\t"
00540 "add $8, %0 \n\t"
00541 "2: \n\t"
00542 "cmp %2, %1 \n\t"
00543 " jb 1b \n\t"
00544 : "+r" (d), "+r"(s)
00545 : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215)
00546 );
00547 #else
00548 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
00549 __asm __volatile(
00550 "movq %0, %%mm7 \n\t"
00551 "movq %1, %%mm6 \n\t"
00552 ::"m"(red_15mask),"m"(green_15mask));
00553 while (s < mm_end)
00554 {
00555 __asm __volatile(
00556 PREFETCH" 32%1 \n\t"
00557 "movd %1, %%mm0 \n\t"
00558 "movd 4%1, %%mm3 \n\t"
00559 "punpckldq 8%1, %%mm0 \n\t"
00560 "punpckldq 12%1, %%mm3 \n\t"
00561 "movq %%mm0, %%mm1 \n\t"
00562 "movq %%mm0, %%mm2 \n\t"
00563 "movq %%mm3, %%mm4 \n\t"
00564 "movq %%mm3, %%mm5 \n\t"
00565 "psrlq $3, %%mm0 \n\t"
00566 "psrlq $3, %%mm3 \n\t"
00567 "pand %2, %%mm0 \n\t"
00568 "pand %2, %%mm3 \n\t"
00569 "psrlq $6, %%mm1 \n\t"
00570 "psrlq $6, %%mm4 \n\t"
00571 "pand %%mm6, %%mm1 \n\t"
00572 "pand %%mm6, %%mm4 \n\t"
00573 "psrlq $9, %%mm2 \n\t"
00574 "psrlq $9, %%mm5 \n\t"
00575 "pand %%mm7, %%mm2 \n\t"
00576 "pand %%mm7, %%mm5 \n\t"
00577 "por %%mm1, %%mm0 \n\t"
00578 "por %%mm4, %%mm3 \n\t"
00579 "por %%mm2, %%mm0 \n\t"
00580 "por %%mm5, %%mm3 \n\t"
00581 "psllq $16, %%mm3 \n\t"
00582 "por %%mm3, %%mm0 \n\t"
00583 MOVNTQ" %%mm0, %0 \n\t"
00584 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
00585 d += 4;
00586 s += 16;
00587 }
00588 #endif
00589 __asm __volatile(SFENCE:::"memory");
00590 __asm __volatile(EMMS:::"memory");
00591 #endif
00592 while (s < end)
00593 {
00594 register int rgb = *(uint32_t*)s; s += 4;
00595 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
00596 }
00597 }
00598
00599 static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
00600 {
00601 const uint8_t *s = src;
00602 const uint8_t *end;
00603 #ifdef HAVE_MMX
00604 const uint8_t *mm_end;
00605 #endif
00606 uint16_t *d = (uint16_t *)dst;
00607 end = s + src_size;
00608 #ifdef HAVE_MMX
00609 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
00610 __asm __volatile(
00611 "movq %0, %%mm7 \n\t"
00612 "movq %1, %%mm6 \n\t"
00613 ::"m"(red_15mask),"m"(green_15mask));
00614 mm_end = end - 15;
00615 while (s < mm_end)
00616 {
00617 __asm __volatile(
00618 PREFETCH" 32%1 \n\t"
00619 "movd %1, %%mm0 \n\t"
00620 "movd 4%1, %%mm3 \n\t"
00621 "punpckldq 8%1, %%mm0 \n\t"
00622 "punpckldq 12%1, %%mm3 \n\t"
00623 "movq %%mm0, %%mm1 \n\t"
00624 "movq %%mm0, %%mm2 \n\t"
00625 "movq %%mm3, %%mm4 \n\t"
00626 "movq %%mm3, %%mm5 \n\t"
00627 "psllq $7, %%mm0 \n\t"
00628 "psllq $7, %%mm3 \n\t"
00629 "pand %%mm7, %%mm0 \n\t"
00630 "pand %%mm7, %%mm3 \n\t"
00631 "psrlq $6, %%mm1 \n\t"
00632 "psrlq $6, %%mm4 \n\t"
00633 "pand %%mm6, %%mm1 \n\t"
00634 "pand %%mm6, %%mm4 \n\t"
00635 "psrlq $19, %%mm2 \n\t"
00636 "psrlq $19, %%mm5 \n\t"
00637 "pand %2, %%mm2 \n\t"
00638 "pand %2, %%mm5 \n\t"
00639 "por %%mm1, %%mm0 \n\t"
00640 "por %%mm4, %%mm3 \n\t"
00641 "por %%mm2, %%mm0 \n\t"
00642 "por %%mm5, %%mm3 \n\t"
00643 "psllq $16, %%mm3 \n\t"
00644 "por %%mm3, %%mm0 \n\t"
00645 MOVNTQ" %%mm0, %0 \n\t"
00646 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
00647 d += 4;
00648 s += 16;
00649 }
00650 __asm __volatile(SFENCE:::"memory");
00651 __asm __volatile(EMMS:::"memory");
00652 #endif
00653 while (s < end)
00654 {
00655 register int rgb = *(uint32_t*)s; s += 4;
00656 *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
00657 }
00658 }
00659
00660 static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size)
00661 {
00662 const uint8_t *s = src;
00663 const uint8_t *end;
00664 #ifdef HAVE_MMX
00665 const uint8_t *mm_end;
00666 #endif
00667 uint16_t *d = (uint16_t *)dst;
00668 end = s + src_size;
00669 #ifdef HAVE_MMX
00670 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
00671 __asm __volatile(
00672 "movq %0, %%mm7 \n\t"
00673 "movq %1, %%mm6 \n\t"
00674 ::"m"(red_16mask),"m"(green_16mask));
00675 mm_end = end - 11;
00676 while (s < mm_end)
00677 {
00678 __asm __volatile(
00679 PREFETCH" 32%1 \n\t"
00680 "movd %1, %%mm0 \n\t"
00681 "movd 3%1, %%mm3 \n\t"
00682 "punpckldq 6%1, %%mm0 \n\t"
00683 "punpckldq 9%1, %%mm3 \n\t"
00684 "movq %%mm0, %%mm1 \n\t"
00685 "movq %%mm0, %%mm2 \n\t"
00686 "movq %%mm3, %%mm4 \n\t"
00687 "movq %%mm3, %%mm5 \n\t"
00688 "psrlq $3, %%mm0 \n\t"
00689 "psrlq $3, %%mm3 \n\t"
00690 "pand %2, %%mm0 \n\t"
00691 "pand %2, %%mm3 \n\t"
00692 "psrlq $5, %%mm1 \n\t"
00693 "psrlq $5, %%mm4 \n\t"
00694 "pand %%mm6, %%mm1 \n\t"
00695 "pand %%mm6, %%mm4 \n\t"
00696 "psrlq $8, %%mm2 \n\t"
00697 "psrlq $8, %%mm5 \n\t"
00698 "pand %%mm7, %%mm2 \n\t"
00699 "pand %%mm7, %%mm5 \n\t"
00700 "por %%mm1, %%mm0 \n\t"
00701 "por %%mm4, %%mm3 \n\t"
00702 "por %%mm2, %%mm0 \n\t"
00703 "por %%mm5, %%mm3 \n\t"
00704 "psllq $16, %%mm3 \n\t"
00705 "por %%mm3, %%mm0 \n\t"
00706 MOVNTQ" %%mm0, %0 \n\t"
00707 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
00708 d += 4;
00709 s += 12;
00710 }
00711 __asm __volatile(SFENCE:::"memory");
00712 __asm __volatile(EMMS:::"memory");
00713 #endif
00714 while (s < end)
00715 {
00716 const int b = *s++;
00717 const int g = *s++;
00718 const int r = *s++;
00719 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
00720 }
00721 }
00722
00723 static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
00724 {
00725 const uint8_t *s = src;
00726 const uint8_t *end;
00727 #ifdef HAVE_MMX
00728 const uint8_t *mm_end;
00729 #endif
00730 uint16_t *d = (uint16_t *)dst;
00731 end = s + src_size;
00732 #ifdef HAVE_MMX
00733 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
00734 __asm __volatile(
00735 "movq %0, %%mm7 \n\t"
00736 "movq %1, %%mm6 \n\t"
00737 ::"m"(red_16mask),"m"(green_16mask));
00738 mm_end = end - 15;
00739 while (s < mm_end)
00740 {
00741 __asm __volatile(
00742 PREFETCH" 32%1 \n\t"
00743 "movd %1, %%mm0 \n\t"
00744 "movd 3%1, %%mm3 \n\t"
00745 "punpckldq 6%1, %%mm0 \n\t"
00746 "punpckldq 9%1, %%mm3 \n\t"
00747 "movq %%mm0, %%mm1 \n\t"
00748 "movq %%mm0, %%mm2 \n\t"
00749 "movq %%mm3, %%mm4 \n\t"
00750 "movq %%mm3, %%mm5 \n\t"
00751 "psllq $8, %%mm0 \n\t"
00752 "psllq $8, %%mm3 \n\t"
00753 "pand %%mm7, %%mm0 \n\t"
00754 "pand %%mm7, %%mm3 \n\t"
00755 "psrlq $5, %%mm1 \n\t"
00756 "psrlq $5, %%mm4 \n\t"
00757 "pand %%mm6, %%mm1 \n\t"
00758 "pand %%mm6, %%mm4 \n\t"
00759 "psrlq $19, %%mm2 \n\t"
00760 "psrlq $19, %%mm5 \n\t"
00761 "pand %2, %%mm2 \n\t"
00762 "pand %2, %%mm5 \n\t"
00763 "por %%mm1, %%mm0 \n\t"
00764 "por %%mm4, %%mm3 \n\t"
00765 "por %%mm2, %%mm0 \n\t"
00766 "por %%mm5, %%mm3 \n\t"
00767 "psllq $16, %%mm3 \n\t"
00768 "por %%mm3, %%mm0 \n\t"
00769 MOVNTQ" %%mm0, %0 \n\t"
00770 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
00771 d += 4;
00772 s += 12;
00773 }
00774 __asm __volatile(SFENCE:::"memory");
00775 __asm __volatile(EMMS:::"memory");
00776 #endif
00777 while (s < end)
00778 {
00779 const int r = *s++;
00780 const int g = *s++;
00781 const int b = *s++;
00782 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
00783 }
00784 }
00785
00786 static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size)
00787 {
00788 const uint8_t *s = src;
00789 const uint8_t *end;
00790 #ifdef HAVE_MMX
00791 const uint8_t *mm_end;
00792 #endif
00793 uint16_t *d = (uint16_t *)dst;
00794 end = s + src_size;
00795 #ifdef HAVE_MMX
00796 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
00797 __asm __volatile(
00798 "movq %0, %%mm7 \n\t"
00799 "movq %1, %%mm6 \n\t"
00800 ::"m"(red_15mask),"m"(green_15mask));
00801 mm_end = end - 11;
00802 while (s < mm_end)
00803 {
00804 __asm __volatile(
00805 PREFETCH" 32%1 \n\t"
00806 "movd %1, %%mm0 \n\t"
00807 "movd 3%1, %%mm3 \n\t"
00808 "punpckldq 6%1, %%mm0 \n\t"
00809 "punpckldq 9%1, %%mm3 \n\t"
00810 "movq %%mm0, %%mm1 \n\t"
00811 "movq %%mm0, %%mm2 \n\t"
00812 "movq %%mm3, %%mm4 \n\t"
00813 "movq %%mm3, %%mm5 \n\t"
00814 "psrlq $3, %%mm0 \n\t"
00815 "psrlq $3, %%mm3 \n\t"
00816 "pand %2, %%mm0 \n\t"
00817 "pand %2, %%mm3 \n\t"
00818 "psrlq $6, %%mm1 \n\t"
00819 "psrlq $6, %%mm4 \n\t"
00820 "pand %%mm6, %%mm1 \n\t"
00821 "pand %%mm6, %%mm4 \n\t"
00822 "psrlq $9, %%mm2 \n\t"
00823 "psrlq $9, %%mm5 \n\t"
00824 "pand %%mm7, %%mm2 \n\t"
00825 "pand %%mm7, %%mm5 \n\t"
00826 "por %%mm1, %%mm0 \n\t"
00827 "por %%mm4, %%mm3 \n\t"
00828 "por %%mm2, %%mm0 \n\t"
00829 "por %%mm5, %%mm3 \n\t"
00830 "psllq $16, %%mm3 \n\t"
00831 "por %%mm3, %%mm0 \n\t"
00832 MOVNTQ" %%mm0, %0 \n\t"
00833 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
00834 d += 4;
00835 s += 12;
00836 }
00837 __asm __volatile(SFENCE:::"memory");
00838 __asm __volatile(EMMS:::"memory");
00839 #endif
00840 while (s < end)
00841 {
00842 const int b = *s++;
00843 const int g = *s++;
00844 const int r = *s++;
00845 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
00846 }
00847 }
00848
00849 static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
00850 {
00851 const uint8_t *s = src;
00852 const uint8_t *end;
00853 #ifdef HAVE_MMX
00854 const uint8_t *mm_end;
00855 #endif
00856 uint16_t *d = (uint16_t *)dst;
00857 end = s + src_size;
00858 #ifdef HAVE_MMX
00859 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
00860 __asm __volatile(
00861 "movq %0, %%mm7 \n\t"
00862 "movq %1, %%mm6 \n\t"
00863 ::"m"(red_15mask),"m"(green_15mask));
00864 mm_end = end - 15;
00865 while (s < mm_end)
00866 {
00867 __asm __volatile(
00868 PREFETCH" 32%1 \n\t"
00869 "movd %1, %%mm0 \n\t"
00870 "movd 3%1, %%mm3 \n\t"
00871 "punpckldq 6%1, %%mm0 \n\t"
00872 "punpckldq 9%1, %%mm3 \n\t"
00873 "movq %%mm0, %%mm1 \n\t"
00874 "movq %%mm0, %%mm2 \n\t"
00875 "movq %%mm3, %%mm4 \n\t"
00876 "movq %%mm3, %%mm5 \n\t"
00877 "psllq $7, %%mm0 \n\t"
00878 "psllq $7, %%mm3 \n\t"
00879 "pand %%mm7, %%mm0 \n\t"
00880 "pand %%mm7, %%mm3 \n\t"
00881 "psrlq $6, %%mm1 \n\t"
00882 "psrlq $6, %%mm4 \n\t"
00883 "pand %%mm6, %%mm1 \n\t"
00884 "pand %%mm6, %%mm4 \n\t"
00885 "psrlq $19, %%mm2 \n\t"
00886 "psrlq $19, %%mm5 \n\t"
00887 "pand %2, %%mm2 \n\t"
00888 "pand %2, %%mm5 \n\t"
00889 "por %%mm1, %%mm0 \n\t"
00890 "por %%mm4, %%mm3 \n\t"
00891 "por %%mm2, %%mm0 \n\t"
00892 "por %%mm5, %%mm3 \n\t"
00893 "psllq $16, %%mm3 \n\t"
00894 "por %%mm3, %%mm0 \n\t"
00895 MOVNTQ" %%mm0, %0 \n\t"
00896 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
00897 d += 4;
00898 s += 12;
00899 }
00900 __asm __volatile(SFENCE:::"memory");
00901 __asm __volatile(EMMS:::"memory");
00902 #endif
00903 while (s < end)
00904 {
00905 const int r = *s++;
00906 const int g = *s++;
00907 const int b = *s++;
00908 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
00909 }
00910 }
00911
00912
00913
00914
00915
00916
00917
00918
00919
00920
00921
00922
00923
00924
00925
00926
00927
00928
00929
00930
00931
00932
00933
00934
00935 static inline void RENAME(rgb15to24)(const uint8_t *src, uint8_t *dst, long src_size)
00936 {
00937 const uint16_t *end;
00938 #ifdef HAVE_MMX
00939 const uint16_t *mm_end;
00940 #endif
00941 uint8_t *d = (uint8_t *)dst;
00942 const uint16_t *s = (uint16_t *)src;
00943 end = s + src_size/2;
00944 #ifdef HAVE_MMX
00945 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
00946 mm_end = end - 7;
00947 while (s < mm_end)
00948 {
00949 __asm __volatile(
00950 PREFETCH" 32%1 \n\t"
00951 "movq %1, %%mm0 \n\t"
00952 "movq %1, %%mm1 \n\t"
00953 "movq %1, %%mm2 \n\t"
00954 "pand %2, %%mm0 \n\t"
00955 "pand %3, %%mm1 \n\t"
00956 "pand %4, %%mm2 \n\t"
00957 "psllq $3, %%mm0 \n\t"
00958 "psrlq $2, %%mm1 \n\t"
00959 "psrlq $7, %%mm2 \n\t"
00960 "movq %%mm0, %%mm3 \n\t"
00961 "movq %%mm1, %%mm4 \n\t"
00962 "movq %%mm2, %%mm5 \n\t"
00963 "punpcklwd %5, %%mm0 \n\t"
00964 "punpcklwd %5, %%mm1 \n\t"
00965 "punpcklwd %5, %%mm2 \n\t"
00966 "punpckhwd %5, %%mm3 \n\t"
00967 "punpckhwd %5, %%mm4 \n\t"
00968 "punpckhwd %5, %%mm5 \n\t"
00969 "psllq $8, %%mm1 \n\t"
00970 "psllq $16, %%mm2 \n\t"
00971 "por %%mm1, %%mm0 \n\t"
00972 "por %%mm2, %%mm0 \n\t"
00973 "psllq $8, %%mm4 \n\t"
00974 "psllq $16, %%mm5 \n\t"
00975 "por %%mm4, %%mm3 \n\t"
00976 "por %%mm5, %%mm3 \n\t"
00977
00978 "movq %%mm0, %%mm6 \n\t"
00979 "movq %%mm3, %%mm7 \n\t"
00980
00981 "movq 8%1, %%mm0 \n\t"
00982 "movq 8%1, %%mm1 \n\t"
00983 "movq 8%1, %%mm2 \n\t"
00984 "pand %2, %%mm0 \n\t"
00985 "pand %3, %%mm1 \n\t"
00986 "pand %4, %%mm2 \n\t"
00987 "psllq $3, %%mm0 \n\t"
00988 "psrlq $2, %%mm1 \n\t"
00989 "psrlq $7, %%mm2 \n\t"
00990 "movq %%mm0, %%mm3 \n\t"
00991 "movq %%mm1, %%mm4 \n\t"
00992 "movq %%mm2, %%mm5 \n\t"
00993 "punpcklwd %5, %%mm0 \n\t"
00994 "punpcklwd %5, %%mm1 \n\t"
00995 "punpcklwd %5, %%mm2 \n\t"
00996 "punpckhwd %5, %%mm3 \n\t"
00997 "punpckhwd %5, %%mm4 \n\t"
00998 "punpckhwd %5, %%mm5 \n\t"
00999 "psllq $8, %%mm1 \n\t"
01000 "psllq $16, %%mm2 \n\t"
01001 "por %%mm1, %%mm0 \n\t"
01002 "por %%mm2, %%mm0 \n\t"
01003 "psllq $8, %%mm4 \n\t"
01004 "psllq $16, %%mm5 \n\t"
01005 "por %%mm4, %%mm3 \n\t"
01006 "por %%mm5, %%mm3 \n\t"
01007
01008 :"=m"(*d)
01009 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
01010 :"memory");
01011
01012 __asm __volatile(
01013 "movq %%mm0, %%mm4 \n\t"
01014 "movq %%mm3, %%mm5 \n\t"
01015 "movq %%mm6, %%mm0 \n\t"
01016 "movq %%mm7, %%mm1 \n\t"
01017
01018 "movq %%mm4, %%mm6 \n\t"
01019 "movq %%mm5, %%mm7 \n\t"
01020 "movq %%mm0, %%mm2 \n\t"
01021 "movq %%mm1, %%mm3 \n\t"
01022
01023 "psrlq $8, %%mm2 \n\t"
01024 "psrlq $8, %%mm3 \n\t"
01025 "psrlq $8, %%mm6 \n\t"
01026 "psrlq $8, %%mm7 \n\t"
01027 "pand %2, %%mm0 \n\t"
01028 "pand %2, %%mm1 \n\t"
01029 "pand %2, %%mm4 \n\t"
01030 "pand %2, %%mm5 \n\t"
01031 "pand %3, %%mm2 \n\t"
01032 "pand %3, %%mm3 \n\t"
01033 "pand %3, %%mm6 \n\t"
01034 "pand %3, %%mm7 \n\t"
01035 "por %%mm2, %%mm0 \n\t"
01036 "por %%mm3, %%mm1 \n\t"
01037 "por %%mm6, %%mm4 \n\t"
01038 "por %%mm7, %%mm5 \n\t"
01039
01040 "movq %%mm1, %%mm2 \n\t"
01041 "movq %%mm4, %%mm3 \n\t"
01042 "psllq $48, %%mm2 \n\t"
01043 "psllq $32, %%mm3 \n\t"
01044 "pand %4, %%mm2 \n\t"
01045 "pand %5, %%mm3 \n\t"
01046 "por %%mm2, %%mm0 \n\t"
01047 "psrlq $16, %%mm1 \n\t"
01048 "psrlq $32, %%mm4 \n\t"
01049 "psllq $16, %%mm5 \n\t"
01050 "por %%mm3, %%mm1 \n\t"
01051 "pand %6, %%mm5 \n\t"
01052 "por %%mm5, %%mm4 \n\t"
01053
01054 MOVNTQ" %%mm0, %0 \n\t"
01055 MOVNTQ" %%mm1, 8%0 \n\t"
01056 MOVNTQ" %%mm4, 16%0"
01057
01058 :"=m"(*d)
01059 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
01060 :"memory");
01061 d += 24;
01062 s += 8;
01063 }
01064 __asm __volatile(SFENCE:::"memory");
01065 __asm __volatile(EMMS:::"memory");
01066 #endif
01067 while (s < end)
01068 {
01069 register uint16_t bgr;
01070 bgr = *s++;
01071 *d++ = (bgr&0x1F)<<3;
01072 *d++ = (bgr&0x3E0)>>2;
01073 *d++ = (bgr&0x7C00)>>7;
01074 }
01075 }
01076
01077 static inline void RENAME(rgb16to24)(const uint8_t *src, uint8_t *dst, long src_size)
01078 {
01079 const uint16_t *end;
01080 #ifdef HAVE_MMX
01081 const uint16_t *mm_end;
01082 #endif
01083 uint8_t *d = (uint8_t *)dst;
01084 const uint16_t *s = (const uint16_t *)src;
01085 end = s + src_size/2;
01086 #ifdef HAVE_MMX
01087 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
01088 mm_end = end - 7;
01089 while (s < mm_end)
01090 {
01091 __asm __volatile(
01092 PREFETCH" 32%1 \n\t"
01093 "movq %1, %%mm0 \n\t"
01094 "movq %1, %%mm1 \n\t"
01095 "movq %1, %%mm2 \n\t"
01096 "pand %2, %%mm0 \n\t"
01097 "pand %3, %%mm1 \n\t"
01098 "pand %4, %%mm2 \n\t"
01099 "psllq $3, %%mm0 \n\t"
01100 "psrlq $3, %%mm1 \n\t"
01101 "psrlq $8, %%mm2 \n\t"
01102 "movq %%mm0, %%mm3 \n\t"
01103 "movq %%mm1, %%mm4 \n\t"
01104 "movq %%mm2, %%mm5 \n\t"
01105 "punpcklwd %5, %%mm0 \n\t"
01106 "punpcklwd %5, %%mm1 \n\t"
01107 "punpcklwd %5, %%mm2 \n\t"
01108 "punpckhwd %5, %%mm3 \n\t"
01109 "punpckhwd %5, %%mm4 \n\t"
01110 "punpckhwd %5, %%mm5 \n\t"
01111 "psllq $8, %%mm1 \n\t"
01112 "psllq $16, %%mm2 \n\t"
01113 "por %%mm1, %%mm0 \n\t"
01114 "por %%mm2, %%mm0 \n\t"
01115 "psllq $8, %%mm4 \n\t"
01116 "psllq $16, %%mm5 \n\t"
01117 "por %%mm4, %%mm3 \n\t"
01118 "por %%mm5, %%mm3 \n\t"
01119
01120 "movq %%mm0, %%mm6 \n\t"
01121 "movq %%mm3, %%mm7 \n\t"
01122
01123 "movq 8%1, %%mm0 \n\t"
01124 "movq 8%1, %%mm1 \n\t"
01125 "movq 8%1, %%mm2 \n\t"
01126 "pand %2, %%mm0 \n\t"
01127 "pand %3, %%mm1 \n\t"
01128 "pand %4, %%mm2 \n\t"
01129 "psllq $3, %%mm0 \n\t"
01130 "psrlq $3, %%mm1 \n\t"
01131 "psrlq $8, %%mm2 \n\t"
01132 "movq %%mm0, %%mm3 \n\t"
01133 "movq %%mm1, %%mm4 \n\t"
01134 "movq %%mm2, %%mm5 \n\t"
01135 "punpcklwd %5, %%mm0 \n\t"
01136 "punpcklwd %5, %%mm1 \n\t"
01137 "punpcklwd %5, %%mm2 \n\t"
01138 "punpckhwd %5, %%mm3 \n\t"
01139 "punpckhwd %5, %%mm4 \n\t"
01140 "punpckhwd %5, %%mm5 \n\t"
01141 "psllq $8, %%mm1 \n\t"
01142 "psllq $16, %%mm2 \n\t"
01143 "por %%mm1, %%mm0 \n\t"
01144 "por %%mm2, %%mm0 \n\t"
01145 "psllq $8, %%mm4 \n\t"
01146 "psllq $16, %%mm5 \n\t"
01147 "por %%mm4, %%mm3 \n\t"
01148 "por %%mm5, %%mm3 \n\t"
01149 :"=m"(*d)
01150 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
01151 :"memory");
01152
01153 __asm __volatile(
01154 "movq %%mm0, %%mm4 \n\t"
01155 "movq %%mm3, %%mm5 \n\t"
01156 "movq %%mm6, %%mm0 \n\t"
01157 "movq %%mm7, %%mm1 \n\t"
01158
01159 "movq %%mm4, %%mm6 \n\t"
01160 "movq %%mm5, %%mm7 \n\t"
01161 "movq %%mm0, %%mm2 \n\t"
01162 "movq %%mm1, %%mm3 \n\t"
01163
01164 "psrlq $8, %%mm2 \n\t"
01165 "psrlq $8, %%mm3 \n\t"
01166 "psrlq $8, %%mm6 \n\t"
01167 "psrlq $8, %%mm7 \n\t"
01168 "pand %2, %%mm0 \n\t"
01169 "pand %2, %%mm1 \n\t"
01170 "pand %2, %%mm4 \n\t"
01171 "pand %2, %%mm5 \n\t"
01172 "pand %3, %%mm2 \n\t"
01173 "pand %3, %%mm3 \n\t"
01174 "pand %3, %%mm6 \n\t"
01175 "pand %3, %%mm7 \n\t"
01176 "por %%mm2, %%mm0 \n\t"
01177 "por %%mm3, %%mm1 \n\t"
01178 "por %%mm6, %%mm4 \n\t"
01179 "por %%mm7, %%mm5 \n\t"
01180
01181 "movq %%mm1, %%mm2 \n\t"
01182 "movq %%mm4, %%mm3 \n\t"
01183 "psllq $48, %%mm2 \n\t"
01184 "psllq $32, %%mm3 \n\t"
01185 "pand %4, %%mm2 \n\t"
01186 "pand %5, %%mm3 \n\t"
01187 "por %%mm2, %%mm0 \n\t"
01188 "psrlq $16, %%mm1 \n\t"
01189 "psrlq $32, %%mm4 \n\t"
01190 "psllq $16, %%mm5 \n\t"
01191 "por %%mm3, %%mm1 \n\t"
01192 "pand %6, %%mm5 \n\t"
01193 "por %%mm5, %%mm4 \n\t"
01194
01195 MOVNTQ" %%mm0, %0 \n\t"
01196 MOVNTQ" %%mm1, 8%0 \n\t"
01197 MOVNTQ" %%mm4, 16%0"
01198
01199 :"=m"(*d)
01200 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
01201 :"memory");
01202 d += 24;
01203 s += 8;
01204 }
01205 __asm __volatile(SFENCE:::"memory");
01206 __asm __volatile(EMMS:::"memory");
01207 #endif
01208 while (s < end)
01209 {
01210 register uint16_t bgr;
01211 bgr = *s++;
01212 *d++ = (bgr&0x1F)<<3;
01213 *d++ = (bgr&0x7E0)>>3;
01214 *d++ = (bgr&0xF800)>>8;
01215 }
01216 }
01217
01218 static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size)
01219 {
01220 const uint16_t *end;
01221 #ifdef HAVE_MMX
01222 const uint16_t *mm_end;
01223 #endif
01224 uint8_t *d = (uint8_t *)dst;
01225 const uint16_t *s = (const uint16_t *)src;
01226 end = s + src_size/2;
01227 #ifdef HAVE_MMX
01228 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
01229 __asm __volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
01230 mm_end = end - 3;
01231 while (s < mm_end)
01232 {
01233 __asm __volatile(
01234 PREFETCH" 32%1 \n\t"
01235 "movq %1, %%mm0 \n\t"
01236 "movq %1, %%mm1 \n\t"
01237 "movq %1, %%mm2 \n\t"
01238 "pand %2, %%mm0 \n\t"
01239 "pand %3, %%mm1 \n\t"
01240 "pand %4, %%mm2 \n\t"
01241 "psllq $3, %%mm0 \n\t"
01242 "psrlq $2, %%mm1 \n\t"
01243 "psrlq $7, %%mm2 \n\t"
01244 "movq %%mm0, %%mm3 \n\t"
01245 "movq %%mm1, %%mm4 \n\t"
01246 "movq %%mm2, %%mm5 \n\t"
01247 "punpcklwd %%mm7, %%mm0 \n\t"
01248 "punpcklwd %%mm7, %%mm1 \n\t"
01249 "punpcklwd %%mm7, %%mm2 \n\t"
01250 "punpckhwd %%mm7, %%mm3 \n\t"
01251 "punpckhwd %%mm7, %%mm4 \n\t"
01252 "punpckhwd %%mm7, %%mm5 \n\t"
01253 "psllq $8, %%mm1 \n\t"
01254 "psllq $16, %%mm2 \n\t"
01255 "por %%mm1, %%mm0 \n\t"
01256 "por %%mm2, %%mm0 \n\t"
01257 "psllq $8, %%mm4 \n\t"
01258 "psllq $16, %%mm5 \n\t"
01259 "por %%mm4, %%mm3 \n\t"
01260 "por %%mm5, %%mm3 \n\t"
01261 MOVNTQ" %%mm0, %0 \n\t"
01262 MOVNTQ" %%mm3, 8%0 \n\t"
01263 :"=m"(*d)
01264 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
01265 :"memory");
01266 d += 16;
01267 s += 4;
01268 }
01269 __asm __volatile(SFENCE:::"memory");
01270 __asm __volatile(EMMS:::"memory");
01271 #endif
01272 while (s < end)
01273 {
01274 #if 0 //slightly slower on athlon
01275 int bgr= *s++;
01276 *((uint32_t*)d)++ = ((bgr&0x1F)<<3) + ((bgr&0x3E0)<<6) + ((bgr&0x7C00)<<9);
01277 #else
01278 register uint16_t bgr;
01279 bgr = *s++;
01280 #ifdef WORDS_BIGENDIAN
01281 *d++ = 0;
01282 *d++ = (bgr&0x7C00)>>7;
01283 *d++ = (bgr&0x3E0)>>2;
01284 *d++ = (bgr&0x1F)<<3;
01285 #else
01286 *d++ = (bgr&0x1F)<<3;
01287 *d++ = (bgr&0x3E0)>>2;
01288 *d++ = (bgr&0x7C00)>>7;
01289 *d++ = 0;
01290 #endif
01291
01292 #endif
01293 }
01294 }
01295
01296 static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size)
01297 {
01298 const uint16_t *end;
01299 #ifdef HAVE_MMX
01300 const uint16_t *mm_end;
01301 #endif
01302 uint8_t *d = (uint8_t *)dst;
01303 const uint16_t *s = (uint16_t *)src;
01304 end = s + src_size/2;
01305 #ifdef HAVE_MMX
01306 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
01307 __asm __volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
01308 mm_end = end - 3;
01309 while (s < mm_end)
01310 {
01311 __asm __volatile(
01312 PREFETCH" 32%1 \n\t"
01313 "movq %1, %%mm0 \n\t"
01314 "movq %1, %%mm1 \n\t"
01315 "movq %1, %%mm2 \n\t"
01316 "pand %2, %%mm0 \n\t"
01317 "pand %3, %%mm1 \n\t"
01318 "pand %4, %%mm2 \n\t"
01319 "psllq $3, %%mm0 \n\t"
01320 "psrlq $3, %%mm1 \n\t"
01321 "psrlq $8, %%mm2 \n\t"
01322 "movq %%mm0, %%mm3 \n\t"
01323 "movq %%mm1, %%mm4 \n\t"
01324 "movq %%mm2, %%mm5 \n\t"
01325 "punpcklwd %%mm7, %%mm0 \n\t"
01326 "punpcklwd %%mm7, %%mm1 \n\t"
01327 "punpcklwd %%mm7, %%mm2 \n\t"
01328 "punpckhwd %%mm7, %%mm3 \n\t"
01329 "punpckhwd %%mm7, %%mm4 \n\t"
01330 "punpckhwd %%mm7, %%mm5 \n\t"
01331 "psllq $8, %%mm1 \n\t"
01332 "psllq $16, %%mm2 \n\t"
01333 "por %%mm1, %%mm0 \n\t"
01334 "por %%mm2, %%mm0 \n\t"
01335 "psllq $8, %%mm4 \n\t"
01336 "psllq $16, %%mm5 \n\t"
01337 "por %%mm4, %%mm3 \n\t"
01338 "por %%mm5, %%mm3 \n\t"
01339 MOVNTQ" %%mm0, %0 \n\t"
01340 MOVNTQ" %%mm3, 8%0 \n\t"
01341 :"=m"(*d)
01342 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
01343 :"memory");
01344 d += 16;
01345 s += 4;
01346 }
01347 __asm __volatile(SFENCE:::"memory");
01348 __asm __volatile(EMMS:::"memory");
01349 #endif
01350 while (s < end)
01351 {
01352 register uint16_t bgr;
01353 bgr = *s++;
01354 #ifdef WORDS_BIGENDIAN
01355 *d++ = 0;
01356 *d++ = (bgr&0xF800)>>8;
01357 *d++ = (bgr&0x7E0)>>3;
01358 *d++ = (bgr&0x1F)<<3;
01359 #else
01360 *d++ = (bgr&0x1F)<<3;
01361 *d++ = (bgr&0x7E0)>>3;
01362 *d++ = (bgr&0xF800)>>8;
01363 *d++ = 0;
01364 #endif
01365 }
01366 }
01367
01368 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
01369 {
01370 long idx = 15 - src_size;
01371 uint8_t *s = (uint8_t *) src-idx, *d = dst-idx;
01372 #ifdef HAVE_MMX
01373 __asm __volatile(
01374 "test %0, %0 \n\t"
01375 "jns 2f \n\t"
01376 PREFETCH" (%1, %0) \n\t"
01377 "movq %3, %%mm7 \n\t"
01378 "pxor %4, %%mm7 \n\t"
01379 "movq %%mm7, %%mm6 \n\t"
01380 "pxor %5, %%mm7 \n\t"
01381 ASMALIGN(4)
01382 "1: \n\t"
01383 PREFETCH" 32(%1, %0) \n\t"
01384 "movq (%1, %0), %%mm0 \n\t"
01385 "movq 8(%1, %0), %%mm1 \n\t"
01386 # ifdef HAVE_MMX2
01387 "pshufw $177, %%mm0, %%mm3 \n\t"
01388 "pshufw $177, %%mm1, %%mm5 \n\t"
01389 "pand %%mm7, %%mm0 \n\t"
01390 "pand %%mm6, %%mm3 \n\t"
01391 "pand %%mm7, %%mm1 \n\t"
01392 "pand %%mm6, %%mm5 \n\t"
01393 "por %%mm3, %%mm0 \n\t"
01394 "por %%mm5, %%mm1 \n\t"
01395 # else
01396 "movq %%mm0, %%mm2 \n\t"
01397 "movq %%mm1, %%mm4 \n\t"
01398 "pand %%mm7, %%mm0 \n\t"
01399 "pand %%mm6, %%mm2 \n\t"
01400 "pand %%mm7, %%mm1 \n\t"
01401 "pand %%mm6, %%mm4 \n\t"
01402 "movq %%mm2, %%mm3 \n\t"
01403 "movq %%mm4, %%mm5 \n\t"
01404 "pslld $16, %%mm2 \n\t"
01405 "psrld $16, %%mm3 \n\t"
01406 "pslld $16, %%mm4 \n\t"
01407 "psrld $16, %%mm5 \n\t"
01408 "por %%mm2, %%mm0 \n\t"
01409 "por %%mm4, %%mm1 \n\t"
01410 "por %%mm3, %%mm0 \n\t"
01411 "por %%mm5, %%mm1 \n\t"
01412 # endif
01413 MOVNTQ" %%mm0, (%2, %0) \n\t"
01414 MOVNTQ" %%mm1, 8(%2, %0) \n\t"
01415 "add $16, %0 \n\t"
01416 "js 1b \n\t"
01417 SFENCE" \n\t"
01418 EMMS" \n\t"
01419 "2: \n\t"
01420 : "+&r"(idx)
01421 : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one)
01422 : "memory");
01423 #endif
01424 for (; idx<15; idx+=4) {
01425 register int v = *(uint32_t *)&s[idx], g = v & 0xff00ff00;
01426 v &= 0xff00ff;
01427 *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16);
01428 }
01429 }
01430
01431 static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
01432 {
01433 unsigned i;
01434 #ifdef HAVE_MMX
01435 long mmx_size= 23 - src_size;
01436 asm volatile (
01437 "test %%"REG_a", %%"REG_a" \n\t"
01438 "jns 2f \n\t"
01439 "movq "MANGLE(mask24r)", %%mm5 \n\t"
01440 "movq "MANGLE(mask24g)", %%mm6 \n\t"
01441 "movq "MANGLE(mask24b)", %%mm7 \n\t"
01442 ASMALIGN(4)
01443 "1: \n\t"
01444 PREFETCH" 32(%1, %%"REG_a") \n\t"
01445 "movq (%1, %%"REG_a"), %%mm0 \n\t"
01446 "movq (%1, %%"REG_a"), %%mm1 \n\t"
01447 "movq 2(%1, %%"REG_a"), %%mm2 \n\t"
01448 "psllq $16, %%mm0 \n\t"
01449 "pand %%mm5, %%mm0 \n\t"
01450 "pand %%mm6, %%mm1 \n\t"
01451 "pand %%mm7, %%mm2 \n\t"
01452 "por %%mm0, %%mm1 \n\t"
01453 "por %%mm2, %%mm1 \n\t"
01454 "movq 6(%1, %%"REG_a"), %%mm0 \n\t"
01455 MOVNTQ" %%mm1, (%2, %%"REG_a") \n\t"
01456 "movq 8(%1, %%"REG_a"), %%mm1 \n\t"
01457 "movq 10(%1, %%"REG_a"), %%mm2 \n\t"
01458 "pand %%mm7, %%mm0 \n\t"
01459 "pand %%mm5, %%mm1 \n\t"
01460 "pand %%mm6, %%mm2 \n\t"
01461 "por %%mm0, %%mm1 \n\t"
01462 "por %%mm2, %%mm1 \n\t"
01463 "movq 14(%1, %%"REG_a"), %%mm0 \n\t"
01464 MOVNTQ" %%mm1, 8(%2, %%"REG_a") \n\t"
01465 "movq 16(%1, %%"REG_a"), %%mm1 \n\t"
01466 "movq 18(%1, %%"REG_a"), %%mm2 \n\t"
01467 "pand %%mm6, %%mm0 \n\t"
01468 "pand %%mm7, %%mm1 \n\t"
01469 "pand %%mm5, %%mm2 \n\t"
01470 "por %%mm0, %%mm1 \n\t"
01471 "por %%mm2, %%mm1 \n\t"
01472 MOVNTQ" %%mm1, 16(%2, %%"REG_a") \n\t"
01473 "add $24, %%"REG_a" \n\t"
01474 " js 1b \n\t"
01475 "2: \n\t"
01476 : "+a" (mmx_size)
01477 : "r" (src-mmx_size), "r"(dst-mmx_size)
01478 );
01479
01480 __asm __volatile(SFENCE:::"memory");
01481 __asm __volatile(EMMS:::"memory");
01482
01483 if (mmx_size==23) return;
01484
01485 src+= src_size;
01486 dst+= src_size;
01487 src_size= 23-mmx_size;
01488 src-= src_size;
01489 dst-= src_size;
01490 #endif
01491 for (i=0; i<src_size; i+=3)
01492 {
01493 register uint8_t x;
01494 x = src[i + 2];
01495 dst[i + 1] = src[i + 1];
01496 dst[i + 2] = src[i + 0];
01497 dst[i + 0] = x;
01498 }
01499 }
01500
01501 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01502 long width, long height,
01503 long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
01504 {
01505 long y;
01506 const long chromWidth= width>>1;
01507 for (y=0; y<height; y++)
01508 {
01509 #ifdef HAVE_MMX
01510
01511 asm volatile(
01512 "xor %%"REG_a", %%"REG_a" \n\t"
01513 ASMALIGN(4)
01514 "1: \n\t"
01515 PREFETCH" 32(%1, %%"REG_a", 2) \n\t"
01516 PREFETCH" 32(%2, %%"REG_a") \n\t"
01517 PREFETCH" 32(%3, %%"REG_a") \n\t"
01518 "movq (%2, %%"REG_a"), %%mm0 \n\t"
01519 "movq %%mm0, %%mm2 \n\t"
01520 "movq (%3, %%"REG_a"), %%mm1 \n\t"
01521 "punpcklbw %%mm1, %%mm0 \n\t"
01522 "punpckhbw %%mm1, %%mm2 \n\t"
01523
01524 "movq (%1, %%"REG_a",2), %%mm3 \n\t"
01525 "movq 8(%1, %%"REG_a",2), %%mm5 \n\t"
01526 "movq %%mm3, %%mm4 \n\t"
01527 "movq %%mm5, %%mm6 \n\t"
01528 "punpcklbw %%mm0, %%mm3 \n\t"
01529 "punpckhbw %%mm0, %%mm4 \n\t"
01530 "punpcklbw %%mm2, %%mm5 \n\t"
01531 "punpckhbw %%mm2, %%mm6 \n\t"
01532
01533 MOVNTQ" %%mm3, (%0, %%"REG_a", 4) \n\t"
01534 MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t"
01535 MOVNTQ" %%mm5, 16(%0, %%"REG_a", 4) \n\t"
01536 MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t"
01537
01538 "add $8, %%"REG_a" \n\t"
01539 "cmp %4, %%"REG_a" \n\t"
01540 " jb 1b \n\t"
01541 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
01542 : "%"REG_a
01543 );
01544 #else
01545
01546 #if defined ARCH_ALPHA && defined HAVE_MVI
01547 #define pl2yuy2(n) \
01548 y1 = yc[n]; \
01549 y2 = yc2[n]; \
01550 u = uc[n]; \
01551 v = vc[n]; \
01552 asm("unpkbw %1, %0" : "=r"(y1) : "r"(y1)); \
01553 asm("unpkbw %1, %0" : "=r"(y2) : "r"(y2)); \
01554 asm("unpkbl %1, %0" : "=r"(u) : "r"(u)); \
01555 asm("unpkbl %1, %0" : "=r"(v) : "r"(v)); \
01556 yuv1 = (u << 8) + (v << 24); \
01557 yuv2 = yuv1 + y2; \
01558 yuv1 += y1; \
01559 qdst[n] = yuv1; \
01560 qdst2[n] = yuv2;
01561
01562 int i;
01563 uint64_t *qdst = (uint64_t *) dst;
01564 uint64_t *qdst2 = (uint64_t *) (dst + dstStride);
01565 const uint32_t *yc = (uint32_t *) ysrc;
01566 const uint32_t *yc2 = (uint32_t *) (ysrc + lumStride);
01567 const uint16_t *uc = (uint16_t*) usrc, *vc = (uint16_t*) vsrc;
01568 for (i = 0; i < chromWidth; i += 8){
01569 uint64_t y1, y2, yuv1, yuv2;
01570 uint64_t u, v;
01571
01572 asm("ldq $31,64(%0)" :: "r"(yc));
01573 asm("ldq $31,64(%0)" :: "r"(yc2));
01574 asm("ldq $31,64(%0)" :: "r"(uc));
01575 asm("ldq $31,64(%0)" :: "r"(vc));
01576
01577 pl2yuy2(0);
01578 pl2yuy2(1);
01579 pl2yuy2(2);
01580 pl2yuy2(3);
01581
01582 yc += 4;
01583 yc2 += 4;
01584 uc += 4;
01585 vc += 4;
01586 qdst += 4;
01587 qdst2 += 4;
01588 }
01589 y++;
01590 ysrc += lumStride;
01591 dst += dstStride;
01592
01593 #elif __WORDSIZE >= 64
01594 int i;
01595 uint64_t *ldst = (uint64_t *) dst;
01596 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
01597 for (i = 0; i < chromWidth; i += 2){
01598 uint64_t k, l;
01599 k = yc[0] + (uc[0] << 8) +
01600 (yc[1] << 16) + (vc[0] << 24);
01601 l = yc[2] + (uc[1] << 8) +
01602 (yc[3] << 16) + (vc[1] << 24);
01603 *ldst++ = k + (l << 32);
01604 yc += 4;
01605 uc += 2;
01606 vc += 2;
01607 }
01608
01609 #else
01610 int i, *idst = (int32_t *) dst;
01611 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
01612 for (i = 0; i < chromWidth; i++){
01613 #ifdef WORDS_BIGENDIAN
01614 *idst++ = (yc[0] << 24)+ (uc[0] << 16) +
01615 (yc[1] << 8) + (vc[0] << 0);
01616 #else
01617 *idst++ = yc[0] + (uc[0] << 8) +
01618 (yc[1] << 16) + (vc[0] << 24);
01619 #endif
01620 yc += 2;
01621 uc++;
01622 vc++;
01623 }
01624 #endif
01625 #endif
01626 if ((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) )
01627 {
01628 usrc += chromStride;
01629 vsrc += chromStride;
01630 }
01631 ysrc += lumStride;
01632 dst += dstStride;
01633 }
01634 #ifdef HAVE_MMX
01635 asm( EMMS" \n\t"
01636 SFENCE" \n\t"
01637 :::"memory");
01638 #endif
01639 }
01640
01646 static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01647 long width, long height,
01648 long lumStride, long chromStride, long dstStride)
01649 {
01650
01651 RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
01652 }
01653
01654 static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01655 long width, long height,
01656 long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
01657 {
01658 long y;
01659 const long chromWidth= width>>1;
01660 for (y=0; y<height; y++)
01661 {
01662 #ifdef HAVE_MMX
01663
01664 asm volatile(
01665 "xor %%"REG_a", %%"REG_a" \n\t"
01666 ASMALIGN(4)
01667 "1: \n\t"
01668 PREFETCH" 32(%1, %%"REG_a", 2) \n\t"
01669 PREFETCH" 32(%2, %%"REG_a") \n\t"
01670 PREFETCH" 32(%3, %%"REG_a") \n\t"
01671 "movq (%2, %%"REG_a"), %%mm0 \n\t"
01672 "movq %%mm0, %%mm2 \n\t"
01673 "movq (%3, %%"REG_a"), %%mm1 \n\t"
01674 "punpcklbw %%mm1, %%mm0 \n\t"
01675 "punpckhbw %%mm1, %%mm2 \n\t"
01676
01677 "movq (%1, %%"REG_a",2), %%mm3 \n\t"
01678 "movq 8(%1, %%"REG_a",2), %%mm5 \n\t"
01679 "movq %%mm0, %%mm4 \n\t"
01680 "movq %%mm2, %%mm6 \n\t"
01681 "punpcklbw %%mm3, %%mm0 \n\t"
01682 "punpckhbw %%mm3, %%mm4 \n\t"
01683 "punpcklbw %%mm5, %%mm2 \n\t"
01684 "punpckhbw %%mm5, %%mm6 \n\t"
01685
01686 MOVNTQ" %%mm0, (%0, %%"REG_a", 4) \n\t"
01687 MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t"
01688 MOVNTQ" %%mm2, 16(%0, %%"REG_a", 4) \n\t"
01689 MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t"
01690
01691 "add $8, %%"REG_a" \n\t"
01692 "cmp %4, %%"REG_a" \n\t"
01693 " jb 1b \n\t"
01694 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
01695 : "%"REG_a
01696 );
01697 #else
01698
01699
01700 #if __WORDSIZE >= 64
01701 int i;
01702 uint64_t *ldst = (uint64_t *) dst;
01703 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
01704 for (i = 0; i < chromWidth; i += 2){
01705 uint64_t k, l;
01706 k = uc[0] + (yc[0] << 8) +
01707 (vc[0] << 16) + (yc[1] << 24);
01708 l = uc[1] + (yc[2] << 8) +
01709 (vc[1] << 16) + (yc[3] << 24);
01710 *ldst++ = k + (l << 32);
01711 yc += 4;
01712 uc += 2;
01713 vc += 2;
01714 }
01715
01716 #else
01717 int i, *idst = (int32_t *) dst;
01718 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
01719 for (i = 0; i < chromWidth; i++){
01720 #ifdef WORDS_BIGENDIAN
01721 *idst++ = (uc[0] << 24)+ (yc[0] << 16) +
01722 (vc[0] << 8) + (yc[1] << 0);
01723 #else
01724 *idst++ = uc[0] + (yc[0] << 8) +
01725 (vc[0] << 16) + (yc[1] << 24);
01726 #endif
01727 yc += 2;
01728 uc++;
01729 vc++;
01730 }
01731 #endif
01732 #endif
01733 if ((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) )
01734 {
01735 usrc += chromStride;
01736 vsrc += chromStride;
01737 }
01738 ysrc += lumStride;
01739 dst += dstStride;
01740 }
01741 #ifdef HAVE_MMX
01742 asm( EMMS" \n\t"
01743 SFENCE" \n\t"
01744 :::"memory");
01745 #endif
01746 }
01747
01753 static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01754 long width, long height,
01755 long lumStride, long chromStride, long dstStride)
01756 {
01757
01758 RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
01759 }
01760
01765 static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01766 long width, long height,
01767 long lumStride, long chromStride, long dstStride)
01768 {
01769 RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
01770 }
01771
01777 static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
01778 long width, long height,
01779 long lumStride, long chromStride, long srcStride)
01780 {
01781 long y;
01782 const long chromWidth= width>>1;
01783 for (y=0; y<height; y+=2)
01784 {
01785 #ifdef HAVE_MMX
01786 asm volatile(
01787 "xor %%"REG_a", %%"REG_a" \n\t"
01788 "pcmpeqw %%mm7, %%mm7 \n\t"
01789 "psrlw $8, %%mm7 \n\t"
01790 ASMALIGN(4)
01791 "1: \n\t"
01792 PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
01793 "movq (%0, %%"REG_a", 4), %%mm0 \n\t"
01794 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t"
01795 "movq %%mm0, %%mm2 \n\t"
01796 "movq %%mm1, %%mm3 \n\t"
01797 "psrlw $8, %%mm0 \n\t"
01798 "psrlw $8, %%mm1 \n\t"
01799 "pand %%mm7, %%mm2 \n\t"
01800 "pand %%mm7, %%mm3 \n\t"
01801 "packuswb %%mm1, %%mm0 \n\t"
01802 "packuswb %%mm3, %%mm2 \n\t"
01803
01804 MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t"
01805
01806 "movq 16(%0, %%"REG_a", 4), %%mm1 \n\t"
01807 "movq 24(%0, %%"REG_a", 4), %%mm2 \n\t"
01808 "movq %%mm1, %%mm3 \n\t"
01809 "movq %%mm2, %%mm4 \n\t"
01810 "psrlw $8, %%mm1 \n\t"
01811 "psrlw $8, %%mm2 \n\t"
01812 "pand %%mm7, %%mm3 \n\t"
01813 "pand %%mm7, %%mm4 \n\t"
01814 "packuswb %%mm2, %%mm1 \n\t"
01815 "packuswb %%mm4, %%mm3 \n\t"
01816
01817 MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t"
01818
01819 "movq %%mm0, %%mm2 \n\t"
01820 "movq %%mm1, %%mm3 \n\t"
01821 "psrlw $8, %%mm0 \n\t"
01822 "psrlw $8, %%mm1 \n\t"
01823 "pand %%mm7, %%mm2 \n\t"
01824 "pand %%mm7, %%mm3 \n\t"
01825 "packuswb %%mm1, %%mm0 \n\t"
01826 "packuswb %%mm3, %%mm2 \n\t"
01827
01828 MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t"
01829 MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t"
01830
01831 "add $8, %%"REG_a" \n\t"
01832 "cmp %4, %%"REG_a" \n\t"
01833 " jb 1b \n\t"
01834 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
01835 : "memory", "%"REG_a
01836 );
01837
01838 ydst += lumStride;
01839 src += srcStride;
01840
01841 asm volatile(
01842 "xor %%"REG_a", %%"REG_a" \n\t"
01843 ASMALIGN(4)
01844 "1: \n\t"
01845 PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
01846 "movq (%0, %%"REG_a", 4), %%mm0 \n\t"
01847 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t"
01848 "movq 16(%0, %%"REG_a", 4), %%mm2 \n\t"
01849 "movq 24(%0, %%"REG_a", 4), %%mm3 \n\t"
01850 "pand %%mm7, %%mm0 \n\t"
01851 "pand %%mm7, %%mm1 \n\t"
01852 "pand %%mm7, %%mm2 \n\t"
01853 "pand %%mm7, %%mm3 \n\t"
01854 "packuswb %%mm1, %%mm0 \n\t"
01855 "packuswb %%mm3, %%mm2 \n\t"
01856
01857 MOVNTQ" %%mm0, (%1, %%"REG_a", 2) \n\t"
01858 MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2) \n\t"
01859
01860 "add $8, %%"REG_a" \n\t"
01861 "cmp %4, %%"REG_a" \n\t"
01862 " jb 1b \n\t"
01863
01864 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
01865 : "memory", "%"REG_a
01866 );
01867 #else
01868 long i;
01869 for (i=0; i<chromWidth; i++)
01870 {
01871 ydst[2*i+0] = src[4*i+0];
01872 udst[i] = src[4*i+1];
01873 ydst[2*i+1] = src[4*i+2];
01874 vdst[i] = src[4*i+3];
01875 }
01876 ydst += lumStride;
01877 src += srcStride;
01878
01879 for (i=0; i<chromWidth; i++)
01880 {
01881 ydst[2*i+0] = src[4*i+0];
01882 ydst[2*i+1] = src[4*i+2];
01883 }
01884 #endif
01885 udst += chromStride;
01886 vdst += chromStride;
01887 ydst += lumStride;
01888 src += srcStride;
01889 }
01890 #ifdef HAVE_MMX
01891 asm volatile( EMMS" \n\t"
01892 SFENCE" \n\t"
01893 :::"memory");
01894 #endif
01895 }
01896
01897 static inline void RENAME(yvu9toyv12)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc,
01898 uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
01899 long width, long height, long lumStride, long chromStride)
01900 {
01901
01902 memcpy(ydst, ysrc, width*height);
01903
01904
01905 }
01906
01907 static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride)
01908 {
01909 long x,y;
01910
01911 dst[0]= src[0];
01912
01913
01914 for (x=0; x<srcWidth-1; x++){
01915 dst[2*x+1]= (3*src[x] + src[x+1])>>2;
01916 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
01917 }
01918 dst[2*srcWidth-1]= src[srcWidth-1];
01919
01920 dst+= dstStride;
01921
01922 for (y=1; y<srcHeight; y++){
01923 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01924 const long mmxSize= srcWidth&~15;
01925 asm volatile(
01926 "mov %4, %%"REG_a" \n\t"
01927 "1: \n\t"
01928 "movq (%0, %%"REG_a"), %%mm0 \n\t"
01929 "movq (%1, %%"REG_a"), %%mm1 \n\t"
01930 "movq 1(%0, %%"REG_a"), %%mm2 \n\t"
01931 "movq 1(%1, %%"REG_a"), %%mm3 \n\t"
01932 "movq -1(%0, %%"REG_a"), %%mm4 \n\t"
01933 "movq -1(%1, %%"REG_a"), %%mm5 \n\t"
01934 PAVGB" %%mm0, %%mm5 \n\t"
01935 PAVGB" %%mm0, %%mm3 \n\t"
01936 PAVGB" %%mm0, %%mm5 \n\t"
01937 PAVGB" %%mm0, %%mm3 \n\t"
01938 PAVGB" %%mm1, %%mm4 \n\t"
01939 PAVGB" %%mm1, %%mm2 \n\t"
01940 PAVGB" %%mm1, %%mm4 \n\t"
01941 PAVGB" %%mm1, %%mm2 \n\t"
01942 "movq %%mm5, %%mm7 \n\t"
01943 "movq %%mm4, %%mm6 \n\t"
01944 "punpcklbw %%mm3, %%mm5 \n\t"
01945 "punpckhbw %%mm3, %%mm7 \n\t"
01946 "punpcklbw %%mm2, %%mm4 \n\t"
01947 "punpckhbw %%mm2, %%mm6 \n\t"
01948 #if 1
01949 MOVNTQ" %%mm5, (%2, %%"REG_a", 2) \n\t"
01950 MOVNTQ" %%mm7, 8(%2, %%"REG_a", 2) \n\t"
01951 MOVNTQ" %%mm4, (%3, %%"REG_a", 2) \n\t"
01952 MOVNTQ" %%mm6, 8(%3, %%"REG_a", 2) \n\t"
01953 #else
01954 "movq %%mm5, (%2, %%"REG_a", 2) \n\t"
01955 "movq %%mm7, 8(%2, %%"REG_a", 2) \n\t"
01956 "movq %%mm4, (%3, %%"REG_a", 2) \n\t"
01957 "movq %%mm6, 8(%3, %%"REG_a", 2) \n\t"
01958 #endif
01959 "add $8, %%"REG_a" \n\t"
01960 " js 1b \n\t"
01961 :: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ),
01962 "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
01963 "g" (-mmxSize)
01964 : "%"REG_a
01965
01966 );
01967 #else
01968 const long mmxSize=1;
01969 #endif
01970 dst[0 ]= (3*src[0] + src[srcStride])>>2;
01971 dst[dstStride]= ( src[0] + 3*src[srcStride])>>2;
01972
01973 for (x=mmxSize-1; x<srcWidth-1; x++){
01974 dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2;
01975 dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2;
01976 dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2;
01977 dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2;
01978 }
01979 dst[srcWidth*2 -1 ]= (3*src[srcWidth-1] + src[srcWidth-1 + srcStride])>>2;
01980 dst[srcWidth*2 -1 + dstStride]= ( src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2;
01981
01982 dst+=dstStride*2;
01983 src+=srcStride;
01984 }
01985
01986
01987 #if 1
01988 dst[0]= src[0];
01989
01990 for (x=0; x<srcWidth-1; x++){
01991 dst[2*x+1]= (3*src[x] + src[x+1])>>2;
01992 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
01993 }
01994 dst[2*srcWidth-1]= src[srcWidth-1];
01995 #else
01996 for (x=0; x<srcWidth; x++){
01997 dst[2*x+0]=
01998 dst[2*x+1]= src[x];
01999 }
02000 #endif
02001
02002 #ifdef HAVE_MMX
02003 asm volatile( EMMS" \n\t"
02004 SFENCE" \n\t"
02005 :::"memory");
02006 #endif
02007 }
02008
02015 static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
02016 long width, long height,
02017 long lumStride, long chromStride, long srcStride)
02018 {
02019 long y;
02020 const long chromWidth= width>>1;
02021 for (y=0; y<height; y+=2)
02022 {
02023 #ifdef HAVE_MMX
02024 asm volatile(
02025 "xorl %%eax, %%eax \n\t"
02026 "pcmpeqw %%mm7, %%mm7 \n\t"
02027 "psrlw $8, %%mm7 \n\t"
02028 ASMALIGN(4)
02029 "1: \n\t"
02030 PREFETCH" 64(%0, %%eax, 4) \n\t"
02031 "movq (%0, %%eax, 4), %%mm0 \n\t"
02032 "movq 8(%0, %%eax, 4), %%mm1 \n\t"
02033 "movq %%mm0, %%mm2 \n\t"
02034 "movq %%mm1, %%mm3 \n\t"
02035 "pand %%mm7, %%mm0 \n\t"
02036 "pand %%mm7, %%mm1 \n\t"
02037 "psrlw $8, %%mm2 \n\t"
02038 "psrlw $8, %%mm3 \n\t"
02039 "packuswb %%mm1, %%mm0 \n\t"
02040 "packuswb %%mm3, %%mm2 \n\t"
02041
02042 MOVNTQ" %%mm2, (%1, %%eax, 2) \n\t"
02043
02044 "movq 16(%0, %%eax, 4), %%mm1 \n\t"
02045 "movq 24(%0, %%eax, 4), %%mm2 \n\t"
02046 "movq %%mm1, %%mm3 \n\t"
02047 "movq %%mm2, %%mm4 \n\t"
02048 "pand %%mm7, %%mm1 \n\t"
02049 "pand %%mm7, %%mm2 \n\t"
02050 "psrlw $8, %%mm3 \n\t"
02051 "psrlw $8, %%mm4 \n\t"
02052 "packuswb %%mm2, %%mm1 \n\t"
02053 "packuswb %%mm4, %%mm3 \n\t"
02054
02055 MOVNTQ" %%mm3, 8(%1, %%eax, 2) \n\t"
02056
02057 "movq %%mm0, %%mm2 \n\t"
02058 "movq %%mm1, %%mm3 \n\t"
02059 "psrlw $8, %%mm0 \n\t"
02060 "psrlw $8, %%mm1 \n\t"
02061 "pand %%mm7, %%mm2 \n\t"
02062 "pand %%mm7, %%mm3 \n\t"
02063 "packuswb %%mm1, %%mm0 \n\t"
02064 "packuswb %%mm3, %%mm2 \n\t"
02065
02066 MOVNTQ" %%mm0, (%3, %%eax) \n\t"
02067 MOVNTQ" %%mm2, (%2, %%eax) \n\t"
02068
02069 "addl $8, %%eax \n\t"
02070 "cmpl %4, %%eax \n\t"
02071 " jb 1b \n\t"
02072 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
02073 : "memory", "%eax"
02074 );
02075
02076 ydst += lumStride;
02077 src += srcStride;
02078
02079 asm volatile(
02080 "xorl %%eax, %%eax \n\t"
02081 ASMALIGN(4)
02082 "1: \n\t"
02083 PREFETCH" 64(%0, %%eax, 4) \n\t"
02084 "movq (%0, %%eax, 4), %%mm0 \n\t"
02085 "movq 8(%0, %%eax, 4), %%mm1 \n\t"
02086 "movq 16(%0, %%eax, 4), %%mm2 \n\t"
02087 "movq 24(%0, %%eax, 4), %%mm3 \n\t"
02088 "psrlw $8, %%mm0 \n\t"
02089 "psrlw $8, %%mm1 \n\t"
02090 "psrlw $8, %%mm2 \n\t"
02091 "psrlw $8, %%mm3 \n\t"
02092 "packuswb %%mm1, %%mm0 \n\t"
02093 "packuswb %%mm3, %%mm2 \n\t"
02094
02095 MOVNTQ" %%mm0, (%1, %%eax, 2) \n\t"
02096 MOVNTQ" %%mm2, 8(%1, %%eax, 2) \n\t"
02097
02098 "addl $8, %%eax \n\t"
02099 "cmpl %4, %%eax \n\t"
02100 " jb 1b \n\t"
02101
02102 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
02103 : "memory", "%eax"
02104 );
02105 #else
02106 long i;
02107 for (i=0; i<chromWidth; i++)
02108 {
02109 udst[i] = src[4*i+0];
02110 ydst[2*i+0] = src[4*i+1];
02111 vdst[i] = src[4*i+2];
02112 ydst[2*i+1] = src[4*i+3];
02113 }
02114 ydst += lumStride;
02115 src += srcStride;
02116
02117 for (i=0; i<chromWidth; i++)
02118 {
02119 ydst[2*i+0] = src[4*i+1];
02120 ydst[2*i+1] = src[4*i+3];
02121 }
02122 #endif
02123 udst += chromStride;
02124 vdst += chromStride;
02125 ydst += lumStride;
02126 src += srcStride;
02127 }
02128 #ifdef HAVE_MMX
02129 asm volatile( EMMS" \n\t"
02130 SFENCE" \n\t"
02131 :::"memory");
02132 #endif
02133 }
02134
02141 static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
02142 long width, long height,
02143 long lumStride, long chromStride, long srcStride)
02144 {
02145 long y;
02146 const long chromWidth= width>>1;
02147 #ifdef HAVE_MMX
02148 for (y=0; y<height-2; y+=2)
02149 {
02150 long i;
02151 for (i=0; i<2; i++)
02152 {
02153 asm volatile(
02154 "mov %2, %%"REG_a" \n\t"
02155 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t"
02156 "movq "MANGLE(w1111)", %%mm5 \n\t"
02157 "pxor %%mm7, %%mm7 \n\t"
02158 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
02159 ASMALIGN(4)
02160 "1: \n\t"
02161 PREFETCH" 64(%0, %%"REG_d") \n\t"
02162 "movd (%0, %%"REG_d"), %%mm0 \n\t"
02163 "movd 3(%0, %%"REG_d"), %%mm1 \n\t"
02164 "punpcklbw %%mm7, %%mm0 \n\t"
02165 "punpcklbw %%mm7, %%mm1 \n\t"
02166 "movd 6(%0, %%"REG_d"), %%mm2 \n\t"
02167 "movd 9(%0, %%"REG_d"), %%mm3 \n\t"
02168 "punpcklbw %%mm7, %%mm2 \n\t"
02169 "punpcklbw %%mm7, %%mm3 \n\t"
02170 "pmaddwd %%mm6, %%mm0 \n\t"
02171 "pmaddwd %%mm6, %%mm1 \n\t"
02172 "pmaddwd %%mm6, %%mm2 \n\t"
02173 "pmaddwd %%mm6, %%mm3 \n\t"
02174 #ifndef FAST_BGR2YV12
02175 "psrad $8, %%mm0 \n\t"
02176 "psrad $8, %%mm1 \n\t"
02177 "psrad $8, %%mm2 \n\t"
02178 "psrad $8, %%mm3 \n\t"
02179 #endif
02180 "packssdw %%mm1, %%mm0 \n\t"
02181 "packssdw %%mm3, %%mm2 \n\t"
02182 "pmaddwd %%mm5, %%mm0 \n\t"
02183 "pmaddwd %%mm5, %%mm2 \n\t"
02184 "packssdw %%mm2, %%mm0 \n\t"
02185 "psraw $7, %%mm0 \n\t"
02186
02187 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
02188 "movd 15(%0, %%"REG_d"), %%mm1 \n\t"
02189 "punpcklbw %%mm7, %%mm4 \n\t"
02190 "punpcklbw %%mm7, %%mm1 \n\t"
02191 "movd 18(%0, %%"REG_d"), %%mm2 \n\t"
02192 "movd 21(%0, %%"REG_d"), %%mm3 \n\t"
02193 "punpcklbw %%mm7, %%mm2 \n\t"
02194 "punpcklbw %%mm7, %%mm3 \n\t"
02195 "pmaddwd %%mm6, %%mm4 \n\t"
02196 "pmaddwd %%mm6, %%mm1 \n\t"
02197 "pmaddwd %%mm6, %%mm2 \n\t"
02198 "pmaddwd %%mm6, %%mm3 \n\t"
02199 #ifndef FAST_BGR2YV12
02200 "psrad $8, %%mm4 \n\t"
02201 "psrad $8, %%mm1 \n\t"
02202 "psrad $8, %%mm2 \n\t"
02203 "psrad $8, %%mm3 \n\t"
02204 #endif
02205 "packssdw %%mm1, %%mm4 \n\t"
02206 "packssdw %%mm3, %%mm2 \n\t"
02207 "pmaddwd %%mm5, %%mm4 \n\t"
02208 "pmaddwd %%mm5, %%mm2 \n\t"
02209 "add $24, %%"REG_d" \n\t"
02210 "packssdw %%mm2, %%mm4 \n\t"
02211 "psraw $7, %%mm4 \n\t"
02212
02213 "packuswb %%mm4, %%mm0 \n\t"
02214 "paddusb "MANGLE(bgr2YOffset)", %%mm0 \n\t"
02215
02216 MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t"
02217 "add $8, %%"REG_a" \n\t"
02218 " js 1b \n\t"
02219 : : "r" (src+width*3), "r" (ydst+width), "g" (-width)
02220 : "%"REG_a, "%"REG_d
02221 );
02222 ydst += lumStride;
02223 src += srcStride;
02224 }
02225 src -= srcStride*2;
02226 asm volatile(
02227 "mov %4, %%"REG_a" \n\t"
02228 "movq "MANGLE(w1111)", %%mm5 \n\t"
02229 "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t"
02230 "pxor %%mm7, %%mm7 \n\t"
02231 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
02232 "add %%"REG_d", %%"REG_d" \n\t"
02233 ASMALIGN(4)
02234 "1: \n\t"
02235 PREFETCH" 64(%0, %%"REG_d") \n\t"
02236 PREFETCH" 64(%1, %%"REG_d") \n\t"
02237 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
02238 "movq (%0, %%"REG_d"), %%mm0 \n\t"
02239 "movq (%1, %%"REG_d"), %%mm1 \n\t"
02240 "movq 6(%0, %%"REG_d"), %%mm2 \n\t"
02241 "movq 6(%1, %%"REG_d"), %%mm3 \n\t"
02242 PAVGB" %%mm1, %%mm0 \n\t"
02243 PAVGB" %%mm3, %%mm2 \n\t"
02244 "movq %%mm0, %%mm1 \n\t"
02245 "movq %%mm2, %%mm3 \n\t"
02246 "psrlq $24, %%mm0 \n\t"
02247 "psrlq $24, %%mm2 \n\t"
02248 PAVGB" %%mm1, %%mm0 \n\t"
02249 PAVGB" %%mm3, %%mm2 \n\t"
02250 "punpcklbw %%mm7, %%mm0 \n\t"
02251 "punpcklbw %%mm7, %%mm2 \n\t"
02252 #else
02253 "movd (%0, %%"REG_d"), %%mm0 \n\t"
02254 "movd (%1, %%"REG_d"), %%mm1 \n\t"
02255 "movd 3(%0, %%"REG_d"), %%mm2 \n\t"
02256 "movd 3(%1, %%"REG_d"), %%mm3 \n\t"
02257 "punpcklbw %%mm7, %%mm0 \n\t"
02258 "punpcklbw %%mm7, %%mm1 \n\t"
02259 "punpcklbw %%mm7, %%mm2 \n\t"
02260 "punpcklbw %%mm7, %%mm3 \n\t"
02261 "paddw %%mm1, %%mm0 \n\t"
02262 "paddw %%mm3, %%mm2 \n\t"
02263 "paddw %%mm2, %%mm0 \n\t"
02264 "movd 6(%0, %%"REG_d"), %%mm4 \n\t"
02265 "movd 6(%1, %%"REG_d"), %%mm1 \n\t"
02266 "movd 9(%0, %%"REG_d"), %%mm2 \n\t"
02267 "movd 9(%1, %%"REG_d"), %%mm3 \n\t"
02268 "punpcklbw %%mm7, %%mm4 \n\t"
02269 "punpcklbw %%mm7, %%mm1 \n\t"
02270 "punpcklbw %%mm7, %%mm2 \n\t"
02271 "punpcklbw %%mm7, %%mm3 \n\t"
02272 "paddw %%mm1, %%mm4 \n\t"
02273 "paddw %%mm3, %%mm2 \n\t"
02274 "paddw %%mm4, %%mm2 \n\t"
02275 "psrlw $2, %%mm0 \n\t"
02276 "psrlw $2, %%mm2 \n\t"
02277 #endif
02278 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t"
02279 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t"
02280
02281 "pmaddwd %%mm0, %%mm1 \n\t"
02282 "pmaddwd %%mm2, %%mm3 \n\t"
02283 "pmaddwd %%mm6, %%mm0 \n\t"
02284 "pmaddwd %%mm6, %%mm2 \n\t"
02285 #ifndef FAST_BGR2YV12
02286 "psrad $8, %%mm0 \n\t"
02287 "psrad $8, %%mm1 \n\t"
02288 "psrad $8, %%mm2 \n\t"
02289 "psrad $8, %%mm3 \n\t"
02290 #endif
02291 "packssdw %%mm2, %%mm0 \n\t"
02292 "packssdw %%mm3, %%mm1 \n\t"
02293 "pmaddwd %%mm5, %%mm0 \n\t"
02294 "pmaddwd %%mm5, %%mm1 \n\t"
02295 "packssdw %%mm1, %%mm0 \n\t"
02296 "psraw $7, %%mm0 \n\t"
02297
02298 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
02299 "movq 12(%0, %%"REG_d"), %%mm4 \n\t"
02300 "movq 12(%1, %%"REG_d"), %%mm1 \n\t"
02301 "movq 18(%0, %%"REG_d"), %%mm2 \n\t"
02302 "movq 18(%1, %%"REG_d"), %%mm3 \n\t"
02303 PAVGB" %%mm1, %%mm4 \n\t"
02304 PAVGB" %%mm3, %%mm2 \n\t"
02305 "movq %%mm4, %%mm1 \n\t"
02306 "movq %%mm2, %%mm3 \n\t"
02307 "psrlq $24, %%mm4 \n\t"
02308 "psrlq $24, %%mm2 \n\t"
02309 PAVGB" %%mm1, %%mm4 \n\t"
02310 PAVGB" %%mm3, %%mm2 \n\t"
02311 "punpcklbw %%mm7, %%mm4 \n\t"
02312 "punpcklbw %%mm7, %%mm2 \n\t"
02313 #else
02314 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
02315 "movd 12(%1, %%"REG_d"), %%mm1 \n\t"
02316 "movd 15(%0, %%"REG_d"), %%mm2 \n\t"
02317 "movd 15(%1, %%"REG_d"), %%mm3 \n\t"
02318 "punpcklbw %%mm7, %%mm4 \n\t"
02319 "punpcklbw %%mm7, %%mm1 \n\t"
02320 "punpcklbw %%mm7, %%mm2 \n\t"
02321 "punpcklbw %%mm7, %%mm3 \n\t"
02322 "paddw %%mm1, %%mm4 \n\t"
02323 "paddw %%mm3, %%mm2 \n\t"
02324 "paddw %%mm2, %%mm4 \n\t"
02325 "movd 18(%0, %%"REG_d"), %%mm5 \n\t"
02326 "movd 18(%1, %%"REG_d"), %%mm1 \n\t"
02327 "movd 21(%0, %%"REG_d"), %%mm2 \n\t"
02328 "movd 21(%1, %%"REG_d"), %%mm3 \n\t"
02329 "punpcklbw %%mm7, %%mm5 \n\t"
02330 "punpcklbw %%mm7, %%mm1 \n\t"
02331 "punpcklbw %%mm7, %%mm2 \n\t"
02332 "punpcklbw %%mm7, %%mm3 \n\t"
02333 "paddw %%mm1, %%mm5 \n\t"
02334 "paddw %%mm3, %%mm2 \n\t"
02335 "paddw %%mm5, %%mm2 \n\t"
02336 "movq "MANGLE(w1111)", %%mm5 \n\t"
02337 "psrlw $2, %%mm4 \n\t"
02338 "psrlw $2, %%mm2 \n\t"
02339 #endif
02340 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t"
02341 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t"
02342
02343 "pmaddwd %%mm4, %%mm1 \n\t"
02344 "pmaddwd %%mm2, %%mm3 \n\t"
02345 "pmaddwd %%mm6, %%mm4 \n\t"
02346 "pmaddwd %%mm6, %%mm2 \n\t"
02347 #ifndef FAST_BGR2YV12
02348 "psrad $8, %%mm4 \n\t"
02349 "psrad $8, %%mm1 \n\t"
02350 "psrad $8, %%mm2 \n\t"
02351 "psrad $8, %%mm3 \n\t"
02352 #endif
02353 "packssdw %%mm2, %%mm4 \n\t"
02354 "packssdw %%mm3, %%mm1 \n\t"
02355 "pmaddwd %%mm5, %%mm4 \n\t"
02356 "pmaddwd %%mm5, %%mm1 \n\t"
02357 "add $24, %%"REG_d" \n\t"
02358 "packssdw %%mm1, %%mm4 \n\t"
02359 "psraw $7, %%mm4 \n\t"
02360
02361 "movq %%mm0, %%mm1 \n\t"
02362 "punpckldq %%mm4, %%mm0 \n\t"
02363 "punpckhdq %%mm4, %%mm1 \n\t"
02364 "packsswb %%mm1, %%mm0 \n\t"
02365 "paddb "MANGLE(bgr2UVOffset)", %%mm0 \n\t"
02366 "movd %%mm0, (%2, %%"REG_a") \n\t"
02367 "punpckhdq %%mm0, %%mm0 \n\t"
02368 "movd %%mm0, (%3, %%"REG_a") \n\t"
02369 "add $4, %%"REG_a" \n\t"
02370 " js 1b \n\t"
02371 : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth)
02372 : "%"REG_a, "%"REG_d
02373 );
02374
02375 udst += chromStride;
02376 vdst += chromStride;
02377 src += srcStride*2;
02378 }
02379
02380 asm volatile( EMMS" \n\t"
02381 SFENCE" \n\t"
02382 :::"memory");
02383 #else
02384 y=0;
02385 #endif
02386 for (; y<height; y+=2)
02387 {
02388 long i;
02389 for (i=0; i<chromWidth; i++)
02390 {
02391 unsigned int b = src[6*i+0];
02392 unsigned int g = src[6*i+1];
02393 unsigned int r = src[6*i+2];
02394
02395 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
02396 unsigned int V = ((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128;
02397 unsigned int U = ((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128;
02398
02399 udst[i] = U;
02400 vdst[i] = V;
02401 ydst[2*i] = Y;
02402
02403 b = src[6*i+3];
02404 g = src[6*i+4];
02405 r = src[6*i+5];
02406
02407 Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
02408 ydst[2*i+1] = Y;
02409 }
02410 ydst += lumStride;
02411 src += srcStride;
02412
02413 for (i=0; i<chromWidth; i++)
02414 {
02415 unsigned int b = src[6*i+0];
02416 unsigned int g = src[6*i+1];
02417 unsigned int r = src[6*i+2];
02418
02419 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
02420
02421 ydst[2*i] = Y;
02422
02423 b = src[6*i+3];
02424 g = src[6*i+4];
02425 r = src[6*i+5];
02426
02427 Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
02428 ydst[2*i+1] = Y;
02429 }
02430 udst += chromStride;
02431 vdst += chromStride;
02432 ydst += lumStride;
02433 src += srcStride;
02434 }
02435 }
02436
02437 void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest,
02438 long width, long height, long src1Stride,
02439 long src2Stride, long dstStride){
02440 long h;
02441
02442 for (h=0; h < height; h++)
02443 {
02444 long w;
02445
02446 #ifdef HAVE_MMX
02447 #ifdef HAVE_SSE2
02448 asm(
02449 "xor %%"REG_a", %%"REG_a" \n\t"
02450 "1: \n\t"
02451 PREFETCH" 64(%1, %%"REG_a") \n\t"
02452 PREFETCH" 64(%2, %%"REG_a") \n\t"
02453 "movdqa (%1, %%"REG_a"), %%xmm0 \n\t"
02454 "movdqa (%1, %%"REG_a"), %%xmm1 \n\t"
02455 "movdqa (%2, %%"REG_a"), %%xmm2 \n\t"
02456 "punpcklbw %%xmm2, %%xmm0 \n\t"
02457 "punpckhbw %%xmm2, %%xmm1 \n\t"
02458 "movntdq %%xmm0, (%0, %%"REG_a", 2) \n\t"
02459 "movntdq %%xmm1, 16(%0, %%"REG_a", 2) \n\t"
02460 "add $16, %%"REG_a" \n\t"
02461 "cmp %3, %%"REG_a" \n\t"
02462 " jb 1b \n\t"
02463 ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15)
02464 : "memory", "%"REG_a""
02465 );
02466 #else
02467 asm(
02468 "xor %%"REG_a", %%"REG_a" \n\t"
02469 "1: \n\t"
02470 PREFETCH" 64(%1, %%"REG_a") \n\t"
02471 PREFETCH" 64(%2, %%"REG_a") \n\t"
02472 "movq (%1, %%"REG_a"), %%mm0 \n\t"
02473 "movq 8(%1, %%"REG_a"), %%mm2 \n\t"
02474 "movq %%mm0, %%mm1 \n\t"
02475 "movq %%mm2, %%mm3 \n\t"
02476 "movq (%2, %%"REG_a"), %%mm4 \n\t"
02477 "movq 8(%2, %%"REG_a"), %%mm5 \n\t"
02478 "punpcklbw %%mm4, %%mm0 \n\t"
02479 "punpckhbw %%mm4, %%mm1 \n\t"
02480 "punpcklbw %%mm5, %%mm2 \n\t"
02481 "punpckhbw %%mm5, %%mm3 \n\t"
02482 MOVNTQ" %%mm0, (%0, %%"REG_a", 2) \n\t"
02483 MOVNTQ" %%mm1, 8(%0, %%"REG_a", 2) \n\t"
02484 MOVNTQ" %%mm2, 16(%0, %%"REG_a", 2) \n\t"
02485 MOVNTQ" %%mm3, 24(%0, %%"REG_a", 2) \n\t"
02486 "add $16, %%"REG_a" \n\t"
02487 "cmp %3, %%"REG_a" \n\t"
02488 " jb 1b \n\t"
02489 ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15)
02490 : "memory", "%"REG_a
02491 );
02492 #endif
02493 for (w= (width&(~15)); w < width; w++)
02494 {
02495 dest[2*w+0] = src1[w];
02496 dest[2*w+1] = src2[w];
02497 }
02498 #else
02499 for (w=0; w < width; w++)
02500 {
02501 dest[2*w+0] = src1[w];
02502 dest[2*w+1] = src2[w];
02503 }
02504 #endif
02505 dest += dstStride;
02506 src1 += src1Stride;
02507 src2 += src2Stride;
02508 }
02509 #ifdef HAVE_MMX
02510 asm(
02511 EMMS" \n\t"
02512 SFENCE" \n\t"
02513 ::: "memory"
02514 );
02515 #endif
02516 }
02517
02518 static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
02519 uint8_t *dst1, uint8_t *dst2,
02520 long width, long height,
02521 long srcStride1, long srcStride2,
02522 long dstStride1, long dstStride2)
02523 {
02524 long y,x,w,h;
02525 w=width/2; h=height/2;
02526 #ifdef HAVE_MMX
02527 asm volatile(
02528 PREFETCH" %0 \n\t"
02529 PREFETCH" %1 \n\t"
02530 ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory");
02531 #endif
02532 for (y=0;y<h;y++){
02533 const uint8_t* s1=src1+srcStride1*(y>>1);
02534 uint8_t* d=dst1+dstStride1*y;
02535 x=0;
02536 #ifdef HAVE_MMX
02537 for (;x<w-31;x+=32)
02538 {
02539 asm volatile(
02540 PREFETCH" 32%1 \n\t"
02541 "movq %1, %%mm0 \n\t"
02542 "movq 8%1, %%mm2 \n\t"
02543 "movq 16%1, %%mm4 \n\t"
02544 "movq 24%1, %%mm6 \n\t"
02545 "movq %%mm0, %%mm1 \n\t"
02546 "movq %%mm2, %%mm3 \n\t"
02547 "movq %%mm4, %%mm5 \n\t"
02548 "movq %%mm6, %%mm7 \n\t"
02549 "punpcklbw %%mm0, %%mm0 \n\t"
02550 "punpckhbw %%mm1, %%mm1 \n\t"
02551 "punpcklbw %%mm2, %%mm2 \n\t"
02552 "punpckhbw %%mm3, %%mm3 \n\t"
02553 "punpcklbw %%mm4, %%mm4 \n\t"
02554 "punpckhbw %%mm5, %%mm5 \n\t"
02555 "punpcklbw %%mm6, %%mm6 \n\t"
02556 "punpckhbw %%mm7, %%mm7 \n\t"
02557 MOVNTQ" %%mm0, %0 \n\t"
02558 MOVNTQ" %%mm1, 8%0 \n\t"
02559 MOVNTQ" %%mm2, 16%0 \n\t"
02560 MOVNTQ" %%mm3, 24%0 \n\t"
02561 MOVNTQ" %%mm4, 32%0 \n\t"
02562 MOVNTQ" %%mm5, 40%0 \n\t"
02563 MOVNTQ" %%mm6, 48%0 \n\t"
02564 MOVNTQ" %%mm7, 56%0"
02565 :"=m"(d[2*x])
02566 :"m"(s1[x])
02567 :"memory");
02568 }
02569 #endif
02570 for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
02571 }
02572 for (y=0;y<h;y++){
02573 const uint8_t* s2=src2+srcStride2*(y>>1);
02574 uint8_t* d=dst2+dstStride2*y;
02575 x=0;
02576 #ifdef HAVE_MMX
02577 for (;x<w-31;x+=32)
02578 {
02579 asm volatile(
02580 PREFETCH" 32%1 \n\t"
02581 "movq %1, %%mm0 \n\t"
02582 "movq 8%1, %%mm2 \n\t"
02583 "movq 16%1, %%mm4 \n\t"
02584 "movq 24%1, %%mm6 \n\t"
02585 "movq %%mm0, %%mm1 \n\t"
02586 "movq %%mm2, %%mm3 \n\t"
02587 "movq %%mm4, %%mm5 \n\t"
02588 "movq %%mm6, %%mm7 \n\t"
02589 "punpcklbw %%mm0, %%mm0 \n\t"
02590 "punpckhbw %%mm1, %%mm1 \n\t"
02591 "punpcklbw %%mm2, %%mm2 \n\t"
02592 "punpckhbw %%mm3, %%mm3 \n\t"
02593 "punpcklbw %%mm4, %%mm4 \n\t"
02594 "punpckhbw %%mm5, %%mm5 \n\t"
02595 "punpcklbw %%mm6, %%mm6 \n\t"
02596 "punpckhbw %%mm7, %%mm7 \n\t"
02597 MOVNTQ" %%mm0, %0 \n\t"
02598 MOVNTQ" %%mm1, 8%0 \n\t"
02599 MOVNTQ" %%mm2, 16%0 \n\t"
02600 MOVNTQ" %%mm3, 24%0 \n\t"
02601 MOVNTQ" %%mm4, 32%0 \n\t"
02602 MOVNTQ" %%mm5, 40%0 \n\t"
02603 MOVNTQ" %%mm6, 48%0 \n\t"
02604 MOVNTQ" %%mm7, 56%0"
02605 :"=m"(d[2*x])
02606 :"m"(s2[x])
02607 :"memory");
02608 }
02609 #endif
02610 for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x];
02611 }
02612 #ifdef HAVE_MMX
02613 asm(
02614 EMMS" \n\t"
02615 SFENCE" \n\t"
02616 ::: "memory"
02617 );
02618 #endif
02619 }
02620
02621 static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
02622 uint8_t *dst,
02623 long width, long height,
02624 long srcStride1, long srcStride2,
02625 long srcStride3, long dstStride)
02626 {
02627 long y,x,w,h;
02628 w=width/2; h=height;
02629 for (y=0;y<h;y++){
02630 const uint8_t* yp=src1+srcStride1*y;
02631 const uint8_t* up=src2+srcStride2*(y>>2);
02632 const uint8_t* vp=src3+srcStride3*(y>>2);
02633 uint8_t* d=dst+dstStride*y;
02634 x=0;
02635 #ifdef HAVE_MMX
02636 for (;x<w-7;x+=8)
02637 {
02638 asm volatile(
02639 PREFETCH" 32(%1, %0) \n\t"
02640 PREFETCH" 32(%2, %0) \n\t"
02641 PREFETCH" 32(%3, %0) \n\t"
02642 "movq (%1, %0, 4), %%mm0 \n\t"
02643 "movq (%2, %0), %%mm1 \n\t"
02644 "movq (%3, %0), %%mm2 \n\t"
02645 "movq %%mm0, %%mm3 \n\t"
02646 "movq %%mm1, %%mm4 \n\t"
02647 "movq %%mm2, %%mm5 \n\t"
02648 "punpcklbw %%mm1, %%mm1 \n\t"
02649 "punpcklbw %%mm2, %%mm2 \n\t"
02650 "punpckhbw %%mm4, %%mm4 \n\t"
02651 "punpckhbw %%mm5, %%mm5 \n\t"
02652
02653 "movq %%mm1, %%mm6 \n\t"
02654 "punpcklbw %%mm2, %%mm1 \n\t"
02655 "punpcklbw %%mm1, %%mm0 \n\t"
02656 "punpckhbw %%mm1, %%mm3 \n\t"
02657 MOVNTQ" %%mm0, (%4, %0, 8) \n\t"
02658 MOVNTQ" %%mm3, 8(%4, %0, 8) \n\t"
02659
02660 "punpckhbw %%mm2, %%mm6 \n\t"
02661 "movq 8(%1, %0, 4), %%mm0 \n\t"
02662 "movq %%mm0, %%mm3 \n\t"
02663 "punpcklbw %%mm6, %%mm0 \n\t"
02664 "punpckhbw %%mm6, %%mm3 \n\t"
02665 MOVNTQ" %%mm0, 16(%4, %0, 8) \n\t"
02666 MOVNTQ" %%mm3, 24(%4, %0, 8) \n\t"
02667
02668 "movq %%mm4, %%mm6 \n\t"
02669 "movq 16(%1, %0, 4), %%mm0 \n\t"
02670 "movq %%mm0, %%mm3 \n\t"
02671 "punpcklbw %%mm5, %%mm4 \n\t"
02672 "punpcklbw %%mm4, %%mm0 \n\t"
02673 "punpckhbw %%mm4, %%mm3 \n\t"
02674 MOVNTQ" %%mm0, 32(%4, %0, 8) \n\t"
02675 MOVNTQ" %%mm3, 40(%4, %0, 8) \n\t"
02676
02677 "punpckhbw %%mm5, %%mm6 \n\t"
02678 "movq 24(%1, %0, 4), %%mm0 \n\t"
02679 "movq %%mm0, %%mm3 \n\t"
02680 "punpcklbw %%mm6, %%mm0 \n\t"
02681 "punpckhbw %%mm6, %%mm3 \n\t"
02682 MOVNTQ" %%mm0, 48(%4, %0, 8) \n\t"
02683 MOVNTQ" %%mm3, 56(%4, %0, 8) \n\t"
02684
02685 : "+r" (x)
02686 : "r"(yp), "r" (up), "r"(vp), "r"(d)
02687 :"memory");
02688 }
02689 #endif
02690 for (; x<w; x++)
02691 {
02692 const long x2 = x<<2;
02693 d[8*x+0] = yp[x2];
02694 d[8*x+1] = up[x];
02695 d[8*x+2] = yp[x2+1];
02696 d[8*x+3] = vp[x];
02697 d[8*x+4] = yp[x2+2];
02698 d[8*x+5] = up[x];
02699 d[8*x+6] = yp[x2+3];
02700 d[8*x+7] = vp[x];
02701 }
02702 }
02703 #ifdef HAVE_MMX
02704 asm(
02705 EMMS" \n\t"
02706 SFENCE" \n\t"
02707 ::: "memory"
02708 );
02709 #endif
02710 }
02711
02712 static inline void RENAME(rgb2rgb_init)(void){
02713 rgb15to16 = RENAME(rgb15to16);
02714 rgb15to24 = RENAME(rgb15to24);
02715 rgb15to32 = RENAME(rgb15to32);
02716 rgb16to24 = RENAME(rgb16to24);
02717 rgb16to32 = RENAME(rgb16to32);
02718 rgb16to15 = RENAME(rgb16to15);
02719 rgb24to16 = RENAME(rgb24to16);
02720 rgb24to15 = RENAME(rgb24to15);
02721 rgb24to32 = RENAME(rgb24to32);
02722 rgb32to16 = RENAME(rgb32to16);
02723 rgb32to15 = RENAME(rgb32to15);
02724 rgb32to24 = RENAME(rgb32to24);
02725 rgb24tobgr15 = RENAME(rgb24tobgr15);
02726 rgb24tobgr16 = RENAME(rgb24tobgr16);
02727 rgb24tobgr24 = RENAME(rgb24tobgr24);
02728 rgb32tobgr32 = RENAME(rgb32tobgr32);
02729 rgb32tobgr16 = RENAME(rgb32tobgr16);
02730 rgb32tobgr15 = RENAME(rgb32tobgr15);
02731 yv12toyuy2 = RENAME(yv12toyuy2);
02732 yv12touyvy = RENAME(yv12touyvy);
02733 yuv422ptoyuy2 = RENAME(yuv422ptoyuy2);
02734 yuy2toyv12 = RENAME(yuy2toyv12);
02735
02736
02737 planar2x = RENAME(planar2x);
02738 rgb24toyv12 = RENAME(rgb24toyv12);
02739 interleaveBytes = RENAME(interleaveBytes);
02740 vu9_to_vu12 = RENAME(vu9_to_vu12);
02741 yvu9_to_yuy2 = RENAME(yvu9_to_yuy2);
02742 }