00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include "config.h"
00025
00026 #ifdef ARCH_PPC
00027
00028 #ifdef HAVE_ALTIVEC_H
00029 #include <altivec.h>
00030 #endif
00031 #include <inttypes.h>
00032
00033 #include "mpeg2.h"
00034 #include "attributes.h"
00035 #include "mpeg2_internal.h"
00036
00037 typedef vector signed char vector_s8_t;
00038 typedef vector unsigned char vector_u8_t;
00039 typedef vector signed short vector_s16_t;
00040 typedef vector unsigned short vector_u16_t;
00041 typedef vector signed int vector_s32_t;
00042 typedef vector unsigned int vector_u32_t;
00043
00044 #ifndef COFFEE_BREAK
00045
00046 static inline vector_u8_t my_vec_ld (int const A, const uint8_t * const B)
00047 {
00048 return vec_ld (A, (uint8_t *)B);
00049 }
00050 #undef vec_ld
00051 #define vec_ld my_vec_ld
00052
00053 static inline vector_u8_t my_vec_and (vector_u8_t const A, vector_u8_t const B)
00054 {
00055 return vec_and (A, B);
00056 }
00057 #undef vec_and
00058 #define vec_and my_vec_and
00059
00060 static inline vector_u8_t my_vec_avg (vector_u8_t const A, vector_u8_t const B)
00061 {
00062 return vec_avg (A, B);
00063 }
00064 #undef vec_avg
00065 #define vec_avg my_vec_avg
00066
00067 #endif
00068
00069 static void MC_put_o_16_altivec (uint8_t * dest, const uint8_t * ref,
00070 const int stride, int height)
00071 {
00072 vector_u8_t perm, ref0, ref1, tmp;
00073
00074 perm = vec_lvsl (0, ref);
00075
00076 height = (height >> 1) - 1;
00077
00078 ref0 = vec_ld (0, ref);
00079 ref1 = vec_ld (15, ref);
00080 ref += stride;
00081 tmp = vec_perm (ref0, ref1, perm);
00082
00083 do {
00084 ref0 = vec_ld (0, ref);
00085 ref1 = vec_ld (15, ref);
00086 ref += stride;
00087 vec_st (tmp, 0, dest);
00088 tmp = vec_perm (ref0, ref1, perm);
00089
00090 ref0 = vec_ld (0, ref);
00091 ref1 = vec_ld (15, ref);
00092 ref += stride;
00093 vec_st (tmp, stride, dest);
00094 dest += 2*stride;
00095 tmp = vec_perm (ref0, ref1, perm);
00096 } while (--height);
00097
00098 ref0 = vec_ld (0, ref);
00099 ref1 = vec_ld (15, ref);
00100 vec_st (tmp, 0, dest);
00101 tmp = vec_perm (ref0, ref1, perm);
00102 vec_st (tmp, stride, dest);
00103 }
00104
00105 static void MC_put_o_8_altivec (uint8_t * dest, const uint8_t * ref,
00106 const int stride, int height)
00107 {
00108 vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1;
00109
00110 tmp0 = vec_lvsl (0, ref);
00111 tmp0 = vec_mergeh (tmp0, tmp0);
00112 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
00113 tmp1 = vec_lvsl (stride, ref);
00114 tmp1 = vec_mergeh (tmp1, tmp1);
00115 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
00116
00117 height = (height >> 1) - 1;
00118
00119 ref0 = vec_ld (0, ref);
00120 ref1 = vec_ld (7, ref);
00121 ref += stride;
00122 tmp0 = vec_perm (ref0, ref1, perm0);
00123
00124 do {
00125 ref0 = vec_ld (0, ref);
00126 ref1 = vec_ld (7, ref);
00127 ref += stride;
00128 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
00129 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
00130 dest += stride;
00131 tmp1 = vec_perm (ref0, ref1, perm1);
00132
00133 ref0 = vec_ld (0, ref);
00134 ref1 = vec_ld (7, ref);
00135 ref += stride;
00136 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
00137 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
00138 dest += stride;
00139 tmp0 = vec_perm (ref0, ref1, perm0);
00140 } while (--height);
00141
00142 ref0 = vec_ld (0, ref);
00143 ref1 = vec_ld (7, ref);
00144 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
00145 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
00146 dest += stride;
00147 tmp1 = vec_perm (ref0, ref1, perm1);
00148 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
00149 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
00150 }
00151
00152 static void MC_put_x_16_altivec (uint8_t * dest, const uint8_t * ref,
00153 const int stride, int height)
00154 {
00155 vector_u8_t permA, permB, ref0, ref1, tmp;
00156
00157 permA = vec_lvsl (0, ref);
00158 permB = vec_add (permA, vec_splat_u8 (1));
00159
00160 height = (height >> 1) - 1;
00161
00162 ref0 = vec_ld (0, ref);
00163 ref1 = vec_ld (16, ref);
00164 ref += stride;
00165 tmp = vec_avg (vec_perm (ref0, ref1, permA),
00166 vec_perm (ref0, ref1, permB));
00167
00168 do {
00169 ref0 = vec_ld (0, ref);
00170 ref1 = vec_ld (16, ref);
00171 ref += stride;
00172 vec_st (tmp, 0, dest);
00173 tmp = vec_avg (vec_perm (ref0, ref1, permA),
00174 vec_perm (ref0, ref1, permB));
00175
00176 ref0 = vec_ld (0, ref);
00177 ref1 = vec_ld (16, ref);
00178 ref += stride;
00179 vec_st (tmp, stride, dest);
00180 dest += 2*stride;
00181 tmp = vec_avg (vec_perm (ref0, ref1, permA),
00182 vec_perm (ref0, ref1, permB));
00183 } while (--height);
00184
00185 ref0 = vec_ld (0, ref);
00186 ref1 = vec_ld (16, ref);
00187 vec_st (tmp, 0, dest);
00188 tmp = vec_avg (vec_perm (ref0, ref1, permA),
00189 vec_perm (ref0, ref1, permB));
00190 vec_st (tmp, stride, dest);
00191 }
00192
00193 static void MC_put_x_8_altivec (uint8_t * dest, const uint8_t * ref,
00194 const int stride, int height)
00195 {
00196 vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1;
00197
00198 ones = vec_splat_u8 (1);
00199 tmp0 = vec_lvsl (0, ref);
00200 tmp0 = vec_mergeh (tmp0, tmp0);
00201 perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
00202 perm0B = vec_add (perm0A, ones);
00203 tmp1 = vec_lvsl (stride, ref);
00204 tmp1 = vec_mergeh (tmp1, tmp1);
00205 perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
00206 perm1B = vec_add (perm1A, ones);
00207
00208 height = (height >> 1) - 1;
00209
00210 ref0 = vec_ld (0, ref);
00211 ref1 = vec_ld (8, ref);
00212 ref += stride;
00213 tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A),
00214 vec_perm (ref0, ref1, perm0B));
00215
00216 do {
00217 ref0 = vec_ld (0, ref);
00218 ref1 = vec_ld (8, ref);
00219 ref += stride;
00220 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
00221 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
00222 dest += stride;
00223 tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A),
00224 vec_perm (ref0, ref1, perm1B));
00225
00226 ref0 = vec_ld (0, ref);
00227 ref1 = vec_ld (8, ref);
00228 ref += stride;
00229 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
00230 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
00231 dest += stride;
00232 tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A),
00233 vec_perm (ref0, ref1, perm0B));
00234 } while (--height);
00235
00236 ref0 = vec_ld (0, ref);
00237 ref1 = vec_ld (8, ref);
00238 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
00239 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
00240 dest += stride;
00241 tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A),
00242 vec_perm (ref0, ref1, perm1B));
00243 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
00244 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
00245 }
00246
00247 static void MC_put_y_16_altivec (uint8_t * dest, const uint8_t * ref,
00248 const int stride, int height)
00249 {
00250 vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp;
00251
00252 perm = vec_lvsl (0, ref);
00253
00254 height = (height >> 1) - 1;
00255
00256 ref0 = vec_ld (0, ref);
00257 ref1 = vec_ld (15, ref);
00258 ref += stride;
00259 tmp0 = vec_perm (ref0, ref1, perm);
00260 ref0 = vec_ld (0, ref);
00261 ref1 = vec_ld (15, ref);
00262 ref += stride;
00263 tmp1 = vec_perm (ref0, ref1, perm);
00264 tmp = vec_avg (tmp0, tmp1);
00265
00266 do {
00267 ref0 = vec_ld (0, ref);
00268 ref1 = vec_ld (15, ref);
00269 ref += stride;
00270 vec_st (tmp, 0, dest);
00271 tmp0 = vec_perm (ref0, ref1, perm);
00272 tmp = vec_avg (tmp0, tmp1);
00273
00274 ref0 = vec_ld (0, ref);
00275 ref1 = vec_ld (15, ref);
00276 ref += stride;
00277 vec_st (tmp, stride, dest);
00278 dest += 2*stride;
00279 tmp1 = vec_perm (ref0, ref1, perm);
00280 tmp = vec_avg (tmp0, tmp1);
00281 } while (--height);
00282
00283 ref0 = vec_ld (0, ref);
00284 ref1 = vec_ld (15, ref);
00285 vec_st (tmp, 0, dest);
00286 tmp0 = vec_perm (ref0, ref1, perm);
00287 tmp = vec_avg (tmp0, tmp1);
00288 vec_st (tmp, stride, dest);
00289 }
00290
00291 static void MC_put_y_8_altivec (uint8_t * dest, const uint8_t * ref,
00292 const int stride, int height)
00293 {
00294 vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1;
00295
00296 tmp0 = vec_lvsl (0, ref);
00297 tmp0 = vec_mergeh (tmp0, tmp0);
00298 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
00299 tmp1 = vec_lvsl (stride, ref);
00300 tmp1 = vec_mergeh (tmp1, tmp1);
00301 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
00302
00303 height = (height >> 1) - 1;
00304
00305 ref0 = vec_ld (0, ref);
00306 ref1 = vec_ld (7, ref);
00307 ref += stride;
00308 tmp0 = vec_perm (ref0, ref1, perm0);
00309 ref0 = vec_ld (0, ref);
00310 ref1 = vec_ld (7, ref);
00311 ref += stride;
00312 tmp1 = vec_perm (ref0, ref1, perm1);
00313 tmp = vec_avg (tmp0, tmp1);
00314
00315 do {
00316 ref0 = vec_ld (0, ref);
00317 ref1 = vec_ld (7, ref);
00318 ref += stride;
00319 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
00320 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
00321 dest += stride;
00322 tmp0 = vec_perm (ref0, ref1, perm0);
00323 tmp = vec_avg (tmp0, tmp1);
00324
00325 ref0 = vec_ld (0, ref);
00326 ref1 = vec_ld (7, ref);
00327 ref += stride;
00328 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
00329 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
00330 dest += stride;
00331 tmp1 = vec_perm (ref0, ref1, perm1);
00332 tmp = vec_avg (tmp0, tmp1);
00333 } while (--height);
00334
00335 ref0 = vec_ld (0, ref);
00336 ref1 = vec_ld (7, ref);
00337 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
00338 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
00339 dest += stride;
00340 tmp0 = vec_perm (ref0, ref1, perm0);
00341 tmp = vec_avg (tmp0, tmp1);
00342 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
00343 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
00344 }
00345
00346 static void MC_put_xy_16_altivec (uint8_t * dest, const uint8_t * ref,
00347 const int stride, int height)
00348 {
00349 vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp;
00350 vector_u8_t ones;
00351
00352 ones = vec_splat_u8 (1);
00353 permA = vec_lvsl (0, ref);
00354 permB = vec_add (permA, ones);
00355
00356 height = (height >> 1) - 1;
00357
00358 ref0 = vec_ld (0, ref);
00359 ref1 = vec_ld (16, ref);
00360 ref += stride;
00361 A = vec_perm (ref0, ref1, permA);
00362 B = vec_perm (ref0, ref1, permB);
00363 avg0 = vec_avg (A, B);
00364 xor0 = vec_xor (A, B);
00365
00366 ref0 = vec_ld (0, ref);
00367 ref1 = vec_ld (16, ref);
00368 ref += stride;
00369 A = vec_perm (ref0, ref1, permA);
00370 B = vec_perm (ref0, ref1, permB);
00371 avg1 = vec_avg (A, B);
00372 xor1 = vec_xor (A, B);
00373 tmp = vec_sub (vec_avg (avg0, avg1),
00374 vec_and (vec_and (ones, vec_or (xor0, xor1)),
00375 vec_xor (avg0, avg1)));
00376
00377 do {
00378 ref0 = vec_ld (0, ref);
00379 ref1 = vec_ld (16, ref);
00380 ref += stride;
00381 vec_st (tmp, 0, dest);
00382 A = vec_perm (ref0, ref1, permA);
00383 B = vec_perm (ref0, ref1, permB);
00384 avg0 = vec_avg (A, B);
00385 xor0 = vec_xor (A, B);
00386 tmp = vec_sub (vec_avg (avg0, avg1),
00387 vec_and (vec_and (ones, vec_or (xor0, xor1)),
00388 vec_xor (avg0, avg1)));
00389
00390 ref0 = vec_ld (0, ref);
00391 ref1 = vec_ld (16, ref);
00392 ref += stride;
00393 vec_st (tmp, stride, dest);
00394 dest += 2*stride;
00395 A = vec_perm (ref0, ref1, permA);
00396 B = vec_perm (ref0, ref1, permB);
00397 avg1 = vec_avg (A, B);
00398 xor1 = vec_xor (A, B);
00399 tmp = vec_sub (vec_avg (avg0, avg1),
00400 vec_and (vec_and (ones, vec_or (xor0, xor1)),
00401 vec_xor (avg0, avg1)));
00402 } while (--height);
00403
00404 ref0 = vec_ld (0, ref);
00405 ref1 = vec_ld (16, ref);
00406 vec_st (tmp, 0, dest);
00407 A = vec_perm (ref0, ref1, permA);
00408 B = vec_perm (ref0, ref1, permB);
00409 avg0 = vec_avg (A, B);
00410 xor0 = vec_xor (A, B);
00411 tmp = vec_sub (vec_avg (avg0, avg1),
00412 vec_and (vec_and (ones, vec_or (xor0, xor1)),
00413 vec_xor (avg0, avg1)));
00414 vec_st (tmp, stride, dest);
00415 }
00416
00417 static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref,
00418 const int stride, int height)
00419 {
00420 vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B;
00421 vector_u8_t avg0, avg1, xor0, xor1, tmp, ones;
00422
00423 ones = vec_splat_u8 (1);
00424 perm0A = vec_lvsl (0, ref);
00425 perm0A = vec_mergeh (perm0A, perm0A);
00426 perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A);
00427 perm0B = vec_add (perm0A, ones);
00428 perm1A = vec_lvsl (stride, ref);
00429 perm1A = vec_mergeh (perm1A, perm1A);
00430 perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A);
00431 perm1B = vec_add (perm1A, ones);
00432
00433 height = (height >> 1) - 1;
00434
00435 ref0 = vec_ld (0, ref);
00436 ref1 = vec_ld (8, ref);
00437 ref += stride;
00438 A = vec_perm (ref0, ref1, perm0A);
00439 B = vec_perm (ref0, ref1, perm0B);
00440 avg0 = vec_avg (A, B);
00441 xor0 = vec_xor (A, B);
00442
00443 ref0 = vec_ld (0, ref);
00444 ref1 = vec_ld (8, ref);
00445 ref += stride;
00446 A = vec_perm (ref0, ref1, perm1A);
00447 B = vec_perm (ref0, ref1, perm1B);
00448 avg1 = vec_avg (A, B);
00449 xor1 = vec_xor (A, B);
00450 tmp = vec_sub (vec_avg (avg0, avg1),
00451 vec_and (vec_and (ones, vec_or (xor0, xor1)),
00452 vec_xor (avg0, avg1)));
00453
00454 do {
00455 ref0 = vec_ld (0, ref);
00456 ref1 = vec_ld (8, ref);
00457 ref += stride;
00458 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
00459 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
00460 dest += stride;
00461 A = vec_perm (ref0, ref1, perm0A);
00462 B = vec_perm (ref0, ref1, perm0B);
00463 avg0 = vec_avg (A, B);
00464 xor0 = vec_xor (A, B);
00465 tmp = vec_sub (vec_avg (avg0, avg1),
00466 vec_and (vec_and (ones, vec_or (xor0, xor1)),
00467 vec_xor (avg0, avg1)));
00468
00469 ref0 = vec_ld (0, ref);
00470 ref1 = vec_ld (8, ref);
00471 ref += stride;
00472 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
00473 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
00474 dest += stride;
00475 A = vec_perm (ref0, ref1, perm1A);
00476 B = vec_perm (ref0, ref1, perm1B);
00477 avg1 = vec_avg (A, B);
00478 xor1 = vec_xor (A, B);
00479 tmp = vec_sub (vec_avg (avg0, avg1),
00480 vec_and (vec_and (ones, vec_or (xor0, xor1)),
00481 vec_xor (avg0, avg1)));
00482 } while (--height);
00483
00484 ref0 = vec_ld (0, ref);
00485 ref1 = vec_ld (8, ref);
00486 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
00487 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
00488 dest += stride;
00489 A = vec_perm (ref0, ref1, perm0A);
00490 B = vec_perm (ref0, ref1, perm0B);
00491 avg0 = vec_avg (A, B);
00492 xor0 = vec_xor (A, B);
00493 tmp = vec_sub (vec_avg (avg0, avg1),
00494 vec_and (vec_and (ones, vec_or (xor0, xor1)),
00495 vec_xor (avg0, avg1)));
00496 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
00497 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
00498 }
00499
00500 #if 0
00501 static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref,
00502 const int stride, int height)
00503 {
00504 vector_u8_t permA, permB, ref0, ref1, A, B, C, D, tmp, zero, ones;
00505 vector_u16_t splat2, temp;
00506
00507 ones = vec_splat_u8 (1);
00508 permA = vec_lvsl (0, ref);
00509 permB = vec_add (permA, ones);
00510
00511 zero = vec_splat_u8 (0);
00512 splat2 = vec_splat_u16 (2);
00513
00514 do {
00515 ref0 = vec_ld (0, ref);
00516 ref1 = vec_ld (8, ref);
00517 ref += stride;
00518 A = vec_perm (ref0, ref1, permA);
00519 B = vec_perm (ref0, ref1, permB);
00520 ref0 = vec_ld (0, ref);
00521 ref1 = vec_ld (8, ref);
00522 C = vec_perm (ref0, ref1, permA);
00523 D = vec_perm (ref0, ref1, permB);
00524
00525 temp = vec_add (vec_add ((vector_u16_t)vec_mergeh (zero, A),
00526 (vector_u16_t)vec_mergeh (zero, B)),
00527 vec_add ((vector_u16_t)vec_mergeh (zero, C),
00528 (vector_u16_t)vec_mergeh (zero, D)));
00529 temp = vec_sr (vec_add (temp, splat2), splat2);
00530 tmp = vec_pack (temp, temp);
00531
00532 vec_st (tmp, 0, dest);
00533 dest += stride;
00534 tmp = vec_avg (vec_perm (ref0, ref1, permA),
00535 vec_perm (ref0, ref1, permB));
00536 } while (--height);
00537 }
00538 #endif
00539
00540 static void MC_avg_o_16_altivec (uint8_t * dest, const uint8_t * ref,
00541 const int stride, int height)
00542 {
00543 vector_u8_t perm, ref0, ref1, tmp, prev;
00544
00545 perm = vec_lvsl (0, ref);
00546
00547 height = (height >> 1) - 1;
00548
00549 ref0 = vec_ld (0, ref);
00550 ref1 = vec_ld (15, ref);
00551 ref += stride;
00552 prev = vec_ld (0, dest);
00553 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
00554
00555 do {
00556 ref0 = vec_ld (0, ref);
00557 ref1 = vec_ld (15, ref);
00558 ref += stride;
00559 prev = vec_ld (stride, dest);
00560 vec_st (tmp, 0, dest);
00561 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
00562
00563 ref0 = vec_ld (0, ref);
00564 ref1 = vec_ld (15, ref);
00565 ref += stride;
00566 prev = vec_ld (2*stride, dest);
00567 vec_st (tmp, stride, dest);
00568 dest += 2*stride;
00569 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
00570 } while (--height);
00571
00572 ref0 = vec_ld (0, ref);
00573 ref1 = vec_ld (15, ref);
00574 prev = vec_ld (stride, dest);
00575 vec_st (tmp, 0, dest);
00576 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
00577 vec_st (tmp, stride, dest);
00578 }
00579
00580 static void MC_avg_o_8_altivec (uint8_t * dest, const uint8_t * ref,
00581 const int stride, int height)
00582 {
00583 vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1, prev;
00584
00585 tmp0 = vec_lvsl (0, ref);
00586 tmp0 = vec_mergeh (tmp0, tmp0);
00587 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
00588 tmp1 = vec_lvsl (stride, ref);
00589 tmp1 = vec_mergeh (tmp1, tmp1);
00590 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
00591
00592 height = (height >> 1) - 1;
00593
00594 ref0 = vec_ld (0, ref);
00595 ref1 = vec_ld (7, ref);
00596 ref += stride;
00597 prev = vec_ld (0, dest);
00598 tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0));
00599
00600 do {
00601 ref0 = vec_ld (0, ref);
00602 ref1 = vec_ld (7, ref);
00603 ref += stride;
00604 prev = vec_ld (stride, dest);
00605 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
00606 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
00607 dest += stride;
00608 tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1));
00609
00610 ref0 = vec_ld (0, ref);
00611 ref1 = vec_ld (7, ref);
00612 ref += stride;
00613 prev = vec_ld (stride, dest);
00614 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
00615 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
00616 dest += stride;
00617 tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0));
00618 } while (--height);
00619
00620 ref0 = vec_ld (0, ref);
00621 ref1 = vec_ld (7, ref);
00622 prev = vec_ld (stride, dest);
00623 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
00624 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
00625 dest += stride;
00626 tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1));
00627 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
00628 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
00629 }
00630
00631 static void MC_avg_x_16_altivec (uint8_t * dest, const uint8_t * ref,
00632 const int stride, int height)
00633 {
00634 vector_u8_t permA, permB, ref0, ref1, tmp, prev;
00635
00636 permA = vec_lvsl (0, ref);
00637 permB = vec_add (permA, vec_splat_u8 (1));
00638
00639 height = (height >> 1) - 1;
00640
00641 ref0 = vec_ld (0, ref);
00642 ref1 = vec_ld (16, ref);
00643 prev = vec_ld (0, dest);
00644 ref += stride;
00645 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
00646 vec_perm (ref0, ref1, permB)));
00647
00648 do {
00649 ref0 = vec_ld (0, ref);
00650 ref1 = vec_ld (16, ref);
00651 ref += stride;
00652 prev = vec_ld (stride, dest);
00653 vec_st (tmp, 0, dest);
00654 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
00655 vec_perm (ref0, ref1, permB)));
00656
00657 ref0 = vec_ld (0, ref);
00658 ref1 = vec_ld (16, ref);
00659 ref += stride;
00660 prev = vec_ld (2*stride, dest);
00661 vec_st (tmp, stride, dest);
00662 dest += 2*stride;
00663 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
00664 vec_perm (ref0, ref1, permB)));
00665 } while (--height);
00666
00667 ref0 = vec_ld (0, ref);
00668 ref1 = vec_ld (16, ref);
00669 prev = vec_ld (stride, dest);
00670 vec_st (tmp, 0, dest);
00671 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
00672 vec_perm (ref0, ref1, permB)));
00673 vec_st (tmp, stride, dest);
00674 }
00675
00676 static void MC_avg_x_8_altivec (uint8_t * dest, const uint8_t * ref,
00677 const int stride, int height)
00678 {
00679 vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1;
00680 vector_u8_t prev;
00681
00682 ones = vec_splat_u8 (1);
00683 tmp0 = vec_lvsl (0, ref);
00684 tmp0 = vec_mergeh (tmp0, tmp0);
00685 perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
00686 perm0B = vec_add (perm0A, ones);
00687 tmp1 = vec_lvsl (stride, ref);
00688 tmp1 = vec_mergeh (tmp1, tmp1);
00689 perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
00690 perm1B = vec_add (perm1A, ones);
00691
00692 height = (height >> 1) - 1;
00693
00694 ref0 = vec_ld (0, ref);
00695 ref1 = vec_ld (8, ref);
00696 prev = vec_ld (0, dest);
00697 ref += stride;
00698 tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A),
00699 vec_perm (ref0, ref1, perm0B)));
00700
00701 do {
00702 ref0 = vec_ld (0, ref);
00703 ref1 = vec_ld (8, ref);
00704 ref += stride;
00705 prev = vec_ld (stride, dest);
00706 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
00707 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
00708 dest += stride;
00709 tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A),
00710 vec_perm (ref0, ref1, perm1B)));
00711
00712 ref0 = vec_ld (0, ref);
00713 ref1 = vec_ld (8, ref);
00714 ref += stride;
00715 prev = vec_ld (stride, dest);
00716 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
00717 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
00718 dest += stride;
00719 tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A),
00720 vec_perm (ref0, ref1, perm0B)));
00721 } while (--height);
00722
00723 ref0 = vec_ld (0, ref);
00724 ref1 = vec_ld (8, ref);
00725 prev = vec_ld (stride, dest);
00726 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
00727 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
00728 dest += stride;
00729 tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A),
00730 vec_perm (ref0, ref1, perm1B)));
00731 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
00732 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
00733 }
00734
00735 static void MC_avg_y_16_altivec (uint8_t * dest, const uint8_t * ref,
00736 const int stride, int height)
00737 {
00738 vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp, prev;
00739
00740 perm = vec_lvsl (0, ref);
00741
00742 height = (height >> 1) - 1;
00743
00744 ref0 = vec_ld (0, ref);
00745 ref1 = vec_ld (15, ref);
00746 ref += stride;
00747 tmp0 = vec_perm (ref0, ref1, perm);
00748 ref0 = vec_ld (0, ref);
00749 ref1 = vec_ld (15, ref);
00750 ref += stride;
00751 prev = vec_ld (0, dest);
00752 tmp1 = vec_perm (ref0, ref1, perm);
00753 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
00754
00755 do {
00756 ref0 = vec_ld (0, ref);
00757 ref1 = vec_ld (15, ref);
00758 ref += stride;
00759 prev = vec_ld (stride, dest);
00760 vec_st (tmp, 0, dest);
00761 tmp0 = vec_perm (ref0, ref1, perm);
00762 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
00763
00764 ref0 = vec_ld (0, ref);
00765 ref1 = vec_ld (15, ref);
00766 ref += stride;
00767 prev = vec_ld (2*stride, dest);
00768 vec_st (tmp, stride, dest);
00769 dest += 2*stride;
00770 tmp1 = vec_perm (ref0, ref1, perm);
00771 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
00772 } while (--height);
00773
00774 ref0 = vec_ld (0, ref);
00775 ref1 = vec_ld (15, ref);
00776 prev = vec_ld (stride, dest);
00777 vec_st (tmp, 0, dest);
00778 tmp0 = vec_perm (ref0, ref1, perm);
00779 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
00780 vec_st (tmp, stride, dest);
00781 }
00782
00783 static void MC_avg_y_8_altivec (uint8_t * dest, const uint8_t * ref,
00784 const int stride, int height)
00785 {
00786 vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1, prev;
00787
00788 tmp0 = vec_lvsl (0, ref);
00789 tmp0 = vec_mergeh (tmp0, tmp0);
00790 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
00791 tmp1 = vec_lvsl (stride, ref);
00792 tmp1 = vec_mergeh (tmp1, tmp1);
00793 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
00794
00795 height = (height >> 1) - 1;
00796
00797 ref0 = vec_ld (0, ref);
00798 ref1 = vec_ld (7, ref);
00799 ref += stride;
00800 tmp0 = vec_perm (ref0, ref1, perm0);
00801 ref0 = vec_ld (0, ref);
00802 ref1 = vec_ld (7, ref);
00803 ref += stride;
00804 prev = vec_ld (0, dest);
00805 tmp1 = vec_perm (ref0, ref1, perm1);
00806 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
00807
00808 do {
00809 ref0 = vec_ld (0, ref);
00810 ref1 = vec_ld (7, ref);
00811 ref += stride;
00812 prev = vec_ld (stride, dest);
00813 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
00814 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
00815 dest += stride;
00816 tmp0 = vec_perm (ref0, ref1, perm0);
00817 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
00818
00819 ref0 = vec_ld (0, ref);
00820 ref1 = vec_ld (7, ref);
00821 ref += stride;
00822 prev = vec_ld (stride, dest);
00823 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
00824 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
00825 dest += stride;
00826 tmp1 = vec_perm (ref0, ref1, perm1);
00827 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
00828 } while (--height);
00829
00830 ref0 = vec_ld (0, ref);
00831 ref1 = vec_ld (7, ref);
00832 prev = vec_ld (stride, dest);
00833 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
00834 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
00835 dest += stride;
00836 tmp0 = vec_perm (ref0, ref1, perm0);
00837 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
00838 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
00839 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
00840 }
00841
00842 static void MC_avg_xy_16_altivec (uint8_t * dest, const uint8_t * ref,
00843 const int stride, int height)
00844 {
00845 vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp;
00846 vector_u8_t ones, prev;
00847
00848 ones = vec_splat_u8 (1);
00849 permA = vec_lvsl (0, ref);
00850 permB = vec_add (permA, ones);
00851
00852 height = (height >> 1) - 1;
00853
00854 ref0 = vec_ld (0, ref);
00855 ref1 = vec_ld (16, ref);
00856 ref += stride;
00857 A = vec_perm (ref0, ref1, permA);
00858 B = vec_perm (ref0, ref1, permB);
00859 avg0 = vec_avg (A, B);
00860 xor0 = vec_xor (A, B);
00861
00862 ref0 = vec_ld (0, ref);
00863 ref1 = vec_ld (16, ref);
00864 ref += stride;
00865 prev = vec_ld (0, dest);
00866 A = vec_perm (ref0, ref1, permA);
00867 B = vec_perm (ref0, ref1, permB);
00868 avg1 = vec_avg (A, B);
00869 xor1 = vec_xor (A, B);
00870 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
00871 vec_and (vec_and (ones, vec_or (xor0, xor1)),
00872 vec_xor (avg0, avg1))));
00873
00874 do {
00875 ref0 = vec_ld (0, ref);
00876 ref1 = vec_ld (16, ref);
00877 ref += stride;
00878 prev = vec_ld (stride, dest);
00879 vec_st (tmp, 0, dest);
00880 A = vec_perm (ref0, ref1, permA);
00881 B = vec_perm (ref0, ref1, permB);
00882 avg0 = vec_avg (A, B);
00883 xor0 = vec_xor (A, B);
00884 tmp = vec_avg (prev,
00885 vec_sub (vec_avg (avg0, avg1),
00886 vec_and (vec_and (ones, vec_or (xor0, xor1)),
00887 vec_xor (avg0, avg1))));
00888
00889 ref0 = vec_ld (0, ref);
00890 ref1 = vec_ld (16, ref);
00891 ref += stride;
00892 prev = vec_ld (2*stride, dest);
00893 vec_st (tmp, stride, dest);
00894 dest += 2*stride;
00895 A = vec_perm (ref0, ref1, permA);
00896 B = vec_perm (ref0, ref1, permB);
00897 avg1 = vec_avg (A, B);
00898 xor1 = vec_xor (A, B);
00899 tmp = vec_avg (prev,
00900 vec_sub (vec_avg (avg0, avg1),
00901 vec_and (vec_and (ones, vec_or (xor0, xor1)),
00902 vec_xor (avg0, avg1))));
00903 } while (--height);
00904
00905 ref0 = vec_ld (0, ref);
00906 ref1 = vec_ld (16, ref);
00907 prev = vec_ld (stride, dest);
00908 vec_st (tmp, 0, dest);
00909 A = vec_perm (ref0, ref1, permA);
00910 B = vec_perm (ref0, ref1, permB);
00911 avg0 = vec_avg (A, B);
00912 xor0 = vec_xor (A, B);
00913 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
00914 vec_and (vec_and (ones, vec_or (xor0, xor1)),
00915 vec_xor (avg0, avg1))));
00916 vec_st (tmp, stride, dest);
00917 }
00918
00919 static void MC_avg_xy_8_altivec (uint8_t * dest, const uint8_t * ref,
00920 const int stride, int height)
00921 {
00922 vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B;
00923 vector_u8_t avg0, avg1, xor0, xor1, tmp, ones, prev;
00924
00925 ones = vec_splat_u8 (1);
00926 perm0A = vec_lvsl (0, ref);
00927 perm0A = vec_mergeh (perm0A, perm0A);
00928 perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A);
00929 perm0B = vec_add (perm0A, ones);
00930 perm1A = vec_lvsl (stride, ref);
00931 perm1A = vec_mergeh (perm1A, perm1A);
00932 perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A);
00933 perm1B = vec_add (perm1A, ones);
00934
00935 height = (height >> 1) - 1;
00936
00937 ref0 = vec_ld (0, ref);
00938 ref1 = vec_ld (8, ref);
00939 ref += stride;
00940 A = vec_perm (ref0, ref1, perm0A);
00941 B = vec_perm (ref0, ref1, perm0B);
00942 avg0 = vec_avg (A, B);
00943 xor0 = vec_xor (A, B);
00944
00945 ref0 = vec_ld (0, ref);
00946 ref1 = vec_ld (8, ref);
00947 ref += stride;
00948 prev = vec_ld (0, dest);
00949 A = vec_perm (ref0, ref1, perm1A);
00950 B = vec_perm (ref0, ref1, perm1B);
00951 avg1 = vec_avg (A, B);
00952 xor1 = vec_xor (A, B);
00953 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
00954 vec_and (vec_and (ones, vec_or (xor0, xor1)),
00955 vec_xor (avg0, avg1))));
00956
00957 do {
00958 ref0 = vec_ld (0, ref);
00959 ref1 = vec_ld (8, ref);
00960 ref += stride;
00961 prev = vec_ld (stride, dest);
00962 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
00963 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
00964 dest += stride;
00965 A = vec_perm (ref0, ref1, perm0A);
00966 B = vec_perm (ref0, ref1, perm0B);
00967 avg0 = vec_avg (A, B);
00968 xor0 = vec_xor (A, B);
00969 tmp = vec_avg (prev,
00970 vec_sub (vec_avg (avg0, avg1),
00971 vec_and (vec_and (ones, vec_or (xor0, xor1)),
00972 vec_xor (avg0, avg1))));
00973
00974 ref0 = vec_ld (0, ref);
00975 ref1 = vec_ld (8, ref);
00976 ref += stride;
00977 prev = vec_ld (stride, dest);
00978 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
00979 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
00980 dest += stride;
00981 A = vec_perm (ref0, ref1, perm1A);
00982 B = vec_perm (ref0, ref1, perm1B);
00983 avg1 = vec_avg (A, B);
00984 xor1 = vec_xor (A, B);
00985 tmp = vec_avg (prev,
00986 vec_sub (vec_avg (avg0, avg1),
00987 vec_and (vec_and (ones, vec_or (xor0, xor1)),
00988 vec_xor (avg0, avg1))));
00989 } while (--height);
00990
00991 ref0 = vec_ld (0, ref);
00992 ref1 = vec_ld (8, ref);
00993 prev = vec_ld (stride, dest);
00994 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
00995 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
00996 dest += stride;
00997 A = vec_perm (ref0, ref1, perm0A);
00998 B = vec_perm (ref0, ref1, perm0B);
00999 avg0 = vec_avg (A, B);
01000 xor0 = vec_xor (A, B);
01001 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
01002 vec_and (vec_and (ones, vec_or (xor0, xor1)),
01003 vec_xor (avg0, avg1))));
01004 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01005 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01006 }
01007
01008 MPEG2_MC_EXTERN (altivec)
01009
01010 #endif