00001 #include "blend.h"
00002
00003 #ifdef MMX
00004
00005 #include "ffmpeg-mmx.h"
00006
00007 static const mmx_t mm_cpool[] =
00008 {
00009 { uw: {128, 128, 128, 128} },
00010 { uw: {255, 255, 255, 255} },
00011 { uw: {514, 514, 514, 514} },
00012 { uw: {1, 1, 1, 1} },
00013 { uw: {257, 257, 257, 257} }
00014 };
00015
00016 void blendregion_mmx (uint8_t * ysrc, uint8_t * usrc, uint8_t * vsrc,
00017 uint8_t * asrc, int srcstrd,
00018 uint8_t * ydst, uint8_t * udst, uint8_t * vdst,
00019 uint8_t * adst, int dststrd,
00020 int width, int height, int alphamod, int dochroma,
00021 int16_t rec_lut[256], uint8_t pow_lut[256][256])
00022 {
00023 int x, y, i, alpha, newalpha;
00024 mmx_t amod = { uw: {alphamod, alphamod, alphamod, alphamod} };
00025 int16_t wbuf[8];
00026
00027 (void) pow_lut;
00028 for (y = 0; y < height; y++)
00029 {
00030 for (x = 0; x + 7 < width; x += 8)
00031 {
00032 movq_m2r (asrc[x], mm0);
00033 movq_m2r (adst[x], mm2);
00034 movq_m2r (mm_cpool[0], mm6);
00035 pxor_r2r (mm7, mm7);
00036 movq_m2r (amod, mm5);
00037 movq_m2r (mm_cpool[1], mm4);
00038 movq_r2r (mm0, mm1);
00039 movq_r2r (mm2, mm3);
00040
00041 punpcklbw_r2r (mm7, mm0);
00042 punpckhbw_r2r (mm7, mm1);
00043 punpcklbw_r2r (mm7, mm2);
00044 punpckhbw_r2r (mm7, mm3);
00045
00046 pmullw_r2r (mm5, mm0);
00047 pmullw_r2r (mm5, mm1);
00048
00049 movq_r2r (mm4, mm5);
00050
00051 paddw_r2r (mm6, mm0);
00052 paddw_r2r (mm6, mm1);
00053
00054 psubw_r2r (mm2, mm4);
00055 psubw_r2r (mm3, mm5);
00056
00057 psrlw_i2r (8, mm0);
00058 psrlw_i2r (8, mm1);
00059
00060 movq_m2r (mm_cpool[3], mm6);
00061 movq_m2r (mm_cpool[2], mm7);
00062
00063 pmullw_r2r (mm0, mm4);
00064 pmullw_r2r (mm1, mm5);
00065
00066 paddw_r2r (mm6, mm4);
00067 paddw_r2r (mm6, mm5);
00068
00069 psrlw_i2r (1, mm4);
00070 psrlw_i2r (1, mm5);
00071
00072 pmulhw_r2r (mm7, mm4);
00073 pmulhw_r2r (mm7, mm5);
00074
00075 movq_m2r (mm_cpool[1], mm6);
00076
00077 paddw_r2r (mm2, mm4);
00078 paddw_r2r (mm3, mm5);
00079
00080 movq_r2r (mm6, mm7);
00081
00082 packuswb_r2r (mm5, mm4);
00083
00084 psubw_r2r (mm0, mm6);
00085 psubw_r2r (mm1, mm7);
00086
00087 movq_r2m (mm4, adst[x]);
00088 movq_m2r (mm_cpool[3], mm5);
00089 movq_m2r (mm_cpool[2], mm4);
00090
00091 pmullw_r2r (mm2, mm6);
00092 pmullw_r2r (mm3, mm7);
00093
00094 paddw_r2r (mm5, mm6);
00095 paddw_r2r (mm5, mm7);
00096
00097 psrlw_i2r (1, mm6);
00098 psrlw_i2r (1, mm7);
00099
00100 pmulhw_r2r (mm4, mm6);
00101 pmulhw_r2r (mm4, mm7);
00102
00103 paddw_r2r (mm0, mm6);
00104 paddw_r2r (mm1, mm7);
00105
00106 movq_m2r (ysrc[x], mm2);
00107 movq_m2r (ydst[x], mm4);
00108
00109 movq_r2m (mm6, wbuf[0]);
00110 movq_r2m (mm7, wbuf[4]);
00111 movq_m2r (mm_cpool[4], mm7);
00112 pxor_r2r (mm6, mm6);
00113
00114 movq_r2r (mm2, mm3);
00115 movq_r2r (mm4, mm5);
00116
00117 for (i = 0; i < 8; i++)
00118 wbuf[i] = rec_lut[wbuf[i]];
00119
00120 pmullw_m2r (wbuf[0], mm0);
00121 pmullw_m2r (wbuf[4], mm1);
00122
00123 punpcklbw_r2r (mm6, mm2);
00124 punpckhbw_r2r (mm6, mm3);
00125 punpcklbw_r2r (mm6, mm4);
00126 punpckhbw_r2r (mm6, mm5);
00127
00128 psrlw_i2r (7, mm0);
00129 psrlw_i2r (7, mm1);
00130
00131 psubw_r2r (mm4, mm2);
00132 psubw_r2r (mm5, mm3);
00133
00134 pmullw_r2r (mm7, mm0);
00135 pmullw_r2r (mm7, mm1);
00136
00137 movq_m2r (mm_cpool[3], mm7);
00138
00139 psllw_i2r (2, mm2);
00140 psllw_i2r (2, mm3);
00141
00142 psrlw_i2r (1, mm0);
00143 psrlw_i2r (1, mm1);
00144
00145 pmulhw_r2r (mm0, mm2);
00146 pmulhw_r2r (mm1, mm3);
00147
00148 paddw_r2r (mm7, mm2);
00149 paddw_r2r (mm7, mm3);
00150
00151 psraw_i2r (1, mm2);
00152 psraw_i2r (1, mm3);
00153
00154 paddw_r2r (mm4, mm2);
00155 paddw_r2r (mm5, mm3);
00156
00157 packuswb_r2r (mm3, mm2);
00158
00159 movq_r2m (mm2, ydst[x]);
00160 if ((y & 1) == 0 && dochroma)
00161 {
00162 pslld_i2r (16, mm0);
00163 movd_m2r (usrc[x>>1], mm2);
00164 pslld_i2r (16, mm1);
00165 movd_m2r (vsrc[x>>1], mm3);
00166 psrld_i2r (16, mm0);
00167 movd_m2r (udst[x>>1], mm4);
00168 psrld_i2r (16, mm1);
00169 movd_m2r (vdst[x>>1], mm5);
00170
00171
00172 punpcklbw_r2r (mm6, mm2);
00173 punpcklbw_r2r (mm6, mm3);
00174 punpcklbw_r2r (mm6, mm4);
00175 punpcklbw_r2r (mm6, mm5);
00176
00177 packssdw_r2r (mm1, mm0);
00178
00179 psubw_r2r (mm4, mm2);
00180 psubw_r2r (mm5, mm3);
00181
00182 psllw_i2r (2, mm2);
00183 psllw_i2r (2, mm3);
00184
00185 pmulhw_r2r (mm0, mm2);
00186 pmulhw_r2r (mm0, mm3);
00187
00188 paddw_r2r (mm7, mm2);
00189 paddw_r2r (mm7, mm3);
00190
00191 psraw_i2r (1, mm2);
00192 psraw_i2r (1, mm3);
00193
00194 paddw_r2r (mm4, mm2);
00195 paddw_r2r (mm5, mm3);
00196
00197 packuswb_r2r (mm7, mm2);
00198 packuswb_r2r (mm7, mm3);
00199
00200 movd_r2m (mm2, udst[x>>1]);
00201 movd_r2m (mm3, vdst[x>>1]);
00202 }
00203 }
00204 for (; x < width; x++)
00205 {
00206
00207
00208
00209 alpha = (asrc[x] * alphamod + 0x80) >> 8;
00210 newalpha = ((((((255 - alpha) * adst[x]) + 1) >> 1) * 514) >> 16) + alpha;
00211 newalpha = (((alpha * rec_lut[newalpha]) >> 7) * 257) >> 1;
00212 adst[x] = ((((((255 - adst[x]) * alpha) + 1) >> 1) * 514) >> 16) + adst[x];
00213 ydst[x] = (((((ysrc[x] - ydst[x]) << 2) * newalpha) + 65536) >> 17) + ydst[x];
00214 if (((y & 1) | (x & 1)) == 0 && dochroma)
00215 {
00216 udst[x>>1] = (((((usrc[x>>1] - udst[x>>1]) << 2) * newalpha) + 65536) >> 17) + udst[x>>1];
00217 vdst[x>>1] = (((((vsrc[x>>1] - vdst[x>>1]) << 2) * newalpha) + 65536) >> 17) + vdst[x>>1];
00218 }
00219 }
00220
00221 ysrc += srcstrd;
00222 asrc += srcstrd;
00223 ydst += dststrd;
00224 adst += dststrd;
00225
00226 if ((y & 1) == 0 && dochroma)
00227 {
00228 usrc += srcstrd >> 1;
00229 vsrc += srcstrd >> 1;
00230 udst += dststrd >> 1;
00231 vdst += dststrd >> 1;
00232 }
00233 }
00234 emms();
00235 }
00236
00237 void blendcolumn2_mmx (uint8_t * ysrc1, uint8_t * usrc1, uint8_t * vsrc1,
00238 uint8_t * asrc1, int srcstrd1,
00239 uint8_t * ysrc2, uint8_t * usrc2, uint8_t * vsrc2,
00240 uint8_t * asrc2, int srcstrd2,
00241 uint8_t * mask,
00242 uint8_t * ydst, uint8_t * udst, uint8_t * vdst,
00243 uint8_t * adst, int dststrd,
00244 int width, int height, int alphamod, int dochroma,
00245 int16_t rec_lut[256], uint8_t pow_lut[256][256])
00246 {
00247 int x, y, i, alpha, newalpha;
00248 mmx_t amod = { uw: {alphamod, alphamod, alphamod, alphamod} };
00249 mmx_t maskm;
00250 mmx_t ysrc1m;
00251 mmx_t uvsrc1m;
00252 mmx_t asrc1m;
00253 mmx_t ysrc2m;
00254 mmx_t uvsrc2m;
00255 mmx_t asrc2m;
00256 uint8_t * ysrc, * asrc, * usrc, * vsrc;
00257 int16_t wbuf[8];
00258
00259 (void) pow_lut;
00260 for (y = 0; y < height; y++)
00261 {
00262 ysrc1m.ub[0] = *ysrc1;
00263 ysrc1m.ub[1] = *ysrc1;
00264 ysrc1m.ub[2] = *ysrc1;
00265 ysrc1m.ub[3] = *ysrc1;
00266 ysrc1m.ub[4] = *ysrc1;
00267 ysrc1m.ub[5] = *ysrc1;
00268 ysrc1m.ub[6] = *ysrc1;
00269 ysrc1m.ub[7] = *ysrc1;
00270 ysrc2m.ub[0] = *ysrc2;
00271 ysrc2m.ub[1] = *ysrc2;
00272 ysrc2m.ub[2] = *ysrc2;
00273 ysrc2m.ub[3] = *ysrc2;
00274 ysrc2m.ub[4] = *ysrc2;
00275 ysrc2m.ub[5] = *ysrc2;
00276 ysrc2m.ub[6] = *ysrc2;
00277 ysrc2m.ub[7] = *ysrc2;
00278 asrc1m.ub[0] = *asrc1;
00279 asrc1m.ub[1] = *asrc1;
00280 asrc1m.ub[2] = *asrc1;
00281 asrc1m.ub[3] = *asrc1;
00282 asrc1m.ub[4] = *asrc1;
00283 asrc1m.ub[5] = *asrc1;
00284 asrc1m.ub[6] = *asrc1;
00285 asrc1m.ub[7] = *asrc1;
00286 asrc2m.ub[0] = *asrc2;
00287 asrc2m.ub[1] = *asrc2;
00288 asrc2m.ub[2] = *asrc2;
00289 asrc2m.ub[3] = *asrc2;
00290 asrc2m.ub[4] = *asrc2;
00291 asrc2m.ub[5] = *asrc2;
00292 asrc2m.ub[6] = *asrc2;
00293 asrc2m.ub[7] = *asrc2;
00294 uvsrc1m.ub[0] = *usrc1;
00295 uvsrc1m.ub[1] = *usrc1;
00296 uvsrc1m.ub[2] = *usrc1;
00297 uvsrc1m.ub[3] = *usrc1;
00298 uvsrc1m.ub[4] = *vsrc1;
00299 uvsrc1m.ub[5] = *vsrc1;
00300 uvsrc1m.ub[6] = *vsrc1;
00301 uvsrc1m.ub[7] = *vsrc1;
00302 uvsrc2m.ub[0] = *usrc2;
00303 uvsrc2m.ub[1] = *usrc2;
00304 uvsrc2m.ub[2] = *usrc2;
00305 uvsrc2m.ub[3] = *usrc2;
00306 uvsrc2m.ub[4] = *vsrc2;
00307 uvsrc2m.ub[5] = *vsrc2;
00308 uvsrc2m.ub[6] = *vsrc2;
00309 uvsrc2m.ub[7] = *vsrc2;
00310 for (x = 0; x + 7 < width; x += 8)
00311 {
00312 movq_m2r (mask[x], mm3);
00313 movq_m2r (asrc1m, mm0);
00314 pxor_r2r (mm7, mm7);
00315 movq_m2r (asrc2m, mm1);
00316 movq_m2r (adst[x], mm2);
00317 pcmpeqb_r2r (mm7, mm3);
00318 movq_m2r (mm_cpool[0], mm6);
00319 pand_r2r (mm3, mm1);
00320 movq_r2m (mm3, maskm);
00321 movq_m2r (amod, mm5);
00322 pandn_r2r (mm0, mm3);
00323 movq_m2r (mm_cpool[1], mm4);
00324 por_r2r (mm3, mm1);
00325 movq_r2r (mm1, mm0);
00326 movq_r2r (mm2, mm3);
00327
00328 punpcklbw_r2r (mm7, mm0);
00329 punpckhbw_r2r (mm7, mm1);
00330 punpcklbw_r2r (mm7, mm2);
00331 punpckhbw_r2r (mm7, mm3);
00332
00333 pmullw_r2r (mm5, mm0);
00334 pmullw_r2r (mm5, mm1);
00335
00336 movq_r2r (mm4, mm5);
00337
00338 paddw_r2r (mm6, mm0);
00339 paddw_r2r (mm6, mm1);
00340
00341 psubw_r2r (mm2, mm4);
00342 psubw_r2r (mm3, mm5);
00343
00344 psrlw_i2r (8, mm0);
00345 psrlw_i2r (8, mm1);
00346
00347 movq_m2r (mm_cpool[3], mm6);
00348 movq_m2r (mm_cpool[2], mm7);
00349
00350 pmullw_r2r (mm0, mm4);
00351 pmullw_r2r (mm1, mm5);
00352
00353 paddw_r2r (mm6, mm4);
00354 paddw_r2r (mm6, mm5);
00355
00356 psrlw_i2r (1, mm4);
00357 psrlw_i2r (1, mm5);
00358
00359 pmulhw_r2r (mm7, mm4);
00360 pmulhw_r2r (mm7, mm5);
00361
00362 movq_m2r (mm_cpool[1], mm6);
00363
00364 paddw_r2r (mm2, mm4);
00365 paddw_r2r (mm3, mm5);
00366
00367 movq_r2r (mm6, mm7);
00368
00369 packuswb_r2r (mm5, mm4);
00370
00371 psubw_r2r (mm0, mm6);
00372 psubw_r2r (mm1, mm7);
00373
00374 movq_r2m (mm4, adst[x]);
00375 movq_m2r (mm_cpool[3], mm5);
00376 movq_m2r (mm_cpool[2], mm4);
00377
00378 pmullw_r2r (mm2, mm6);
00379 pmullw_r2r (mm3, mm7);
00380
00381 paddw_r2r (mm5, mm6);
00382 paddw_r2r (mm5, mm7);
00383
00384 psrlw_i2r (1, mm6);
00385 psrlw_i2r (1, mm7);
00386
00387 pmulhw_r2r (mm4, mm6);
00388 pmulhw_r2r (mm4, mm7);
00389
00390 paddw_r2r (mm0, mm6);
00391 paddw_r2r (mm1, mm7);
00392
00393 movq_m2r (maskm, mm5);
00394 movq_m2r (ysrc1m, mm2);
00395 movq_m2r (ysrc2m, mm3);
00396 movq_m2r (ydst[x], mm4);
00397
00398 movq_r2m (mm6, wbuf[0]);
00399 pxor_r2r (mm6, mm6);
00400 movq_r2m (mm7, wbuf[4]);
00401 movq_m2r (mm_cpool[4], mm7);
00402 pand_r2r (mm5, mm3);
00403 pandn_r2r (mm2, mm5);
00404
00405 for (i = 0; i < 8; i++)
00406 wbuf[i] = rec_lut[wbuf[i]];
00407
00408 por_r2r (mm5, mm3);
00409
00410
00411 pmullw_m2r (wbuf[0], mm0);
00412 movq_r2r (mm3, mm2);
00413 movq_r2r (mm4, mm5);
00414 pmullw_m2r (wbuf[4], mm1);
00415
00416 punpcklbw_r2r (mm6, mm2);
00417 punpckhbw_r2r (mm6, mm3);
00418 punpcklbw_r2r (mm6, mm4);
00419 punpckhbw_r2r (mm6, mm5);
00420
00421 psrlw_i2r (7, mm0);
00422 psrlw_i2r (7, mm1);
00423
00424 psubw_r2r (mm4, mm2);
00425 psubw_r2r (mm5, mm3);
00426
00427 pmullw_r2r (mm7, mm0);
00428 pmullw_r2r (mm7, mm1);
00429
00430 movq_m2r (mm_cpool[3], mm7);
00431
00432 psllw_i2r (2, mm2);
00433 psllw_i2r (2, mm3);
00434
00435 psrlw_i2r (1, mm0);
00436 psrlw_i2r (1, mm1);
00437
00438 pmulhw_r2r (mm0, mm2);
00439 pmulhw_r2r (mm1, mm3);
00440
00441 paddw_r2r (mm7, mm2);
00442 paddw_r2r (mm7, mm3);
00443
00444 psraw_i2r (1, mm2);
00445 psraw_i2r (1, mm3);
00446
00447 paddw_r2r (mm4, mm2);
00448 paddw_r2r (mm5, mm3);
00449
00450 packuswb_r2r (mm3, mm2);
00451
00452 movq_r2m (mm2, ydst[x]);
00453 if ((y & 1) == 0 && dochroma)
00454 {
00455 pslld_i2r (16, mm0);
00456 movq_m2r (maskm, mm6);
00457 pslld_i2r (16, mm1);
00458 psllw_i2r (8, mm6);
00459 movq_m2r (uvsrc1m, mm2);
00460 psrlw_i2r (8, mm6);
00461 movq_m2r (uvsrc2m, mm3);
00462 packuswb_r2r (mm6, mm6);
00463 psrld_i2r (16, mm0);
00464 psrld_i2r (16, mm1);
00465
00466 pand_r2r (mm6, mm3);
00467 pandn_r2r (mm2, mm6);
00468 movd_m2r (udst[x>>1], mm4);
00469 por_r2r (mm6, mm3);
00470 movd_m2r (vdst[x>>1], mm5);
00471
00472 movq_r2r (mm3, mm2);
00473 pxor_r2r (mm6, mm6);
00474 packssdw_r2r (mm1, mm0);
00475 psrlq_i2r (32, mm3);
00476
00477 punpcklbw_r2r (mm6, mm2);
00478 punpcklbw_r2r (mm6, mm3);
00479 punpcklbw_r2r (mm6, mm4);
00480 punpcklbw_r2r (mm6, mm5);
00481
00482
00483 psubw_r2r (mm4, mm2);
00484 psubw_r2r (mm5, mm3);
00485
00486 psllw_i2r (2, mm2);
00487 psllw_i2r (2, mm3);
00488
00489 pmulhw_r2r (mm0, mm2);
00490 pmulhw_r2r (mm0, mm3);
00491
00492 paddw_r2r (mm7, mm2);
00493 paddw_r2r (mm7, mm3);
00494
00495 psraw_i2r (1, mm2);
00496 psraw_i2r (1, mm3);
00497
00498 paddw_r2r (mm4, mm2);
00499 paddw_r2r (mm5, mm3);
00500
00501 packuswb_r2r (mm7, mm2);
00502 packuswb_r2r (mm7, mm3);
00503
00504 movd_r2m (mm2, udst[x>>1]);
00505 movd_r2m (mm3, vdst[x>>1]);
00506 }
00507 }
00508 for (; x < width; x++)
00509 {
00510
00511
00512
00513 if (mask[x])
00514 {
00515 ysrc = ysrc1;
00516 usrc = usrc1;
00517 vsrc = vsrc1;
00518 asrc = asrc1;
00519 }
00520 else
00521 {
00522 ysrc = ysrc2;
00523 usrc = usrc2;
00524 vsrc = vsrc2;
00525 asrc = asrc2;
00526 }
00527 alpha = (*asrc * alphamod + 0x80) >> 8;
00528 newalpha = ((((((255 - alpha) * adst[x]) + 1) >> 1) * 514) >> 16) + alpha;
00529 newalpha = (((alpha * rec_lut[newalpha]) >> 7) * 257) >> 1;
00530 adst[x] = ((((((255 - adst[x]) * alpha) + 1) >> 1) * 514) >> 16) + adst[x];
00531 ydst[x] = (((((*ysrc - ydst[x]) << 2) * newalpha) + 65536) >> 17) + ydst[x];
00532 if (((y & 1) | (x & 1)) == 0 && dochroma)
00533 {
00534 udst[x>>1] = (((((*usrc - udst[x>>1]) << 2) * newalpha) + 65536) >> 17) + udst[x>>1];
00535 vdst[x>>1] = (((((*vsrc - vdst[x>>1]) << 2) * newalpha) + 65536) >> 17) + vdst[x>>1];
00536 }
00537 }
00538
00539 ysrc1 += srcstrd1;
00540 asrc1 += srcstrd1;
00541 ysrc2 += srcstrd2;
00542 asrc2 += srcstrd2;
00543 ydst += dststrd;
00544 adst += dststrd;
00545
00546 if ((y & 1) == 0 && dochroma)
00547 {
00548 usrc1 += srcstrd1 >> 1;
00549 vsrc1 += srcstrd1 >> 1;
00550 usrc2 += srcstrd2 >> 1;
00551 vsrc2 += srcstrd2 >> 1;
00552 udst += dststrd >> 1;
00553 vdst += dststrd >> 1;
00554 }
00555 }
00556 emms();
00557 }
00558
00559 void blendcolor_mmx (uint8_t ysrc, uint8_t usrc, uint8_t vsrc,
00560 uint8_t * asrc, int srcstrd,
00561 uint8_t * ydst, uint8_t * udst, uint8_t * vdst,
00562 uint8_t * adst, int dststrd,
00563 int width, int height, int alphamod, int dochroma,
00564 int16_t rec_lut[256], uint8_t pow_lut[256][256])
00565 {
00566 int x, y, i, alpha, newalpha;
00567 mmx_t amod = { uw: {alphamod, alphamod, alphamod, alphamod} };
00568 mmx_t ysrcm = { uw: {ysrc, ysrc, ysrc, ysrc} };
00569 mmx_t usrcm = { uw: {usrc, usrc, usrc, usrc} };
00570 mmx_t vsrcm = { uw: {vsrc, vsrc, vsrc, vsrc} };
00571 int16_t wbuf[8];
00572
00573 (void) pow_lut;
00574 for (y = 0; y < height; y++)
00575 {
00576 for (x = 0; x + 7 < width; x += 8)
00577 {
00578 movq_m2r (asrc[x], mm0);
00579 movq_m2r (adst[x], mm2);
00580 movq_m2r (mm_cpool[0], mm6);
00581 pxor_r2r (mm7, mm7);
00582 movq_m2r (amod, mm5);
00583 movq_m2r (mm_cpool[1], mm4);
00584 movq_r2r (mm0, mm1);
00585 movq_r2r (mm2, mm3);
00586
00587 punpcklbw_r2r (mm7, mm0);
00588 punpckhbw_r2r (mm7, mm1);
00589 punpcklbw_r2r (mm7, mm2);
00590 punpckhbw_r2r (mm7, mm3);
00591
00592 pmullw_r2r (mm5, mm0);
00593 pmullw_r2r (mm5, mm1);
00594
00595 movq_r2r (mm4, mm5);
00596
00597 paddw_r2r (mm6, mm0);
00598 paddw_r2r (mm6, mm1);
00599
00600 psubw_r2r (mm2, mm4);
00601 psubw_r2r (mm3, mm5);
00602
00603 psrlw_i2r (8, mm0);
00604 psrlw_i2r (8, mm1);
00605
00606 movq_m2r (mm_cpool[3], mm6);
00607 movq_m2r (mm_cpool[2], mm7);
00608
00609 pmullw_r2r (mm0, mm4);
00610 pmullw_r2r (mm1, mm5);
00611
00612 paddw_r2r (mm6, mm4);
00613 paddw_r2r (mm6, mm5);
00614
00615 psrlw_i2r (1, mm4);
00616 psrlw_i2r (1, mm5);
00617
00618 pmulhw_r2r (mm7, mm4);
00619 pmulhw_r2r (mm7, mm5);
00620
00621 movq_m2r (mm_cpool[1], mm6);
00622
00623 paddw_r2r (mm2, mm4);
00624 paddw_r2r (mm3, mm5);
00625
00626 movq_r2r (mm6, mm7);
00627
00628 packuswb_r2r (mm5, mm4);
00629
00630 psubw_r2r (mm0, mm6);
00631 psubw_r2r (mm1, mm7);
00632
00633 movq_r2m (mm4, adst[x]);
00634 movq_m2r (mm_cpool[3], mm5);
00635 movq_m2r (mm_cpool[2], mm4);
00636
00637 pmullw_r2r (mm2, mm6);
00638 pmullw_r2r (mm3, mm7);
00639
00640 paddw_r2r (mm5, mm6);
00641 paddw_r2r (mm5, mm7);
00642
00643 psrlw_i2r (1, mm6);
00644 psrlw_i2r (1, mm7);
00645
00646 pmulhw_r2r (mm4, mm6);
00647 pmulhw_r2r (mm4, mm7);
00648
00649 paddw_r2r (mm0, mm6);
00650 paddw_r2r (mm1, mm7);
00651
00652 movq_m2r (ysrcm, mm2);
00653 movq_m2r (ydst[x], mm4);
00654
00655 movq_r2m (mm6, wbuf[0]);
00656 movq_r2m (mm7, wbuf[4]);
00657 movq_m2r (mm_cpool[4], mm7);
00658 pxor_r2r (mm6, mm6);
00659
00660 movq_r2r (mm2, mm3);
00661 movq_r2r (mm4, mm5);
00662
00663 for (i = 0; i < 8; i++)
00664 wbuf[i] = rec_lut[wbuf[i]];
00665
00666 pmullw_m2r (wbuf[0], mm0);
00667 pmullw_m2r (wbuf[4], mm1);
00668
00669 punpcklbw_r2r (mm6, mm4);
00670 punpckhbw_r2r (mm6, mm5);
00671
00672 psrlw_i2r (7, mm0);
00673 psrlw_i2r (7, mm1);
00674
00675 psubw_r2r (mm4, mm2);
00676 psubw_r2r (mm5, mm3);
00677
00678 pmullw_r2r (mm7, mm0);
00679 pmullw_r2r (mm7, mm1);
00680
00681 movq_m2r (mm_cpool[3], mm7);
00682
00683 psllw_i2r (2, mm2);
00684 psllw_i2r (2, mm3);
00685
00686 psrlw_i2r (1, mm0);
00687 psrlw_i2r (1, mm1);
00688
00689 pmulhw_r2r (mm0, mm2);
00690 pmulhw_r2r (mm1, mm3);
00691
00692 paddw_r2r (mm7, mm2);
00693 paddw_r2r (mm7, mm3);
00694
00695 psraw_i2r (1, mm2);
00696 psraw_i2r (1, mm3);
00697
00698 paddw_r2r (mm4, mm2);
00699 paddw_r2r (mm5, mm3);
00700
00701 packuswb_r2r (mm3, mm2);
00702
00703 movq_r2m (mm2, ydst[x]);
00704 if ((y & 1) == 0 && dochroma)
00705 {
00706 pslld_i2r (16, mm0);
00707 movq_m2r (usrcm, mm2);
00708 pslld_i2r (16, mm1);
00709 movq_m2r (vsrcm, mm3);
00710 psrld_i2r (16, mm0);
00711 movd_m2r (udst[x>>1], mm4);
00712 psrld_i2r (16, mm1);
00713 movd_m2r (vdst[x>>1], mm5);
00714
00715 punpcklbw_r2r (mm6, mm4);
00716 punpcklbw_r2r (mm6, mm5);
00717
00718 packssdw_r2r (mm1, mm0);
00719
00720 psubw_r2r (mm4, mm2);
00721 psubw_r2r (mm5, mm3);
00722
00723 psllw_i2r (2, mm2);
00724 psllw_i2r (2, mm3);
00725
00726 pmulhw_r2r (mm0, mm2);
00727 pmulhw_r2r (mm0, mm3);
00728
00729 paddw_r2r (mm7, mm2);
00730 paddw_r2r (mm7, mm3);
00731
00732 psraw_i2r (1, mm2);
00733 psraw_i2r (1, mm3);
00734
00735 paddw_r2r (mm4, mm2);
00736 paddw_r2r (mm5, mm3);
00737
00738 packuswb_r2r (mm7, mm2);
00739 packuswb_r2r (mm7, mm3);
00740
00741 movd_r2m (mm2, udst[x>>1]);
00742 movd_r2m (mm3, vdst[x>>1]);
00743 }
00744 }
00745 for (; x < width; x++)
00746 {
00747
00748
00749
00750 alpha = (asrc[x] * alphamod + 0x80) >> 8;
00751 newalpha = ((((((255 - alpha) * adst[x]) + 1) >> 1) * 514) >> 16) + alpha;
00752 newalpha = (((alpha * rec_lut[newalpha]) >> 7) * 257) >> 1;
00753 adst[x] = ((((((255 - adst[x]) * alpha) + 1) >> 1) * 514) >> 16) + adst[x];
00754 ydst[x] = (((((ysrc - ydst[x]) << 2) * newalpha) + 65536) >> 17) + ydst[x];
00755 if (((y & 1) | (x & 1)) == 0 && dochroma)
00756 {
00757 udst[x>>1] = (((((usrc - udst[x>>1]) << 2) * newalpha) + 65536) >> 17) + udst[x>>1];
00758 vdst[x>>1] = (((((vsrc - vdst[x>>1]) << 2) * newalpha) + 65536) >> 17) + vdst[x>>1];
00759 }
00760 }
00761
00762 asrc += srcstrd;
00763 ydst += dststrd;
00764 adst += dststrd;
00765
00766 if ((y & 1) == 0 && dochroma)
00767 {
00768 udst += dststrd >> 1;
00769 vdst += dststrd >> 1;
00770 }
00771 }
00772 emms();
00773 }
00774
00775 void blendconst_mmx (uint8_t ysrc, uint8_t usrc, uint8_t vsrc,
00776 uint8_t asrc,
00777 uint8_t * ydst, uint8_t * udst, uint8_t * vdst,
00778 uint8_t * adst, int dststrd,
00779 int width, int height, int dochroma,
00780 int16_t rec_lut[256], uint8_t pow_lut[256][256])
00781 {
00782 int x, y, i, alpha, newalpha;
00783 mmx_t ysrcm = { uw: {ysrc, ysrc, ysrc, ysrc} };
00784 mmx_t usrcm = { uw: {usrc, usrc, usrc, usrc} };
00785 mmx_t vsrcm = { uw: {vsrc, vsrc, vsrc, vsrc} };
00786 mmx_t asrcm = { uw: {asrc, asrc, asrc, asrc} };
00787 int16_t wbuf[8];
00788
00789 (void) pow_lut;
00790 for (y = 0; y < height; y++)
00791 {
00792 for (x = 0; x + 7 < width; x += 8)
00793 {
00794 movq_m2r (asrcm, mm0);
00795 movq_m2r (adst[x], mm2);
00796 movq_m2r (mm_cpool[0], mm6);
00797 pxor_r2r (mm7, mm7);
00798 movq_m2r (mm_cpool[1], mm4);
00799 movq_r2r (mm0, mm1);
00800 movq_r2r (mm2, mm3);
00801
00802 punpcklbw_r2r (mm7, mm2);
00803 punpckhbw_r2r (mm7, mm3);
00804
00805 movq_r2r (mm4, mm5);
00806
00807 psubw_r2r (mm2, mm4);
00808 psubw_r2r (mm3, mm5);
00809
00810 movq_m2r (mm_cpool[3], mm6);
00811 movq_m2r (mm_cpool[2], mm7);
00812
00813 pmullw_r2r (mm0, mm4);
00814 pmullw_r2r (mm1, mm5);
00815
00816 paddw_r2r (mm6, mm4);
00817 paddw_r2r (mm6, mm5);
00818
00819 psrlw_i2r (1, mm4);
00820 psrlw_i2r (1, mm5);
00821
00822 pmulhw_r2r (mm7, mm4);
00823 pmulhw_r2r (mm7, mm5);
00824
00825 movq_m2r (mm_cpool[1], mm6);
00826
00827 paddw_r2r (mm2, mm4);
00828 paddw_r2r (mm3, mm5);
00829
00830 movq_r2r (mm6, mm7);
00831
00832 packuswb_r2r (mm5, mm4);
00833
00834 psubw_r2r (mm0, mm6);
00835 psubw_r2r (mm1, mm7);
00836
00837 movq_r2m (mm4, adst[x]);
00838 movq_m2r (mm_cpool[3], mm5);
00839 movq_m2r (mm_cpool[2], mm4);
00840
00841 pmullw_r2r (mm2, mm6);
00842 pmullw_r2r (mm3, mm7);
00843
00844 paddw_r2r (mm5, mm6);
00845 paddw_r2r (mm5, mm7);
00846
00847 psrlw_i2r (1, mm6);
00848 psrlw_i2r (1, mm7);
00849
00850 pmulhw_r2r (mm4, mm6);
00851 pmulhw_r2r (mm4, mm7);
00852
00853 paddw_r2r (mm0, mm6);
00854 paddw_r2r (mm1, mm7);
00855
00856 movq_m2r (ysrcm, mm2);
00857 movq_m2r (ydst[x], mm4);
00858
00859 movq_r2m (mm6, wbuf[0]);
00860 movq_r2m (mm7, wbuf[4]);
00861 movq_m2r (mm_cpool[4], mm7);
00862 pxor_r2r (mm6, mm6);
00863
00864 movq_r2r (mm2, mm3);
00865 movq_r2r (mm4, mm5);
00866
00867 for (i = 0; i < 8; i++)
00868 wbuf[i] = rec_lut[wbuf[i]];
00869
00870 pmullw_m2r (wbuf[0], mm0);
00871 pmullw_m2r (wbuf[4], mm1);
00872
00873 punpcklbw_r2r (mm6, mm4);
00874 punpckhbw_r2r (mm6, mm5);
00875
00876 psrlw_i2r (7, mm0);
00877 psrlw_i2r (7, mm1);
00878
00879 psubw_r2r (mm4, mm2);
00880 psubw_r2r (mm5, mm3);
00881
00882 pmullw_r2r (mm7, mm0);
00883 pmullw_r2r (mm7, mm1);
00884
00885 movq_m2r (mm_cpool[3], mm7);
00886
00887 psllw_i2r (2, mm2);
00888 psllw_i2r (2, mm3);
00889
00890 psrlw_i2r (1, mm0);
00891 psrlw_i2r (1, mm1);
00892
00893 pmulhw_r2r (mm0, mm2);
00894 pmulhw_r2r (mm1, mm3);
00895
00896 paddw_r2r (mm7, mm2);
00897 paddw_r2r (mm7, mm3);
00898
00899 psraw_i2r (1, mm2);
00900 psraw_i2r (1, mm3);
00901
00902 paddw_r2r (mm4, mm2);
00903 paddw_r2r (mm5, mm3);
00904
00905 packuswb_r2r (mm3, mm2);
00906
00907 movq_r2m (mm2, ydst[x]);
00908 if ((y & 1) == 0 && dochroma)
00909 {
00910 pslld_i2r (16, mm0);
00911 movq_m2r (usrcm, mm2);
00912 pslld_i2r (16, mm1);
00913 movq_m2r (vsrcm, mm3);
00914 psrld_i2r (16, mm0);
00915 movd_m2r (udst[x>>1], mm4);
00916 psrld_i2r (16, mm1);
00917 movd_m2r (vdst[x>>1], mm5);
00918
00919 punpcklbw_r2r (mm6, mm4);
00920 punpcklbw_r2r (mm6, mm5);
00921
00922 packssdw_r2r (mm1, mm0);
00923
00924 psubw_r2r (mm4, mm2);
00925 psubw_r2r (mm5, mm3);
00926
00927 psllw_i2r (2, mm2);
00928 psllw_i2r (2, mm3);
00929
00930 pmulhw_r2r (mm0, mm2);
00931 pmulhw_r2r (mm0, mm3);
00932
00933 paddw_r2r (mm7, mm2);
00934 paddw_r2r (mm7, mm3);
00935
00936 psraw_i2r (1, mm2);
00937 psraw_i2r (1, mm3);
00938
00939 paddw_r2r (mm4, mm2);
00940 paddw_r2r (mm5, mm3);
00941
00942 packuswb_r2r (mm7, mm2);
00943 packuswb_r2r (mm7, mm3);
00944
00945 movd_r2m (mm2, udst[x>>1]);
00946 movd_r2m (mm3, vdst[x>>1]);
00947 }
00948 }
00949 for (; x < width; x++)
00950 {
00951
00952
00953
00954 alpha = asrc;
00955 newalpha = ((((((255 - alpha) * adst[x]) + 1) >> 1) * 514) >> 16) + alpha;
00956 newalpha = (((alpha * rec_lut[newalpha]) >> 7) * 257) >> 1;
00957 adst[x] = ((((((255 - adst[x]) * alpha) + 1) >> 1) * 514) >> 16) + adst[x];
00958 ydst[x] = (((((ysrc - ydst[x]) << 2) * newalpha) + 65536) >> 17) + ydst[x];
00959 if (((y & 1) | (x & 1)) == 0 && dochroma)
00960 {
00961 udst[x>>1] = (((((usrc - udst[x>>1]) << 2) * newalpha) + 65536) >> 17) + udst[x>>1];
00962 vdst[x>>1] = (((((vsrc - vdst[x>>1]) << 2) * newalpha) + 65536) >> 17) + vdst[x>>1];
00963 }
00964 }
00965
00966 ydst += dststrd;
00967 adst += dststrd;
00968
00969 if ((y & 1) == 0 && dochroma)
00970 {
00971 udst += dststrd >> 1;
00972 vdst += dststrd >> 1;
00973 }
00974 }
00975 emms();
00976 }
00977
00978 void blendcolumn_mmx (uint8_t * ysrc, uint8_t * usrc, uint8_t * vsrc,
00979 uint8_t * asrc, int srcstrd,
00980 uint8_t * ydst, uint8_t * udst, uint8_t * vdst,
00981 uint8_t * adst, int dststrd,
00982 int width, int height, int alphamod, int dochroma,
00983 int16_t rec_lut[256], uint8_t pow_lut[256][256])
00984 {
00985 int x, y, i, alpha, newalpha;
00986 mmx_t amod = { uw: {alphamod, alphamod, alphamod, alphamod} };
00987 mmx_t ysrcm;
00988 mmx_t usrcm;
00989 mmx_t vsrcm;
00990 mmx_t asrcm;
00991 int16_t wbuf[8];
00992
00993 (void) pow_lut;
00994 for (y = 0; y < height; y++)
00995 {
00996
00997 ysrcm.uw[0] = *ysrc;
00998 ysrcm.uw[1] = *ysrc;
00999 ysrcm.uw[2] = *ysrc;
01000 ysrcm.uw[3] = *ysrc;
01001 asrcm.uw[0] = *asrc;
01002 asrcm.uw[1] = *asrc;
01003 asrcm.uw[2] = *asrc;
01004 asrcm.uw[3] = *asrc;
01005
01006
01007
01008 usrcm.uw[0] = *usrc;
01009 usrcm.uw[1] = *usrc;
01010 usrcm.uw[2] = *usrc;
01011 usrcm.uw[3] = *usrc;
01012 vsrcm.uw[0] = *vsrc;
01013 vsrcm.uw[1] = *vsrc;
01014 vsrcm.uw[2] = *vsrc;
01015 vsrcm.uw[3] = *vsrc;
01016 for (x = 0; x + 7 < width; x += 8)
01017 {
01018 movq_m2r (asrcm, mm0);
01019 movq_m2r (adst[x], mm2);
01020 movq_m2r (mm_cpool[0], mm6);
01021 pxor_r2r (mm7, mm7);
01022 movq_m2r (amod, mm5);
01023 movq_m2r (mm_cpool[1], mm4);
01024 movq_r2r (mm0, mm1);
01025 movq_r2r (mm2, mm3);
01026
01027 punpcklbw_r2r (mm7, mm2);
01028 punpckhbw_r2r (mm7, mm3);
01029
01030 pmullw_r2r (mm5, mm0);
01031 pmullw_r2r (mm5, mm1);
01032
01033 movq_r2r (mm4, mm5);
01034
01035 paddw_r2r (mm6, mm0);
01036 paddw_r2r (mm6, mm1);
01037
01038 psubw_r2r (mm2, mm4);
01039 psubw_r2r (mm3, mm5);
01040
01041 psrlw_i2r (8, mm0);
01042 psrlw_i2r (8, mm1);
01043
01044 movq_m2r (mm_cpool[3], mm6);
01045 movq_m2r (mm_cpool[2], mm7);
01046
01047 pmullw_r2r (mm0, mm4);
01048 pmullw_r2r (mm1, mm5);
01049
01050 paddw_r2r (mm6, mm4);
01051 paddw_r2r (mm6, mm5);
01052
01053 psrlw_i2r (1, mm4);
01054 psrlw_i2r (1, mm5);
01055
01056 pmulhw_r2r (mm7, mm4);
01057 pmulhw_r2r (mm7, mm5);
01058
01059 movq_m2r (mm_cpool[1], mm6);
01060
01061 paddw_r2r (mm2, mm4);
01062 paddw_r2r (mm3, mm5);
01063
01064 movq_r2r (mm6, mm7);
01065
01066 packuswb_r2r (mm5, mm4);
01067
01068 psubw_r2r (mm0, mm6);
01069 psubw_r2r (mm1, mm7);
01070
01071 movq_r2m (mm4, adst[x]);
01072 movq_m2r (mm_cpool[3], mm5);
01073 movq_m2r (mm_cpool[2], mm4);
01074
01075 pmullw_r2r (mm2, mm6);
01076 pmullw_r2r (mm3, mm7);
01077
01078 paddw_r2r (mm5, mm6);
01079 paddw_r2r (mm5, mm7);
01080
01081 psrlw_i2r (1, mm6);
01082 psrlw_i2r (1, mm7);
01083
01084 pmulhw_r2r (mm4, mm6);
01085 pmulhw_r2r (mm4, mm7);
01086
01087 paddw_r2r (mm0, mm6);
01088 paddw_r2r (mm1, mm7);
01089
01090 movq_m2r (ysrcm, mm2);
01091 movq_m2r (ydst[x], mm4);
01092
01093 movq_r2m (mm6, wbuf[0]);
01094 movq_r2m (mm7, wbuf[4]);
01095 movq_m2r (mm_cpool[4], mm7);
01096 pxor_r2r (mm6, mm6);
01097
01098 movq_r2r (mm2, mm3);
01099 movq_r2r (mm4, mm5);
01100
01101 for (i = 0; i < 8; i++)
01102 wbuf[i] = rec_lut[wbuf[i]];
01103
01104 pmullw_m2r (wbuf[0], mm0);
01105 pmullw_m2r (wbuf[4], mm1);
01106
01107 punpcklbw_r2r (mm6, mm4);
01108 punpckhbw_r2r (mm6, mm5);
01109
01110 psrlw_i2r (7, mm0);
01111 psrlw_i2r (7, mm1);
01112
01113 psubw_r2r (mm4, mm2);
01114 psubw_r2r (mm5, mm3);
01115
01116 pmullw_r2r (mm7, mm0);
01117 pmullw_r2r (mm7, mm1);
01118
01119 movq_m2r (mm_cpool[3], mm7);
01120
01121 psllw_i2r (2, mm2);
01122 psllw_i2r (2, mm3);
01123
01124 psrlw_i2r (1, mm0);
01125 psrlw_i2r (1, mm1);
01126
01127 pmulhw_r2r (mm0, mm2);
01128 pmulhw_r2r (mm1, mm3);
01129
01130 paddw_r2r (mm7, mm2);
01131 paddw_r2r (mm7, mm3);
01132
01133 psraw_i2r (1, mm2);
01134 psraw_i2r (1, mm3);
01135
01136 paddw_r2r (mm4, mm2);
01137 paddw_r2r (mm5, mm3);
01138
01139 packuswb_r2r (mm3, mm2);
01140
01141 movq_r2m (mm2, ydst[x]);
01142 if ((y & 1) == 0 && dochroma)
01143 {
01144 pslld_i2r (16, mm0);
01145 movq_m2r (usrcm, mm2);
01146 pslld_i2r (16, mm1);
01147 movq_m2r (vsrcm, mm3);
01148 psrld_i2r (16, mm0);
01149 movd_m2r (udst[x>>1], mm4);
01150 psrld_i2r (16, mm1);
01151 movd_m2r (vdst[x>>1], mm5);
01152
01153 punpcklbw_r2r (mm6, mm4);
01154 punpcklbw_r2r (mm6, mm5);
01155
01156 packssdw_r2r (mm1, mm0);
01157
01158 psubw_r2r (mm4, mm2);
01159 psubw_r2r (mm5, mm3);
01160
01161 psllw_i2r (2, mm2);
01162 psllw_i2r (2, mm3);
01163
01164 pmulhw_r2r (mm0, mm2);
01165 pmulhw_r2r (mm0, mm3);
01166
01167 paddw_r2r (mm7, mm2);
01168 paddw_r2r (mm7, mm3);
01169
01170 psraw_i2r (1, mm2);
01171 psraw_i2r (1, mm3);
01172
01173 paddw_r2r (mm4, mm2);
01174 paddw_r2r (mm5, mm3);
01175
01176 packuswb_r2r (mm7, mm2);
01177 packuswb_r2r (mm7, mm3);
01178
01179 movd_r2m (mm2, udst[x>>1]);
01180 movd_r2m (mm3, vdst[x>>1]);
01181 }
01182 }
01183 for (; x < width; x++)
01184 {
01185
01186
01187
01188 alpha = (*asrc * alphamod + 0x80) >> 8;
01189 newalpha = ((((((255 - alpha) * adst[x]) + 1) >> 1) * 514) >> 16) + alpha;
01190 newalpha = (((alpha * rec_lut[newalpha]) >> 7) * 257) >> 1;
01191 adst[x] = ((((((255 - adst[x]) * alpha) + 1) >> 1) * 514) >> 16) + adst[x];
01192 ydst[x] = (((((*ysrc - ydst[x]) << 2) * newalpha) + 65536) >> 17) + ydst[x];
01193 if (((y & 1) | (x & 1)) == 0 && dochroma)
01194 {
01195 udst[x>>1] = (((((*usrc - udst[x>>1]) << 2) * newalpha) + 65536) >> 17) + udst[x>>1];
01196 vdst[x>>1] = (((((*vsrc - vdst[x>>1]) << 2) * newalpha) + 65536) >> 17) + vdst[x>>1];
01197 }
01198 }
01199
01200 ysrc += srcstrd;
01201 asrc += srcstrd;
01202 ydst += dststrd;
01203 adst += dststrd;
01204
01205 if ((y & 1) == 0 && dochroma)
01206 {
01207 usrc += srcstrd >> 1;
01208 vsrc += srcstrd >> 1;
01209 udst += dststrd >> 1;
01210 vdst += dststrd >> 1;
01211 }
01212 }
01213 emms();
01214 }
01215 #endif
01216
01217 void blendregion (uint8_t * ysrc, uint8_t * usrc, uint8_t * vsrc,
01218 uint8_t * asrc, int srcstrd,
01219 uint8_t * ydst, uint8_t * udst, uint8_t * vdst,
01220 uint8_t * adst, int dststrd,
01221 int width, int height, int alphamod, int dochroma,
01222 int16_t rec_lut[256], uint8_t pow_lut[256][256])
01223 {
01224 int newalpha, alpha;
01225 int x, y;
01226
01227 (void) rec_lut;
01228 for (y = 0; y < height; y++)
01229 {
01230 if ((y & 1) || dochroma == 0)
01231 {
01232 for (x = 0; x < width; x++)
01233 {
01234 alpha = ((asrc[x] * alphamod) + 0x80) >> 8;
01235
01236 newalpha = pow_lut [alpha][adst[x]] * 257;
01237 adst[x] = ((255 - adst[x]) * alpha) / 255 + adst[x];
01238 ydst[x] += (((ysrc[x] - ydst[x]) * newalpha) + 32768) >> 16;
01239 }
01240 ysrc += srcstrd;
01241 asrc += srcstrd;
01242 ydst += dststrd;
01243 adst += dststrd;
01244 }
01245 else
01246 {
01247 for (x = 0; x < width; x++)
01248 {
01249 alpha = ((asrc[x] * alphamod) + 0x80) >> 8;
01250
01251 newalpha = pow_lut [alpha][adst[x]] * 257;
01252 adst[x] = ((255 - adst[x]) * alpha) / 255 + adst[x];
01253 ydst[x] += (((ysrc[x] - ydst[x]) * newalpha) + 32768) >> 16;
01254 if ((x & 1) == 0)
01255 {
01256 udst[x>>1] += (((usrc[x>>1] - udst[x>>1]) * newalpha) + 32768) >> 16;
01257 vdst[x>>1] += (((vsrc[x>>1] - vdst[x>>1]) * newalpha) + 32768) >> 16;
01258 }
01259 }
01260 ysrc += srcstrd;
01261 asrc += srcstrd;
01262 ydst += dststrd;
01263 adst += dststrd;
01264 usrc += srcstrd >> 1;
01265 vsrc += srcstrd >> 1;
01266 udst += dststrd >> 1;
01267 vdst += dststrd >> 1;
01268 }
01269 }
01270 }
01271
01272 void blendcolumn2 (uint8_t * ysrc1, uint8_t * usrc1, uint8_t * vsrc1,
01273 uint8_t * asrc1, int srcstrd1,
01274 uint8_t * ysrc2, uint8_t * usrc2, uint8_t * vsrc2,
01275 uint8_t * asrc2, int srcstrd2,
01276 uint8_t * mask,
01277 uint8_t * ydst, uint8_t * udst, uint8_t * vdst,
01278 uint8_t * adst, int dststrd,
01279 int width, int height, int alphamod, int dochroma,
01280 int16_t rec_lut[256], uint8_t pow_lut[256][256])
01281 {
01282 int newalpha, alpha;
01283 int x, y;
01284 uint8_t * ysrc, * usrc, * vsrc, * asrc;
01285
01286 (void) rec_lut;
01287 for (y = 0; y < height; y++)
01288 {
01289 if ((y & 1) || dochroma == 0)
01290 {
01291 for (x = 0; x < width; x++)
01292 {
01293 if (mask[x])
01294 {
01295 ysrc = ysrc1;
01296 asrc = asrc2;
01297 }
01298 else
01299 {
01300 ysrc = ysrc2;
01301 asrc = asrc2;
01302 }
01303 alpha = ((*asrc * alphamod) + 0x80) >> 8;
01304
01305 newalpha = pow_lut [alpha][adst[x]] * 257;
01306 adst[x] = ((255 - adst[x]) * alpha) / 255 + adst[x];
01307 ydst[x] += (((*ysrc - ydst[x]) * newalpha) + 32768) >> 16;
01308 }
01309 ysrc1 += srcstrd1;
01310 asrc1 += srcstrd1;
01311 ysrc2 += srcstrd2;
01312 asrc2 += srcstrd2;
01313 ydst += dststrd;
01314 adst += dststrd;
01315 }
01316 else
01317 {
01318 for (x = 0; x < width; x++)
01319 {
01320 if (mask[x])
01321 {
01322 ysrc = ysrc1;
01323 usrc = usrc1;
01324 vsrc = vsrc1;
01325 asrc = asrc1;
01326 }
01327 else
01328 {
01329 ysrc = ysrc2;
01330 usrc = usrc2;
01331 vsrc = vsrc2;
01332 asrc = asrc2;
01333 }
01334 alpha = ((*asrc * alphamod) + 0x80) >> 8;
01335
01336 newalpha = pow_lut [alpha][adst[x]] * 257;
01337 adst[x] = ((255 - adst[x]) * alpha) / 255 + adst[x];
01338 ydst[x] += (((*ysrc - ydst[x]) * newalpha) + 32768) >> 16;
01339 if ((x & 1) == 0)
01340 {
01341 udst[x>>1] += (((*usrc - udst[x>>1]) * newalpha) + 32768) >> 16;
01342 vdst[x>>1] += (((*vsrc - vdst[x>>1]) * newalpha) + 32768) >> 16;
01343 }
01344 }
01345 ysrc1 += srcstrd1;
01346 asrc1 += srcstrd1;
01347 ysrc2 += srcstrd2;
01348 asrc2 += srcstrd2;
01349 ydst += dststrd;
01350 adst += dststrd;
01351 usrc1 += srcstrd1 >> 1;
01352 vsrc1 += srcstrd1 >> 1;
01353 usrc2 += srcstrd2 >> 1;
01354 vsrc2 += srcstrd2 >> 1;
01355 udst += dststrd >> 1;
01356 vdst += dststrd >> 1;
01357 }
01358 }
01359 }
01360
01361 void blendcolor (uint8_t ysrc, uint8_t usrc, uint8_t vsrc,
01362 uint8_t * asrc, int srcstrd,
01363 uint8_t * ydst, uint8_t * udst, uint8_t * vdst,
01364 uint8_t * adst, int dststrd,
01365 int width, int height, int alphamod, int dochroma,
01366 int16_t rec_lut[256], uint8_t pow_lut[256][256])
01367 {
01368 int newalpha, alpha;
01369 int x, y;
01370
01371 (void) rec_lut;
01372 for (y = 0; y < height; y++)
01373 {
01374 if ((y & 1) || dochroma == 0)
01375 {
01376 for (x = 0; x < width; x++)
01377 {
01378 alpha = ((asrc[x] * alphamod) + 0x80) >> 8;
01379
01380 newalpha = pow_lut [alpha][adst[x]] * 257;
01381 adst[x] = ((255 - adst[x]) * alpha) / 255 + adst[x];
01382 ydst[x] += (((ysrc - ydst[x]) * newalpha) + 32768) >> 16;
01383 }
01384 asrc += srcstrd;
01385 ydst += dststrd;
01386 adst += dststrd;
01387 }
01388 else
01389 {
01390 for (x = 0; x < width; x++)
01391 {
01392 alpha = ((asrc[x] * alphamod) + 0x80) >> 8;
01393
01394 newalpha = pow_lut [alpha][adst[x]] * 257;
01395 adst[x] = ((255 - adst[x]) * alpha) / 255 + adst[x];
01396 ydst[x] += (((ysrc - ydst[x]) * newalpha) + 32768) >> 16;
01397 if ((x & 1) == 0)
01398 {
01399 udst[x>>1] += (((usrc - udst[x>>1]) * newalpha) + 32768) >> 16;
01400 vdst[x>>1] += (((vsrc - vdst[x>>1]) * newalpha) + 32768) >> 16;
01401 }
01402 }
01403 asrc += srcstrd;
01404 ydst += dststrd;
01405 adst += dststrd;
01406 udst += dststrd >> 1;
01407 vdst += dststrd >> 1;
01408 }
01409 }
01410 }
01411
01412 void blendconst (uint8_t ysrc, uint8_t usrc, uint8_t vsrc,
01413 uint8_t asrc,
01414 uint8_t * ydst, uint8_t * udst, uint8_t * vdst,
01415 uint8_t * adst, int dststrd,
01416 int width, int height, int dochroma,
01417 int16_t rec_lut[256], uint8_t pow_lut[256][256])
01418 {
01419 int newalpha, alpha;
01420 int x, y;
01421
01422 (void) rec_lut;
01423 for (y = 0; y < height; y++)
01424 {
01425 if ((y & 1) || dochroma == 0)
01426 {
01427 for (x = 0; x < width; x++)
01428 {
01429 alpha = asrc;
01430
01431 newalpha = pow_lut [alpha][adst[x]] * 257;
01432 adst[x] = ((255 - adst[x]) * alpha) / 255 + adst[x];
01433 ydst[x] += (((ysrc - ydst[x]) * newalpha) + 32768) >> 16;
01434 }
01435 ydst += dststrd;
01436 adst += dststrd;
01437 }
01438 else
01439 {
01440 for (x = 0; x < width; x++)
01441 {
01442 alpha = asrc;
01443
01444 newalpha = pow_lut [alpha][adst[x]] * 257;
01445 adst[x] = ((255 - adst[x]) * alpha) / 255 + adst[x];
01446 ydst[x] += (((ysrc - ydst[x]) * newalpha) + 32768) >> 16;
01447 if ((x & 1) == 0)
01448 {
01449 udst[x>>1] += (((usrc - udst[x>>1]) * newalpha) + 32768) >> 16;
01450 vdst[x>>1] += (((vsrc - vdst[x>>1]) * newalpha) + 32768) >> 16;
01451 }
01452 }
01453 ydst += dststrd;
01454 adst += dststrd;
01455 udst += dststrd >> 1;
01456 vdst += dststrd >> 1;
01457 }
01458 }
01459 }
01460
01461 void blendcolumn (uint8_t * ysrc, uint8_t * usrc, uint8_t * vsrc,
01462 uint8_t * asrc, int srcstrd,
01463 uint8_t * ydst, uint8_t * udst, uint8_t * vdst,
01464 uint8_t * adst, int dststrd,
01465 int width, int height, int alphamod, int dochroma,
01466 int16_t rec_lut[256], uint8_t pow_lut[256][256])
01467 {
01468 int newalpha, alpha;
01469 int x, y;
01470
01471 (void) rec_lut;
01472 for (y = 0; y < height; y++)
01473 {
01474 if ((y & 1) || dochroma == 0)
01475 {
01476 for (x = 0; x < width; x++)
01477 {
01478 alpha = ((*asrc * alphamod) + 0x80) >> 8;
01479
01480 newalpha = pow_lut [alpha][adst[x]] * 257;
01481 adst[x] = ((255 - adst[x]) * alpha) / 255 + adst[x];
01482 ydst[x] += (((*ysrc - ydst[x]) * newalpha) + 32768) >> 16;
01483 }
01484 ysrc += srcstrd;
01485 asrc += srcstrd;
01486 ydst += dststrd;
01487 adst += dststrd;
01488 }
01489 else
01490 {
01491 for (x = 0; x < width; x++)
01492 {
01493 alpha = ((*asrc * alphamod) + 0x80) >> 8;
01494
01495 newalpha = pow_lut [alpha][adst[x]] * 257;
01496 adst[x] = ((255 - adst[x]) * alpha) / 255 + adst[x];
01497 ydst[x] += (((*ysrc - ydst[x]) * newalpha) + 32768) >> 16;
01498 if ((x & 1) == 0)
01499 {
01500 udst[x>>1] += (((*usrc - udst[x>>1]) * newalpha) + 32768) >> 16;
01501 vdst[x>>1] += (((*vsrc - vdst[x>>1]) * newalpha) + 32768) >> 16;
01502 }
01503 }
01504 ysrc += srcstrd;
01505 asrc += srcstrd;
01506 ydst += dststrd;
01507 adst += dststrd;
01508 usrc += srcstrd >> 1;
01509 vsrc += srcstrd >> 1;
01510 udst += dststrd >> 1;
01511 vdst += dststrd >> 1;
01512 }
01513 }
01514 }