00001 #include "util-osd.h"
00002 #include "dithertable.h"
00003
00004 #if HAVE_BIGENDIAN
00005 #define R_OI 1
00006 #define G_OI 2
00007 #define B_OI 3
00008 #define A_OI 0
00009 #else
00010 #define R_OI 2
00011 #define G_OI 1
00012 #define B_OI 0
00013 #define A_OI 3
00014 #endif
00015
00016 void yuv888_to_yv12(VideoFrame *frame, MythImage *osd_image,
00017 int left, int top, int right, int bottom)
00018 {
00019 bool c_aligned = !(left % ALIGN_C || right % ALIGN_C);
00020 bool misaligned = (top % ALIGN_C || bottom % ALIGN_C) || !c_aligned;
00021 bool mmx_aligned = false;
00022 #ifdef MMX
00023 mmx_aligned = !(left % ALIGN_X_MMX || right % ALIGN_X_MMX);
00024 #endif
00025
00026 if (misaligned)
00027 {
00028 LOG(VB_GENERAL, LOG_ERR,
00029 QString("OSD image size is odd. This shouldn't happen."));
00030 }
00031 else if (mmx_aligned)
00032 {
00033 mmx_yuv888_to_yv12(frame, osd_image, left, top, right, bottom);
00034 }
00035 else if (c_aligned)
00036 {
00037 #ifdef MMX
00038 LOG(VB_GENERAL, LOG_WARNING, "MMX available but image not MMX aligned. "
00039 "This shouldn't happen.");
00040 #endif
00041 c_yuv888_to_yv12(frame, osd_image, left, top, right, bottom);
00042 }
00043 }
00044
00045 #define ASM(code) __asm__ __volatile__(code);
00046 void inline mmx_yuv888_to_yv12(VideoFrame *frame, MythImage *osd_image,
00047 int left, int top, int right, int bottom)
00048 {
00049 #ifdef MMX
00050 unsigned char *src1, *src2, *y1, *y2, *u, *v;
00051 int y_wrap, src_wrap, u_wrap, v_wrap, width, height;
00052
00053 width = right - left;
00054 height = bottom - top;
00055 src1 = osd_image->scanLine(top) + (left << 2);
00056 src2 = src1 + osd_image->bytesPerLine();
00057 src_wrap = (osd_image->bytesPerLine() << 1)- (width << 2);
00058
00059 y1 = frame->buf + frame->offsets[0] + (frame->pitches[0] * top) + left;
00060 y2 = y1 + frame->pitches[0];
00061 u = frame->buf + frame->offsets[1] +
00062 (frame->pitches[1] * (top >> 1)) + (left >> 1);
00063 v = frame->buf + frame->offsets[2] +
00064 (frame->pitches[2] * (top >> 1)) + (left >> 1);
00065 y_wrap = (frame->pitches[0] << 1) - width;
00066 u_wrap = frame->pitches[1] - (width >> 1);
00067 v_wrap = frame->pitches[2] - (width >> 1);
00068
00069 static long long MMX_MAX = 0xFFFFFFFFFFFFFFFFLL;
00070 static long long MMX_MIN = 0x0000000000000000LL;
00071 static long long MMX_255 = 0x00FF00FF00FF00FFLL;
00072 static long long tmp_u, tmp_v, tmp_a;
00073
00074 for (int row = 0; row < height; row += 2)
00075 {
00076 for (int col = 0; col < (width >> 3); col++)
00077 {
00078
00079
00080 ASM("movq %0, %%mm1"::"m"(src1[0]))
00081 ASM("movq %mm1, %mm2")
00082 ASM("punpckhbw %0, %%mm1"::"m"(src1[8]))
00083 ASM("punpcklbw %0, %%mm2"::"m"(src1[8]))
00084 ASM("movq %mm2, %mm0")
00085 ASM("punpckhbw %mm1, %mm2")
00086 ASM("punpcklbw %mm1, %mm0")
00087 ASM("movq %0, %%mm3"::"m"(src1[16]))
00088 ASM("movq %mm3, %mm4")
00089 ASM("punpckhbw %0, %%mm3"::"m"(src1[24]))
00090 ASM("punpcklbw %0, %%mm4"::"m"(src1[24]))
00091 ASM("movq %mm4, %mm1")
00092 ASM("punpckhbw %mm3, %mm1")
00093 ASM("punpcklbw %mm3, %mm4")
00094 ASM("movq %mm2, %mm3")
00095 ASM("punpckldq %mm1, %mm3")
00096 ASM("punpckhdq %mm1, %mm2")
00097 ASM("movq %0, %%mm7"::"m"(MMX_MAX))
00098 ASM("psubusb %mm2, %mm7")
00099 ASM("movq %mm7, %mm6")
00100 ASM("movq %mm7, %mm2")
00101 ASM("punpckhbw %0, %%mm7"::"m"(MMX_MIN))
00102 ASM("punpcklbw %0, %%mm6"::"m"(MMX_MIN))
00103 ASM("movq %0, %%mm5"::"m"(*y1))
00104 ASM("movq %mm5, %mm1")
00105 ASM("punpckhbw %0, %%mm5"::"m"(MMX_MIN))
00106 ASM("punpcklbw %0, %%mm1"::"m"(MMX_MIN))
00107 ASM("pmullw %mm7, %mm5")
00108 ASM("pmullw %mm6, %mm1")
00109 ASM("psrlw $8, %mm5")
00110 ASM("psrlw $8, %mm1")
00111 ASM("packuswb %mm5, %mm1")
00112 ASM("paddusb %mm1, %mm3")
00113 ASM("movq %%mm3, %0":"=m"(*y1):)
00114 ASM("movq %mm0, %mm1")
00115 ASM("punpckhdq %mm4, %mm0")
00116 ASM("punpckldq %mm4, %mm1")
00117
00118 ASM("movq %%mm0, %0":"=m"(tmp_u):)
00119 ASM("movq %%mm1, %0":"=m"(tmp_v):)
00120 ASM("movq %%mm2, %0":"=m"(tmp_a):)
00121
00122 ASM("movq %0, %%mm1"::"m"(src2[0]))
00123 ASM("movq %mm1, %mm2")
00124 ASM("punpckhbw %0, %%mm1"::"m"(src2[8]))
00125 ASM("punpcklbw %0, %%mm2"::"m"(src2[8]))
00126 ASM("movq %mm2, %mm0")
00127 ASM("punpckhbw %mm1, %mm2")
00128 ASM("punpcklbw %mm1, %mm0")
00129 ASM("movq %0, %%mm3"::"m"(src2[16]))
00130 ASM("movq %mm3, %mm4")
00131 ASM("punpckhbw %0, %%mm3"::"m"(src2[24]))
00132 ASM("punpcklbw %0, %%mm4"::"m"(src2[24]))
00133 ASM("movq %mm4, %mm1")
00134 ASM("punpckhbw %mm3, %mm1")
00135 ASM("punpcklbw %mm3, %mm4")
00136 ASM("movq %mm2, %mm3")
00137 ASM("punpckldq %mm1, %mm3")
00138 ASM("punpckhdq %mm1, %mm2")
00139 ASM("movq %0, %%mm7"::"m"(MMX_MAX))
00140 ASM("psubusb %mm2, %mm7")
00141 ASM("movq %mm7, %mm6")
00142 ASM("movq %mm7, %mm2")
00143 ASM("punpckhbw %0, %%mm7"::"m"(MMX_MIN))
00144 ASM("punpcklbw %0, %%mm6"::"m"(MMX_MIN))
00145 ASM("movq %0, %%mm5"::"m"(*y2))
00146 ASM("movq %mm5, %mm1")
00147 ASM("punpckhbw %0, %%mm5"::"m"(MMX_MIN))
00148 ASM("punpcklbw %0, %%mm1"::"m"(MMX_MIN))
00149 ASM("pmullw %mm7, %mm5")
00150 ASM("pmullw %mm6, %mm1")
00151 ASM("psrlw $8, %mm5")
00152 ASM("psrlw $8, %mm1")
00153 ASM("packuswb %mm5, %mm1")
00154 ASM("paddusb %mm1, %mm3")
00155 ASM("movq %%mm3, %0":"=m"(*y2):)
00156 ASM("movq %mm0, %mm1")
00157 ASM("punpckhdq %mm4, %mm0")
00158 ASM("punpckldq %mm4, %mm1")
00159
00160 ASM("movq %mm2, %mm3")
00161 ASM("movq %0, %%mm4"::"m"(tmp_a))
00162 ASM("movq %mm4, %mm5")
00163 ASM("psrlw $8, %mm2")
00164 ASM("pand %0, %%mm3"::"m"(MMX_255))
00165 ASM("psrlw $8, %mm4")
00166 ASM("pand %0, %%mm5"::"m"(MMX_255))
00167 ASM("paddusw %mm5, %mm4")
00168 ASM("paddusw %mm4, %mm3")
00169 ASM("paddusw %mm3, %mm2")
00170 ASM("psrlw $2, %mm2")
00171 ASM("pand %0, %%mm2"::"m"(MMX_255))
00172
00173 ASM("movq %mm0, %mm3")
00174 ASM("movq %0, %%mm4"::"m"(tmp_u))
00175 ASM("movq %mm4, %mm5")
00176 ASM("psrlw $8, %mm0")
00177 ASM("pand %0, %%mm3"::"m"(MMX_255))
00178 ASM("psrlw $8, %mm4")
00179 ASM("pand %0, %%mm5"::"m"(MMX_255))
00180 ASM("paddusw %mm5, %mm4")
00181 ASM("paddusw %mm4, %mm3")
00182 ASM("paddusw %mm3, %mm0")
00183 ASM("psrlw $2, %mm0")
00184 ASM("pand %0, %%mm0"::"m"(MMX_255))
00185
00186 ASM("movd %0, %%mm3"::"m"(*u))
00187 ASM("punpcklbw %0, %%mm3"::"m"(MMX_MIN))
00188 ASM("pmullw %mm2, %mm3")
00189 ASM("psrlw $8, %mm3")
00190 ASM("paddusb %mm3, %mm0")
00191 ASM("packuswb %mm1, %mm0")
00192 ASM("movd %%mm0, %0":"=m"(*u):)
00193
00194 ASM("movq %mm1, %mm3")
00195 ASM("movq %0, %%mm4"::"m"(tmp_v))
00196 ASM("movq %mm4, %mm5")
00197 ASM("psrlw $8, %mm1")
00198 ASM("pand %0, %%mm3"::"m"(MMX_255))
00199 ASM("psrlw $8, %mm4")
00200 ASM("pand %0, %%mm5"::"m"(MMX_255))
00201 ASM("paddusw %mm5, %mm4")
00202 ASM("paddusw %mm4, %mm3")
00203 ASM("paddusw %mm3, %mm1")
00204 ASM("psrlw $2, %mm1")
00205 ASM("pand %0, %%mm1"::"m"(MMX_255))
00206
00207 ASM("movd %0, %%mm3"::"m"(*v))
00208 ASM("punpcklbw %0, %%mm3"::"m"(MMX_MIN))
00209 ASM("pmullw %mm2, %mm3")
00210 ASM("psrlw $8, %mm3")
00211 ASM("paddusb %mm3, %mm1")
00212 ASM("packuswb %mm2, %mm1")
00213 ASM("movd %%mm1, %0":"=m"(*v):)
00214
00215 src1 += 32; src2 += 32; y1 += 8; y2 += 8; u += 4; v += 4;
00216 }
00217 y1 += y_wrap; y2 += y_wrap; u+= u_wrap; v += v_wrap;
00218 src1 += src_wrap; src2 += src_wrap;
00219 }
00220 ASM("emms")
00221 #endif
00222 }
00223
00224 void inline c_yuv888_to_yv12(VideoFrame *frame, MythImage *osd_image,
00225 int left, int top, int right, int bottom)
00226 {
00227 unsigned char *udest, *vdest, *src1, *src2;
00228 int alpha1, alpha2, alpha3, alpha4, src_wrap, y_wrap, width, height;
00229 unsigned char *y1, *y2, *y3, *y4, *a1, *a2, *a3, *a4, *r1, *r2, *r3, *r4;
00230 unsigned char *g1, *g2, *g3, *g4, *b1, *b2, *b3, *b4;
00231
00232 width = right - left;
00233 height = bottom - top;
00234
00235 udest = frame->buf + frame->offsets[1];
00236 vdest = frame->buf + frame->offsets[2];
00237 udest += (frame->pitches[1] * (top >> 1)) + (left >> 1);
00238 vdest += (frame->pitches[2] * (top >> 1)) + (left >> 1);
00239
00240 y1 = frame->buf + frame->offsets[0] + (frame->pitches[0] * top) + left;
00241 y3 = frame->buf + frame->offsets[0] + (frame->pitches[0] * (top + 1)) + left;
00242 y2 = y1 + 1; y4 = y3 + 1;
00243
00244 src1 = osd_image->scanLine(top) + (left << 2);
00245 src2 = osd_image->scanLine(top + 1) + (left << 2);
00246 b1 = src1 + B_OI; b2 = b1 + 4; b3 = src2 + B_OI; b4 = b3 + 4;
00247 g1 = src1 + G_OI; g2 = g1 + 4; g3 = src2 + G_OI; g4 = g3 + 4;
00248 r1 = src1 + R_OI; r2 = r1 + 4; r3 = src2 + R_OI; r4 = r3 + 4;
00249 a1 = src1 + A_OI; a2 = a1 + 4; a3 = src2 + A_OI; a4 = a3 + 4;
00250 src_wrap = (osd_image->bytesPerLine() << 1) - (width << 2);
00251 y_wrap = (frame->pitches[0] << 1) - width;
00252
00253 for (int row = 0; row < height; row += 2)
00254 {
00255 for (int col = 0; col < (width >> 1); col++)
00256 {
00257 alpha1 = 255 - *a1; alpha2 = 255 - *a2;
00258 alpha3 = 255 - *a3; alpha4 = 255 - *a4;
00259
00260 *y1 = ((*y1 * alpha1) >> 8) + *r1;
00261 *y2 = ((*y2 * alpha2) >> 8) + *r2;
00262 *y3 = ((*y3 * alpha3) >> 8) + *r3;
00263 *y4 = ((*y4 * alpha4) >> 8) + *r4;
00264
00265 alpha1 = (alpha1 + alpha2 + alpha3 + alpha4) >> 2;
00266 udest[col] = ((udest[col] * alpha1) >> 8) +
00267 ((*g1 + *g2 + *g3 + *g4) >> 2);
00268 vdest[col] = ((vdest[col] * alpha1) >> 8) +
00269 ((*b1 + *b2 + *b3 + *b4) >> 2);
00270
00271 y1 += 2; y2 += 2; y3 += 2; y4 += 2;
00272 r1 += 8; r2 += 8; r3 += 8; r4 += 8;
00273 g1 += 8; g2 += 8; g3 += 8; g4 += 8;
00274 b1 += 8; b2 += 8; b3 += 8; b4 += 8;
00275 a1 += 8; a2 += 8; a3 += 8; a4 += 8;
00276
00277 }
00278 r1 += src_wrap; r2 += src_wrap; r3 += src_wrap; r4 += src_wrap;
00279 g1 += src_wrap; g2 += src_wrap; g3 += src_wrap; g4 += src_wrap;
00280 b1 += src_wrap; b2 += src_wrap; b3 += src_wrap; b4 += src_wrap;
00281 a1 += src_wrap; a2 += src_wrap; a3 += src_wrap; a4 += src_wrap;
00282 y1 += y_wrap; y2 += y_wrap; y3 += y_wrap; y4 += y_wrap;
00283 udest += frame->pitches[1];
00284 vdest += frame->pitches[2];
00285 }
00286 }
00287
00288 void yuv888_to_i44(unsigned char *dest, MythImage *osd_image, QSize dst_size,
00289 int left, int top, int right, int bottom, bool ifirst)
00290 {
00291 int width, ashift, amask, ishift, imask, src_wrap, dst_wrap;
00292 unsigned char *src, *alpha, *dst;
00293 const unsigned char *dmp;
00294
00295 width = right - left;
00296 ashift = ifirst ? 0 : 4;
00297 amask = ifirst ? 0x0f : 0xf0;
00298 ishift = ifirst ? 4 : 0;
00299 imask = ifirst ? 0xf0 : 0x0f;
00300
00301 src = osd_image->scanLine(top) + (left << 2) + R_OI;
00302 alpha = osd_image->scanLine(top) + (left << 2) + A_OI;
00303 dst = dest + dst_size.width() * top + left;
00304 dst_wrap = dst_size.width() - width;
00305 src_wrap = osd_image->bytesPerLine() - (width << 2);
00306
00307 for (int row = top; row < bottom; row++)
00308 {
00309 dmp = DM[row & (DM_HEIGHT - 1)];
00310 for (int col = left; col < right; col++)
00311 {
00312 int grey;
00313
00314 grey = *src + ((dmp[col & (DM_WIDTH - 1)] << 2) >> 4);
00315 grey = (grey - (grey >> 4)) >> 4;
00316
00317 *dst = (((*alpha >> 4) << ashift) & amask) |
00318 (((grey) << ishift) & imask);
00319
00320 alpha += 4;
00321 src += 4;
00322 dst++;
00323 }
00324 alpha += src_wrap;
00325 src += src_wrap;
00326 dst += dst_wrap;
00327 }
00328 }
00329