00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include "config.h"
00025
00026 #include <stdlib.h>
00027 #include <inttypes.h>
00028
00029 #include "mpeg2.h"
00030 #include "attributes.h"
00031 #include "mpeg2_internal.h"
00032
00033 #define W1 2841
00034 #define W2 2676
00035 #define W3 2408
00036 #define W5 1609
00037 #define W6 1108
00038 #define W7 565
00039
00040
00041 void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride);
00042 void (* mpeg2_idct_add) (int last, int16_t * block,
00043 uint8_t * dest, int stride);
00044
00045
00046
00047
00048
00049
00050
00051 uint8_t mpeg2_clip[3840 * 2 + 256];
00052 #define CLIP(i) ((mpeg2_clip + 3840)[i])
00053
00054 #if 0
00055 #define BUTTERFLY(t0,t1,W0,W1,d0,d1) \
00056 do { \
00057 t0 = W0 * d0 + W1 * d1; \
00058 t1 = W0 * d1 - W1 * d0; \
00059 } while (0)
00060 #else
00061 #define BUTTERFLY(t0,t1,W0,W1,d0,d1) \
00062 do { \
00063 int tmp = W0 * (d0 + d1); \
00064 t0 = tmp + (W1 - W0) * d1; \
00065 t1 = tmp - (W1 + W0) * d0; \
00066 } while (0)
00067 #endif
00068
00069 static inline void idct_row (int16_t * const block)
00070 {
00071 int d0, d1, d2, d3;
00072 int a0, a1, a2, a3, b0, b1, b2, b3;
00073 int t0, t1, t2, t3;
00074
00075
00076 if (likely (!(block[1] | ((int32_t *)block)[1] | ((int32_t *)block)[2] |
00077 ((int32_t *)block)[3]))) {
00078 uint32_t tmp = (uint16_t) (block[0] >> 1);
00079 tmp |= tmp << 16;
00080 ((int32_t *)block)[0] = tmp;
00081 ((int32_t *)block)[1] = tmp;
00082 ((int32_t *)block)[2] = tmp;
00083 ((int32_t *)block)[3] = tmp;
00084 return;
00085 }
00086
00087 d0 = (block[0] << 11) + 2048;
00088 d1 = block[1];
00089 d2 = block[2] << 11;
00090 d3 = block[3];
00091 t0 = d0 + d2;
00092 t1 = d0 - d2;
00093 BUTTERFLY (t2, t3, W6, W2, d3, d1);
00094 a0 = t0 + t2;
00095 a1 = t1 + t3;
00096 a2 = t1 - t3;
00097 a3 = t0 - t2;
00098
00099 d0 = block[4];
00100 d1 = block[5];
00101 d2 = block[6];
00102 d3 = block[7];
00103 BUTTERFLY (t0, t1, W7, W1, d3, d0);
00104 BUTTERFLY (t2, t3, W3, W5, d1, d2);
00105 b0 = t0 + t2;
00106 b3 = t1 + t3;
00107 t0 -= t2;
00108 t1 -= t3;
00109 b1 = ((t0 + t1) >> 8) * 181;
00110 b2 = ((t0 - t1) >> 8) * 181;
00111
00112 block[0] = (a0 + b0) >> 12;
00113 block[1] = (a1 + b1) >> 12;
00114 block[2] = (a2 + b2) >> 12;
00115 block[3] = (a3 + b3) >> 12;
00116 block[4] = (a3 - b3) >> 12;
00117 block[5] = (a2 - b2) >> 12;
00118 block[6] = (a1 - b1) >> 12;
00119 block[7] = (a0 - b0) >> 12;
00120 }
00121
00122 static inline void idct_col (int16_t * const block)
00123 {
00124 int d0, d1, d2, d3;
00125 int a0, a1, a2, a3, b0, b1, b2, b3;
00126 int t0, t1, t2, t3;
00127
00128 d0 = (block[8*0] << 11) + 65536;
00129 d1 = block[8*1];
00130 d2 = block[8*2] << 11;
00131 d3 = block[8*3];
00132 t0 = d0 + d2;
00133 t1 = d0 - d2;
00134 BUTTERFLY (t2, t3, W6, W2, d3, d1);
00135 a0 = t0 + t2;
00136 a1 = t1 + t3;
00137 a2 = t1 - t3;
00138 a3 = t0 - t2;
00139
00140 d0 = block[8*4];
00141 d1 = block[8*5];
00142 d2 = block[8*6];
00143 d3 = block[8*7];
00144 BUTTERFLY (t0, t1, W7, W1, d3, d0);
00145 BUTTERFLY (t2, t3, W3, W5, d1, d2);
00146 b0 = t0 + t2;
00147 b3 = t1 + t3;
00148 t0 -= t2;
00149 t1 -= t3;
00150 b1 = ((t0 + t1) >> 8) * 181;
00151 b2 = ((t0 - t1) >> 8) * 181;
00152
00153 block[8*0] = (a0 + b0) >> 17;
00154 block[8*1] = (a1 + b1) >> 17;
00155 block[8*2] = (a2 + b2) >> 17;
00156 block[8*3] = (a3 + b3) >> 17;
00157 block[8*4] = (a3 - b3) >> 17;
00158 block[8*5] = (a2 - b2) >> 17;
00159 block[8*6] = (a1 - b1) >> 17;
00160 block[8*7] = (a0 - b0) >> 17;
00161 }
00162
00163 static void mpeg2_idct_copy_c (int16_t * block, uint8_t * dest,
00164 const int stride)
00165 {
00166 int i;
00167
00168 for (i = 0; i < 8; i++)
00169 idct_row (block + 8 * i);
00170 for (i = 0; i < 8; i++)
00171 idct_col (block + i);
00172 do {
00173 dest[0] = CLIP (block[0]);
00174 dest[1] = CLIP (block[1]);
00175 dest[2] = CLIP (block[2]);
00176 dest[3] = CLIP (block[3]);
00177 dest[4] = CLIP (block[4]);
00178 dest[5] = CLIP (block[5]);
00179 dest[6] = CLIP (block[6]);
00180 dest[7] = CLIP (block[7]);
00181
00182 ((int32_t *)block)[0] = 0; ((int32_t *)block)[1] = 0;
00183 ((int32_t *)block)[2] = 0; ((int32_t *)block)[3] = 0;
00184
00185 dest += stride;
00186 block += 8;
00187 } while (--i);
00188 }
00189
00190 static void mpeg2_idct_add_c (const int last, int16_t * block,
00191 uint8_t * dest, const int stride)
00192 {
00193 int i;
00194
00195 if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) {
00196 for (i = 0; i < 8; i++)
00197 idct_row (block + 8 * i);
00198 for (i = 0; i < 8; i++)
00199 idct_col (block + i);
00200 do {
00201 dest[0] = CLIP (block[0] + dest[0]);
00202 dest[1] = CLIP (block[1] + dest[1]);
00203 dest[2] = CLIP (block[2] + dest[2]);
00204 dest[3] = CLIP (block[3] + dest[3]);
00205 dest[4] = CLIP (block[4] + dest[4]);
00206 dest[5] = CLIP (block[5] + dest[5]);
00207 dest[6] = CLIP (block[6] + dest[6]);
00208 dest[7] = CLIP (block[7] + dest[7]);
00209
00210 ((int32_t *)block)[0] = 0; ((int32_t *)block)[1] = 0;
00211 ((int32_t *)block)[2] = 0; ((int32_t *)block)[3] = 0;
00212
00213 dest += stride;
00214 block += 8;
00215 } while (--i);
00216 } else {
00217 int DC;
00218
00219 DC = (block[0] + 64) >> 7;
00220 block[0] = block[63] = 0;
00221 i = 8;
00222 do {
00223 dest[0] = CLIP (DC + dest[0]);
00224 dest[1] = CLIP (DC + dest[1]);
00225 dest[2] = CLIP (DC + dest[2]);
00226 dest[3] = CLIP (DC + dest[3]);
00227 dest[4] = CLIP (DC + dest[4]);
00228 dest[5] = CLIP (DC + dest[5]);
00229 dest[6] = CLIP (DC + dest[6]);
00230 dest[7] = CLIP (DC + dest[7]);
00231 dest += stride;
00232 } while (--i);
00233 }
00234 }
00235
00236 void mpeg2_idct_init (uint32_t accel)
00237 {
00238 #ifdef ARCH_X86
00239 #ifdef MMX
00240 if (accel & MPEG2_ACCEL_X86_MMXEXT) {
00241 mpeg2_idct_copy = mpeg2_idct_copy_mmxext;
00242 mpeg2_idct_add = mpeg2_idct_add_mmxext;
00243 mpeg2_idct_mmx_init ();
00244 } else if (accel & MPEG2_ACCEL_X86_MMX) {
00245 mpeg2_idct_copy = mpeg2_idct_copy_mmx;
00246 mpeg2_idct_add = mpeg2_idct_add_mmx;
00247 mpeg2_idct_mmx_init ();
00248 } else
00249 #endif
00250 #endif
00251 #ifdef HAVE_ALTIVEC
00252 if (accel & MPEG2_ACCEL_PPC_ALTIVEC) {
00253 mpeg2_idct_copy = mpeg2_idct_copy_altivec;
00254 mpeg2_idct_add = mpeg2_idct_add_altivec;
00255 mpeg2_idct_altivec_init ();
00256 } else
00257 #endif
00258 #ifdef ARCH_ALPHA
00259 if (accel & MPEG2_ACCEL_ALPHA_MVI) {
00260 mpeg2_idct_copy = mpeg2_idct_copy_mvi;
00261 mpeg2_idct_add = mpeg2_idct_add_mvi;
00262 mpeg2_idct_alpha_init ();
00263 } else if (accel & MPEG2_ACCEL_ALPHA) {
00264 int i;
00265
00266 mpeg2_idct_copy = mpeg2_idct_copy_alpha;
00267 mpeg2_idct_add = mpeg2_idct_add_alpha;
00268 mpeg2_idct_alpha_init ();
00269 for (i = -3840; i < 3840 + 256; i++)
00270 CLIP(i) = (i < 0) ? 0 : ((i > 255) ? 255 : i);
00271 } else
00272 #endif
00273 {
00274 extern uint8_t mpeg2_scan_norm[64];
00275 extern uint8_t mpeg2_scan_alt[64];
00276 int i, j;
00277
00278 mpeg2_idct_copy = mpeg2_idct_copy_c;
00279 mpeg2_idct_add = mpeg2_idct_add_c;
00280 for (i = -3840; i < 3840 + 256; i++)
00281 CLIP(i) = (i < 0) ? 0 : ((i > 255) ? 255 : i);
00282 for (i = 0; i < 64; i++) {
00283 j = mpeg2_scan_norm[i];
00284 mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
00285 j = mpeg2_scan_alt[i];
00286 mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
00287 }
00288 }
00289 }