00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include "mpeg2config.h"
00024
00025 #if ARCH_ALPHA
00026
00027 #include <inttypes.h>
00028
00029 #include "mpeg2.h"
00030 #include "attributes.h"
00031 #include "mpeg2_internal.h"
00032 #include "alpha_asm.h"
00033
00034 static inline uint64_t avg2 (uint64_t a, uint64_t b)
00035 {
00036 return (a | b) - (((a ^ b) & BYTE_VEC (0xfe)) >> 1);
00037 }
00038
00039
00040
00041 #define ULOAD16(ret_l,ret_r,addr) \
00042 do { \
00043 uint64_t _l = ldq_u (addr + 0); \
00044 uint64_t _m = ldq_u (addr + 8); \
00045 uint64_t _r = ldq_u (addr + 16); \
00046 ret_l = extql (_l, addr) | extqh (_m, addr); \
00047 ret_r = extql (_m, addr) | extqh (_r, addr); \
00048 } while (0)
00049
00050
00051 #define ALOAD16(ret_l,ret_r,addr) \
00052 do { \
00053 ret_l = ldq (addr); \
00054 ret_r = ldq (addr + 8); \
00055 } while (0)
00056
00057 #define OP8(LOAD,LOAD16,STORE) \
00058 do { \
00059 STORE (LOAD (pixels), block); \
00060 pixels += line_size; \
00061 block += line_size; \
00062 } while (--h)
00063
00064 #define OP16(LOAD,LOAD16,STORE) \
00065 do { \
00066 uint64_t l, r; \
00067 LOAD16 (l, r, pixels); \
00068 STORE (l, block); \
00069 STORE (r, block + 8); \
00070 pixels += line_size; \
00071 block += line_size; \
00072 } while (--h)
00073
00074 #define OP8_X2(LOAD,LOAD16,STORE) \
00075 do { \
00076 uint64_t p0, p1; \
00077 \
00078 p0 = LOAD (pixels); \
00079 p1 = p0 >> 8 | ((uint64_t) pixels[8] << 56); \
00080 STORE (avg2 (p0, p1), block); \
00081 pixels += line_size; \
00082 block += line_size; \
00083 } while (--h)
00084
00085 #define OP16_X2(LOAD,LOAD16,STORE) \
00086 do { \
00087 uint64_t p0, p1; \
00088 \
00089 LOAD16 (p0, p1, pixels); \
00090 STORE (avg2(p0, p0 >> 8 | p1 << 56), block); \
00091 STORE (avg2(p1, p1 >> 8 | (uint64_t) pixels[16] << 56), \
00092 block + 8); \
00093 pixels += line_size; \
00094 block += line_size; \
00095 } while (--h)
00096
00097 #define OP8_Y2(LOAD,LOAD16,STORE) \
00098 do { \
00099 uint64_t p0, p1; \
00100 p0 = LOAD (pixels); \
00101 pixels += line_size; \
00102 p1 = LOAD (pixels); \
00103 do { \
00104 uint64_t av = avg2 (p0, p1); \
00105 if (--h == 0) line_size = 0; \
00106 pixels += line_size; \
00107 p0 = p1; \
00108 p1 = LOAD (pixels); \
00109 STORE (av, block); \
00110 block += line_size; \
00111 } while (h); \
00112 } while (0)
00113
00114 #define OP16_Y2(LOAD,LOAD16,STORE) \
00115 do { \
00116 uint64_t p0l, p0r, p1l, p1r; \
00117 LOAD16 (p0l, p0r, pixels); \
00118 pixels += line_size; \
00119 LOAD16 (p1l, p1r, pixels); \
00120 do { \
00121 uint64_t avl, avr; \
00122 if (--h == 0) line_size = 0; \
00123 avl = avg2 (p0l, p1l); \
00124 avr = avg2 (p0r, p1r); \
00125 p0l = p1l; \
00126 p0r = p1r; \
00127 pixels += line_size; \
00128 LOAD16 (p1l, p1r, pixels); \
00129 STORE (avl, block); \
00130 STORE (avr, block + 8); \
00131 block += line_size; \
00132 } while (h); \
00133 } while (0)
00134
00135 #define OP8_XY2(LOAD,LOAD16,STORE) \
00136 do { \
00137 uint64_t pl, ph; \
00138 uint64_t p1 = LOAD (pixels); \
00139 uint64_t p2 = p1 >> 8 | ((uint64_t) pixels[8] << 56); \
00140 \
00141 ph = (((p1 & ~BYTE_VEC (0x03)) >> 2) + \
00142 ((p2 & ~BYTE_VEC (0x03)) >> 2)); \
00143 pl = ((p1 & BYTE_VEC (0x03)) + \
00144 (p2 & BYTE_VEC (0x03))); \
00145 \
00146 do { \
00147 uint64_t npl, nph; \
00148 \
00149 pixels += line_size; \
00150 p1 = LOAD (pixels); \
00151 p2 = (p1 >> 8) | ((uint64_t) pixels[8] << 56); \
00152 nph = (((p1 & ~BYTE_VEC (0x03)) >> 2) + \
00153 ((p2 & ~BYTE_VEC (0x03)) >> 2)); \
00154 npl = ((p1 & BYTE_VEC (0x03)) + \
00155 (p2 & BYTE_VEC (0x03))); \
00156 \
00157 STORE (ph + nph + \
00158 (((pl + npl + BYTE_VEC (0x02)) >> 2) & \
00159 BYTE_VEC (0x03)), block); \
00160 \
00161 block += line_size; \
00162 pl = npl; \
00163 ph = nph; \
00164 } while (--h); \
00165 } while (0)
00166
00167 #define OP16_XY2(LOAD,LOAD16,STORE) \
00168 do { \
00169 uint64_t p0, p1, p2, p3, pl_l, ph_l, pl_r, ph_r; \
00170 LOAD16 (p0, p2, pixels); \
00171 p1 = p0 >> 8 | (p2 << 56); \
00172 p3 = p2 >> 8 | ((uint64_t)pixels[16] << 56); \
00173 \
00174 ph_l = (((p0 & ~BYTE_VEC (0x03)) >> 2) + \
00175 ((p1 & ~BYTE_VEC (0x03)) >> 2)); \
00176 pl_l = ((p0 & BYTE_VEC (0x03)) + \
00177 (p1 & BYTE_VEC(0x03))); \
00178 ph_r = (((p2 & ~BYTE_VEC (0x03)) >> 2) + \
00179 ((p3 & ~BYTE_VEC (0x03)) >> 2)); \
00180 pl_r = ((p2 & BYTE_VEC (0x03)) + \
00181 (p3 & BYTE_VEC (0x03))); \
00182 \
00183 do { \
00184 uint64_t npl_l, nph_l, npl_r, nph_r; \
00185 \
00186 pixels += line_size; \
00187 LOAD16 (p0, p2, pixels); \
00188 p1 = p0 >> 8 | (p2 << 56); \
00189 p3 = p2 >> 8 | ((uint64_t)pixels[16] << 56); \
00190 nph_l = (((p0 & ~BYTE_VEC (0x03)) >> 2) + \
00191 ((p1 & ~BYTE_VEC (0x03)) >> 2)); \
00192 npl_l = ((p0 & BYTE_VEC (0x03)) + \
00193 (p1 & BYTE_VEC (0x03))); \
00194 nph_r = (((p2 & ~BYTE_VEC (0x03)) >> 2) + \
00195 ((p3 & ~BYTE_VEC (0x03)) >> 2)); \
00196 npl_r = ((p2 & BYTE_VEC (0x03)) + \
00197 (p3 & BYTE_VEC (0x03))); \
00198 \
00199 STORE (ph_l + nph_l + \
00200 (((pl_l + npl_l + BYTE_VEC (0x02)) >> 2) & \
00201 BYTE_VEC(0x03)), block); \
00202 STORE (ph_r + nph_r + \
00203 (((pl_r + npl_r + BYTE_VEC (0x02)) >> 2) & \
00204 BYTE_VEC(0x03)), block + 8); \
00205 \
00206 block += line_size; \
00207 pl_l = npl_l; \
00208 ph_l = nph_l; \
00209 pl_r = npl_r; \
00210 ph_r = nph_r; \
00211 } while (--h); \
00212 } while (0)
00213
00214 #define MAKE_OP(OPNAME,SIZE,SUFF,OPKIND,STORE) \
00215 static void MC_ ## OPNAME ## _ ## SUFF ## _ ## SIZE ## _alpha \
00216 (uint8_t *restrict block, const uint8_t *restrict pixels, \
00217 int line_size, int h) \
00218 { \
00219 if ((uint64_t) pixels & 0x7) { \
00220 OPKIND (uldq, ULOAD16, STORE); \
00221 } else { \
00222 OPKIND (ldq, ALOAD16, STORE); \
00223 } \
00224 }
00225
00226 #define PIXOP(OPNAME,STORE) \
00227 MAKE_OP (OPNAME, 8, o, OP8, STORE); \
00228 MAKE_OP (OPNAME, 8, x, OP8_X2, STORE); \
00229 MAKE_OP (OPNAME, 8, y, OP8_Y2, STORE); \
00230 MAKE_OP (OPNAME, 8, xy, OP8_XY2, STORE); \
00231 MAKE_OP (OPNAME, 16, o, OP16, STORE); \
00232 MAKE_OP (OPNAME, 16, x, OP16_X2, STORE); \
00233 MAKE_OP (OPNAME, 16, y, OP16_Y2, STORE); \
00234 MAKE_OP (OPNAME, 16, xy, OP16_XY2, STORE);
00235
00236 #define STORE(l,b) stq (l, b)
00237 PIXOP (put, STORE);
00238 #undef STORE
00239 #define STORE(l,b) stq (avg2 (l, ldq (b)), b);
00240 PIXOP (avg, STORE);
00241
00242 mpeg2_mc_t mpeg2_mc_alpha = {
00243 { MC_put_o_16_alpha, MC_put_x_16_alpha,
00244 MC_put_y_16_alpha, MC_put_xy_16_alpha,
00245 MC_put_o_8_alpha, MC_put_x_8_alpha,
00246 MC_put_y_8_alpha, MC_put_xy_8_alpha },
00247 { MC_avg_o_16_alpha, MC_avg_x_16_alpha,
00248 MC_avg_y_16_alpha, MC_avg_xy_16_alpha,
00249 MC_avg_o_8_alpha, MC_avg_x_8_alpha,
00250 MC_avg_y_8_alpha, MC_avg_xy_8_alpha }
00251 };
00252
00253 #endif