• Main Page
  • Related Pages
  • Modules
  • Namespaces
  • Classes
  • Files

h264.c

Go to the documentation of this file.
00001 /*
00002  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
00003  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
00004  *
00005  * This file is part of FFmpeg.
00006  *
00007  * FFmpeg is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * FFmpeg is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with FFmpeg; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00028 #include "dsputil.h"
00029 #include "avcodec.h"
00030 #include "mpegvideo.h"
00031 #include "h264.h"
00032 #include "h264data.h"
00033 #include "h264_parser.h"
00034 #include "golomb.h"
00035 #include "rectangle.h"
00036 
00037 #include "cabac.h"
00038 #ifdef ARCH_X86
00039 #include "i386/h264_i386.h"
00040 #endif
00041 
00042 //#undef NDEBUG
00043 #include <assert.h>
00044 
00049 #define DELAYED_PIC_REF 4
00050 
00051 static VLC coeff_token_vlc[4];
00052 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
00053 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
00054 
00055 static VLC chroma_dc_coeff_token_vlc;
00056 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
00057 static const int chroma_dc_coeff_token_vlc_table_size = 256;
00058 
00059 static VLC total_zeros_vlc[15];
00060 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
00061 static const int total_zeros_vlc_tables_size = 512;
00062 
00063 static VLC chroma_dc_total_zeros_vlc[3];
00064 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
00065 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
00066 
00067 static VLC run_vlc[6];
00068 static VLC_TYPE run_vlc_tables[6][8][2];
00069 static const int run_vlc_tables_size = 8;
00070 
00071 static VLC run7_vlc;
00072 static VLC_TYPE run7_vlc_table[96][2];
00073 static const int run7_vlc_table_size = 96;
00074 
00075 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
00076 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
00077 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
00078 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
00079 static Picture * remove_long(H264Context *h, int i, int ref_mask);
00080 
00081 static av_always_inline uint32_t pack16to32(int a, int b){
00082 #ifdef WORDS_BIGENDIAN
00083    return (b&0xFFFF) + (a<<16);
00084 #else
00085    return (a&0xFFFF) + (b<<16);
00086 #endif
00087 }
00088 
00089 const uint8_t ff_rem6[52]={
00090 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
00091 };
00092 
00093 const uint8_t ff_div6[52]={
00094 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
00095 };
00096 
00097 static const int left_block_options[4][8]={
00098     {0,1,2,3,7,10,8,11},
00099     {2,2,3,3,8,11,8,11},
00100     {0,0,1,1,7,10,7,10},
00101     {0,2,0,2,7,10,7,10}
00102 };
00103 
00104 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
00105     MpegEncContext * const s = &h->s;
00106     const int mb_xy= h->mb_xy;
00107     int topleft_xy, top_xy, topright_xy, left_xy[2];
00108     int topleft_type, top_type, topright_type, left_type[2];
00109     int * left_block;
00110     int topleft_partition= -1;
00111     int i;
00112 
00113     top_xy     = mb_xy  - (s->mb_stride << FIELD_PICTURE);
00114 
00115     //FIXME deblocking could skip the intra and nnz parts.
00116     if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
00117         return;
00118 
00119     //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
00120 
00121     topleft_xy = top_xy - 1;
00122     topright_xy= top_xy + 1;
00123     left_xy[1] = left_xy[0] = mb_xy-1;
00124     left_block = left_block_options[0];
00125     if(FRAME_MBAFF){
00126         const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
00127         const int top_pair_xy      = pair_xy     - s->mb_stride;
00128         const int topleft_pair_xy  = top_pair_xy - 1;
00129         const int topright_pair_xy = top_pair_xy + 1;
00130         const int topleft_mb_frame_flag  = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
00131         const int top_mb_frame_flag      = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
00132         const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
00133         const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
00134         const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
00135         const int bottom = (s->mb_y & 1);
00136         tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
00137         if (bottom
00138                 ? !curr_mb_frame_flag // bottom macroblock
00139                 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
00140                 ) {
00141             top_xy -= s->mb_stride;
00142         }
00143         if (bottom
00144                 ? !curr_mb_frame_flag // bottom macroblock
00145                 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
00146                 ) {
00147             topleft_xy -= s->mb_stride;
00148         } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
00149             topleft_xy += s->mb_stride;
00150             // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
00151             topleft_partition = 0;
00152         }
00153         if (bottom
00154                 ? !curr_mb_frame_flag // bottom macroblock
00155                 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
00156                 ) {
00157             topright_xy -= s->mb_stride;
00158         }
00159         if (left_mb_frame_flag != curr_mb_frame_flag) {
00160             left_xy[1] = left_xy[0] = pair_xy - 1;
00161             if (curr_mb_frame_flag) {
00162                 if (bottom) {
00163                     left_block = left_block_options[1];
00164                 } else {
00165                     left_block= left_block_options[2];
00166                 }
00167             } else {
00168                 left_xy[1] += s->mb_stride;
00169                 left_block = left_block_options[3];
00170             }
00171         }
00172     }
00173 
00174     h->top_mb_xy = top_xy;
00175     h->left_mb_xy[0] = left_xy[0];
00176     h->left_mb_xy[1] = left_xy[1];
00177     if(for_deblock){
00178         topleft_type = 0;
00179         topright_type = 0;
00180         top_type     = h->slice_table[top_xy     ] < 255 ? s->current_picture.mb_type[top_xy]     : 0;
00181         left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
00182         left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
00183 
00184         if(MB_MBAFF && !IS_INTRA(mb_type)){
00185             int list;
00186             for(list=0; list<h->list_count; list++){
00187                 //These values where changed for ease of performing MC, we need to change them back
00188                 //FIXME maybe we can make MC and loop filter use the same values or prevent
00189                 //the MC code from changing ref_cache and rather use a temporary array.
00190                 if(USES_LIST(mb_type,list)){
00191                     int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
00192                     *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
00193                     *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
00194                     ref += h->b8_stride;
00195                     *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
00196                     *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
00197                 }
00198             }
00199         }
00200     }else{
00201         topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
00202         top_type     = h->slice_table[top_xy     ] == h->slice_num ? s->current_picture.mb_type[top_xy]     : 0;
00203         topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
00204         left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
00205         left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
00206 
00207     if(IS_INTRA(mb_type)){
00208         int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
00209         h->topleft_samples_available=
00210         h->top_samples_available=
00211         h->left_samples_available= 0xFFFF;
00212         h->topright_samples_available= 0xEEEA;
00213 
00214         if(!(top_type & type_mask)){
00215             h->topleft_samples_available= 0xB3FF;
00216             h->top_samples_available= 0x33FF;
00217             h->topright_samples_available= 0x26EA;
00218         }
00219         if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
00220             if(IS_INTERLACED(mb_type)){
00221                 if(!(left_type[0] & type_mask)){
00222                     h->topleft_samples_available&= 0xDFFF;
00223                     h->left_samples_available&= 0x5FFF;
00224                 }
00225                 if(!(left_type[1] & type_mask)){
00226                     h->topleft_samples_available&= 0xFF5F;
00227                     h->left_samples_available&= 0xFF5F;
00228                 }
00229             }else{
00230                 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
00231                                 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
00232                 assert(left_xy[0] == left_xy[1]);
00233                 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
00234                     h->topleft_samples_available&= 0xDF5F;
00235                     h->left_samples_available&= 0x5F5F;
00236                 }
00237             }
00238         }else{
00239             if(!(left_type[0] & type_mask)){
00240                 h->topleft_samples_available&= 0xDF5F;
00241                 h->left_samples_available&= 0x5F5F;
00242             }
00243         }
00244 
00245         if(!(topleft_type & type_mask))
00246             h->topleft_samples_available&= 0x7FFF;
00247 
00248         if(!(topright_type & type_mask))
00249             h->topright_samples_available&= 0xFBFF;
00250 
00251         if(IS_INTRA4x4(mb_type)){
00252             if(IS_INTRA4x4(top_type)){
00253                 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
00254                 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
00255                 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
00256                 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
00257             }else{
00258                 int pred;
00259                 if(!(top_type & type_mask))
00260                     pred= -1;
00261                 else{
00262                     pred= 2;
00263                 }
00264                 h->intra4x4_pred_mode_cache[4+8*0]=
00265                 h->intra4x4_pred_mode_cache[5+8*0]=
00266                 h->intra4x4_pred_mode_cache[6+8*0]=
00267                 h->intra4x4_pred_mode_cache[7+8*0]= pred;
00268             }
00269             for(i=0; i<2; i++){
00270                 if(IS_INTRA4x4(left_type[i])){
00271                     h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
00272                     h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
00273                 }else{
00274                     int pred;
00275                     if(!(left_type[i] & type_mask))
00276                         pred= -1;
00277                     else{
00278                         pred= 2;
00279                     }
00280                     h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
00281                     h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
00282                 }
00283             }
00284         }
00285     }
00286     }
00287 
00288 
00289 /*
00290 0 . T T. T T T T
00291 1 L . .L . . . .
00292 2 L . .L . . . .
00293 3 . T TL . . . .
00294 4 L . .L . . . .
00295 5 L . .. . . . .
00296 */
00297 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
00298     if(top_type){
00299         h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
00300         h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
00301         h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
00302         h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
00303 
00304         h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
00305         h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
00306 
00307         h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
00308         h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
00309 
00310     }else{
00311         h->non_zero_count_cache[4+8*0]=
00312         h->non_zero_count_cache[5+8*0]=
00313         h->non_zero_count_cache[6+8*0]=
00314         h->non_zero_count_cache[7+8*0]=
00315 
00316         h->non_zero_count_cache[1+8*0]=
00317         h->non_zero_count_cache[2+8*0]=
00318 
00319         h->non_zero_count_cache[1+8*3]=
00320         h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
00321 
00322     }
00323 
00324     for (i=0; i<2; i++) {
00325         if(left_type[i]){
00326             h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
00327             h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
00328             h->non_zero_count_cache[0+8*1 +   8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
00329             h->non_zero_count_cache[0+8*4 +   8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
00330         }else{
00331             h->non_zero_count_cache[3+8*1 + 2*8*i]=
00332             h->non_zero_count_cache[3+8*2 + 2*8*i]=
00333             h->non_zero_count_cache[0+8*1 +   8*i]=
00334             h->non_zero_count_cache[0+8*4 +   8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
00335         }
00336     }
00337 
00338     if( h->pps.cabac ) {
00339         // top_cbp
00340         if(top_type) {
00341             h->top_cbp = h->cbp_table[top_xy];
00342         } else if(IS_INTRA(mb_type)) {
00343             h->top_cbp = 0x1C0;
00344         } else {
00345             h->top_cbp = 0;
00346         }
00347         // left_cbp
00348         if (left_type[0]) {
00349             h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
00350         } else if(IS_INTRA(mb_type)) {
00351             h->left_cbp = 0x1C0;
00352         } else {
00353             h->left_cbp = 0;
00354         }
00355         if (left_type[0]) {
00356             h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
00357         }
00358         if (left_type[1]) {
00359             h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
00360         }
00361     }
00362 
00363 #if 1
00364     if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
00365         int list;
00366         for(list=0; list<h->list_count; list++){
00367             if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
00368                 /*if(!h->mv_cache_clean[list]){
00369                     memset(h->mv_cache [list],  0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
00370                     memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
00371                     h->mv_cache_clean[list]= 1;
00372                 }*/
00373                 continue;
00374             }
00375             h->mv_cache_clean[list]= 0;
00376 
00377             if(USES_LIST(top_type, list)){
00378                 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
00379                 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
00380                 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
00381                 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
00382                 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
00383                 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
00384                 h->ref_cache[list][scan8[0] + 0 - 1*8]=
00385                 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
00386                 h->ref_cache[list][scan8[0] + 2 - 1*8]=
00387                 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
00388             }else{
00389                 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
00390                 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
00391                 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
00392                 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
00393                 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
00394             }
00395 
00396             for(i=0; i<2; i++){
00397                 int cache_idx = scan8[0] - 1 + i*2*8;
00398                 if(USES_LIST(left_type[i], list)){
00399                     const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
00400                     const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
00401                     *(uint32_t*)h->mv_cache[list][cache_idx  ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
00402                     *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
00403                     h->ref_cache[list][cache_idx  ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
00404                     h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
00405                 }else{
00406                     *(uint32_t*)h->mv_cache [list][cache_idx  ]=
00407                     *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
00408                     h->ref_cache[list][cache_idx  ]=
00409                     h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
00410                 }
00411             }
00412 
00413             if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
00414                 continue;
00415 
00416             if(USES_LIST(topleft_type, list)){
00417                 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
00418                 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
00419                 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
00420                 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
00421             }else{
00422                 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
00423                 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
00424             }
00425 
00426             if(USES_LIST(topright_type, list)){
00427                 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
00428                 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
00429                 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
00430                 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
00431             }else{
00432                 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
00433                 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
00434             }
00435 
00436             if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
00437                 continue;
00438 
00439             h->ref_cache[list][scan8[5 ]+1] =
00440             h->ref_cache[list][scan8[7 ]+1] =
00441             h->ref_cache[list][scan8[13]+1] =  //FIXME remove past 3 (init somewhere else)
00442             h->ref_cache[list][scan8[4 ]] =
00443             h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
00444             *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
00445             *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
00446             *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
00447             *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
00448             *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
00449 
00450             if( h->pps.cabac ) {
00451                 /* XXX beurk, Load mvd */
00452                 if(USES_LIST(top_type, list)){
00453                     const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
00454                     *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
00455                     *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
00456                     *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
00457                     *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
00458                 }else{
00459                     *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
00460                     *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
00461                     *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
00462                     *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
00463                 }
00464                 if(USES_LIST(left_type[0], list)){
00465                     const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
00466                     *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
00467                     *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
00468                 }else{
00469                     *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
00470                     *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
00471                 }
00472                 if(USES_LIST(left_type[1], list)){
00473                     const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
00474                     *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
00475                     *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
00476                 }else{
00477                     *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
00478                     *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
00479                 }
00480                 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
00481                 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
00482                 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
00483                 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
00484                 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
00485 
00486                 if(h->slice_type_nos == B_TYPE){
00487                     fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
00488 
00489                     if(IS_DIRECT(top_type)){
00490                         *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
00491                     }else if(IS_8X8(top_type)){
00492                         int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
00493                         h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
00494                         h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
00495                     }else{
00496                         *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
00497                     }
00498 
00499                     if(IS_DIRECT(left_type[0]))
00500                         h->direct_cache[scan8[0] - 1 + 0*8]= 1;
00501                     else if(IS_8X8(left_type[0]))
00502                         h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
00503                     else
00504                         h->direct_cache[scan8[0] - 1 + 0*8]= 0;
00505 
00506                     if(IS_DIRECT(left_type[1]))
00507                         h->direct_cache[scan8[0] - 1 + 2*8]= 1;
00508                     else if(IS_8X8(left_type[1]))
00509                         h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
00510                     else
00511                         h->direct_cache[scan8[0] - 1 + 2*8]= 0;
00512                 }
00513             }
00514 
00515             if(FRAME_MBAFF){
00516 #define MAP_MVS\
00517                     MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
00518                     MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
00519                     MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
00520                     MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
00521                     MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
00522                     MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
00523                     MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
00524                     MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
00525                     MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
00526                     MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
00527                 if(MB_FIELD){
00528 #define MAP_F2F(idx, mb_type)\
00529                     if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
00530                         h->ref_cache[list][idx] <<= 1;\
00531                         h->mv_cache[list][idx][1] /= 2;\
00532                         h->mvd_cache[list][idx][1] /= 2;\
00533                     }
00534                     MAP_MVS
00535 #undef MAP_F2F
00536                 }else{
00537 #define MAP_F2F(idx, mb_type)\
00538                     if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
00539                         h->ref_cache[list][idx] >>= 1;\
00540                         h->mv_cache[list][idx][1] <<= 1;\
00541                         h->mvd_cache[list][idx][1] <<= 1;\
00542                     }
00543                     MAP_MVS
00544 #undef MAP_F2F
00545                 }
00546             }
00547         }
00548     }
00549 #endif
00550 
00551     h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
00552 }
00553 
00554 static inline void write_back_intra_pred_mode(H264Context *h){
00555     const int mb_xy= h->mb_xy;
00556 
00557     h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
00558     h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
00559     h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
00560     h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
00561     h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
00562     h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
00563     h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
00564 }
00565 
00569 static inline int check_intra4x4_pred_mode(H264Context *h){
00570     MpegEncContext * const s = &h->s;
00571     static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
00572     static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
00573     int i;
00574 
00575     if(!(h->top_samples_available&0x8000)){
00576         for(i=0; i<4; i++){
00577             int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
00578             if(status<0){
00579                 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
00580                 return -1;
00581             } else if(status){
00582                 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
00583             }
00584         }
00585     }
00586 
00587     if((h->left_samples_available&0x8888)!=0x8888){
00588         static const int mask[4]={0x8000,0x2000,0x80,0x20};
00589         for(i=0; i<4; i++){
00590             if(!(h->left_samples_available&mask[i])){
00591             int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
00592             if(status<0){
00593                 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
00594                 return -1;
00595             } else if(status){
00596                 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
00597             }
00598             }
00599         }
00600     }
00601 
00602     return 0;
00603 } //FIXME cleanup like next
00604 
00608 static inline int check_intra_pred_mode(H264Context *h, int mode){
00609     MpegEncContext * const s = &h->s;
00610     static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
00611     static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
00612 
00613     if(mode > 6U) {
00614         av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
00615         return -1;
00616     }
00617 
00618     if(!(h->top_samples_available&0x8000)){
00619         mode= top[ mode ];
00620         if(mode<0){
00621             av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
00622             return -1;
00623         }
00624     }
00625 
00626     if((h->left_samples_available&0x8080) != 0x8080){
00627         mode= left[ mode ];
00628         if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
00629             mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
00630         }
00631         if(mode<0){
00632             av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
00633             return -1;
00634         }
00635     }
00636 
00637     return mode;
00638 }
00639 
00643 static inline int pred_intra_mode(H264Context *h, int n){
00644     const int index8= scan8[n];
00645     const int left= h->intra4x4_pred_mode_cache[index8 - 1];
00646     const int top = h->intra4x4_pred_mode_cache[index8 - 8];
00647     const int min= FFMIN(left, top);
00648 
00649     tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
00650 
00651     if(min<0) return DC_PRED;
00652     else      return min;
00653 }
00654 
00655 static inline void write_back_non_zero_count(H264Context *h){
00656     const int mb_xy= h->mb_xy;
00657 
00658     h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
00659     h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
00660     h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
00661     h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
00662     h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
00663     h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
00664     h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
00665 
00666     h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
00667     h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
00668     h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
00669 
00670     h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
00671     h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
00672     h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
00673 }
00674 
00679 static inline int pred_non_zero_count(H264Context *h, int n){
00680     const int index8= scan8[n];
00681     const int left= h->non_zero_count_cache[index8 - 1];
00682     const int top = h->non_zero_count_cache[index8 - 8];
00683     int i= left + top;
00684 
00685     if(i<64) i= (i+1)>>1;
00686 
00687     tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
00688 
00689     return i&31;
00690 }
00691 
00692 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
00693     const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
00694     MpegEncContext *s = &h->s;
00695 
00696     /* there is no consistent mapping of mvs to neighboring locations that will
00697      * make mbaff happy, so we can't move all this logic to fill_caches */
00698     if(FRAME_MBAFF){
00699         const uint32_t *mb_types = s->current_picture_ptr->mb_type;
00700         const int16_t *mv;
00701         *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
00702         *C = h->mv_cache[list][scan8[0]-2];
00703 
00704         if(!MB_FIELD
00705            && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
00706             int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
00707             if(IS_INTERLACED(mb_types[topright_xy])){
00708 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
00709                 const int x4 = X4, y4 = Y4;\
00710                 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
00711                 if(!USES_LIST(mb_type,list))\
00712                     return LIST_NOT_USED;\
00713                 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
00714                 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
00715                 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
00716                 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
00717 
00718                 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
00719             }
00720         }
00721         if(topright_ref == PART_NOT_AVAILABLE
00722            && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
00723            && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
00724             if(!MB_FIELD
00725                && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
00726                 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
00727             }
00728             if(MB_FIELD
00729                && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
00730                && i >= scan8[0]+8){
00731                 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
00732                 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
00733             }
00734         }
00735 #undef SET_DIAG_MV
00736     }
00737 
00738     if(topright_ref != PART_NOT_AVAILABLE){
00739         *C= h->mv_cache[list][ i - 8 + part_width ];
00740         return topright_ref;
00741     }else{
00742         tprintf(s->avctx, "topright MV not available\n");
00743 
00744         *C= h->mv_cache[list][ i - 8 - 1 ];
00745         return h->ref_cache[list][ i - 8 - 1 ];
00746     }
00747 }
00748 
00756 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
00757     const int index8= scan8[n];
00758     const int top_ref=      h->ref_cache[list][ index8 - 8 ];
00759     const int left_ref=     h->ref_cache[list][ index8 - 1 ];
00760     const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
00761     const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
00762     const int16_t * C;
00763     int diagonal_ref, match_count;
00764 
00765     assert(part_width==1 || part_width==2 || part_width==4);
00766 
00767 /* mv_cache
00768   B . . A T T T T
00769   U . . L . . , .
00770   U . . L . . . .
00771   U . . L . . , .
00772   . . . L . . . .
00773 */
00774 
00775     diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
00776     match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
00777     tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
00778     if(match_count > 1){ //most common
00779         *mx= mid_pred(A[0], B[0], C[0]);
00780         *my= mid_pred(A[1], B[1], C[1]);
00781     }else if(match_count==1){
00782         if(left_ref==ref){
00783             *mx= A[0];
00784             *my= A[1];
00785         }else if(top_ref==ref){
00786             *mx= B[0];
00787             *my= B[1];
00788         }else{
00789             *mx= C[0];
00790             *my= C[1];
00791         }
00792     }else{
00793         if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
00794             *mx= A[0];
00795             *my= A[1];
00796         }else{
00797             *mx= mid_pred(A[0], B[0], C[0]);
00798             *my= mid_pred(A[1], B[1], C[1]);
00799         }
00800     }
00801 
00802     tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1],                    diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
00803 }
00804 
00811 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
00812     if(n==0){
00813         const int top_ref=      h->ref_cache[list][ scan8[0] - 8 ];
00814         const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
00815 
00816         tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
00817 
00818         if(top_ref == ref){
00819             *mx= B[0];
00820             *my= B[1];
00821             return;
00822         }
00823     }else{
00824         const int left_ref=     h->ref_cache[list][ scan8[8] - 1 ];
00825         const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
00826 
00827         tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
00828 
00829         if(left_ref == ref){
00830             *mx= A[0];
00831             *my= A[1];
00832             return;
00833         }
00834     }
00835 
00836     //RARE
00837     pred_motion(h, n, 4, list, ref, mx, my);
00838 }
00839 
00846 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
00847     if(n==0){
00848         const int left_ref=      h->ref_cache[list][ scan8[0] - 1 ];
00849         const int16_t * const A=  h->mv_cache[list][ scan8[0] - 1 ];
00850 
00851         tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
00852 
00853         if(left_ref == ref){
00854             *mx= A[0];
00855             *my= A[1];
00856             return;
00857         }
00858     }else{
00859         const int16_t * C;
00860         int diagonal_ref;
00861 
00862         diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
00863 
00864         tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
00865 
00866         if(diagonal_ref == ref){
00867             *mx= C[0];
00868             *my= C[1];
00869             return;
00870         }
00871     }
00872 
00873     //RARE
00874     pred_motion(h, n, 2, list, ref, mx, my);
00875 }
00876 
00877 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
00878     const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
00879     const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
00880 
00881     tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
00882 
00883     if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
00884        || (top_ref == 0  && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
00885        || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
00886 
00887         *mx = *my = 0;
00888         return;
00889     }
00890 
00891     pred_motion(h, 0, 4, 0, 0, mx, my);
00892 
00893     return;
00894 }
00895 
00896 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
00897     int poc0 = h->ref_list[0][i].poc;
00898     int td = av_clip(poc1 - poc0, -128, 127);
00899     if(td == 0 || h->ref_list[0][i].long_ref){
00900         return 256;
00901     }else{
00902         int tb = av_clip(poc - poc0, -128, 127);
00903         int tx = (16384 + (FFABS(td) >> 1)) / td;
00904         return av_clip((tb*tx + 32) >> 6, -1024, 1023);
00905     }
00906 }
00907 
00908 static inline void direct_dist_scale_factor(H264Context * const h){
00909     MpegEncContext * const s = &h->s;
00910     const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
00911     const int poc1 = h->ref_list[1][0].poc;
00912     int i, field;
00913     for(field=0; field<2; field++){
00914         const int poc  = h->s.current_picture_ptr->field_poc[field];
00915         const int poc1 = h->ref_list[1][0].field_poc[field];
00916         for(i=0; i < 2*h->ref_count[0]; i++)
00917             h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
00918     }
00919 
00920     for(i=0; i<h->ref_count[0]; i++){
00921         h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
00922     }
00923 }
00924 
00925 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
00926     MpegEncContext * const s = &h->s;
00927     Picture * const ref1 = &h->ref_list[1][0];
00928     int j, old_ref, rfield;
00929     int start= mbafi ? 16                      : 0;
00930     int end  = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
00931     int interl= mbafi || s->picture_structure != PICT_FRAME;
00932 
00933     /* bogus; fills in for missing frames */
00934     memset(map[list], 0, sizeof(map[list]));
00935 
00936     for(rfield=0; rfield<2; rfield++){
00937         for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
00938             int poc = ref1->ref_poc[colfield][list][old_ref];
00939 
00940             if     (!interl)
00941                 poc |= 3;
00942             else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
00943                 poc= (poc&~3) + rfield + 1;
00944 
00945             for(j=start; j<end; j++){
00946                 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
00947                     int cur_ref= mbafi ? (j-16)^field : j;
00948                     map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
00949                     if(rfield == field)
00950                         map[list][old_ref] = cur_ref;
00951                     break;
00952                 }
00953             }
00954         }
00955     }
00956 }
00957 
00958 static inline void direct_ref_list_init(H264Context * const h){
00959     MpegEncContext * const s = &h->s;
00960     Picture * const ref1 = &h->ref_list[1][0];
00961     Picture * const cur = s->current_picture_ptr;
00962     int list, j, field, rfield;
00963     int sidx= (s->picture_structure&1)^1;
00964     int ref1sidx= (ref1->reference&1)^1;
00965 
00966     for(list=0; list<2; list++){
00967         cur->ref_count[sidx][list] = h->ref_count[list];
00968         for(j=0; j<h->ref_count[list]; j++)
00969             cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
00970     }
00971 
00972     if(s->picture_structure == PICT_FRAME){
00973         memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
00974         memcpy(cur->ref_poc  [1], cur->ref_poc  [0], sizeof(cur->ref_poc  [0]));
00975     }
00976 
00977     cur->mbaff= FRAME_MBAFF;
00978 
00979     if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
00980         return;
00981 
00982     for(list=0; list<2; list++){
00983         fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
00984         for(field=0; field<2; field++)
00985             fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
00986     }
00987 }
00988 
00989 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
00990     MpegEncContext * const s = &h->s;
00991     int b8_stride = h->b8_stride;
00992     int b4_stride = h->b_stride;
00993     int mb_xy = h->mb_xy;
00994     int mb_type_col[2];
00995     const int16_t (*l1mv0)[2], (*l1mv1)[2];
00996     const int8_t *l1ref0, *l1ref1;
00997     const int is_b8x8 = IS_8X8(*mb_type);
00998     unsigned int sub_mb_type;
00999     int i8, i4;
01000 
01001 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
01002 
01003     if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
01004         if(!IS_INTERLACED(*mb_type)){                    //     AFR/FR    -> AFL/FL
01005             int cur_poc = s->current_picture_ptr->poc;
01006             int *col_poc = h->ref_list[1]->field_poc;
01007             int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
01008             mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
01009             b8_stride = 0;
01010         }else if(!(s->picture_structure & h->ref_list[1][0].reference)){// FL -> FL & differ parity
01011             int fieldoff= 2*(h->ref_list[1][0].reference)-3;
01012             mb_xy += s->mb_stride*fieldoff;
01013         }
01014         goto single_col;
01015     }else{                                               // AFL/AFR/FR/FL -> AFR/FR
01016         if(IS_INTERLACED(*mb_type)){                     // AFL       /FL -> AFR/FR
01017             mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
01018             mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
01019             mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
01020             b8_stride *= 3;
01021             b4_stride *= 6;
01022             //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
01023             if(    (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
01024                 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
01025                 && !is_b8x8){
01026                 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
01027                 *mb_type   |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
01028             }else{
01029                 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
01030                 *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
01031             }
01032         }else{                                           //     AFR/FR    -> AFR/FR
01033 single_col:
01034             mb_type_col[0] =
01035             mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
01036             if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
01037                 /* FIXME save sub mb types from previous frames (or derive from MVs)
01038                 * so we know exactly what block size to use */
01039                 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
01040                 *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
01041             }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
01042                 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
01043                 *mb_type   |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
01044             }else{
01045                 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
01046                 *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
01047             }
01048         }
01049     }
01050 
01051     l1mv0  = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
01052     l1mv1  = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
01053     l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
01054     l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
01055     if(!b8_stride){
01056         if(s->mb_y&1){
01057             l1ref0 += h->b8_stride;
01058             l1ref1 += h->b8_stride;
01059             l1mv0  +=  2*b4_stride;
01060             l1mv1  +=  2*b4_stride;
01061         }
01062     }
01063 
01064     if(h->direct_spatial_mv_pred){
01065         int ref[2];
01066         int mv[2][2];
01067         int list;
01068 
01069         /* FIXME interlacing + spatial direct uses wrong colocated block positions */
01070 
01071         /* ref = min(neighbors) */
01072         for(list=0; list<2; list++){
01073             int refa = h->ref_cache[list][scan8[0] - 1];
01074             int refb = h->ref_cache[list][scan8[0] - 8];
01075             int refc = h->ref_cache[list][scan8[0] - 8 + 4];
01076             if(refc == PART_NOT_AVAILABLE)
01077                 refc = h->ref_cache[list][scan8[0] - 8 - 1];
01078             ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
01079             if(ref[list] < 0)
01080                 ref[list] = -1;
01081         }
01082 
01083         if(ref[0] < 0 && ref[1] < 0){
01084             ref[0] = ref[1] = 0;
01085             mv[0][0] = mv[0][1] =
01086             mv[1][0] = mv[1][1] = 0;
01087         }else{
01088             for(list=0; list<2; list++){
01089                 if(ref[list] >= 0)
01090                     pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
01091                 else
01092                     mv[list][0] = mv[list][1] = 0;
01093             }
01094         }
01095 
01096         if(ref[1] < 0){
01097             if(!is_b8x8)
01098                 *mb_type &= ~MB_TYPE_L1;
01099             sub_mb_type &= ~MB_TYPE_L1;
01100         }else if(ref[0] < 0){
01101             if(!is_b8x8)
01102                 *mb_type &= ~MB_TYPE_L0;
01103             sub_mb_type &= ~MB_TYPE_L0;
01104         }
01105 
01106         if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
01107             for(i8=0; i8<4; i8++){
01108                 int x8 = i8&1;
01109                 int y8 = i8>>1;
01110                 int xy8 = x8+y8*b8_stride;
01111                 int xy4 = 3*x8+y8*b4_stride;
01112                 int a=0, b=0;
01113 
01114                 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
01115                     continue;
01116                 h->sub_mb_type[i8] = sub_mb_type;
01117 
01118                 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
01119                 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
01120                 if(!IS_INTRA(mb_type_col[y8])
01121                    && (   (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
01122                        || (l1ref0[xy8]  < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
01123                     if(ref[0] > 0)
01124                         a= pack16to32(mv[0][0],mv[0][1]);
01125                     if(ref[1] > 0)
01126                         b= pack16to32(mv[1][0],mv[1][1]);
01127                 }else{
01128                     a= pack16to32(mv[0][0],mv[0][1]);
01129                     b= pack16to32(mv[1][0],mv[1][1]);
01130                 }
01131                 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
01132                 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
01133             }
01134         }else if(IS_16X16(*mb_type)){
01135             int a=0, b=0;
01136 
01137             fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
01138             fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
01139             if(!IS_INTRA(mb_type_col[0])
01140                && (   (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
01141                    || (l1ref0[0]  < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
01142                        && (h->x264_build>33 || !h->x264_build)))){
01143                 if(ref[0] > 0)
01144                     a= pack16to32(mv[0][0],mv[0][1]);
01145                 if(ref[1] > 0)
01146                     b= pack16to32(mv[1][0],mv[1][1]);
01147             }else{
01148                 a= pack16to32(mv[0][0],mv[0][1]);
01149                 b= pack16to32(mv[1][0],mv[1][1]);
01150             }
01151             fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
01152             fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
01153         }else{
01154             for(i8=0; i8<4; i8++){
01155                 const int x8 = i8&1;
01156                 const int y8 = i8>>1;
01157 
01158                 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
01159                     continue;
01160                 h->sub_mb_type[i8] = sub_mb_type;
01161 
01162                 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
01163                 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
01164                 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
01165                 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
01166 
01167                 /* col_zero_flag */
01168                 if(!IS_INTRA(mb_type_col[0]) && (   l1ref0[x8 + y8*b8_stride] == 0
01169                                               || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
01170                                                   && (h->x264_build>33 || !h->x264_build)))){
01171                     const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
01172                     if(IS_SUB_8X8(sub_mb_type)){
01173                         const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
01174                         if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
01175                             if(ref[0] == 0)
01176                                 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
01177                             if(ref[1] == 0)
01178                                 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
01179                         }
01180                     }else
01181                     for(i4=0; i4<4; i4++){
01182                         const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
01183                         if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
01184                             if(ref[0] == 0)
01185                                 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
01186                             if(ref[1] == 0)
01187                                 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
01188                         }
01189                     }
01190                 }
01191             }
01192         }
01193     }else{ /* direct temporal mv pred */
01194         const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
01195         const int *dist_scale_factor = h->dist_scale_factor;
01196         int ref_offset= 0;
01197 
01198         if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
01199             map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
01200             map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
01201             dist_scale_factor   =h->dist_scale_factor_field[s->mb_y&1];
01202         }
01203         if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
01204             ref_offset += 16;
01205 
01206         if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
01207             /* FIXME assumes direct_8x8_inference == 1 */
01208             int y_shift  = 2*!IS_INTERLACED(*mb_type);
01209 
01210             for(i8=0; i8<4; i8++){
01211                 const int x8 = i8&1;
01212                 const int y8 = i8>>1;
01213                 int ref0, scale;
01214                 const int16_t (*l1mv)[2]= l1mv0;
01215 
01216                 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
01217                     continue;
01218                 h->sub_mb_type[i8] = sub_mb_type;
01219 
01220                 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
01221                 if(IS_INTRA(mb_type_col[y8])){
01222                     fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
01223                     fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
01224                     fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
01225                     continue;
01226                 }
01227 
01228                 ref0 = l1ref0[x8 + y8*b8_stride];
01229                 if(ref0 >= 0)
01230                     ref0 = map_col_to_list0[0][ref0 + ref_offset];
01231                 else{
01232                     ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
01233                     l1mv= l1mv1;
01234                 }
01235                 scale = dist_scale_factor[ref0];
01236                 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
01237 
01238                 {
01239                     const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
01240                     int my_col = (mv_col[1]<<y_shift)/2;
01241                     int mx = (scale * mv_col[0] + 128) >> 8;
01242                     int my = (scale * my_col + 128) >> 8;
01243                     fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
01244                     fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
01245                 }
01246             }
01247             return;
01248         }
01249 
01250         /* one-to-one mv scaling */
01251 
01252         if(IS_16X16(*mb_type)){
01253             int ref, mv0, mv1;
01254 
01255             fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
01256             if(IS_INTRA(mb_type_col[0])){
01257                 ref=mv0=mv1=0;
01258             }else{
01259                 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
01260                                                 : map_col_to_list0[1][l1ref1[0] + ref_offset];
01261                 const int scale = dist_scale_factor[ref0];
01262                 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
01263                 int mv_l0[2];
01264                 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
01265                 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
01266                 ref= ref0;
01267                 mv0= pack16to32(mv_l0[0],mv_l0[1]);
01268                 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
01269             }
01270             fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
01271             fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
01272             fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
01273         }else{
01274             for(i8=0; i8<4; i8++){
01275                 const int x8 = i8&1;
01276                 const int y8 = i8>>1;
01277                 int ref0, scale;
01278                 const int16_t (*l1mv)[2]= l1mv0;
01279 
01280                 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
01281                     continue;
01282                 h->sub_mb_type[i8] = sub_mb_type;
01283                 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
01284                 if(IS_INTRA(mb_type_col[0])){
01285                     fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
01286                     fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
01287                     fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
01288                     continue;
01289                 }
01290 
01291                 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
01292                 if(ref0 >= 0)
01293                     ref0 = map_col_to_list0[0][ref0];
01294                 else{
01295                     ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
01296                     l1mv= l1mv1;
01297                 }
01298                 scale = dist_scale_factor[ref0];
01299 
01300                 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
01301                 if(IS_SUB_8X8(sub_mb_type)){
01302                     const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
01303                     int mx = (scale * mv_col[0] + 128) >> 8;
01304                     int my = (scale * mv_col[1] + 128) >> 8;
01305                     fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
01306                     fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
01307                 }else
01308                 for(i4=0; i4<4; i4++){
01309                     const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
01310                     int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
01311                     mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
01312                     mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
01313                     *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
01314                         pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
01315                 }
01316             }
01317         }
01318     }
01319 }
01320 
01321 static inline void write_back_motion(H264Context *h, int mb_type){
01322     MpegEncContext * const s = &h->s;
01323     const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
01324     const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
01325     int list;
01326 
01327     if(!USES_LIST(mb_type, 0))
01328         fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
01329 
01330     for(list=0; list<h->list_count; list++){
01331         int y;
01332         if(!USES_LIST(mb_type, list))
01333             continue;
01334 
01335         for(y=0; y<4; y++){
01336             *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
01337             *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
01338         }
01339         if( h->pps.cabac ) {
01340             if(IS_SKIP(mb_type))
01341                 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
01342             else
01343             for(y=0; y<4; y++){
01344                 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
01345                 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
01346             }
01347         }
01348 
01349         {
01350             int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
01351             ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
01352             ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
01353             ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
01354             ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
01355         }
01356     }
01357 
01358     if(h->slice_type_nos == B_TYPE && h->pps.cabac){
01359         if(IS_8X8(mb_type)){
01360             uint8_t *direct_table = &h->direct_table[b8_xy];
01361             direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
01362             direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
01363             direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
01364         }
01365     }
01366 }
01367 
01375 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
01376     int i, si, di;
01377     uint8_t *dst;
01378     int bufidx;
01379 
01380 //    src[0]&0x80;                //forbidden bit
01381     h->nal_ref_idc= src[0]>>5;
01382     h->nal_unit_type= src[0]&0x1F;
01383 
01384     src++; length--;
01385 #if 0
01386     for(i=0; i<length; i++)
01387         printf("%2X ", src[i]);
01388 #endif
01389     for(i=0; i+1<length; i+=2){
01390         if(src[i]) continue;
01391         if(i>0 && src[i-1]==0) i--;
01392         if(i+2<length && src[i+1]==0 && src[i+2]<=3){
01393             if(src[i+2]!=3){
01394                 /* startcode, so we must be past the end */
01395                 length=i;
01396             }
01397             break;
01398         }
01399     }
01400 
01401     if(i>=length-1){ //no escaped 0
01402         *dst_length= length;
01403         *consumed= length+1; //+1 for the header
01404         return src;
01405     }
01406 
01407     bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
01408     h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
01409     dst= h->rbsp_buffer[bufidx];
01410 
01411     if (dst == NULL){
01412         return NULL;
01413     }
01414 
01415 //printf("decoding esc\n");
01416     si=di=0;
01417     while(si<length){
01418         //remove escapes (very rare 1:2^22)
01419         if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
01420             if(src[si+2]==3){ //escape
01421                 dst[di++]= 0;
01422                 dst[di++]= 0;
01423                 si+=3;
01424                 continue;
01425             }else //next start code
01426                 break;
01427         }
01428 
01429         dst[di++]= src[si++];
01430     }
01431 
01432     *dst_length= di;
01433     *consumed= si + 1;//+1 for the header
01434 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
01435     return dst;
01436 }
01437 
01442 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
01443     int v= *src;
01444     int r;
01445 
01446     tprintf(h->s.avctx, "rbsp trailing %X\n", v);
01447 
01448     for(r=1; r<9; r++){
01449         if(v&1) return r;
01450         v>>=1;
01451     }
01452     return 0;
01453 }
01454 
01459 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
01460 #define stride 16
01461     int i;
01462     int temp[16]; //FIXME check if this is a good idea
01463     static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
01464     static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
01465 
01466 //memset(block, 64, 2*256);
01467 //return;
01468     for(i=0; i<4; i++){
01469         const int offset= y_offset[i];
01470         const int z0= block[offset+stride*0] + block[offset+stride*4];
01471         const int z1= block[offset+stride*0] - block[offset+stride*4];
01472         const int z2= block[offset+stride*1] - block[offset+stride*5];
01473         const int z3= block[offset+stride*1] + block[offset+stride*5];
01474 
01475         temp[4*i+0]= z0+z3;
01476         temp[4*i+1]= z1+z2;
01477         temp[4*i+2]= z1-z2;
01478         temp[4*i+3]= z0-z3;
01479     }
01480 
01481     for(i=0; i<4; i++){
01482         const int offset= x_offset[i];
01483         const int z0= temp[4*0+i] + temp[4*2+i];
01484         const int z1= temp[4*0+i] - temp[4*2+i];
01485         const int z2= temp[4*1+i] - temp[4*3+i];
01486         const int z3= temp[4*1+i] + temp[4*3+i];
01487 
01488         block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
01489         block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
01490         block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
01491         block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
01492     }
01493 }
01494 
01495 #if 0
01496 
01500 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
01501 //    const int qmul= dequant_coeff[qp][0];
01502     int i;
01503     int temp[16]; //FIXME check if this is a good idea
01504     static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
01505     static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
01506 
01507     for(i=0; i<4; i++){
01508         const int offset= y_offset[i];
01509         const int z0= block[offset+stride*0] + block[offset+stride*4];
01510         const int z1= block[offset+stride*0] - block[offset+stride*4];
01511         const int z2= block[offset+stride*1] - block[offset+stride*5];
01512         const int z3= block[offset+stride*1] + block[offset+stride*5];
01513 
01514         temp[4*i+0]= z0+z3;
01515         temp[4*i+1]= z1+z2;
01516         temp[4*i+2]= z1-z2;
01517         temp[4*i+3]= z0-z3;
01518     }
01519 
01520     for(i=0; i<4; i++){
01521         const int offset= x_offset[i];
01522         const int z0= temp[4*0+i] + temp[4*2+i];
01523         const int z1= temp[4*0+i] - temp[4*2+i];
01524         const int z2= temp[4*1+i] - temp[4*3+i];
01525         const int z3= temp[4*1+i] + temp[4*3+i];
01526 
01527         block[stride*0 +offset]= (z0 + z3)>>1;
01528         block[stride*2 +offset]= (z1 + z2)>>1;
01529         block[stride*8 +offset]= (z1 - z2)>>1;
01530         block[stride*10+offset]= (z0 - z3)>>1;
01531     }
01532 }
01533 #endif
01534 
01535 #undef xStride
01536 #undef stride
01537 
01538 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
01539     const int stride= 16*2;
01540     const int xStride= 16;
01541     int a,b,c,d,e;
01542 
01543     a= block[stride*0 + xStride*0];
01544     b= block[stride*0 + xStride*1];
01545     c= block[stride*1 + xStride*0];
01546     d= block[stride*1 + xStride*1];
01547 
01548     e= a-b;
01549     a= a+b;
01550     b= c-d;
01551     c= c+d;
01552 
01553     block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
01554     block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
01555     block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
01556     block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
01557 }
01558 
01559 #if 0
01560 static void chroma_dc_dct_c(DCTELEM *block){
01561     const int stride= 16*2;
01562     const int xStride= 16;
01563     int a,b,c,d,e;
01564 
01565     a= block[stride*0 + xStride*0];
01566     b= block[stride*0 + xStride*1];
01567     c= block[stride*1 + xStride*0];
01568     d= block[stride*1 + xStride*1];
01569 
01570     e= a-b;
01571     a= a+b;
01572     b= c-d;
01573     c= c+d;
01574 
01575     block[stride*0 + xStride*0]= (a+c);
01576     block[stride*0 + xStride*1]= (e+b);
01577     block[stride*1 + xStride*0]= (a-c);
01578     block[stride*1 + xStride*1]= (e-b);
01579 }
01580 #endif
01581 
01585 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
01586     return h->pps.chroma_qp_table[t][qscale];
01587 }
01588 
01589 //FIXME need to check that this does not overflow signed 32 bit for low qp, i am not sure, it's very close
01590 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
01591 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
01592     int i;
01593     const int * const quant_table= quant_coeff[qscale];
01594     const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
01595     const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
01596     const unsigned int threshold2= (threshold1<<1);
01597     int last_non_zero;
01598 
01599     if(separate_dc){
01600         if(qscale<=18){
01601             //avoid overflows
01602             const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
01603             const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
01604             const unsigned int dc_threshold2= (dc_threshold1<<1);
01605 
01606             int level= block[0]*quant_coeff[qscale+18][0];
01607             if(((unsigned)(level+dc_threshold1))>dc_threshold2){
01608                 if(level>0){
01609                     level= (dc_bias + level)>>(QUANT_SHIFT-2);
01610                     block[0]= level;
01611                 }else{
01612                     level= (dc_bias - level)>>(QUANT_SHIFT-2);
01613                     block[0]= -level;
01614                 }
01615 //                last_non_zero = i;
01616             }else{
01617                 block[0]=0;
01618             }
01619         }else{
01620             const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
01621             const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
01622             const unsigned int dc_threshold2= (dc_threshold1<<1);
01623 
01624             int level= block[0]*quant_table[0];
01625             if(((unsigned)(level+dc_threshold1))>dc_threshold2){
01626                 if(level>0){
01627                     level= (dc_bias + level)>>(QUANT_SHIFT+1);
01628                     block[0]= level;
01629                 }else{
01630                     level= (dc_bias - level)>>(QUANT_SHIFT+1);
01631                     block[0]= -level;
01632                 }
01633 //                last_non_zero = i;
01634             }else{
01635                 block[0]=0;
01636             }
01637         }
01638         last_non_zero= 0;
01639         i=1;
01640     }else{
01641         last_non_zero= -1;
01642         i=0;
01643     }
01644 
01645     for(; i<16; i++){
01646         const int j= scantable[i];
01647         int level= block[j]*quant_table[j];
01648 
01649 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
01650 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
01651         if(((unsigned)(level+threshold1))>threshold2){
01652             if(level>0){
01653                 level= (bias + level)>>QUANT_SHIFT;
01654                 block[j]= level;
01655             }else{
01656                 level= (bias - level)>>QUANT_SHIFT;
01657                 block[j]= -level;
01658             }
01659             last_non_zero = i;
01660         }else{
01661             block[j]=0;
01662         }
01663     }
01664 
01665     return last_non_zero;
01666 }
01667 
01668 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
01669                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
01670                            int src_x_offset, int src_y_offset,
01671                            qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
01672     MpegEncContext * const s = &h->s;
01673     const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
01674     int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
01675     const int luma_xy= (mx&3) + ((my&3)<<2);
01676     uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
01677     uint8_t * src_cb, * src_cr;
01678     int extra_width= h->emu_edge_width;
01679     int extra_height= h->emu_edge_height;
01680     int emu=0;
01681     const int full_mx= mx>>2;
01682     const int full_my= my>>2;
01683     const int pic_width  = 16*s->mb_width;
01684     const int pic_height = 16*s->mb_height >> MB_FIELD;
01685 
01686     if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
01687         return;
01688 
01689     if(mx&7) extra_width -= 3;
01690     if(my&7) extra_height -= 3;
01691 
01692     if(   full_mx < 0-extra_width
01693        || full_my < 0-extra_height
01694        || full_mx + 16/*FIXME*/ > pic_width + extra_width
01695        || full_my + 16/*FIXME*/ > pic_height + extra_height){
01696         ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
01697             src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
01698         emu=1;
01699     }
01700 
01701     qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
01702     if(!square){
01703         qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
01704     }
01705 
01706     if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
01707 
01708     if(MB_FIELD){
01709         // chroma offset when predicting from a field of opposite parity
01710         my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
01711         emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
01712     }
01713     src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
01714     src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
01715 
01716     if(emu){
01717         ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
01718             src_cb= s->edge_emu_buffer;
01719     }
01720     chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
01721 
01722     if(emu){
01723         ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
01724             src_cr= s->edge_emu_buffer;
01725     }
01726     chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
01727 }
01728 
01729 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
01730                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
01731                            int x_offset, int y_offset,
01732                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
01733                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
01734                            int list0, int list1){
01735     MpegEncContext * const s = &h->s;
01736     qpel_mc_func *qpix_op=  qpix_put;
01737     h264_chroma_mc_func chroma_op= chroma_put;
01738 
01739     dest_y  += 2*x_offset + 2*y_offset*h->  mb_linesize;
01740     dest_cb +=   x_offset +   y_offset*h->mb_uvlinesize;
01741     dest_cr +=   x_offset +   y_offset*h->mb_uvlinesize;
01742     x_offset += 8*s->mb_x;
01743     y_offset += 8*(s->mb_y >> MB_FIELD);
01744 
01745     if(list0){
01746         Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
01747         mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
01748                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
01749                            qpix_op, chroma_op);
01750 
01751         qpix_op=  qpix_avg;
01752         chroma_op= chroma_avg;
01753     }
01754 
01755     if(list1){
01756         Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
01757         mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
01758                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
01759                            qpix_op, chroma_op);
01760     }
01761 }
01762 
01763 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
01764                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
01765                            int x_offset, int y_offset,
01766                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
01767                            h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
01768                            h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
01769                            int list0, int list1){
01770     MpegEncContext * const s = &h->s;
01771 
01772     dest_y  += 2*x_offset + 2*y_offset*h->  mb_linesize;
01773     dest_cb +=   x_offset +   y_offset*h->mb_uvlinesize;
01774     dest_cr +=   x_offset +   y_offset*h->mb_uvlinesize;
01775     x_offset += 8*s->mb_x;
01776     y_offset += 8*(s->mb_y >> MB_FIELD);
01777 
01778     if(list0 && list1){
01779         /* don't optimize for luma-only case, since B-frames usually
01780          * use implicit weights => chroma too. */
01781         uint8_t *tmp_cb = s->obmc_scratchpad;
01782         uint8_t *tmp_cr = s->obmc_scratchpad + 8;
01783         uint8_t *tmp_y  = s->obmc_scratchpad + 8*h->mb_uvlinesize;
01784         int refn0 = h->ref_cache[0][ scan8[n] ];
01785         int refn1 = h->ref_cache[1][ scan8[n] ];
01786 
01787         mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
01788                     dest_y, dest_cb, dest_cr,
01789                     x_offset, y_offset, qpix_put, chroma_put);
01790         mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
01791                     tmp_y, tmp_cb, tmp_cr,
01792                     x_offset, y_offset, qpix_put, chroma_put);
01793 
01794         if(h->use_weight == 2){
01795             int weight0 = h->implicit_weight[refn0][refn1];
01796             int weight1 = 64 - weight0;
01797             luma_weight_avg(  dest_y,  tmp_y,  h->  mb_linesize, 5, weight0, weight1, 0);
01798             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
01799             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
01800         }else{
01801             luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
01802                             h->luma_weight[0][refn0], h->luma_weight[1][refn1],
01803                             h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
01804             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
01805                             h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
01806                             h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
01807             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
01808                             h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
01809                             h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
01810         }
01811     }else{
01812         int list = list1 ? 1 : 0;
01813         int refn = h->ref_cache[list][ scan8[n] ];
01814         Picture *ref= &h->ref_list[list][refn];
01815         mc_dir_part(h, ref, n, square, chroma_height, delta, list,
01816                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
01817                     qpix_put, chroma_put);
01818 
01819         luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
01820                        h->luma_weight[list][refn], h->luma_offset[list][refn]);
01821         if(h->use_weight_chroma){
01822             chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
01823                              h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
01824             chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
01825                              h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
01826         }
01827     }
01828 }
01829 
01830 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
01831                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
01832                            int x_offset, int y_offset,
01833                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
01834                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
01835                            h264_weight_func *weight_op, h264_biweight_func *weight_avg,
01836                            int list0, int list1){
01837     if((h->use_weight==2 && list0 && list1
01838         && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
01839        || h->use_weight==1)
01840         mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
01841                          x_offset, y_offset, qpix_put, chroma_put,
01842                          weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
01843     else
01844         mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
01845                     x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
01846 }
01847 
01848 static inline void prefetch_motion(H264Context *h, int list){
01849     /* fetch pixels for estimated mv 4 macroblocks ahead
01850      * optimized for 64byte cache lines */
01851     MpegEncContext * const s = &h->s;
01852     const int refn = h->ref_cache[list][scan8[0]];
01853     if(refn >= 0){
01854         const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
01855         const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
01856         uint8_t **src= h->ref_list[list][refn].data;
01857         int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
01858         s->dsp.prefetch(src[0]+off, s->linesize, 4);
01859         off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
01860         s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
01861     }
01862 }
01863 
01864 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
01865                       qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
01866                       qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
01867                       h264_weight_func *weight_op, h264_biweight_func *weight_avg){
01868     MpegEncContext * const s = &h->s;
01869     const int mb_xy= h->mb_xy;
01870     const int mb_type= s->current_picture.mb_type[mb_xy];
01871 
01872     assert(IS_INTER(mb_type));
01873 
01874     prefetch_motion(h, 0);
01875 
01876     if(IS_16X16(mb_type)){
01877         mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
01878                 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
01879                 &weight_op[0], &weight_avg[0],
01880                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
01881     }else if(IS_16X8(mb_type)){
01882         mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
01883                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
01884                 &weight_op[1], &weight_avg[1],
01885                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
01886         mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
01887                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
01888                 &weight_op[1], &weight_avg[1],
01889                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
01890     }else if(IS_8X16(mb_type)){
01891         mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
01892                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
01893                 &weight_op[2], &weight_avg[2],
01894                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
01895         mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
01896                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
01897                 &weight_op[2], &weight_avg[2],
01898                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
01899     }else{
01900         int i;
01901 
01902         assert(IS_8X8(mb_type));
01903 
01904         for(i=0; i<4; i++){
01905             const int sub_mb_type= h->sub_mb_type[i];
01906             const int n= 4*i;
01907             int x_offset= (i&1)<<2;
01908             int y_offset= (i&2)<<1;
01909 
01910             if(IS_SUB_8X8(sub_mb_type)){
01911                 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
01912                     qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
01913                     &weight_op[3], &weight_avg[3],
01914                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
01915             }else if(IS_SUB_8X4(sub_mb_type)){
01916                 mc_part(h, n  , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
01917                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
01918                     &weight_op[4], &weight_avg[4],
01919                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
01920                 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
01921                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
01922                     &weight_op[4], &weight_avg[4],
01923                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
01924             }else if(IS_SUB_4X8(sub_mb_type)){
01925                 mc_part(h, n  , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
01926                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
01927                     &weight_op[5], &weight_avg[5],
01928                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
01929                 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
01930                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
01931                     &weight_op[5], &weight_avg[5],
01932                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
01933             }else{
01934                 int j;
01935                 assert(IS_SUB_4X4(sub_mb_type));
01936                 for(j=0; j<4; j++){
01937                     int sub_x_offset= x_offset + 2*(j&1);
01938                     int sub_y_offset= y_offset +   (j&2);
01939                     mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
01940                         qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
01941                         &weight_op[6], &weight_avg[6],
01942                         IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
01943                 }
01944             }
01945         }
01946     }
01947 
01948     prefetch_motion(h, 1);
01949 }
01950 
01951 static void decode_init_vlc(void){
01952     static int done = 0;
01953 
01954     if (!done) {
01955         int i;
01956         int offset;
01957         done = 1;
01958 
01959         chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
01960         chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
01961         init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
01962                  &chroma_dc_coeff_token_len [0], 1, 1,
01963                  &chroma_dc_coeff_token_bits[0], 1, 1,
01964                  INIT_VLC_USE_NEW_STATIC);
01965 
01966         offset = 0;
01967         for(i=0; i<4; i++){
01968             coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
01969             coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
01970             init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
01971                      &coeff_token_len [i][0], 1, 1,
01972                      &coeff_token_bits[i][0], 1, 1,
01973                      INIT_VLC_USE_NEW_STATIC);
01974             offset += coeff_token_vlc_tables_size[i];
01975         }
01976         /*
01977          * This is a one time safety check to make sure that
01978          * the packed static coeff_token_vlc table sizes
01979          * were initialized correctly.
01980          */
01981         assert(offset == sizeof(coeff_token_vlc_tables)/(sizeof(VLC_TYPE)*2));
01982 
01983         for(i=0; i<3; i++){
01984             chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
01985             chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
01986             init_vlc(&chroma_dc_total_zeros_vlc[i],
01987                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
01988                      &chroma_dc_total_zeros_len [i][0], 1, 1,
01989                      &chroma_dc_total_zeros_bits[i][0], 1, 1,
01990                      INIT_VLC_USE_NEW_STATIC);
01991         }
01992         for(i=0; i<15; i++){
01993             total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
01994             total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
01995             init_vlc(&total_zeros_vlc[i],
01996                      TOTAL_ZEROS_VLC_BITS, 16,
01997                      &total_zeros_len [i][0], 1, 1,
01998                      &total_zeros_bits[i][0], 1, 1,
01999                      INIT_VLC_USE_NEW_STATIC);
02000         }
02001 
02002         for(i=0; i<6; i++){
02003             run_vlc[i].table = run_vlc_tables[i];
02004             run_vlc[i].table_allocated = run_vlc_tables_size;
02005             init_vlc(&run_vlc[i],
02006                      RUN_VLC_BITS, 7,
02007                      &run_len [i][0], 1, 1,
02008                      &run_bits[i][0], 1, 1,
02009                      INIT_VLC_USE_NEW_STATIC);
02010         }
02011         run7_vlc.table = run7_vlc_table,
02012         run7_vlc.table_allocated = run7_vlc_table_size;
02013         init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
02014                  &run_len [6][0], 1, 1,
02015                  &run_bits[6][0], 1, 1,
02016                  INIT_VLC_USE_NEW_STATIC);
02017     }
02018 }
02019 
02020 static void free_tables(H264Context *h){
02021     int i;
02022     H264Context *hx;
02023     av_freep(&h->intra4x4_pred_mode);
02024     av_freep(&h->chroma_pred_mode_table);
02025     av_freep(&h->cbp_table);
02026     av_freep(&h->mvd_table[0]);
02027     av_freep(&h->mvd_table[1]);
02028     av_freep(&h->direct_table);
02029     av_freep(&h->non_zero_count);
02030     av_freep(&h->slice_table_base);
02031     h->slice_table= NULL;
02032 
02033     av_freep(&h->mb2b_xy);
02034     av_freep(&h->mb2b8_xy);
02035 
02036     for(i = 0; i < MAX_SPS_COUNT; i++)
02037         av_freep(h->sps_buffers + i);
02038 
02039     for(i = 0; i < MAX_PPS_COUNT; i++)
02040         av_freep(h->pps_buffers + i);
02041 
02042     for(i = 0; i < h->s.avctx->thread_count; i++) {
02043         hx = h->thread_context[i];
02044         if(!hx) continue;
02045         av_freep(&hx->top_borders[1]);
02046         av_freep(&hx->top_borders[0]);
02047         av_freep(&hx->s.obmc_scratchpad);
02048     }
02049 }
02050 
02051 static void init_dequant8_coeff_table(H264Context *h){
02052     int i,q,x;
02053     const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
02054     h->dequant8_coeff[0] = h->dequant8_buffer[0];
02055     h->dequant8_coeff[1] = h->dequant8_buffer[1];
02056 
02057     for(i=0; i<2; i++ ){
02058         if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
02059             h->dequant8_coeff[1] = h->dequant8_buffer[0];
02060             break;
02061         }
02062 
02063         for(q=0; q<52; q++){
02064             int shift = ff_div6[q];
02065             int idx = ff_rem6[q];
02066             for(x=0; x<64; x++)
02067                 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
02068                     ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
02069                     h->pps.scaling_matrix8[i][x]) << shift;
02070         }
02071     }
02072 }
02073 
02074 static void init_dequant4_coeff_table(H264Context *h){
02075     int i,j,q,x;
02076     const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
02077     for(i=0; i<6; i++ ){
02078         h->dequant4_coeff[i] = h->dequant4_buffer[i];
02079         for(j=0; j<i; j++){
02080             if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
02081                 h->dequant4_coeff[i] = h->dequant4_buffer[j];
02082                 break;
02083             }
02084         }
02085         if(j<i)
02086             continue;
02087 
02088         for(q=0; q<52; q++){
02089             int shift = ff_div6[q] + 2;
02090             int idx = ff_rem6[q];
02091             for(x=0; x<16; x++)
02092                 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
02093                     ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
02094                     h->pps.scaling_matrix4[i][x]) << shift;
02095         }
02096     }
02097 }
02098 
02099 static void init_dequant_tables(H264Context *h){
02100     int i,x;
02101     init_dequant4_coeff_table(h);
02102     if(h->pps.transform_8x8_mode)
02103         init_dequant8_coeff_table(h);
02104     if(h->sps.transform_bypass){
02105         for(i=0; i<6; i++)
02106             for(x=0; x<16; x++)
02107                 h->dequant4_coeff[i][0][x] = 1<<6;
02108         if(h->pps.transform_8x8_mode)
02109             for(i=0; i<2; i++)
02110                 for(x=0; x<64; x++)
02111                     h->dequant8_coeff[i][0][x] = 1<<6;
02112     }
02113 }
02114 
02115 
02120 static int alloc_tables(H264Context *h){
02121     MpegEncContext * const s = &h->s;
02122     const int big_mb_num= s->mb_stride * (s->mb_height+1);
02123     int x,y;
02124 
02125     CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8  * sizeof(uint8_t))
02126 
02127     CHECKED_ALLOCZ(h->non_zero_count    , big_mb_num * 16 * sizeof(uint8_t))
02128     CHECKED_ALLOCZ(h->slice_table_base  , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
02129     CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
02130 
02131     CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
02132     CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
02133     CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
02134     CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
02135 
02136     memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride)  * sizeof(uint8_t));
02137     h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
02138 
02139     CHECKED_ALLOCZ(h->mb2b_xy  , big_mb_num * sizeof(uint32_t));
02140     CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
02141     for(y=0; y<s->mb_height; y++){
02142         for(x=0; x<s->mb_width; x++){
02143             const int mb_xy= x + y*s->mb_stride;
02144             const int b_xy = 4*x + 4*y*h->b_stride;
02145             const int b8_xy= 2*x + 2*y*h->b8_stride;
02146 
02147             h->mb2b_xy [mb_xy]= b_xy;
02148             h->mb2b8_xy[mb_xy]= b8_xy;
02149         }
02150     }
02151 
02152     s->obmc_scratchpad = NULL;
02153 
02154     if(!h->dequant4_coeff[0])
02155         init_dequant_tables(h);
02156 
02157     return 0;
02158 fail:
02159     free_tables(h);
02160     return -1;
02161 }
02162 
02166 static void clone_tables(H264Context *dst, H264Context *src){
02167     dst->intra4x4_pred_mode       = src->intra4x4_pred_mode;
02168     dst->non_zero_count           = src->non_zero_count;
02169     dst->slice_table              = src->slice_table;
02170     dst->cbp_table                = src->cbp_table;
02171     dst->mb2b_xy                  = src->mb2b_xy;
02172     dst->mb2b8_xy                 = src->mb2b8_xy;
02173     dst->chroma_pred_mode_table   = src->chroma_pred_mode_table;
02174     dst->mvd_table[0]             = src->mvd_table[0];
02175     dst->mvd_table[1]             = src->mvd_table[1];
02176     dst->direct_table             = src->direct_table;
02177 
02178     dst->s.obmc_scratchpad = NULL;
02179     ff_h264_pred_init(&dst->hpc, src->s.codec_id);
02180 }
02181 
02186 static int context_init(H264Context *h){
02187     CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
02188     CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
02189 
02190     return 0;
02191 fail:
02192     return -1; // free_tables will clean up for us
02193 }
02194 
02195 static void common_init(H264Context *h){
02196     MpegEncContext * const s = &h->s;
02197 
02198     s->width = s->avctx->width;
02199     s->height = s->avctx->height;
02200     s->codec_id= s->avctx->codec->id;
02201 
02202     ff_h264_pred_init(&h->hpc, s->codec_id);
02203 
02204     h->dequant_coeff_pps= -1;
02205     s->unrestricted_mv=1;
02206     s->decode=1; //FIXME
02207 
02208     memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
02209     memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
02210 }
02211 
02212 static int decode_init(AVCodecContext *avctx){
02213     H264Context *h= avctx->priv_data;
02214     MpegEncContext * const s = &h->s;
02215 
02216     MPV_decode_defaults(s);
02217 
02218     s->avctx = avctx;
02219     common_init(h);
02220 
02221     s->out_format = FMT_H264;
02222     s->workaround_bugs= avctx->workaround_bugs;
02223 
02224     // set defaults
02225 //    s->decode_mb= ff_h263_decode_mb;
02226     s->quarter_sample = 1;
02227     s->low_delay= 1;
02228 
02229     if(avctx->codec_id == CODEC_ID_SVQ3)
02230         avctx->pix_fmt= PIX_FMT_YUVJ420P;
02231     else
02232         avctx->pix_fmt= PIX_FMT_YUV420P;
02233 
02234     decode_init_vlc();
02235 
02236     if(avctx->extradata_size > 0 && avctx->extradata &&
02237        *(char *)avctx->extradata == 1){
02238         h->is_avc = 1;
02239         h->got_avcC = 0;
02240     } else {
02241         h->is_avc = 0;
02242     }
02243 
02244     h->thread_context[0] = h;
02245     h->outputed_poc = INT_MIN;
02246     h->prev_poc_msb= 1<<16;
02247     return 0;
02248 }
02249 
02250 static int frame_start(H264Context *h){
02251     MpegEncContext * const s = &h->s;
02252     int i;
02253 
02254     if(MPV_frame_start(s, s->avctx) < 0)
02255         return -1;
02256     ff_er_frame_start(s);
02257     /*
02258      * MPV_frame_start uses pict_type to derive key_frame.
02259      * This is incorrect for H.264; IDR markings must be used.
02260      * Zero here; IDR markings per slice in frame or fields are ORed in later.
02261      * See decode_nal_units().
02262      */
02263     s->current_picture_ptr->key_frame= 0;
02264 
02265     assert(s->linesize && s->uvlinesize);
02266 
02267     for(i=0; i<16; i++){
02268         h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
02269         h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
02270     }
02271     for(i=0; i<4; i++){
02272         h->block_offset[16+i]=
02273         h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
02274         h->block_offset[24+16+i]=
02275         h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
02276     }
02277 
02278     /* can't be in alloc_tables because linesize isn't known there.
02279      * FIXME: redo bipred weight to not require extra buffer? */
02280     for(i = 0; i < s->avctx->thread_count; i++)
02281         if(!h->thread_context[i]->s.obmc_scratchpad)
02282             h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
02283 
02284     /* some macroblocks will be accessed before they're available */
02285     if(FRAME_MBAFF || s->avctx->thread_count > 1)
02286         memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
02287 
02288 //    s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
02289 
02290     // We mark the current picture as non-reference after allocating it, so
02291     // that if we break out due to an error it can be released automatically
02292     // in the next MPV_frame_start().
02293     // SVQ3 as well as most other codecs have only last/next/current and thus
02294     // get released even with set reference, besides SVQ3 and others do not
02295     // mark frames as reference later "naturally".
02296     if(s->codec_id != CODEC_ID_SVQ3)
02297         s->current_picture_ptr->reference= 0;
02298 
02299     s->current_picture_ptr->field_poc[0]=
02300     s->current_picture_ptr->field_poc[1]= INT_MAX;
02301     assert(s->current_picture_ptr->long_ref==0);
02302 
02303     return 0;
02304 }
02305 
02306 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
02307     MpegEncContext * const s = &h->s;
02308     int i;
02309     int step    = 1;
02310     int offset  = 1;
02311     int uvoffset= 1;
02312     int top_idx = 1;
02313     int skiplast= 0;
02314 
02315     src_y  -=   linesize;
02316     src_cb -= uvlinesize;
02317     src_cr -= uvlinesize;
02318 
02319     if(!simple && FRAME_MBAFF){
02320         if(s->mb_y&1){
02321             offset  = MB_MBAFF ? 1 : 17;
02322             uvoffset= MB_MBAFF ? 1 : 9;
02323             if(!MB_MBAFF){
02324                 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y +  15*linesize);
02325                 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
02326                 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
02327                     *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
02328                     *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
02329                 }
02330             }
02331         }else{
02332             if(!MB_MBAFF){
02333                 h->left_border[0]= h->top_borders[0][s->mb_x][15];
02334                 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
02335                     h->left_border[34   ]= h->top_borders[0][s->mb_x][16+7  ];
02336                     h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
02337                 }
02338                 skiplast= 1;
02339             }
02340             offset  =
02341             uvoffset=
02342             top_idx = MB_MBAFF ? 0 : 1;
02343         }
02344         step= MB_MBAFF ? 2 : 1;
02345     }
02346 
02347     // There are two lines saved, the line above the the top macroblock of a pair,
02348     // and the line above the bottom macroblock
02349     h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
02350     for(i=1; i<17 - skiplast; i++){
02351         h->left_border[offset+i*step]= src_y[15+i*  linesize];
02352     }
02353 
02354     *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y +  16*linesize);
02355     *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
02356 
02357     if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
02358         h->left_border[uvoffset+34   ]= h->top_borders[top_idx][s->mb_x][16+7];
02359         h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
02360         for(i=1; i<9 - skiplast; i++){
02361             h->left_border[uvoffset+34   +i*step]= src_cb[7+i*uvlinesize];
02362             h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
02363         }
02364         *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
02365         *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
02366     }
02367 }
02368 
02369 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
02370     MpegEncContext * const s = &h->s;
02371     int temp8, i;
02372     uint64_t temp64;
02373     int deblock_left;
02374     int deblock_top;
02375     int mb_xy;
02376     int step    = 1;
02377     int offset  = 1;
02378     int uvoffset= 1;
02379     int top_idx = 1;
02380 
02381     if(!simple && FRAME_MBAFF){
02382         if(s->mb_y&1){
02383             offset  = MB_MBAFF ? 1 : 17;
02384             uvoffset= MB_MBAFF ? 1 : 9;
02385         }else{
02386             offset  =
02387             uvoffset=
02388             top_idx = MB_MBAFF ? 0 : 1;
02389         }
02390         step= MB_MBAFF ? 2 : 1;
02391     }
02392 
02393     if(h->deblocking_filter == 2) {
02394         mb_xy = h->mb_xy;
02395         deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
02396         deblock_top  = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
02397     } else {
02398         deblock_left = (s->mb_x > 0);
02399         deblock_top =  (s->mb_y > !!MB_FIELD);
02400     }
02401 
02402     src_y  -=   linesize + 1;
02403     src_cb -= uvlinesize + 1;
02404     src_cr -= uvlinesize + 1;
02405 
02406 #define XCHG(a,b,t,xchg)\
02407 t= a;\
02408 if(xchg)\
02409     a= b;\
02410 b= t;
02411 
02412     if(deblock_left){
02413         for(i = !deblock_top; i<16; i++){
02414             XCHG(h->left_border[offset+i*step], src_y [i*  linesize], temp8, xchg);
02415         }
02416         XCHG(h->left_border[offset+i*step], src_y [i*  linesize], temp8, 1);
02417     }
02418 
02419     if(deblock_top){
02420         XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
02421         XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
02422         if(s->mb_x+1 < s->mb_width){
02423             XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
02424         }
02425     }
02426 
02427     if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
02428         if(deblock_left){
02429             for(i = !deblock_top; i<8; i++){
02430                 XCHG(h->left_border[uvoffset+34   +i*step], src_cb[i*uvlinesize], temp8, xchg);
02431                 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
02432             }
02433             XCHG(h->left_border[uvoffset+34   +i*step], src_cb[i*uvlinesize], temp8, 1);
02434             XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
02435         }
02436         if(deblock_top){
02437             XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
02438             XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
02439         }
02440     }
02441 }
02442 
02443 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
02444     MpegEncContext * const s = &h->s;
02445     const int mb_x= s->mb_x;
02446     const int mb_y= s->mb_y;
02447     const int mb_xy= h->mb_xy;
02448     const int mb_type= s->current_picture.mb_type[mb_xy];
02449     uint8_t  *dest_y, *dest_cb, *dest_cr;
02450     int linesize, uvlinesize /*dct_offset*/;
02451     int i;
02452     int *block_offset = &h->block_offset[0];
02453     const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
02454     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
02455     void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
02456 
02457     dest_y  = s->current_picture.data[0] + (mb_y * 16* s->linesize  ) + mb_x * 16;
02458     dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
02459     dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
02460 
02461     s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
02462     s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
02463 
02464     if (!simple && MB_FIELD) {
02465         linesize   = h->mb_linesize   = s->linesize * 2;
02466         uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
02467         block_offset = &h->block_offset[24];
02468         if(mb_y&1){ //FIXME move out of this function?
02469             dest_y -= s->linesize*15;
02470             dest_cb-= s->uvlinesize*7;
02471             dest_cr-= s->uvlinesize*7;
02472         }
02473         if(FRAME_MBAFF) {
02474             int list;
02475             for(list=0; list<h->list_count; list++){
02476                 if(!USES_LIST(mb_type, list))
02477                     continue;
02478                 if(IS_16X16(mb_type)){
02479                     int8_t *ref = &h->ref_cache[list][scan8[0]];
02480                     fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
02481                 }else{
02482                     for(i=0; i<16; i+=4){
02483                         int ref = h->ref_cache[list][scan8[i]];
02484                         if(ref >= 0)
02485                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
02486                     }
02487                 }
02488             }
02489         }
02490     } else {
02491         linesize   = h->mb_linesize   = s->linesize;
02492         uvlinesize = h->mb_uvlinesize = s->uvlinesize;
02493 //        dct_offset = s->linesize * 16;
02494     }
02495 
02496     if(transform_bypass){
02497         idct_dc_add =
02498         idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
02499     }else if(IS_8x8DCT(mb_type)){
02500         idct_dc_add = s->dsp.h264_idct8_dc_add;
02501         idct_add = s->dsp.h264_idct8_add;
02502     }else{
02503         idct_dc_add = s->dsp.h264_idct_dc_add;
02504         idct_add = s->dsp.h264_idct_add;
02505     }
02506 
02507     if (!simple && IS_INTRA_PCM(mb_type)) {
02508         for (i=0; i<16; i++) {
02509             memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
02510         }
02511         for (i=0; i<8; i++) {
02512             memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4,  8);
02513             memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4,  8);
02514         }
02515     } else {
02516         if(IS_INTRA(mb_type)){
02517             if(h->deblocking_filter)
02518                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
02519 
02520             if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
02521                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
02522                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
02523             }
02524 
02525             if(IS_INTRA4x4(mb_type)){
02526                 if(simple || !s->encoding){
02527                     if(IS_8x8DCT(mb_type)){
02528                         for(i=0; i<16; i+=4){
02529                             uint8_t * const ptr= dest_y + block_offset[i];
02530                             const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
02531                             const int nnz = h->non_zero_count_cache[ scan8[i] ];
02532                             h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
02533                                                    (h->topright_samples_available<<i)&0x4000, linesize);
02534                             if(nnz){
02535                                 if(nnz == 1 && h->mb[i*16])
02536                                     idct_dc_add(ptr, h->mb + i*16, linesize);
02537                                 else
02538                                     idct_add(ptr, h->mb + i*16, linesize);
02539                             }
02540                         }
02541                     }else
02542                     for(i=0; i<16; i++){
02543                         uint8_t * const ptr= dest_y + block_offset[i];
02544                         uint8_t *topright;
02545                         const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
02546                         int nnz, tr;
02547 
02548                         if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
02549                             const int topright_avail= (h->topright_samples_available<<i)&0x8000;
02550                             assert(mb_y || linesize <= block_offset[i]);
02551                             if(!topright_avail){
02552                                 tr= ptr[3 - linesize]*0x01010101;
02553                                 topright= (uint8_t*) &tr;
02554                             }else
02555                                 topright= ptr + 4 - linesize;
02556                         }else
02557                             topright= NULL;
02558 
02559                         h->hpc.pred4x4[ dir ](ptr, topright, linesize);
02560                         nnz = h->non_zero_count_cache[ scan8[i] ];
02561                         if(nnz){
02562                             if(is_h264){
02563                                 if(nnz == 1 && h->mb[i*16])
02564                                     idct_dc_add(ptr, h->mb + i*16, linesize);
02565                                 else
02566                                     idct_add(ptr, h->mb + i*16, linesize);
02567                             }else
02568                                 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
02569                         }
02570                     }
02571                 }
02572             }else{
02573                 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
02574                 if(is_h264){
02575                     if(!transform_bypass)
02576                         h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
02577                 }else
02578                     svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
02579             }
02580             if(h->deblocking_filter)
02581                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
02582         }else if(is_h264){
02583             hl_motion(h, dest_y, dest_cb, dest_cr,
02584                       s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
02585                       s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
02586                       s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
02587         }
02588 
02589 
02590         if(!IS_INTRA4x4(mb_type)){
02591             if(is_h264){
02592                 if(IS_INTRA16x16(mb_type)){
02593                     for(i=0; i<16; i++){
02594                         if(h->non_zero_count_cache[ scan8[i] ])
02595                             idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
02596                         else if(h->mb[i*16])
02597                             idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
02598                     }
02599                 }else{
02600                     const int di = IS_8x8DCT(mb_type) ? 4 : 1;
02601                     for(i=0; i<16; i+=di){
02602                         int nnz = h->non_zero_count_cache[ scan8[i] ];
02603                         if(nnz){
02604                             if(nnz==1 && h->mb[i*16])
02605                                 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
02606                             else
02607                                 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
02608                         }
02609                     }
02610                 }
02611             }else{
02612                 for(i=0; i<16; i++){
02613                     if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
02614                         uint8_t * const ptr= dest_y + block_offset[i];
02615                         svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
02616                     }
02617                 }
02618             }
02619         }
02620 
02621         if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
02622             uint8_t *dest[2] = {dest_cb, dest_cr};
02623             if(transform_bypass){
02624                 idct_add = idct_dc_add = s->dsp.add_pixels4;
02625             }else{
02626                 idct_add = s->dsp.h264_idct_add;
02627                 idct_dc_add = s->dsp.h264_idct_dc_add;
02628                 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
02629                 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
02630             }
02631             if(is_h264){
02632                 for(i=16; i<16+8; i++){
02633                     if(h->non_zero_count_cache[ scan8[i] ])
02634                         idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
02635                     else if(h->mb[i*16])
02636                         idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
02637                 }
02638             }else{
02639                 for(i=16; i<16+8; i++){
02640                     if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
02641                         uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
02642                         svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
02643                     }
02644                 }
02645             }
02646         }
02647     }
02648     if(h->deblocking_filter) {
02649         backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
02650         fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
02651         h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
02652         h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
02653         if (!simple && FRAME_MBAFF) {
02654             filter_mb     (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
02655         } else {
02656             filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
02657         }
02658     }
02659 }
02660 
02664 static void hl_decode_mb_simple(H264Context *h){
02665     hl_decode_mb_internal(h, 1);
02666 }
02667 
02671 static void av_noinline hl_decode_mb_complex(H264Context *h){
02672     hl_decode_mb_internal(h, 0);
02673 }
02674 
02675 static void hl_decode_mb(H264Context *h){
02676     MpegEncContext * const s = &h->s;
02677     const int mb_xy= h->mb_xy;
02678     const int mb_type= s->current_picture.mb_type[mb_xy];
02679     int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
02680                     (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
02681 
02682     if(ENABLE_H264_ENCODER && !s->decode)
02683         return;
02684 
02685     if (is_complex)
02686         hl_decode_mb_complex(h);
02687     else hl_decode_mb_simple(h);
02688 }
02689 
02690 static void pic_as_field(Picture *pic, const int parity){
02691     int i;
02692     for (i = 0; i < 4; ++i) {
02693         if (parity == PICT_BOTTOM_FIELD)
02694             pic->data[i] += pic->linesize[i];
02695         pic->reference = parity;
02696         pic->linesize[i] *= 2;
02697     }
02698     pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
02699 }
02700 
02701 static int split_field_copy(Picture *dest, Picture *src,
02702                             int parity, int id_add){
02703     int match = !!(src->reference & parity);
02704 
02705     if (match) {
02706         *dest = *src;
02707         if(parity != PICT_FRAME){
02708             pic_as_field(dest, parity);
02709             dest->pic_id *= 2;
02710             dest->pic_id += id_add;
02711         }
02712     }
02713 
02714     return match;
02715 }
02716 
02717 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
02718     int i[2]={0};
02719     int index=0;
02720 
02721     while(i[0]<len || i[1]<len){
02722         while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
02723             i[0]++;
02724         while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
02725             i[1]++;
02726         if(i[0] < len){
02727             in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
02728             split_field_copy(&def[index++], in[ i[0]++ ], sel  , 1);
02729         }
02730         if(i[1] < len){
02731             in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
02732             split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
02733         }
02734     }
02735 
02736     return index;
02737 }
02738 
02739 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
02740     int i, best_poc;
02741     int out_i= 0;
02742 
02743     for(;;){
02744         best_poc= dir ? INT_MIN : INT_MAX;
02745 
02746         for(i=0; i<len; i++){
02747             const int poc= src[i]->poc;
02748             if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
02749                 best_poc= poc;
02750                 sorted[out_i]= src[i];
02751             }
02752         }
02753         if(best_poc == (dir ? INT_MIN : INT_MAX))
02754             break;
02755         limit= sorted[out_i++]->poc - dir;
02756     }
02757     return out_i;
02758 }
02759 
02763 static int fill_default_ref_list(H264Context *h){
02764     MpegEncContext * const s = &h->s;
02765     int i, len;
02766 
02767     if(h->slice_type_nos==B_TYPE){
02768         Picture *sorted[32];
02769         int cur_poc, list;
02770         int lens[2];
02771 
02772         if(FIELD_PICTURE)
02773             cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
02774         else
02775             cur_poc= s->current_picture_ptr->poc;
02776 
02777         for(list= 0; list<2; list++){
02778             len= add_sorted(sorted    , h->short_ref, h->short_ref_count, cur_poc, 1^list);
02779             len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
02780             assert(len<=32);
02781             len= build_def_list(h->default_ref_list[list]    , sorted     , len, 0, s->picture_structure);
02782             len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
02783             assert(len<=32);
02784 
02785             if(len < h->ref_count[list])
02786                 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
02787             lens[list]= len;
02788         }
02789 
02790         if(lens[0] == lens[1] && lens[1] > 1){
02791             for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
02792             if(i == lens[0])
02793                 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
02794         }
02795     }else{
02796         len = build_def_list(h->default_ref_list[0]    , h->short_ref, h->short_ref_count, 0, s->picture_structure);
02797         len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16                , 1, s->picture_structure);
02798         assert(len <= 32);
02799         if(len < h->ref_count[0])
02800             memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
02801     }
02802 #ifdef TRACE
02803     for (i=0; i<h->ref_count[0]; i++) {
02804         tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
02805     }
02806     if(h->slice_type_nos==B_TYPE){
02807         for (i=0; i<h->ref_count[1]; i++) {
02808             tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
02809         }
02810     }
02811 #endif
02812     return 0;
02813 }
02814 
02815 static void print_short_term(H264Context *h);
02816 static void print_long_term(H264Context *h);
02817 
02828 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
02829     MpegEncContext * const s = &h->s;
02830 
02831     *structure = s->picture_structure;
02832     if(FIELD_PICTURE){
02833         if (!(pic_num & 1))
02834             /* opposite field */
02835             *structure ^= PICT_FRAME;
02836         pic_num >>= 1;
02837     }
02838 
02839     return pic_num;
02840 }
02841 
02842 static int decode_ref_pic_list_reordering(H264Context *h){
02843     MpegEncContext * const s = &h->s;
02844     int list, index, pic_structure;
02845 
02846     print_short_term(h);
02847     print_long_term(h);
02848 
02849     for(list=0; list<h->list_count; list++){
02850         memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
02851 
02852         if(get_bits1(&s->gb)){
02853             int pred= h->curr_pic_num;
02854 
02855             for(index=0; ; index++){
02856                 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
02857                 unsigned int pic_id;
02858                 int i;
02859                 Picture *ref = NULL;
02860 
02861                 if(reordering_of_pic_nums_idc==3)
02862                     break;
02863 
02864                 if(index >= h->ref_count[list]){
02865                     av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
02866                     return -1;
02867                 }
02868 
02869                 if(reordering_of_pic_nums_idc<3){
02870                     if(reordering_of_pic_nums_idc<2){
02871                         const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
02872                         int frame_num;
02873 
02874                         if(abs_diff_pic_num > h->max_pic_num){
02875                             av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
02876                             return -1;
02877                         }
02878 
02879                         if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
02880                         else                                pred+= abs_diff_pic_num;
02881                         pred &= h->max_pic_num - 1;
02882 
02883                         frame_num = pic_num_extract(h, pred, &pic_structure);
02884 
02885                         for(i= h->short_ref_count-1; i>=0; i--){
02886                             ref = h->short_ref[i];
02887                             assert(ref->reference);
02888                             assert(!ref->long_ref);
02889                             if(
02890                                    ref->frame_num == frame_num &&
02891                                    (ref->reference & pic_structure)
02892                               )
02893                                 break;
02894                         }
02895                         if(i>=0)
02896                             ref->pic_id= pred;
02897                     }else{
02898                         int long_idx;
02899                         pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
02900 
02901                         long_idx= pic_num_extract(h, pic_id, &pic_structure);
02902 
02903                         if(long_idx>31){
02904                             av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
02905                             return -1;
02906                         }
02907                         ref = h->long_ref[long_idx];
02908                         assert(!(ref && !ref->reference));
02909                         if(ref && (ref->reference & pic_structure)){
02910                             ref->pic_id= pic_id;
02911                             assert(ref->long_ref);
02912                             i=0;
02913                         }else{
02914                             i=-1;
02915                         }
02916                     }
02917 
02918                     if (i < 0) {
02919                         av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
02920                         memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
02921                     } else {
02922                         for(i=index; i+1<h->ref_count[list]; i++){
02923                             if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
02924                                 break;
02925                         }
02926                         for(; i > index; i--){
02927                             h->ref_list[list][i]= h->ref_list[list][i-1];
02928                         }
02929                         h->ref_list[list][index]= *ref;
02930                         if (FIELD_PICTURE){
02931                             pic_as_field(&h->ref_list[list][index], pic_structure);
02932                         }
02933                     }
02934                 }else{
02935                     av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
02936                     return -1;
02937                 }
02938             }
02939         }
02940     }
02941     for(list=0; list<h->list_count; list++){
02942         for(index= 0; index < h->ref_count[list]; index++){
02943             if(!h->ref_list[list][index].data[0]){
02944                 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
02945                 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
02946             }
02947         }
02948     }
02949 
02950     return 0;
02951 }
02952 
02953 static void fill_mbaff_ref_list(H264Context *h){
02954     int list, i, j;
02955     for(list=0; list<2; list++){ //FIXME try list_count
02956         for(i=0; i<h->ref_count[list]; i++){
02957             Picture *frame = &h->ref_list[list][i];
02958             Picture *field = &h->ref_list[list][16+2*i];
02959             field[0] = *frame;
02960             for(j=0; j<3; j++)
02961                 field[0].linesize[j] <<= 1;
02962             field[0].reference = PICT_TOP_FIELD;
02963             field[0].poc= field[0].field_poc[0];
02964             field[1] = field[0];
02965             for(j=0; j<3; j++)
02966                 field[1].data[j] += frame->linesize[j];
02967             field[1].reference = PICT_BOTTOM_FIELD;
02968             field[1].poc= field[1].field_poc[1];
02969 
02970             h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
02971             h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
02972             for(j=0; j<2; j++){
02973                 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
02974                 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
02975             }
02976         }
02977     }
02978     for(j=0; j<h->ref_count[1]; j++){
02979         for(i=0; i<h->ref_count[0]; i++)
02980             h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
02981         memcpy(h->implicit_weight[16+2*j],   h->implicit_weight[j], sizeof(*h->implicit_weight));
02982         memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
02983     }
02984 }
02985 
02986 static int pred_weight_table(H264Context *h){
02987     MpegEncContext * const s = &h->s;
02988     int list, i;
02989     int luma_def, chroma_def;
02990 
02991     h->use_weight= 0;
02992     h->use_weight_chroma= 0;
02993     h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
02994     h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
02995     luma_def = 1<<h->luma_log2_weight_denom;
02996     chroma_def = 1<<h->chroma_log2_weight_denom;
02997 
02998     for(list=0; list<2; list++){
02999         for(i=0; i<h->ref_count[list]; i++){
03000             int luma_weight_flag, chroma_weight_flag;
03001 
03002             luma_weight_flag= get_bits1(&s->gb);
03003             if(luma_weight_flag){
03004                 h->luma_weight[list][i]= get_se_golomb(&s->gb);
03005                 h->luma_offset[list][i]= get_se_golomb(&s->gb);
03006                 if(   h->luma_weight[list][i] != luma_def
03007                    || h->luma_offset[list][i] != 0)
03008                     h->use_weight= 1;
03009             }else{
03010                 h->luma_weight[list][i]= luma_def;
03011                 h->luma_offset[list][i]= 0;
03012             }
03013 
03014             if(CHROMA){
03015                 chroma_weight_flag= get_bits1(&s->gb);
03016                 if(chroma_weight_flag){
03017                     int j;
03018                     for(j=0; j<2; j++){
03019                         h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
03020                         h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
03021                         if(   h->chroma_weight[list][i][j] != chroma_def
03022                         || h->chroma_offset[list][i][j] != 0)
03023                             h->use_weight_chroma= 1;
03024                     }
03025                 }else{
03026                     int j;
03027                     for(j=0; j<2; j++){
03028                         h->chroma_weight[list][i][j]= chroma_def;
03029                         h->chroma_offset[list][i][j]= 0;
03030                     }
03031                 }
03032             }
03033         }
03034         if(h->slice_type_nos != B_TYPE) break;
03035     }
03036     h->use_weight= h->use_weight || h->use_weight_chroma;
03037     return 0;
03038 }
03039 
03040 static void implicit_weight_table(H264Context *h){
03041     MpegEncContext * const s = &h->s;
03042     int ref0, ref1;
03043     int cur_poc = s->current_picture_ptr->poc;
03044 
03045     if(   h->ref_count[0] == 1 && h->ref_count[1] == 1
03046        && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
03047         h->use_weight= 0;
03048         h->use_weight_chroma= 0;
03049         return;
03050     }
03051 
03052     h->use_weight= 2;
03053     h->use_weight_chroma= 2;
03054     h->luma_log2_weight_denom= 5;
03055     h->chroma_log2_weight_denom= 5;
03056 
03057     for(ref0=0; ref0 < h->ref_count[0]; ref0++){
03058         int poc0 = h->ref_list[0][ref0].poc;
03059         for(ref1=0; ref1 < h->ref_count[1]; ref1++){
03060             int poc1 = h->ref_list[1][ref1].poc;
03061             int td = av_clip(poc1 - poc0, -128, 127);
03062             if(td){
03063                 int tb = av_clip(cur_poc - poc0, -128, 127);
03064                 int tx = (16384 + (FFABS(td) >> 1)) / td;
03065                 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
03066                 if(dist_scale_factor < -64 || dist_scale_factor > 128)
03067                     h->implicit_weight[ref0][ref1] = 32;
03068                 else
03069                     h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
03070             }else
03071                 h->implicit_weight[ref0][ref1] = 32;
03072         }
03073     }
03074 }
03075 
03087 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
03088     int i;
03089     if (pic->reference &= refmask) {
03090         return 0;
03091     } else {
03092         for(i = 0; h->delayed_pic[i]; i++)
03093             if(pic == h->delayed_pic[i]){
03094                 pic->reference=DELAYED_PIC_REF;
03095                 break;
03096             }
03097         return 1;
03098     }
03099 }
03100 
03104 static void idr(H264Context *h){
03105     int i;
03106 
03107     for(i=0; i<16; i++){
03108         remove_long(h, i, 0);
03109     }
03110     assert(h->long_ref_count==0);
03111 
03112     for(i=0; i<h->short_ref_count; i++){
03113         unreference_pic(h, h->short_ref[i], 0);
03114         h->short_ref[i]= NULL;
03115     }
03116     h->short_ref_count=0;
03117     h->prev_frame_num= 0;
03118     h->prev_frame_num_offset= 0;
03119     h->prev_poc_msb=
03120     h->prev_poc_lsb= 0;
03121 }
03122 
03123 /* forget old pics after a seek */
03124 static void flush_dpb(AVCodecContext *avctx){
03125     H264Context *h= avctx->priv_data;
03126     int i;
03127     for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
03128         if(h->delayed_pic[i])
03129             h->delayed_pic[i]->reference= 0;
03130         h->delayed_pic[i]= NULL;
03131     }
03132     h->outputed_poc= INT_MIN;
03133     idr(h);
03134     if(h->s.current_picture_ptr)
03135         h->s.current_picture_ptr->reference= 0;
03136     h->s.first_field= 0;
03137     ff_mpeg_flush(avctx);
03138 }
03139 
03148 static Picture * find_short(H264Context *h, int frame_num, int *idx){
03149     MpegEncContext * const s = &h->s;
03150     int i;
03151 
03152     for(i=0; i<h->short_ref_count; i++){
03153         Picture *pic= h->short_ref[i];
03154         if(s->avctx->debug&FF_DEBUG_MMCO)
03155             av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
03156         if(pic->frame_num == frame_num) {
03157             *idx = i;
03158             return pic;
03159         }
03160     }
03161     return NULL;
03162 }
03163 
03170 static void remove_short_at_index(H264Context *h, int i){
03171     assert(i >= 0 && i < h->short_ref_count);
03172     h->short_ref[i]= NULL;
03173     if (--h->short_ref_count)
03174         memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
03175 }
03176 
03181 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
03182     MpegEncContext * const s = &h->s;
03183     Picture *pic;
03184     int i;
03185 
03186     if(s->avctx->debug&FF_DEBUG_MMCO)
03187         av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
03188 
03189     pic = find_short(h, frame_num, &i);
03190     if (pic){
03191         if(unreference_pic(h, pic, ref_mask))
03192         remove_short_at_index(h, i);
03193     }
03194 
03195     return pic;
03196 }
03197 
03203 static Picture * remove_long(H264Context *h, int i, int ref_mask){
03204     Picture *pic;
03205 
03206     pic= h->long_ref[i];
03207     if (pic){
03208         if(unreference_pic(h, pic, ref_mask)){
03209             assert(h->long_ref[i]->long_ref == 1);
03210             h->long_ref[i]->long_ref= 0;
03211             h->long_ref[i]= NULL;
03212             h->long_ref_count--;
03213         }
03214     }
03215 
03216     return pic;
03217 }
03218 
03222 static void print_short_term(H264Context *h) {
03223     uint32_t i;
03224     if(h->s.avctx->debug&FF_DEBUG_MMCO) {
03225         av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
03226         for(i=0; i<h->short_ref_count; i++){
03227             Picture *pic= h->short_ref[i];
03228             av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
03229         }
03230     }
03231 }
03232 
03236 static void print_long_term(H264Context *h) {
03237     uint32_t i;
03238     if(h->s.avctx->debug&FF_DEBUG_MMCO) {
03239         av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
03240         for(i = 0; i < 16; i++){
03241             Picture *pic= h->long_ref[i];
03242             if (pic) {
03243                 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
03244             }
03245         }
03246     }
03247 }
03248 
03252 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
03253     MpegEncContext * const s = &h->s;
03254     int i, j;
03255     int current_ref_assigned=0;
03256     Picture *pic;
03257 
03258     if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
03259         av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
03260 
03261     for(i=0; i<mmco_count; i++){
03262         int structure, frame_num;
03263         if(s->avctx->debug&FF_DEBUG_MMCO)
03264             av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
03265 
03266         if(   mmco[i].opcode == MMCO_SHORT2UNUSED
03267            || mmco[i].opcode == MMCO_SHORT2LONG){
03268             frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
03269             pic = find_short(h, frame_num, &j);
03270             if(!pic){
03271                 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
03272                    || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
03273                 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
03274                 continue;
03275             }
03276         }
03277 
03278         switch(mmco[i].opcode){
03279         case MMCO_SHORT2UNUSED:
03280             if(s->avctx->debug&FF_DEBUG_MMCO)
03281                 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
03282             remove_short(h, frame_num, structure ^ PICT_FRAME);
03283             break;
03284         case MMCO_SHORT2LONG:
03285                 if (h->long_ref[mmco[i].long_arg] != pic)
03286                     remove_long(h, mmco[i].long_arg, 0);
03287 
03288                 remove_short_at_index(h, j);
03289                 h->long_ref[ mmco[i].long_arg ]= pic;
03290                 if (h->long_ref[ mmco[i].long_arg ]){
03291                     h->long_ref[ mmco[i].long_arg ]->long_ref=1;
03292                     h->long_ref_count++;
03293                 }
03294             break;
03295         case MMCO_LONG2UNUSED:
03296             j = pic_num_extract(h, mmco[i].long_arg, &structure);
03297             pic = h->long_ref[j];
03298             if (pic) {
03299                 remove_long(h, j, structure ^ PICT_FRAME);
03300             } else if(s->avctx->debug&FF_DEBUG_MMCO)
03301                 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
03302             break;
03303         case MMCO_LONG:
03304                     // Comment below left from previous code as it is an interresting note.
03305                     /* First field in pair is in short term list or
03306                      * at a different long term index.
03307                      * This is not allowed; see 7.4.3.3, notes 2 and 3.
03308                      * Report the problem and keep the pair where it is,
03309                      * and mark this field valid.
03310                      */
03311 
03312             if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
03313                 remove_long(h, mmco[i].long_arg, 0);
03314 
03315                 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
03316                 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
03317                 h->long_ref_count++;
03318             }
03319 
03320             s->current_picture_ptr->reference |= s->picture_structure;
03321             current_ref_assigned=1;
03322             break;
03323         case MMCO_SET_MAX_LONG:
03324             assert(mmco[i].long_arg <= 16);
03325             // just remove the long term which index is greater than new max
03326             for(j = mmco[i].long_arg; j<16; j++){