synfig-core/tags/synfig_0_61_04/synfig-core/src/modules/mod_libavcodec/libavcodec/mpegvideo.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard.
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2 of the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Lesser General Public
  16  * License along with this library; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18  *
  19  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
  20  */
  21
  22 /**
  23  * @file mpegvideo.c
  24  * The simplest mpeg encoder (well, it was the simplest!).
  25  */
  26
  27 #include <limits.h>
  28 #include "avcodec.h"
  29 #include "dsputil.h"
  30 #include "mpegvideo.h"
  31
  32 #ifdef USE_FASTMEMCPY
  33 #include "fastmemcpy.h"
  34 #endif
  35
  36 //#undef NDEBUG
  37 //#include <assert.h>
  38
  39 #ifdef CONFIG_ENCODERS
  40 static void encode_picture(MpegEncContext *s, int picture_number);
  41 #endif //CONFIG_ENCODERS
  42 static void dct_unquantize_mpeg1_c(MpegEncContext *s,
  43                                    DCTELEM *block, int n, int qscale);
  44 static void dct_unquantize_mpeg2_c(MpegEncContext *s,
  45                                    DCTELEM *block, int n, int qscale);
  46 static void dct_unquantize_h263_c(MpegEncContext *s,
  47                                   DCTELEM *block, int n, int qscale);
  48 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
  49 #ifdef CONFIG_ENCODERS
  50 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  51 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  52 static int sse_mb(MpegEncContext *s);
  53 #endif //CONFIG_ENCODERS
  54
  55 #ifdef HAVE_XVMC
  56 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
  57 extern void XVMC_field_end(MpegEncContext *s);
  58 extern void XVMC_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
  59 #endif
  60
  61 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
  62
  63
  64 /* enable all paranoid tests for rounding, overflows, etc... */
  65 //#define PARANOID
  66
  67 //#define DEBUG
  68
  69
  70 /* for jpeg fast DCT */
  71 #define CONST_BITS 14
  72
  73 static const uint16_t aanscales[64] = {
  74     /* precomputed values scaled up by 14 bits */
  75     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  76     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
  77     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
  78     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
  79     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  80     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
  81     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
  82     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
  83 };
  84
  85 static const uint8_t h263_chroma_roundtab[16] = {
  86 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
  87     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
  88 };
  89
  90 #ifdef CONFIG_ENCODERS
  91 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
  92 static uint8_t default_fcode_tab[MAX_MV*2+1];
  93
  94 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
  95
  96 static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[64], uint16_t (*qmat16_bias)[64],
  97                            const uint16_t *quant_matrix, int bias, int qmin, int qmax)
  98 {
  99     int qscale;
 100
 101     for(qscale=qmin; qscale<=qmax; qscale++){
 102         int i;
 103         if (s->dsp.fdct == ff_jpeg_fdct_islow) {
 104             for(i=0;i<64;i++) {
 105                 const int j= s->dsp.idct_permutation[i];
 106                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 107                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 108                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 109                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 110
 111                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
 112                                 (qscale * quant_matrix[j]));
 113             }
 114         } else if (s->dsp.fdct == fdct_ifast) {
 115             for(i=0;i<64;i++) {
 116                 const int j= s->dsp.idct_permutation[i];
 117                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 118                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 119                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 120                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 121
 122                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
 123                                 (aanscales[i] * qscale * quant_matrix[j]));
 124             }
 125         } else {
 126             for(i=0;i<64;i++) {
 127                 const int j= s->dsp.idct_permutation[i];
 128                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 129                    So 16           <= qscale * quant_matrix[i]             <= 7905
 130                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
 131                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
 132                 */
 133                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
 134 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
 135                 qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
 136
 137                 if(qmat16[qscale][i]==0 || qmat16[qscale][i]==128*256) qmat16[qscale][i]=128*256-1;
 138                 qmat16_bias[qscale][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][i]);
 139             }
 140         }
 141     }
 142 }
 143 #endif //CONFIG_ENCODERS
 144
 145 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
 146     int i;
 147     int end;
 148
 149     st->scantable= src_scantable;
 150
 151     for(i=0; i<64; i++){
 152         int j;
 153         j = src_scantable[i];
 154         st->permutated[i] = permutation[j];
 155 #ifdef ARCH_POWERPC
 156         st->inverse[j] = i;
 157 #endif
 158     }
 159
 160     end=-1;
 161     for(i=0; i<64; i++){
 162         int j;
 163         j = st->permutated[i];
 164         if(j>end) end=j;
 165         st->raster_end[i]= end;
 166     }
 167 }
 168
 169 void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
 170     int i;
 171
 172     if(matrix){
 173         put_bits(pb, 1, 1);
 174         for(i=0;i<64;i++) {
 175             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
 176         }
 177     }else
 178         put_bits(pb, 1, 0);
 179 }
 180
 181 /* init common dct for both encoder and decoder */
 182 int DCT_common_init(MpegEncContext *s)
 183 {
 184     s->dct_unquantize_h263 = dct_unquantize_h263_c;
 185     s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_c;
 186     s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_c;
 187
 188 #ifdef CONFIG_ENCODERS
 189     s->dct_quantize= dct_quantize_c;
 190 #endif
 191
 192 #ifdef HAVE_MMX
 193     MPV_common_init_mmx(s);
 194 #endif
 195 #ifdef ARCH_ALPHA
 196     MPV_common_init_axp(s);
 197 #endif
 198 #ifdef HAVE_MLIB
 199     MPV_common_init_mlib(s);
 200 #endif
 201 #ifdef HAVE_MMI
 202     MPV_common_init_mmi(s);
 203 #endif
 204 #ifdef ARCH_ARMV4L
 205     MPV_common_init_armv4l(s);
 206 #endif
 207 #ifdef ARCH_POWERPC
 208     MPV_common_init_ppc(s);
 209 #endif
 210
 211 #ifdef CONFIG_ENCODERS
 212     s->fast_dct_quantize= s->dct_quantize;
 213
 214     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
 215         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
 216     }
 217
 218 #endif //CONFIG_ENCODERS
 219
 220     /* load & permutate scantables
 221        note: only wmv uses differnt ones
 222     */
 223     ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
 224     ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
 225     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
 226     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
 227
 228     s->picture_structure= PICT_FRAME;
 229
 230     return 0;
 231 }
 232
 233 /**
 234  * allocates a Picture
 235  * The pixels are allocated/set by calling get_buffer() if shared=0
 236  */
 237 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
 238     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
 239     const int mb_array_size= s->mb_stride*s->mb_height;
 240     int i;
 241
 242     if(shared){
 243         assert(pic->data[0]);
 244         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
 245         pic->type= FF_BUFFER_TYPE_SHARED;
 246     }else{
 247         int r;
 248
 249         assert(!pic->data[0]);
 250
 251         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
 252
 253         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
 254             fprintf(stderr, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
 255             return -1;
 256         }
 257
 258         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
 259             fprintf(stderr, "get_buffer() failed (stride changed)\n");
 260             return -1;
 261         }
 262
 263         if(pic->linesize[1] != pic->linesize[2]){
 264             fprintf(stderr, "get_buffer() failed (uv stride missmatch)\n");
 265             return -1;
 266         }
 267
 268         s->linesize  = pic->linesize[0];
 269         s->uvlinesize= pic->linesize[1];
 270     }
 271
 272     if(pic->qscale_table==NULL){
 273         if (s->encoding) {
 274             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
 275             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
 276             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
 277             CHECKED_ALLOCZ(pic->mb_cmp_score, mb_array_size * sizeof(int32_t))
 278         }
 279
 280         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
 281         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
 282         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(int))
 283         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
 284         if(s->out_format == FMT_H264){
 285             for(i=0; i<2; i++){
 286                 CHECKED_ALLOCZ(pic->motion_val[i], 2 * 16 * s->mb_num * sizeof(uint16_t))
 287                 CHECKED_ALLOCZ(pic->ref_index[i] , 4 * s->mb_num * sizeof(uint8_t))
 288             }
 289         }
 290         pic->qstride= s->mb_stride;
 291     }
 292
 293     //it might be nicer if the application would keep track of these but it would require a API change
 294     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
 295     s->prev_pict_types[0]= s->pict_type;
 296     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
 297         pic->age= INT_MAX; // skiped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
 298
 299     return 0;
 300 fail: //for the CHECKED_ALLOCZ macro
 301     return -1;
 302 }
 303
 304 /**
 305  * deallocates a picture
 306  */
 307 static void free_picture(MpegEncContext *s, Picture *pic){
 308     int i;
 309
 310     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
 311         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 312     }
 313
 314     av_freep(&pic->mb_var);
 315     av_freep(&pic->mc_mb_var);
 316     av_freep(&pic->mb_mean);
 317     av_freep(&pic->mb_cmp_score);
 318     av_freep(&pic->mbskip_table);
 319     av_freep(&pic->qscale_table);
 320     av_freep(&pic->mb_type_base);
 321     pic->mb_type= NULL;
 322     for(i=0; i<2; i++){
 323         av_freep(&pic->motion_val[i]);
 324         av_freep(&pic->ref_index[i]);
 325     }
 326
 327     if(pic->type == FF_BUFFER_TYPE_SHARED){
 328         for(i=0; i<4; i++){
 329             pic->base[i]=
 330             pic->data[i]= NULL;
 331         }
 332         pic->type= 0;
 333     }
 334 }
 335
 336 /* init common structure for both encoder and decoder */
 337 int MPV_common_init(MpegEncContext *s)
 338 {
 339     int y_size, c_size, yc_size, i, mb_array_size, x, y;
 340
 341     dsputil_init(&s->dsp, s->avctx);
 342     DCT_common_init(s);
 343
 344     s->flags= s->avctx->flags;
 345
 346     s->mb_width  = (s->width  + 15) / 16;
 347     s->mb_height = (s->height + 15) / 16;
 348     s->mb_stride = s->mb_width + 1;
 349     mb_array_size= s->mb_height * s->mb_stride;
 350
 351     /* set default edge pos, will be overriden in decode_header if needed */
 352     s->h_edge_pos= s->mb_width*16;
 353     s->v_edge_pos= s->mb_height*16;
 354
 355     s->mb_num = s->mb_width * s->mb_height;
 356
 357     s->block_wrap[0]=
 358     s->block_wrap[1]=
 359     s->block_wrap[2]=
 360     s->block_wrap[3]= s->mb_width*2 + 2;
 361     s->block_wrap[4]=
 362     s->block_wrap[5]= s->mb_width + 2;
 363
 364     y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
 365     c_size = (s->mb_width + 2) * (s->mb_height + 2);
 366     yc_size = y_size + 2 * c_size;
 367
 368     /* convert fourcc to upper case */
 369     s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)
 370                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
 371                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
 372                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
 373
 374     s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)
 375                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
 376                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
 377                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
 378
 379     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
 380     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
 381
 382     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
 383
 384     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
 385     for(y=0; y<s->mb_height; y++){
 386         for(x=0; x<s->mb_width; x++){
 387             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
 388         }
 389     }
 390     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
 391
 392     if (s->encoding) {
 393         int mv_table_size= s->mb_stride * (s->mb_height+2) + 1;
 394
 395         /* Allocate MV tables */
 396         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
 397         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 398         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
 399         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 400         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
 401         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
 402         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
 403         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
 404         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
 405         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
 406         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
 407         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
 408
 409         //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
 410         CHECKED_ALLOCZ(s->me.scratchpad,  s->width*2*16*3*sizeof(uint8_t))
 411
 412         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
 413         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
 414
 415         if(s->codec_id==CODEC_ID_MPEG4){
 416             CHECKED_ALLOCZ(s->tex_pb_buffer, PB_BUFFER_SIZE);
 417             CHECKED_ALLOCZ(   s->pb2_buffer, PB_BUFFER_SIZE);
 418         }
 419
 420         if(s->msmpeg4_version){
 421             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
 422         }
 423         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
 424
 425         /* Allocate MB type table */
 426         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint8_t)) //needed for encoding
 427     }
 428
 429     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
 430
 431     if (s->out_format == FMT_H263 || s->encoding) {
 432         int size;
 433
 434         /* MV prediction */
 435         size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
 436         CHECKED_ALLOCZ(s->motion_val, size * 2 * sizeof(int16_t));
 437     }
 438
 439     if(s->codec_id==CODEC_ID_MPEG4){
 440         /* interlaced direct mode decoding tables */
 441         CHECKED_ALLOCZ(s->field_mv_table, mb_array_size*2*2 * sizeof(int16_t))
 442         CHECKED_ALLOCZ(s->field_select_table, mb_array_size*2* sizeof(int8_t))
 443     }
 444     if (s->out_format == FMT_H263) {
 445         /* ac values */
 446         CHECKED_ALLOCZ(s->ac_val[0], yc_size * sizeof(int16_t) * 16);
 447         s->ac_val[1] = s->ac_val[0] + y_size;
 448         s->ac_val[2] = s->ac_val[1] + c_size;
 449
 450         /* cbp values */
 451         CHECKED_ALLOCZ(s->coded_block, y_size);
 452
 453         /* divx501 bitstream reorder buffer */
 454         CHECKED_ALLOCZ(s->bitstream_buffer, BITSTREAM_BUFFER_SIZE);
 455
 456         /* cbp, ac_pred, pred_dir */
 457         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
 458         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
 459     }
 460
 461     if (s->h263_pred || s->h263_plus || !s->encoding) {
 462         /* dc values */
 463         //MN: we need these for error resilience of intra-frames
 464         CHECKED_ALLOCZ(s->dc_val[0], yc_size * sizeof(int16_t));
 465         s->dc_val[1] = s->dc_val[0] + y_size;
 466         s->dc_val[2] = s->dc_val[1] + c_size;
 467         for(i=0;i<yc_size;i++)
 468             s->dc_val[0][i] = 1024;
 469     }
 470
 471     /* which mb is a intra block */
 472     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
 473     memset(s->mbintra_table, 1, mb_array_size);
 474
 475     /* default structure is frame */
 476     s->picture_structure = PICT_FRAME;
 477
 478     /* init macroblock skip table */
 479     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
 480     //Note the +1 is for a quicker mpeg4 slice_end detection
 481     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
 482
 483     s->block= s->blocks[0];
 484
 485     s->parse_context.state= -1;
 486
 487     s->context_initialized = 1;
 488     return 0;
 489  fail:
 490     MPV_common_end(s);
 491     return -1;
 492 }
 493
 494
 495 //extern int sads;
 496
 497 /* init common structure for both encoder and decoder */
 498 void MPV_common_end(MpegEncContext *s)
 499 {
 500     int i;
 501
 502     av_freep(&s->parse_context.buffer);
 503     s->parse_context.buffer_size=0;
 504
 505     av_freep(&s->mb_type);
 506     av_freep(&s->p_mv_table_base);
 507     av_freep(&s->b_forw_mv_table_base);
 508     av_freep(&s->b_back_mv_table_base);
 509     av_freep(&s->b_bidir_forw_mv_table_base);
 510     av_freep(&s->b_bidir_back_mv_table_base);
 511     av_freep(&s->b_direct_mv_table_base);
 512     s->p_mv_table= NULL;
 513     s->b_forw_mv_table= NULL;
 514     s->b_back_mv_table= NULL;
 515     s->b_bidir_forw_mv_table= NULL;
 516     s->b_bidir_back_mv_table= NULL;
 517     s->b_direct_mv_table= NULL;
 518
 519     av_freep(&s->motion_val);
 520     av_freep(&s->dc_val[0]);
 521     av_freep(&s->ac_val[0]);
 522     av_freep(&s->coded_block);
 523     av_freep(&s->mbintra_table);
 524     av_freep(&s->cbp_table);
 525     av_freep(&s->pred_dir_table);
 526     av_freep(&s->me.scratchpad);
 527     av_freep(&s->me.map);
 528     av_freep(&s->me.score_map);
 529
 530     av_freep(&s->mbskip_table);
 531     av_freep(&s->prev_pict_types);
 532     av_freep(&s->bitstream_buffer);
 533     av_freep(&s->tex_pb_buffer);
 534     av_freep(&s->pb2_buffer);
 535     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
 536     av_freep(&s->field_mv_table);
 537     av_freep(&s->field_select_table);
 538     av_freep(&s->avctx->stats_out);
 539     av_freep(&s->ac_stats);
 540     av_freep(&s->error_status_table);
 541     av_freep(&s->mb_index2xy);
 542
 543     for(i=0; i<MAX_PICTURE_COUNT; i++){
 544         free_picture(s, &s->picture[i]);
 545     }
 546     avcodec_default_free_buffers(s->avctx);
 547     s->context_initialized = 0;
 548 }
 549
 550 #ifdef CONFIG_ENCODERS
 551
 552 /* init video encoder */
 553 int MPV_encode_init(AVCodecContext *avctx)
 554 {
 555     MpegEncContext *s = avctx->priv_data;
 556     int i, dummy;
 557     int chroma_h_shift, chroma_v_shift;
 558
 559     avctx->pix_fmt = PIX_FMT_YUV420P; // FIXME
 560
 561     s->bit_rate = avctx->bit_rate;
 562     s->bit_rate_tolerance = avctx->bit_rate_tolerance;
 563     s->width = avctx->width;
 564     s->height = avctx->height;
 565     if(avctx->gop_size > 600){
 566         fprintf(stderr, "Warning keyframe interval too large! reducing it ...\n");
 567         avctx->gop_size=600;
 568     }
 569     s->gop_size = avctx->gop_size;
 570     s->rtp_mode = avctx->rtp_mode;
 571     s->rtp_payload_size = avctx->rtp_payload_size;
 572     if (avctx->rtp_callback)
 573         s->rtp_callback = avctx->rtp_callback;
 574     s->max_qdiff= avctx->max_qdiff;
 575     s->qcompress= avctx->qcompress;
 576     s->qblur= avctx->qblur;
 577     s->avctx = avctx;
 578     s->flags= avctx->flags;
 579     s->max_b_frames= avctx->max_b_frames;
 580     s->b_frame_strategy= avctx->b_frame_strategy;
 581     s->codec_id= avctx->codec->id;
 582     s->luma_elim_threshold  = avctx->luma_elim_threshold;
 583     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
 584     s->strict_std_compliance= avctx->strict_std_compliance;
 585     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
 586     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
 587     s->mpeg_quant= avctx->mpeg_quant;
 588
 589     if (s->gop_size <= 1) {
 590         s->intra_only = 1;
 591         s->gop_size = 12;
 592     } else {
 593         s->intra_only = 0;
 594     }
 595
 596     s->me_method = avctx->me_method;
 597
 598     /* Fixed QSCALE */
 599     s->fixed_qscale = (avctx->flags & CODEC_FLAG_QSCALE);
 600
 601     s->adaptive_quant= (   s->avctx->lumi_masking
 602                         || s->avctx->dark_masking
 603                         || s->avctx->temporal_cplx_masking
 604                         || s->avctx->spatial_cplx_masking
 605                         || s->avctx->p_masking)
 606                        && !s->fixed_qscale;
 607
 608     s->progressive_sequence= !(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
 609
 610     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4){
 611         fprintf(stderr, "4MV not supporetd by codec\n");
 612         return -1;
 613     }
 614
 615     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
 616         fprintf(stderr, "qpel not supporetd by codec\n");
 617         return -1;
 618     }
 619
 620     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
 621         fprintf(stderr, "data partitioning not supporetd by codec\n");
 622         return -1;
 623     }
 624
 625     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
 626         fprintf(stderr, "b frames not supporetd by codec\n");
 627         return -1;
 628     }
 629
 630     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
 631         fprintf(stderr, "mpeg2 style quantization not supporetd by codec\n");
 632         return -1;
 633     }
 634
 635     if(s->codec_id==CODEC_ID_MJPEG){
 636         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
 637         s->inter_quant_bias= 0;
 638     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
 639         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
 640         s->inter_quant_bias= 0;
 641     }else{
 642         s->intra_quant_bias=0;
 643         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
 644     }
 645
 646     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 647         s->intra_quant_bias= avctx->intra_quant_bias;
 648     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 649         s->inter_quant_bias= avctx->inter_quant_bias;
 650
 651     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
 652
 653     av_reduce(&s->time_increment_resolution, &dummy, s->avctx->frame_rate, s->avctx->frame_rate_base, (1<<16)-1);
 654     s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1;
 655
 656     switch(avctx->codec->id) {
 657     case CODEC_ID_MPEG1VIDEO:
 658         s->out_format = FMT_MPEG1;
 659         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
 660         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
 661         break;
 662     case CODEC_ID_MPEG2VIDEO:
 663         s->out_format = FMT_MPEG1;
 664         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
 665         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
 666         s->rtp_mode= 1; // mpeg2 must have slices
 667         if(s->rtp_payload_size == 0) s->rtp_payload_size= 256*256*256;
 668         break;
 669     case CODEC_ID_LJPEG:
 670     case CODEC_ID_MJPEG:
 671         s->out_format = FMT_MJPEG;
 672         s->intra_only = 1; /* force intra only for jpeg */
 673         s->mjpeg_write_tables = 1; /* write all tables */
 674         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
 675         s->mjpeg_vsample[0] = 1<<chroma_v_shift;
 676         s->mjpeg_vsample[1] = 1;
 677         s->mjpeg_vsample[2] = 1;
 678         s->mjpeg_hsample[0] = 1<<chroma_h_shift;
 679         s->mjpeg_hsample[1] = 1;
 680         s->mjpeg_hsample[2] = 1;
 681         if (mjpeg_init(s) < 0)
 682             return -1;
 683         avctx->delay=0;
 684         s->low_delay=1;
 685         break;
 686 #ifdef CONFIG_RISKY
 687     case CODEC_ID_H263:
 688         if (h263_get_picture_format(s->width, s->height) == 7) {
 689             printf("Input picture size isn't suitable for h263 codec! try h263+\n");
 690             return -1;
 691         }
 692         s->out_format = FMT_H263;
 693         avctx->delay=0;
 694         s->low_delay=1;
 695         break;
 696     case CODEC_ID_H263P:
 697         s->out_format = FMT_H263;
 698         s->h263_plus = 1;
 699         /* Fx */
 700         s->unrestricted_mv=(avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
 701         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
 702         /* /Fx */
 703         /* These are just to be sure */
 704         s->umvplus = 1;
 705         avctx->delay=0;
 706         s->low_delay=1;
 707         break;
 708     case CODEC_ID_FLV1:
 709         s->out_format = FMT_H263;
 710         s->h263_flv = 2; /* format = 1; 11-bit codes */
 711         s->unrestricted_mv = 1;
 712         s->rtp_mode=0; /* don't allow GOB */
 713         avctx->delay=0;
 714         s->low_delay=1;
 715         break;
 716     case CODEC_ID_RV10:
 717         s->out_format = FMT_H263;
 718         s->h263_rv10 = 1;
 719         avctx->delay=0;
 720         s->low_delay=1;
 721         break;
 722     case CODEC_ID_MPEG4:
 723         s->out_format = FMT_H263;
 724         s->h263_pred = 1;
 725         s->unrestricted_mv = 1;
 726         s->low_delay= s->max_b_frames ? 0 : 1;
 727         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
 728         break;
 729     case CODEC_ID_MSMPEG4V1:
 730         s->out_format = FMT_H263;
 731         s->h263_msmpeg4 = 1;
 732         s->h263_pred = 1;
 733         s->unrestricted_mv = 1;
 734         s->msmpeg4_version= 1;
 735         avctx->delay=0;
 736         s->low_delay=1;
 737         break;
 738     case CODEC_ID_MSMPEG4V2:
 739         s->out_format = FMT_H263;
 740         s->h263_msmpeg4 = 1;
 741         s->h263_pred = 1;
 742         s->unrestricted_mv = 1;
 743         s->msmpeg4_version= 2;
 744         avctx->delay=0;
 745         s->low_delay=1;
 746         break;
 747     case CODEC_ID_MSMPEG4V3:
 748         s->out_format = FMT_H263;
 749         s->h263_msmpeg4 = 1;
 750         s->h263_pred = 1;
 751         s->unrestricted_mv = 1;
 752         s->msmpeg4_version= 3;
 753         s->flipflop_rounding=1;
 754         avctx->delay=0;
 755         s->low_delay=1;
 756         break;
 757     case CODEC_ID_WMV1:
 758         s->out_format = FMT_H263;
 759         s->h263_msmpeg4 = 1;
 760         s->h263_pred = 1;
 761         s->unrestricted_mv = 1;
 762         s->msmpeg4_version= 4;
 763         s->flipflop_rounding=1;
 764         avctx->delay=0;
 765         s->low_delay=1;
 766         break;
 767     case CODEC_ID_WMV2:
 768         s->out_format = FMT_H263;
 769         s->h263_msmpeg4 = 1;
 770         s->h263_pred = 1;
 771         s->unrestricted_mv = 1;
 772         s->msmpeg4_version= 5;
 773         s->flipflop_rounding=1;
 774         avctx->delay=0;
 775         s->low_delay=1;
 776         break;
 777 #endif
 778     default:
 779         return -1;
 780     }
 781
 782     { /* set up some save defaults, some codecs might override them later */
 783         static int done=0;
 784         if(!done){
 785             int i;
 786             done=1;
 787
 788             default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
 789             memset(default_mv_penalty, 0, sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1));
 790             memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
 791
 792             for(i=-16; i<16; i++){
 793                 default_fcode_tab[i + MAX_MV]= 1;
 794             }
 795         }
 796     }
 797     s->me.mv_penalty= default_mv_penalty;
 798     s->fcode_tab= default_fcode_tab;
 799     s->y_dc_scale_table=
 800     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 801
 802     /* dont use mv_penalty table for crap MV as it would be confused */
 803     //FIXME remove after fixing / removing old ME
 804     if (s->me_method < ME_EPZS) s->me.mv_penalty = default_mv_penalty;
 805
 806     s->encoding = 1;
 807
 808     /* init */
 809     if (MPV_common_init(s) < 0)
 810         return -1;
 811
 812     ff_init_me(s);
 813
 814 #ifdef CONFIG_ENCODERS
 815 #ifdef CONFIG_RISKY
 816     if (s->out_format == FMT_H263)
 817         h263_encode_init(s);
 818     if(s->msmpeg4_version)
 819         ff_msmpeg4_encode_init(s);
 820 #endif
 821     if (s->out_format == FMT_MPEG1)
 822         ff_mpeg1_encode_init(s);
 823 #endif
 824
 825     /* init default q matrix */
 826     for(i=0;i<64;i++) {
 827         int j= s->dsp.idct_permutation[i];
 828 #ifdef CONFIG_RISKY
 829         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
 830             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 831             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 832         }else if(s->out_format == FMT_H263){
 833             s->intra_matrix[j] =
 834             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 835         }else
 836 #endif
 837         { /* mpeg1/2 */
 838             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 839             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 840         }
 841         if(s->avctx->intra_matrix)
 842             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 843         if(s->avctx->inter_matrix)
 844             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 845     }
 846
 847     /* precompute matrix */
 848     /* for mjpeg, we do include qscale in the matrix */
 849     if (s->out_format != FMT_MJPEG) {
 850         convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, s->q_intra_matrix16_bias,
 851                        s->intra_matrix, s->intra_quant_bias, 1, 31);
 852         convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16, s->q_inter_matrix16_bias,
 853                        s->inter_matrix, s->inter_quant_bias, 1, 31);
 854     }
 855
 856     if(ff_rate_control_init(s) < 0)
 857         return -1;
 858
 859     s->picture_number = 0;
 860     s->picture_in_gop_number = 0;
 861     s->fake_picture_number = 0;
 862     /* motion detector init */
 863     s->f_code = 1;
 864     s->b_code = 1;
 865
 866     return 0;
 867 }
 868
 869 int MPV_encode_end(AVCodecContext *avctx)
 870 {
 871     MpegEncContext *s = avctx->priv_data;
 872
 873 #ifdef STATS
 874     print_stats();
 875 #endif
 876
 877     ff_rate_control_uninit(s);
 878
 879     MPV_common_end(s);
 880     if (s->out_format == FMT_MJPEG)
 881         mjpeg_close(s);
 882
 883     av_freep(&avctx->extradata);
 884
 885     return 0;
 886 }
 887
 888 #endif //CONFIG_ENCODERS
 889
 890 void init_rl(RLTable *rl)
 891 {
 892     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
 893     uint8_t index_run[MAX_RUN+1];
 894     int last, run, level, start, end, i;
 895
 896     /* compute max_level[], max_run[] and index_run[] */
 897     for(last=0;last<2;last++) {
 898         if (last == 0) {
 899             start = 0;
 900             end = rl->last;
 901         } else {
 902             start = rl->last;
 903             end = rl->n;
 904         }
 905
 906         memset(max_level, 0, MAX_RUN + 1);
 907         memset(max_run, 0, MAX_LEVEL + 1);
 908         memset(index_run, rl->n, MAX_RUN + 1);
 909         for(i=start;i<end;i++) {
 910             run = rl->table_run[i];
 911             level = rl->table_level[i];
 912             if (index_run[run] == rl->n)
 913                 index_run[run] = i;
 914             if (level > max_level[run])
 915                 max_level[run] = level;
 916             if (run > max_run[level])
 917                 max_run[level] = run;
 918         }
 919         rl->max_level[last] = av_malloc(MAX_RUN + 1);
 920         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
 921         rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
 922         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
 923         rl->index_run[last] = av_malloc(MAX_RUN + 1);
 924         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
 925     }
 926 }
 927
 928 /* draw the edges of width 'w' of an image of size width, height */
 929 //FIXME check that this is ok for mpeg4 interlaced
 930 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
 931 {
 932     uint8_t *ptr, *last_line;
 933     int i;
 934
 935     last_line = buf + (height - 1) * wrap;
 936     for(i=0;i<w;i++) {
 937         /* top and bottom */
 938         memcpy(buf - (i + 1) * wrap, buf, width);
 939         memcpy(last_line + (i + 1) * wrap, last_line, width);
 940     }
 941     /* left and right */
 942     ptr = buf;
 943     for(i=0;i<height;i++) {
 944         memset(ptr - w, ptr[0], w);
 945         memset(ptr + width, ptr[width-1], w);
 946         ptr += wrap;
 947     }
 948     /* corners */
 949     for(i=0;i<w;i++) {
 950         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
 951         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
 952         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
 953         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
 954     }
 955 }
 956
 957 static int find_unused_picture(MpegEncContext *s, int shared){
 958     int i;
 959
 960     if(shared){
 961         for(i=0; i<MAX_PICTURE_COUNT; i++){
 962             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) break;
 963         }
 964     }else{
 965         for(i=0; i<MAX_PICTURE_COUNT; i++){
 966             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) break; //FIXME
 967         }
 968         for(i=0; i<MAX_PICTURE_COUNT; i++){
 969             if(s->picture[i].data[0]==NULL) break;
 970         }
 971     }
 972
 973     assert(i<MAX_PICTURE_COUNT);
 974     return i;
 975 }
 976
 977 /* generic function for encode/decode called before a frame is coded/decoded */
 978 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
 979 {
 980     int i;
 981     AVFrame *pic;
 982
 983     s->mb_skiped = 0;
 984
 985     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
 986
 987     /* mark&release old frames */
 988     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr->data[0]) {
 989         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
 990
 991         /* release forgotten pictures */
 992         /* if(mpeg124/h263) */
 993         if(!s->encoding){
 994             for(i=0; i<MAX_PICTURE_COUNT; i++){
 995                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
 996                     fprintf(stderr, "releasing zombie picture\n");
 997                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
 998                 }
 999             }
1000         }
1001     }
1002 alloc:
1003     if(!s->encoding){
1004         /* release non refernce frames */
1005         for(i=0; i<MAX_PICTURE_COUNT; i++){
1006             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1007                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1008             }
1009         }
1010
1011         i= find_unused_picture(s, 0);
1012
1013         pic= (AVFrame*)&s->picture[i];
1014         pic->reference= s->pict_type != B_TYPE ? 3 : 0;
1015
1016         if(s->current_picture_ptr)
1017             pic->coded_picture_number= s->current_picture_ptr->coded_picture_number+1;
1018
1019         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1020             return -1;
1021
1022         s->current_picture_ptr= &s->picture[i];
1023     }
1024
1025     s->current_picture_ptr->pict_type= s->pict_type;
1026     s->current_picture_ptr->quality= s->qscale;
1027     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1028
1029     s->current_picture= *s->current_picture_ptr;
1030
1031   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1032     if (s->pict_type != B_TYPE) {
1033         s->last_picture_ptr= s->next_picture_ptr;
1034         s->next_picture_ptr= s->current_picture_ptr;
1035     }
1036
1037     if(s->last_picture_ptr) s->last_picture= *s->last_picture_ptr;
1038     if(s->next_picture_ptr) s->next_picture= *s->next_picture_ptr;
1039     if(s->new_picture_ptr ) s->new_picture = *s->new_picture_ptr;
1040
1041     if(s->pict_type != I_TYPE && s->last_picture_ptr==NULL){
1042         fprintf(stderr, "warning: first frame is no keyframe\n");
1043         assert(s->pict_type != B_TYPE); //these should have been dropped if we dont have a reference
1044         goto alloc;
1045     }
1046
1047     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1048
1049     if(s->picture_structure!=PICT_FRAME){
1050         int i;
1051         for(i=0; i<4; i++){
1052             if(s->picture_structure == PICT_BOTTOM_FIELD){
1053                  s->current_picture.data[i] += s->current_picture.linesize[i];
1054             }
1055             s->current_picture.linesize[i] *= 2;
1056             s->last_picture.linesize[i] *=2;
1057             s->next_picture.linesize[i] *=2;
1058         }
1059     }
1060   }
1061
1062     s->hurry_up= s->avctx->hurry_up;
1063     s->error_resilience= avctx->error_resilience;
1064
1065     /* set dequantizer, we cant do it during init as it might change for mpeg4
1066        and we cant do it in the header decode as init isnt called for mpeg4 there yet */
1067     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO)
1068         s->dct_unquantize = s->dct_unquantize_mpeg2;
1069     else if(s->out_format == FMT_H263)
1070         s->dct_unquantize = s->dct_unquantize_h263;
1071     else
1072         s->dct_unquantize = s->dct_unquantize_mpeg1;
1073
1074 #ifdef HAVE_XVMC
1075     if(s->avctx->xvmc_acceleration)
1076         return XVMC_field_start(s, avctx);
1077 #endif
1078     return 0;
1079 }
1080
1081 /* generic function for encode/decode called after a frame has been coded/decoded */
1082 void MPV_frame_end(MpegEncContext *s)
1083 {
1084     int i;
1085     /* draw edge for correct motion prediction if outside */
1086 #ifdef HAVE_XVMC
1087 //just to make sure that all data is rendered.
1088     if(s->avctx->xvmc_acceleration){
1089         XVMC_field_end(s);
1090     }else
1091 #endif
1092     if(s->codec_id!=CODEC_ID_SVQ1 && s->out_format != FMT_MPEG1){
1093         if (s->pict_type != B_TYPE && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1094             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1095             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1096             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1097         }
1098     }
1099     emms_c();
1100
1101     s->last_pict_type    = s->pict_type;
1102     if(s->pict_type!=B_TYPE){
1103         s->last_non_b_pict_type= s->pict_type;
1104     }
1105 #if 0
1106         /* copy back current_picture variables */
1107     for(i=0; i<MAX_PICTURE_COUNT; i++){
1108         if(s->picture[i].data[0] == s->current_picture.data[0]){
1109             s->picture[i]= s->current_picture;
1110             break;
1111         }
1112     }
1113     assert(i<MAX_PICTURE_COUNT);
1114 #endif
1115
1116     if(s->encoding){
1117         /* release non refernce frames */
1118         for(i=0; i<MAX_PICTURE_COUNT; i++){
1119             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1120                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1121             }
1122         }
1123     }
1124     // clear copies, to avoid confusion
1125 #if 0
1126     memset(&s->last_picture, 0, sizeof(Picture));
1127     memset(&s->next_picture, 0, sizeof(Picture));
1128     memset(&s->current_picture, 0, sizeof(Picture));
1129 #endif
1130 }
1131
1132 /**
1133  * draws an line from (ex, ey) -> (sx, sy).
1134  * @param w width of the image
1135  * @param h height of the image
1136  * @param stride stride/linesize of the image
1137  * @param color color of the arrow
1138  */
1139 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1140     int t, x, y, f;
1141
1142     sx= clip(sx, 0, w-1);
1143     sy= clip(sy, 0, h-1);
1144     ex= clip(ex, 0, w-1);
1145     ey= clip(ey, 0, h-1);
1146
1147     buf[sy*stride + sx]+= color;
1148
1149     if(ABS(ex - sx) > ABS(ey - sy)){
1150         if(sx > ex){
1151             t=sx; sx=ex; ex=t;
1152             t=sy; sy=ey; ey=t;
1153         }
1154         buf+= sx + sy*stride;
1155         ex-= sx;
1156         f= ((ey-sy)<<16)/ex;
1157         for(x= 0; x <= ex; x++){
1158             y= ((x*f) + (1<<15))>>16;
1159             buf[y*stride + x]+= color;
1160         }
1161     }else{
1162         if(sy > ey){
1163             t=sx; sx=ex; ex=t;
1164             t=sy; sy=ey; ey=t;
1165         }
1166         buf+= sx + sy*stride;
1167         ey-= sy;
1168         if(ey) f= ((ex-sx)<<16)/ey;
1169         else   f= 0;
1170         for(y= 0; y <= ey; y++){
1171             x= ((y*f) + (1<<15))>>16;
1172             buf[y*stride + x]+= color;
1173         }
1174     }
1175 }
1176
1177 /**
1178  * draws an arrow from (ex, ey) -> (sx, sy).
1179  * @param w width of the image
1180  * @param h height of the image
1181  * @param stride stride/linesize of the image
1182  * @param color color of the arrow
1183  */
1184 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1185     int dx,dy;
1186
1187     sx= clip(sx, -100, w+100);
1188     sy= clip(sy, -100, h+100);
1189     ex= clip(ex, -100, w+100);
1190     ey= clip(ey, -100, h+100);
1191
1192     dx= ex - sx;
1193     dy= ey - sy;
1194
1195     if(dx*dx + dy*dy > 3*3){
1196         int rx=  dx + dy;
1197         int ry= -dx + dy;
1198         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1199
1200         //FIXME subpixel accuracy
1201         rx= ROUNDED_DIV(rx*3<<4, length);
1202         ry= ROUNDED_DIV(ry*3<<4, length);
1203
1204         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1205         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1206     }
1207     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1208 }
1209
1210 /**
1211  * prints debuging info for the given picture.
1212  */
1213 void ff_print_debug_info(MpegEncContext *s, Picture *pict){
1214
1215     if(!pict || !pict->mb_type) return;
1216
1217     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1218         int x,y;
1219
1220         for(y=0; y<s->mb_height; y++){
1221             for(x=0; x<s->mb_width; x++){
1222                 if(s->avctx->debug&FF_DEBUG_SKIP){
1223                     int count= s->mbskip_table[x + y*s->mb_stride];
1224                     if(count>9) count=9;
1225                     printf("%1d", count);
1226                 }
1227                 if(s->avctx->debug&FF_DEBUG_QP){
1228                     printf("%2d", pict->qscale_table[x + y*s->mb_stride]);
1229                 }
1230                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1231                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1232
1233                     //Type & MV direction
1234                     if(IS_PCM(mb_type))
1235                         printf("P");
1236                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1237                         printf("A");
1238                     else if(IS_INTRA4x4(mb_type))
1239                         printf("i");
1240                     else if(IS_INTRA16x16(mb_type))
1241                         printf("I");
1242                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1243                         printf("d");
1244                     else if(IS_DIRECT(mb_type))
1245                         printf("D");
1246                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1247                         printf("g");
1248                     else if(IS_GMC(mb_type))
1249                         printf("G");
1250                     else if(IS_SKIP(mb_type))
1251                         printf("S");
1252                     else if(!USES_LIST(mb_type, 1))
1253                         printf(">");
1254                     else if(!USES_LIST(mb_type, 0))
1255                         printf("<");
1256                     else{
1257                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1258                         printf("X");
1259                     }
1260
1261                     //segmentation
1262                     if(IS_8X8(mb_type))
1263                         printf("+");
1264                     else if(IS_16X8(mb_type))
1265                         printf("-");
1266                     else if(IS_8X16(mb_type))
1267                         printf("¦");
1268                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1269                         printf(" ");
1270                     else
1271                         printf("?");
1272
1273
1274                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1275                         printf("=");
1276                     else
1277                         printf(" ");
1278                 }
1279 //                printf(" ");
1280             }
1281             printf("\n");
1282         }
1283     }
1284
1285     if((s->avctx->debug&FF_DEBUG_VIS_MV) && s->motion_val){
1286         const int shift= 1 + s->quarter_sample;
1287         int mb_y;
1288         uint8_t *ptr= pict->data[0];
1289         s->low_delay=0; //needed to see the vectors without trashing the buffers
1290
1291         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1292             int mb_x;
1293             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1294                 const int mb_index= mb_x + mb_y*s->mb_stride;
1295                 if(IS_8X8(s->current_picture.mb_type[mb_index])){
1296                     int i;
1297                     for(i=0; i<4; i++){
1298                         int sx= mb_x*16 + 4 + 8*(i&1);
1299                         int sy= mb_y*16 + 4 + 8*(i>>1);
1300                         int xy= 1 + mb_x*2 + (i&1) + (mb_y*2 + 1 + (i>>1))*(s->mb_width*2 + 2);
1301                         int mx= (s->motion_val[xy][0]>>shift) + sx;
1302                         int my= (s->motion_val[xy][1]>>shift) + sy;
1303                         draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
1304                     }
1305                 }else{
1306                     int sx= mb_x*16 + 8;
1307                     int sy= mb_y*16 + 8;
1308                     int xy= 1 + mb_x*2 + (mb_y*2 + 1)*(s->mb_width*2 + 2);
1309                     int mx= (s->motion_val[xy][0]>>shift) + sx;
1310                     int my= (s->motion_val[xy][1]>>shift) + sy;
1311                     draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
1312                 }
1313                 s->mbskip_table[mb_index]=0;
1314             }
1315         }
1316     }
1317 }
1318
1319 #ifdef CONFIG_ENCODERS
1320
1321 static int get_sae(uint8_t *src, int ref, int stride){
1322     int x,y;
1323     int acc=0;
1324
1325     for(y=0; y<16; y++){
1326         for(x=0; x<16; x++){
1327             acc+= ABS(src[x+y*stride] - ref);
1328         }
1329     }
1330
1331     return acc;
1332 }
1333
1334 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
1335     int x, y, w, h;
1336     int acc=0;
1337
1338     w= s->width &~15;
1339     h= s->height&~15;
1340
1341     for(y=0; y<h; y+=16){
1342         for(x=0; x<w; x+=16){
1343             int offset= x + y*stride;
1344             int sad = s->dsp.pix_abs16x16(src + offset, ref + offset, stride);
1345             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
1346             int sae = get_sae(src + offset, mean, stride);
1347
1348             acc+= sae + 500 < sad;
1349         }
1350     }
1351     return acc;
1352 }
1353
1354
1355 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
1356     AVFrame *pic=NULL;
1357     int i;
1358     const int encoding_delay= s->max_b_frames;
1359     int direct=1;
1360
1361   if(pic_arg){
1362     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
1363     if(pic_arg->linesize[0] != s->linesize) direct=0;
1364     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
1365     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
1366
1367 //    printf("%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
1368
1369     if(direct){
1370         i= find_unused_picture(s, 1);
1371
1372         pic= (AVFrame*)&s->picture[i];
1373         pic->reference= 3;
1374
1375         for(i=0; i<4; i++){
1376             pic->data[i]= pic_arg->data[i];
1377             pic->linesize[i]= pic_arg->linesize[i];
1378         }
1379         alloc_picture(s, (Picture*)pic, 1);
1380     }else{
1381         i= find_unused_picture(s, 0);
1382
1383         pic= (AVFrame*)&s->picture[i];
1384         pic->reference= 3;
1385
1386         alloc_picture(s, (Picture*)pic, 0);
1387         for(i=0; i<4; i++){
1388             /* the input will be 16 pixels to the right relative to the actual buffer start
1389              * and the current_pic, so the buffer can be reused, yes its not beatifull
1390              */
1391             pic->data[i]+= 16;
1392         }
1393
1394         if(   pic->data[0] == pic_arg->data[0]
1395            && pic->data[1] == pic_arg->data[1]
1396            && pic->data[2] == pic_arg->data[2]){
1397        // empty
1398         }else{
1399             int h_chroma_shift, v_chroma_shift;
1400             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1401
1402             for(i=0; i<3; i++){
1403                 int src_stride= pic_arg->linesize[i];
1404                 int dst_stride= i ? s->uvlinesize : s->linesize;
1405                 int h_shift= i ? h_chroma_shift : 0;
1406                 int v_shift= i ? v_chroma_shift : 0;
1407                 int w= s->width >>h_shift;
1408                 int h= s->height>>v_shift;
1409                 uint8_t *src= pic_arg->data[i];
1410                 uint8_t *dst= pic->data[i];
1411
1412                 if(src_stride==dst_stride)
1413                     memcpy(dst, src, src_stride*h);
1414                 else{
1415                     while(h--){
1416                         memcpy(dst, src, w);
1417                         dst += dst_stride;
1418                         src += src_stride;
1419                     }
1420                 }
1421             }
1422         }
1423     }
1424     pic->quality= pic_arg->quality;
1425     pic->pict_type= pic_arg->pict_type;
1426     pic->pts = pic_arg->pts;
1427
1428     if(s->input_picture[encoding_delay])
1429         pic->display_picture_number= s->input_picture[encoding_delay]->display_picture_number + 1;
1430
1431   }
1432
1433     /* shift buffer entries */
1434     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
1435         s->input_picture[i-1]= s->input_picture[i];
1436
1437     s->input_picture[encoding_delay]= (Picture*)pic;
1438
1439     return 0;
1440 }
1441
1442 static void select_input_picture(MpegEncContext *s){
1443     int i;
1444     int coded_pic_num=0;
1445
1446     if(s->reordered_input_picture[0])
1447         coded_pic_num= s->reordered_input_picture[0]->coded_picture_number + 1;
1448
1449     for(i=1; i<MAX_PICTURE_COUNT; i++)
1450         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
1451     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
1452
1453     /* set next picture types & ordering */
1454     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
1455         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
1456             s->reordered_input_picture[0]= s->input_picture[0];
1457             s->reordered_input_picture[0]->pict_type= I_TYPE;
1458             s->reordered_input_picture[0]->coded_picture_number= coded_pic_num;
1459         }else{
1460             int b_frames;
1461
1462             if(s->flags&CODEC_FLAG_PASS2){
1463                 for(i=0; i<s->max_b_frames+1; i++){
1464                     int pict_num= s->input_picture[0]->display_picture_number + i;
1465                     int pict_type= s->rc_context.entry[pict_num].new_pict_type;
1466                     s->input_picture[i]->pict_type= pict_type;
1467
1468                     if(i + 1 >= s->rc_context.num_entries) break;
1469                 }
1470             }
1471
1472             if(s->input_picture[0]->pict_type){
1473                 /* user selected pict_type */
1474                 for(b_frames=0; b_frames<s->max_b_frames+1; b_frames++){
1475                     if(s->input_picture[b_frames]->pict_type!=B_TYPE) break;
1476                 }
1477
1478                 if(b_frames > s->max_b_frames){
1479                     fprintf(stderr, "warning, too many bframes in a row\n");
1480                     b_frames = s->max_b_frames;
1481                 }
1482             }else if(s->b_frame_strategy==0){
1483                 b_frames= s->max_b_frames;
1484                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
1485             }else if(s->b_frame_strategy==1){
1486                 for(i=1; i<s->max_b_frames+1; i++){
1487                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
1488                         s->input_picture[i]->b_frame_score=
1489                             get_intra_count(s, s->input_picture[i  ]->data[0],
1490                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
1491                     }
1492                 }
1493                 for(i=0; i<s->max_b_frames; i++){
1494                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
1495                 }
1496
1497                 b_frames= FFMAX(0, i-1);
1498
1499                 /* reset scores */
1500                 for(i=0; i<b_frames+1; i++){
1501                     s->input_picture[i]->b_frame_score=0;
1502                 }
1503             }else{
1504                 fprintf(stderr, "illegal b frame strategy\n");
1505                 b_frames=0;
1506             }
1507
1508             emms_c();
1509 //static int b_count=0;
1510 //b_count+= b_frames;
1511 //printf("b_frames: %d\n", b_count);
1512
1513             s->reordered_input_picture[0]= s->input_picture[b_frames];
1514             if(   s->picture_in_gop_number + b_frames >= s->gop_size
1515                || s->reordered_input_picture[0]->pict_type== I_TYPE)
1516                 s->reordered_input_picture[0]->pict_type= I_TYPE;
1517             else
1518                 s->reordered_input_picture[0]->pict_type= P_TYPE;
1519             s->reordered_input_picture[0]->coded_picture_number= coded_pic_num;
1520             for(i=0; i<b_frames; i++){
1521                 coded_pic_num++;
1522                 s->reordered_input_picture[i+1]= s->input_picture[i];
1523                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
1524                 s->reordered_input_picture[i+1]->coded_picture_number= coded_pic_num;
1525             }
1526         }
1527     }
1528
1529     if(s->reordered_input_picture[0]){
1530         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
1531
1532         s->new_picture= *s->reordered_input_picture[0];
1533
1534         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
1535             // input is a shared pix, so we cant modifiy it -> alloc a new one & ensure that the shared one is reuseable
1536
1537             int i= find_unused_picture(s, 0);
1538             Picture *pic= &s->picture[i];
1539
1540             /* mark us unused / free shared pic */
1541             for(i=0; i<4; i++)
1542                 s->reordered_input_picture[0]->data[i]= NULL;
1543             s->reordered_input_picture[0]->type= 0;
1544
1545             //FIXME bad, copy * except
1546             pic->pict_type = s->reordered_input_picture[0]->pict_type;
1547             pic->quality   = s->reordered_input_picture[0]->quality;
1548             pic->coded_picture_number = s->reordered_input_picture[0]->coded_picture_number;
1549             pic->reference = s->reordered_input_picture[0]->reference;
1550             pic->pts = s->reordered_input_picture[0]->pts;
1551
1552             alloc_picture(s, pic, 0);
1553
1554             s->current_picture_ptr= pic;
1555         }else{
1556             // input is not a shared pix -> reuse buffer for current_pix
1557
1558             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
1559                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
1560
1561             s->current_picture_ptr= s->reordered_input_picture[0];
1562             for(i=0; i<4; i++){
1563                 //reverse the +16 we did before storing the input
1564                 s->current_picture_ptr->data[i]-=16;
1565             }
1566         }
1567         s->current_picture= *s->current_picture_ptr;
1568
1569         s->picture_number= s->new_picture.display_picture_number;
1570 //printf("dpn:%d\n", s->picture_number);
1571     }else{
1572        memset(&s->new_picture, 0, sizeof(Picture));
1573     }
1574 }
1575
1576 int MPV_encode_picture(AVCodecContext *avctx,
1577                        unsigned char *buf, int buf_size, void *data)
1578 {
1579     MpegEncContext *s = avctx->priv_data;
1580     AVFrame *pic_arg = data;
1581     int i;
1582
1583     if(avctx->pix_fmt != PIX_FMT_YUV420P){
1584         fprintf(stderr, "this codec supports only YUV420P\n");
1585         return -1;
1586     }
1587
1588     init_put_bits(&s->pb, buf, buf_size, NULL, NULL);
1589
1590     s->picture_in_gop_number++;
1591
1592     load_input_picture(s, pic_arg);
1593
1594     select_input_picture(s);
1595
1596     /* output? */
1597     if(s->new_picture.data[0]){
1598
1599         s->pict_type= s->new_picture.pict_type;
1600         if (s->fixed_qscale){ /* the ratecontrol needs the last qscale so we dont touch it for CBR */
1601             s->qscale= (int)(s->new_picture.quality+0.5);
1602             assert(s->qscale);
1603         }
1604 //emms_c();
1605 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
1606         MPV_frame_start(s, avctx);
1607
1608         encode_picture(s, s->picture_number);
1609
1610         avctx->real_pict_num  = s->picture_number;
1611         avctx->header_bits = s->header_bits;
1612         avctx->mv_bits     = s->mv_bits;
1613         avctx->misc_bits   = s->misc_bits;
1614         avctx->i_tex_bits  = s->i_tex_bits;
1615         avctx->p_tex_bits  = s->p_tex_bits;
1616         avctx->i_count     = s->i_count;
1617         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
1618         avctx->skip_count  = s->skip_count;
1619
1620         MPV_frame_end(s);
1621
1622         if (s->out_format == FMT_MJPEG)
1623             mjpeg_picture_trailer(s);
1624
1625         if(s->flags&CODEC_FLAG_PASS1)
1626             ff_write_pass1_stats(s);
1627
1628         for(i=0; i<4; i++){
1629             avctx->error[i] += s->current_picture_ptr->error[i];
1630         }
1631     }
1632
1633     s->input_picture_number++;
1634
1635     flush_put_bits(&s->pb);
1636     s->frame_bits  = (pbBufPtr(&s->pb) - s->pb.buf) * 8;
1637
1638     s->total_bits += s->frame_bits;
1639     avctx->frame_bits  = s->frame_bits;
1640
1641     return pbBufPtr(&s->pb) - s->pb.buf;
1642 }
1643
1644 #endif //CONFIG_ENCODERS
1645
1646 static inline void gmc1_motion(MpegEncContext *s,
1647                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1648                                int dest_offset,
1649                                uint8_t **ref_picture, int src_offset)
1650 {
1651     uint8_t *ptr;
1652     int offset, src_x, src_y, linesize, uvlinesize;
1653     int motion_x, motion_y;
1654     int emu=0;
1655
1656     motion_x= s->sprite_offset[0][0];
1657     motion_y= s->sprite_offset[0][1];
1658     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
1659     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
1660     motion_x<<=(3-s->sprite_warping_accuracy);
1661     motion_y<<=(3-s->sprite_warping_accuracy);
1662     src_x = clip(src_x, -16, s->width);
1663     if (src_x == s->width)
1664         motion_x =0;
1665     src_y = clip(src_y, -16, s->height);
1666     if (src_y == s->height)
1667         motion_y =0;
1668
1669     linesize = s->linesize;
1670     uvlinesize = s->uvlinesize;
1671
1672     ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset;
1673
1674     dest_y+=dest_offset;
1675     if(s->flags&CODEC_FLAG_EMU_EDGE){
1676         if(src_x<0 || src_y<0 || src_x + 17 >= s->h_edge_pos
1677                               || src_y + 17 >= s->v_edge_pos){
1678             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
1679             ptr= s->edge_emu_buffer;
1680         }
1681     }
1682
1683     if((motion_x|motion_y)&7){
1684         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
1685         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
1686     }else{
1687         int dxy;
1688
1689         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
1690         if (s->no_rounding){
1691             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
1692         }else{
1693             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
1694         }
1695     }
1696
1697     if(s->flags&CODEC_FLAG_GRAY) return;
1698
1699     motion_x= s->sprite_offset[1][0];
1700     motion_y= s->sprite_offset[1][1];
1701     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
1702     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
1703     motion_x<<=(3-s->sprite_warping_accuracy);
1704     motion_y<<=(3-s->sprite_warping_accuracy);
1705     src_x = clip(src_x, -8, s->width>>1);
1706     if (src_x == s->width>>1)
1707         motion_x =0;
1708     src_y = clip(src_y, -8, s->height>>1);
1709     if (src_y == s->height>>1)
1710         motion_y =0;
1711
1712     offset = (src_y * uvlinesize) + src_x + (src_offset>>1);
1713     ptr = ref_picture[1] + offset;
1714     if(s->flags&CODEC_FLAG_EMU_EDGE){
1715         if(src_x<0 || src_y<0 || src_x + 9 >= s->h_edge_pos>>1
1716                               || src_y + 9 >= s->v_edge_pos>>1){
1717             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1718             ptr= s->edge_emu_buffer;
1719             emu=1;
1720         }
1721     }
1722     s->dsp.gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
1723
1724     ptr = ref_picture[2] + offset;
1725     if(emu){
1726         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1727         ptr= s->edge_emu_buffer;
1728     }
1729     s->dsp.gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
1730
1731     return;
1732 }
1733
1734 static inline void gmc_motion(MpegEncContext *s,
1735                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1736                                int dest_offset,
1737                                uint8_t **ref_picture, int src_offset)
1738 {
1739     uint8_t *ptr;
1740     int linesize, uvlinesize;
1741     const int a= s->sprite_warping_accuracy;
1742     int ox, oy;
1743
1744     linesize = s->linesize;
1745     uvlinesize = s->uvlinesize;
1746
1747     ptr = ref_picture[0] + src_offset;
1748
1749     dest_y+=dest_offset;
1750
1751     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
1752     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
1753
1754     s->dsp.gmc(dest_y, ptr, linesize, 16,
1755            ox,
1756            oy,
1757            s->sprite_delta[0][0], s->sprite_delta[0][1],
1758            s->sprite_delta[1][0], s->sprite_delta[1][1],
1759            a+1, (1<<(2*a+1)) - s->no_rounding,
1760            s->h_edge_pos, s->v_edge_pos);
1761     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
1762            ox + s->sprite_delta[0][0]*8,
1763            oy + s->sprite_delta[1][0]*8,
1764            s->sprite_delta[0][0], s->sprite_delta[0][1],
1765            s->sprite_delta[1][0], s->sprite_delta[1][1],
1766            a+1, (1<<(2*a+1)) - s->no_rounding,
1767            s->h_edge_pos, s->v_edge_pos);
1768
1769     if(s->flags&CODEC_FLAG_GRAY) return;
1770
1771
1772     dest_cb+=dest_offset>>1;
1773     dest_cr+=dest_offset>>1;
1774
1775     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
1776     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
1777
1778     ptr = ref_picture[1] + (src_offset>>1);
1779     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
1780            ox,
1781            oy,
1782            s->sprite_delta[0][0], s->sprite_delta[0][1],
1783            s->sprite_delta[1][0], s->sprite_delta[1][1],
1784            a+1, (1<<(2*a+1)) - s->no_rounding,
1785            s->h_edge_pos>>1, s->v_edge_pos>>1);
1786
1787     ptr = ref_picture[2] + (src_offset>>1);
1788     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
1789            ox,
1790            oy,
1791            s->sprite_delta[0][0], s->sprite_delta[0][1],
1792            s->sprite_delta[1][0], s->sprite_delta[1][1],
1793            a+1, (1<<(2*a+1)) - s->no_rounding,
1794            s->h_edge_pos>>1, s->v_edge_pos>>1);
1795 }
1796
1797 /**
1798  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
1799  * @param buf destination buffer
1800  * @param src source buffer
1801  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
1802  * @param block_w width of block
1803  * @param block_h height of block
1804  * @param src_x x coordinate of the top left sample of the block in the source buffer
1805  * @param src_y y coordinate of the top left sample of the block in the source buffer
1806  * @param w width of the source buffer
1807  * @param h height of the source buffer
1808  */
1809 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
1810                                     int src_x, int src_y, int w, int h){
1811     int x, y;
1812     int start_y, start_x, end_y, end_x;
1813
1814     if(src_y>= h){
1815         src+= (h-1-src_y)*linesize;
1816         src_y=h-1;
1817     }else if(src_y<=-block_h){
1818         src+= (1-block_h-src_y)*linesize;
1819         src_y=1-block_h;
1820     }
1821     if(src_x>= w){
1822         src+= (w-1-src_x);
1823         src_x=w-1;
1824     }else if(src_x<=-block_w){
1825         src+= (1-block_w-src_x);
1826         src_x=1-block_w;
1827     }
1828
1829     start_y= FFMAX(0, -src_y);
1830     start_x= FFMAX(0, -src_x);
1831     end_y= FFMIN(block_h, h-src_y);
1832     end_x= FFMIN(block_w, w-src_x);
1833
1834     // copy existing part
1835     for(y=start_y; y<end_y; y++){
1836         for(x=start_x; x<end_x; x++){
1837             buf[x + y*linesize]= src[x + y*linesize];
1838         }
1839     }
1840
1841     //top
1842     for(y=0; y<start_y; y++){
1843         for(x=start_x; x<end_x; x++){
1844             buf[x + y*linesize]= buf[x + start_y*linesize];
1845         }
1846     }
1847
1848     //bottom
1849     for(y=end_y; y<block_h; y++){
1850         for(x=start_x; x<end_x; x++){
1851             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
1852         }
1853     }
1854
1855     for(y=0; y<block_h; y++){
1856        //left
1857         for(x=0; x<start_x; x++){
1858             buf[x + y*linesize]= buf[start_x + y*linesize];
1859         }
1860
1861        //right
1862         for(x=end_x; x<block_w; x++){
1863             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
1864         }
1865     }
1866 }
1867
1868
1869 /* apply one mpeg motion vector to the three components */
1870 static inline void mpeg_motion(MpegEncContext *s,
1871                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1872                                int dest_offset,
1873                                uint8_t **ref_picture, int src_offset,
1874                                int field_based, op_pixels_func (*pix_op)[4],
1875                                int motion_x, int motion_y, int h)
1876 {
1877     uint8_t *ptr;
1878     int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, linesize, uvlinesize;
1879     int emu=0;
1880 #if 0
1881 if(s->quarter_sample)
1882 {
1883     motion_x>>=1;
1884     motion_y>>=1;
1885 }
1886 #endif
1887     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
1888     src_x = s->mb_x * 16 + (motion_x >> 1);
1889     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 1);
1890
1891     /* WARNING: do no forget half pels */
1892     height = s->height >> field_based;
1893     v_edge_pos = s->v_edge_pos >> field_based;
1894     src_x = clip(src_x, -16, s->width);
1895     if (src_x == s->width)
1896         dxy &= ~1;
1897     src_y = clip(src_y, -16, height);
1898     if (src_y == height)
1899         dxy &= ~2;
1900     linesize   = s->current_picture.linesize[0] << field_based;
1901     uvlinesize = s->current_picture.linesize[1] << field_based;
1902     ptr = ref_picture[0] + (src_y * linesize) + (src_x) + src_offset;
1903     dest_y += dest_offset;
1904
1905     if(s->flags&CODEC_FLAG_EMU_EDGE){
1906         if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 16 > s->h_edge_pos
1907                               || src_y + (motion_y&1) + h  > v_edge_pos){
1908             ff_emulated_edge_mc(s->edge_emu_buffer, ptr - src_offset, s->linesize, 17, 17+field_based,  //FIXME linesize? and uv below
1909                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
1910             ptr= s->edge_emu_buffer + src_offset;
1911             emu=1;
1912         }
1913     }
1914     pix_op[0][dxy](dest_y, ptr, linesize, h);
1915
1916     if(s->flags&CODEC_FLAG_GRAY) return;
1917
1918     if (s->out_format == FMT_H263) {
1919         dxy = 0;
1920         if ((motion_x & 3) != 0)
1921             dxy |= 1;
1922         if ((motion_y & 3) != 0)
1923             dxy |= 2;
1924         mx = motion_x >> 2;
1925         my = motion_y >> 2;
1926     } else {
1927         mx = motion_x / 2;
1928         my = motion_y / 2;
1929         dxy = ((my & 1) << 1) | (mx & 1);
1930         mx >>= 1;
1931         my >>= 1;
1932     }
1933
1934     src_x = s->mb_x * 8 + mx;
1935     src_y = s->mb_y * (8 >> field_based) + my;
1936     src_x = clip(src_x, -8, s->width >> 1);
1937     if (src_x == (s->width >> 1))
1938         dxy &= ~1;
1939     src_y = clip(src_y, -8, height >> 1);
1940     if (src_y == (height >> 1))
1941         dxy &= ~2;
1942     offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
1943     ptr = ref_picture[1] + offset;
1944     if(emu){
1945         ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based,
1946                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1947         ptr= s->edge_emu_buffer + (src_offset >> 1);
1948     }
1949     pix_op[1][dxy](dest_cb + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
1950
1951     ptr = ref_picture[2] + offset;
1952     if(emu){
1953         ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based,
1954                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1955         ptr= s->edge_emu_buffer + (src_offset >> 1);
1956     }
1957     pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
1958 }
1959
1960 static inline void qpel_motion(MpegEncContext *s,
1961                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1962                                int dest_offset,
1963                                uint8_t **ref_picture, int src_offset,
1964                                int field_based, op_pixels_func (*pix_op)[4],
1965                                qpel_mc_func (*qpix_op)[16],
1966                                int motion_x, int motion_y, int h)
1967 {
1968     uint8_t *ptr;
1969     int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, linesize, uvlinesize;
1970     int emu=0;
1971
1972     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
1973     src_x = s->mb_x * 16 + (motion_x >> 2);
1974     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
1975
1976     height = s->height >> field_based;
1977     v_edge_pos = s->v_edge_pos >> field_based;
1978     src_x = clip(src_x, -16, s->width);
1979     if (src_x == s->width)
1980         dxy &= ~3;
1981     src_y = clip(src_y, -16, height);
1982     if (src_y == height)
1983         dxy &= ~12;
1984     linesize = s->linesize << field_based;
1985     uvlinesize = s->uvlinesize << field_based;
1986     ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset;
1987     dest_y += dest_offset;
1988 //printf("%d %d %d\n", src_x, src_y, dxy);
1989
1990     if(s->flags&CODEC_FLAG_EMU_EDGE){
1991         if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 16 > s->h_edge_pos
1992                               || src_y + (motion_y&3) + h  > v_edge_pos){
1993             ff_emulated_edge_mc(s->edge_emu_buffer, ptr - src_offset, s->linesize, 17, 17+field_based,
1994                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
1995             ptr= s->edge_emu_buffer + src_offset;
1996             emu=1;
1997         }
1998     }
1999     if(!field_based)
2000         qpix_op[0][dxy](dest_y, ptr, linesize);
2001     else{
2002         //damn interlaced mode
2003         //FIXME boundary mirroring is not exactly correct here
2004         qpix_op[1][dxy](dest_y  , ptr  , linesize);
2005         qpix_op[1][dxy](dest_y+8, ptr+8, linesize);
2006     }
2007
2008     if(s->flags&CODEC_FLAG_GRAY) return;
2009
2010     if(field_based){
2011         mx= motion_x/2;
2012         my= motion_y>>1;
2013     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
2014         static const int rtab[8]= {0,0,1,1,0,0,0,1};
2015         mx= (motion_x>>1) + rtab[motion_x&7];
2016         my= (motion_y>>1) + rtab[motion_y&7];
2017     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
2018         mx= (motion_x>>1)|(motion_x&1);
2019         my= (motion_y>>1)|(motion_y&1);
2020     }else{
2021         mx= motion_x/2;
2022         my= motion_y/2;
2023     }
2024     mx= (mx>>1)|(mx&1);
2025     my= (my>>1)|(my&1);
2026
2027     dxy= (mx&1) | ((my&1)<<1);
2028     mx>>=1;
2029     my>>=1;
2030
2031     src_x = s->mb_x * 8 + mx;
2032     src_y = s->mb_y * (8 >> field_based) + my;
2033     src_x = clip(src_x, -8, s->width >> 1);
2034     if (src_x == (s->width >> 1))
2035         dxy &= ~1;
2036     src_y = clip(src_y, -8, height >> 1);
2037     if (src_y == (height >> 1))
2038         dxy &= ~2;
2039
2040     offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
2041     ptr = ref_picture[1] + offset;
2042     if(emu){
2043         ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based,
2044                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2045         ptr= s->edge_emu_buffer + (src_offset >> 1);
2046     }
2047     pix_op[1][dxy](dest_cb + (dest_offset >> 1), ptr,  uvlinesize, h >> 1);
2048
2049     ptr = ref_picture[2] + offset;
2050     if(emu){
2051         ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based,
2052                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2053         ptr= s->edge_emu_buffer + (src_offset >> 1);
2054     }
2055     pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr,  uvlinesize, h >> 1);
2056 }
2057
2058 inline int ff_h263_round_chroma(int x){
2059     if (x >= 0)
2060         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
2061     else {
2062         x = -x;
2063         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
2064     }
2065 }
2066
2067 /**
2068  * motion compesation of a single macroblock
2069  * @param s context
2070  * @param dest_y luma destination pointer
2071  * @param dest_cb chroma cb/u destination pointer
2072  * @param dest_cr chroma cr/v destination pointer
2073  * @param dir direction (0->forward, 1->backward)
2074  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
2075  * @param pic_op halfpel motion compensation function (average or put normally)
2076  * @param pic_op qpel motion compensation function (average or put normally)
2077  * the motion vectors are taken from s->mv and the MV type from s->mv_type
2078  */
2079 static inline void MPV_motion(MpegEncContext *s,
2080                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2081                               int dir, uint8_t **ref_picture,
2082                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
2083 {
2084     int dxy, offset, mx, my, src_x, src_y, motion_x, motion_y;
2085     int mb_x, mb_y, i;
2086     uint8_t *ptr, *dest;
2087     int emu=0;
2088
2089     mb_x = s->mb_x;
2090     mb_y = s->mb_y;
2091
2092     switch(s->mv_type) {
2093     case MV_TYPE_16X16:
2094 #ifdef CONFIG_RISKY
2095         if(s->mcsel){
2096             if(s->real_sprite_warping_points==1){
2097                 gmc1_motion(s, dest_y, dest_cb, dest_cr, 0,
2098                             ref_picture, 0);
2099             }else{
2100                 gmc_motion(s, dest_y, dest_cb, dest_cr, 0,
2101                             ref_picture, 0);
2102             }
2103         }else if(s->quarter_sample){
2104             qpel_motion(s, dest_y, dest_cb, dest_cr, 0,
2105                         ref_picture, 0,
2106                         0, pix_op, qpix_op,
2107                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2108         }else if(s->mspel){
2109             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
2110                         ref_picture, pix_op,
2111                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2112         }else
2113 #endif
2114         {
2115             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2116                         ref_picture, 0,
2117                         0, pix_op,
2118                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2119         }
2120         break;
2121     case MV_TYPE_8X8:
2122         mx = 0;
2123         my = 0;
2124         if(s->quarter_sample){
2125             for(i=0;i<4;i++) {
2126                 motion_x = s->mv[dir][i][0];
2127                 motion_y = s->mv[dir][i][1];
2128
2129                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
2130                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
2131                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
2132
2133                 /* WARNING: do no forget half pels */
2134                 src_x = clip(src_x, -16, s->width);
2135                 if (src_x == s->width)
2136                     dxy &= ~3;
2137                 src_y = clip(src_y, -16, s->height);
2138                 if (src_y == s->height)
2139                     dxy &= ~12;
2140
2141                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
2142                 if(s->flags&CODEC_FLAG_EMU_EDGE){
2143                     if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 8 > s->h_edge_pos
2144                                           || src_y + (motion_y&3) + 8 > s->v_edge_pos){
2145                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2146                         ptr= s->edge_emu_buffer;
2147                     }
2148                 }
2149                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
2150                 qpix_op[1][dxy](dest, ptr, s->linesize);
2151
2152                 mx += s->mv[dir][i][0]/2;
2153                 my += s->mv[dir][i][1]/2;
2154             }
2155         }else{
2156             for(i=0;i<4;i++) {
2157                 motion_x = s->mv[dir][i][0];
2158                 motion_y = s->mv[dir][i][1];
2159
2160                 dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2161                 src_x = mb_x * 16 + (motion_x >> 1) + (i & 1) * 8;
2162                 src_y = mb_y * 16 + (motion_y >> 1) + (i >>1) * 8;
2163
2164                 /* WARNING: do no forget half pels */
2165                 src_x = clip(src_x, -16, s->width);
2166                 if (src_x == s->width)
2167                     dxy &= ~1;
2168                 src_y = clip(src_y, -16, s->height);
2169                 if (src_y == s->height)
2170                     dxy &= ~2;
2171
2172                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
2173                 if(s->flags&CODEC_FLAG_EMU_EDGE){
2174                     if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 8 > s->h_edge_pos
2175                                           || src_y + (motion_y&1) + 8 > s->v_edge_pos){
2176                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2177                         ptr= s->edge_emu_buffer;
2178                     }
2179                 }
2180                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
2181                 pix_op[1][dxy](dest, ptr, s->linesize, 8);
2182
2183                 mx += s->mv[dir][i][0];
2184                 my += s->mv[dir][i][1];
2185             }
2186         }
2187
2188         if(s->flags&CODEC_FLAG_GRAY) break;
2189         /* In case of 8X8, we construct a single chroma motion vector
2190            with a special rounding */
2191         mx= ff_h263_round_chroma(mx);
2192         my= ff_h263_round_chroma(my);
2193         dxy = ((my & 1) << 1) | (mx & 1);
2194         mx >>= 1;
2195         my >>= 1;
2196
2197         src_x = mb_x * 8 + mx;
2198         src_y = mb_y * 8 + my;
2199         src_x = clip(src_x, -8, s->width/2);
2200         if (src_x == s->width/2)
2201             dxy &= ~1;
2202         src_y = clip(src_y, -8, s->height/2);
2203         if (src_y == s->height/2)
2204             dxy &= ~2;
2205
2206         offset = (src_y * (s->uvlinesize)) + src_x;
2207         ptr = ref_picture[1] + offset;
2208         if(s->flags&CODEC_FLAG_EMU_EDGE){
2209                 if(src_x<0 || src_y<0 || src_x + (dxy &1) + 8 > s->h_edge_pos>>1
2210                                       || src_y + (dxy>>1) + 8 > s->v_edge_pos>>1){
2211                     ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2212                     ptr= s->edge_emu_buffer;
2213                     emu=1;
2214                 }
2215             }
2216         pix_op[1][dxy](dest_cb, ptr, s->uvlinesize, 8);
2217
2218         ptr = ref_picture[2] + offset;
2219         if(emu){
2220             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2221             ptr= s->edge_emu_buffer;
2222         }
2223         pix_op[1][dxy](dest_cr, ptr, s->uvlinesize, 8);
2224         break;
2225     case MV_TYPE_FIELD:
2226         if (s->picture_structure == PICT_FRAME) {
2227             if(s->quarter_sample){
2228                 /* top field */
2229                 qpel_motion(s, dest_y, dest_cb, dest_cr, 0,
2230                             ref_picture, s->field_select[dir][0] ? s->linesize : 0,
2231                             1, pix_op, qpix_op,
2232                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
2233                 /* bottom field */
2234                 qpel_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
2235                             ref_picture, s->field_select[dir][1] ? s->linesize : 0,
2236                             1, pix_op, qpix_op,
2237                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
2238             }else{
2239                 /* top field */
2240                 mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2241                             ref_picture, s->field_select[dir][0] ? s->linesize : 0,
2242                             1, pix_op,
2243                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
2244                 /* bottom field */
2245                 mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
2246                             ref_picture, s->field_select[dir][1] ? s->linesize : 0,
2247                             1, pix_op,
2248                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
2249             }
2250         } else {
2251             int offset;
2252             if(s->picture_structure == s->field_select[dir][0] + 1 || s->pict_type == B_TYPE || s->first_field){
2253                 offset= s->field_select[dir][0] ? s->linesize : 0;
2254             }else{
2255                 ref_picture= s->current_picture.data;
2256                 offset= s->field_select[dir][0] ? s->linesize : -s->linesize;
2257             }
2258
2259             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2260                         ref_picture, offset,
2261                         0, pix_op,
2262                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2263         }
2264         break;
2265     case MV_TYPE_16X8:{
2266         int offset;
2267          uint8_t ** ref2picture;
2268
2269             if(s->picture_structure == s->field_select[dir][0] + 1 || s->pict_type == B_TYPE || s->first_field){
2270                 ref2picture= ref_picture;
2271                 offset= s->field_select[dir][0] ? s->linesize : 0;
2272             }else{
2273                 ref2picture= s->current_picture.data;
2274                 offset= s->field_select[dir][0] ? s->linesize : -s->linesize;
2275             }
2276
2277             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2278                         ref2picture, offset,
2279                         0, pix_op,
2280                         s->mv[dir][0][0], s->mv[dir][0][1], 8);
2281
2282
2283             if(s->picture_structure == s->field_select[dir][1] + 1 || s->pict_type == B_TYPE || s->first_field){
2284                 ref2picture= ref_picture;
2285                 offset= s->field_select[dir][1] ? s->linesize : 0;
2286             }else{
2287                 ref2picture= s->current_picture.data;
2288                 offset= s->field_select[dir][1] ? s->linesize : -s->linesize;
2289             }
2290             // I know it is ugly but this is the only way to fool emu_edge without rewrite mpeg_motion
2291             mpeg_motion(s, dest_y+16*s->linesize, dest_cb+8*s->uvlinesize, dest_cr+8*s->uvlinesize,
2292                         0,
2293                         ref2picture, offset,
2294                         0, pix_op,
2295                         s->mv[dir][1][0], s->mv[dir][1][1]+16, 8);
2296         }
2297
2298         break;
2299     case MV_TYPE_DMV:
2300     {
2301     op_pixels_func (*dmv_pix_op)[4];
2302     int offset;
2303
2304         dmv_pix_op = s->dsp.put_pixels_tab;
2305
2306         if(s->picture_structure == PICT_FRAME){
2307             //put top field from top field
2308             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2309                         ref_picture, 0,
2310                         1, dmv_pix_op,
2311                         s->mv[dir][0][0], s->mv[dir][0][1], 8);
2312             //put bottom field from bottom field
2313             mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
2314                         ref_picture, s->linesize,
2315                         1, dmv_pix_op,
2316                         s->mv[dir][0][0], s->mv[dir][0][1], 8);
2317
2318             dmv_pix_op = s->dsp.avg_pixels_tab;
2319
2320             //avg top field from bottom field
2321             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2322                         ref_picture, s->linesize,
2323                         1, dmv_pix_op,
2324                         s->mv[dir][2][0], s->mv[dir][2][1], 8);
2325             //avg bottom field from top field
2326             mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
2327                         ref_picture, 0,
2328                         1, dmv_pix_op,
2329                         s->mv[dir][3][0], s->mv[dir][3][1], 8);
2330
2331         }else{
2332             offset=(s->picture_structure == PICT_BOTTOM_FIELD)?
2333                          s->linesize : 0;
2334
2335             //put field from the same parity
2336             //same parity is never in the same frame
2337             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2338                         ref_picture,offset,
2339                         0,dmv_pix_op,
2340                         s->mv[dir][0][0],s->mv[dir][0][1],16);
2341
2342             // after put we make avg of the same block
2343             dmv_pix_op=s->dsp.avg_pixels_tab;
2344
2345             //opposite parity is always in the same frame if this is second field
2346             if(!s->first_field){
2347                 ref_picture = s->current_picture.data;
2348                 //top field is one linesize from frame beginig
2349                 offset=(s->picture_structure == PICT_BOTTOM_FIELD)?
2350                         -s->linesize : s->linesize;
2351             }else
2352                 offset=(s->picture_structure == PICT_BOTTOM_FIELD)?
2353                         0 : s->linesize;
2354
2355             //avg field from the opposite parity
2356             mpeg_motion(s, dest_y, dest_cb, dest_cr,0,
2357                         ref_picture, offset,
2358                         0,dmv_pix_op,
2359                         s->mv[dir][2][0],s->mv[dir][2][1],16);
2360         }
2361     }
2362     break;
2363
2364     }
2365 }
2366
2367
2368 /* put block[] to dest[] */
2369 static inline void put_dct(MpegEncContext *s,
2370                            DCTELEM *block, int i, uint8_t *dest, int line_size)
2371 {
2372     s->dct_unquantize(s, block, i, s->qscale);
2373     s->dsp.idct_put (dest, line_size, block);
2374 }
2375
2376 /* add block[] to dest[] */
2377 static inline void add_dct(MpegEncContext *s,
2378                            DCTELEM *block, int i, uint8_t *dest, int line_size)
2379 {
2380     if (s->block_last_index[i] >= 0) {
2381         s->dsp.idct_add (dest, line_size, block);
2382     }
2383 }
2384
2385 static inline void add_dequant_dct(MpegEncContext *s,
2386                            DCTELEM *block, int i, uint8_t *dest, int line_size)
2387 {
2388     if (s->block_last_index[i] >= 0) {
2389         s->dct_unquantize(s, block, i, s->qscale);
2390
2391         s->dsp.idct_add (dest, line_size, block);
2392     }
2393 }
2394
2395 /**
2396  * cleans dc, ac, coded_block for the current non intra MB
2397  */
2398 void ff_clean_intra_table_entries(MpegEncContext *s)
2399 {
2400     int wrap = s->block_wrap[0];
2401     int xy = s->block_index[0];
2402
2403     s->dc_val[0][xy           ] =
2404     s->dc_val[0][xy + 1       ] =
2405     s->dc_val[0][xy     + wrap] =
2406     s->dc_val[0][xy + 1 + wrap] = 1024;
2407     /* ac pred */
2408     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
2409     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
2410     if (s->msmpeg4_version>=3) {
2411         s->coded_block[xy           ] =
2412         s->coded_block[xy + 1       ] =
2413         s->coded_block[xy     + wrap] =
2414         s->coded_block[xy + 1 + wrap] = 0;
2415     }
2416     /* chroma */
2417     wrap = s->block_wrap[4];
2418     xy = s->mb_x + 1 + (s->mb_y + 1) * wrap;
2419     s->dc_val[1][xy] =
2420     s->dc_val[2][xy] = 1024;
2421     /* ac pred */
2422     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
2423     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
2424
2425     s->mbintra_table[s->mb_x + s->mb_y*s->mb_stride]= 0;
2426 }
2427
2428 /* generic function called after a macroblock has been parsed by the
2429    decoder or after it has been encoded by the encoder.
2430
2431    Important variables used:
2432    s->mb_intra : true if intra macroblock
2433    s->mv_dir   : motion vector direction
2434    s->mv_type  : motion vector type
2435    s->mv       : motion vector
2436    s->interlaced_dct : true if interlaced dct used (mpeg2)
2437  */
2438 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
2439 {
2440     int mb_x, mb_y;
2441     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
2442 #ifdef HAVE_XVMC
2443     if(s->avctx->xvmc_acceleration){
2444         XVMC_decode_mb(s,block);
2445         return;
2446     }
2447 #endif
2448
2449     mb_x = s->mb_x;
2450     mb_y = s->mb_y;
2451
2452     s->current_picture.qscale_table[mb_xy]= s->qscale;
2453
2454     /* update DC predictors for P macroblocks */
2455     if (!s->mb_intra) {
2456         if (s->h263_pred || s->h263_aic) {
2457             if(s->mbintra_table[mb_xy])
2458                 ff_clean_intra_table_entries(s);
2459         } else {
2460             s->last_dc[0] =
2461             s->last_dc[1] =
2462             s->last_dc[2] = 128 << s->intra_dc_precision;
2463         }
2464     }
2465     else if (s->h263_pred || s->h263_aic)
2466         s->mbintra_table[mb_xy]=1;
2467
2468     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
2469         uint8_t *dest_y, *dest_cb, *dest_cr;
2470         int dct_linesize, dct_offset;
2471         op_pixels_func (*op_pix)[4];
2472         qpel_mc_func (*op_qpix)[16];
2473         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
2474         const int uvlinesize= s->current_picture.linesize[1];
2475
2476         /* avoid copy if macroblock skipped in last frame too */
2477         /* skip only during decoding as we might trash the buffers during encoding a bit */
2478         if(!s->encoding){
2479             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
2480             const int age= s->current_picture.age;
2481
2482             assert(age);
2483
2484             if (s->mb_skiped) {
2485                 s->mb_skiped= 0;
2486                 assert(s->pict_type!=I_TYPE);
2487
2488                 (*mbskip_ptr) ++; /* indicate that this time we skiped it */
2489                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
2490
2491                 /* if previous was skipped too, then nothing to do !  */
2492                 if (*mbskip_ptr >= age && s->current_picture.reference){
2493                     return;
2494                 }
2495             } else if(!s->current_picture.reference){
2496                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
2497                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
2498             } else{
2499                 *mbskip_ptr = 0; /* not skipped */
2500             }
2501         }
2502
2503         if (s->interlaced_dct) {
2504             dct_linesize = linesize * 2;
2505             dct_offset = linesize;
2506         } else {
2507             dct_linesize = linesize;
2508             dct_offset = linesize * 8;
2509         }
2510
2511         dest_y=  s->dest[0];
2512         dest_cb= s->dest[1];
2513         dest_cr= s->dest[2];
2514
2515         if (!s->mb_intra) {
2516             /* motion handling */
2517             /* decoding or more than one mb_type (MC was allready done otherwise) */
2518             if(!s->encoding){
2519                 if ((!s->no_rounding) || s->pict_type==B_TYPE){
2520                     op_pix = s->dsp.put_pixels_tab;
2521                     op_qpix= s->dsp.put_qpel_pixels_tab;
2522                 }else{
2523                     op_pix = s->dsp.put_no_rnd_pixels_tab;
2524                     op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
2525                 }
2526
2527                 if (s->mv_dir & MV_DIR_FORWARD) {
2528                     MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
2529                     op_pix = s->dsp.avg_pixels_tab;
2530                     op_qpix= s->dsp.avg_qpel_pixels_tab;
2531                 }
2532                 if (s->mv_dir & MV_DIR_BACKWARD) {
2533                     MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
2534                 }
2535             }
2536
2537             /* skip dequant / idct if we are really late ;) */
2538             if(s->hurry_up>1) return;
2539
2540             /* add dct residue */
2541             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
2542                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
2543                 add_dequant_dct(s, block[0], 0, dest_y, dct_linesize);
2544                 add_dequant_dct(s, block[1], 1, dest_y + 8, dct_linesize);
2545                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
2546                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
2547
2548                 if(!(s->flags&CODEC_FLAG_GRAY)){
2549                     add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize);
2550                     add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize);
2551                 }
2552             } else if(s->codec_id != CODEC_ID_WMV2){
2553                 add_dct(s, block[0], 0, dest_y, dct_linesize);
2554                 add_dct(s, block[1], 1, dest_y + 8, dct_linesize);
2555                 add_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
2556                 add_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
2557
2558                 if(!(s->flags&CODEC_FLAG_GRAY)){
2559                     add_dct(s, block[4], 4, dest_cb, uvlinesize);
2560                     add_dct(s, block[5], 5, dest_cr, uvlinesize);
2561                 }
2562             }
2563 #ifdef CONFIG_RISKY
2564             else{
2565                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
2566             }
2567 #endif
2568         } else {
2569             /* dct only in intra block */
2570             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
2571                 put_dct(s, block[0], 0, dest_y, dct_linesize);
2572                 put_dct(s, block[1], 1, dest_y + 8, dct_linesize);
2573                 put_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
2574                 put_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
2575
2576                 if(!(s->flags&CODEC_FLAG_GRAY)){
2577                     put_dct(s, block[4], 4, dest_cb, uvlinesize);
2578                     put_dct(s, block[5], 5, dest_cr, uvlinesize);
2579                 }
2580             }else{
2581                 s->dsp.idct_put(dest_y                 , dct_linesize, block[0]);
2582                 s->dsp.idct_put(dest_y              + 8, dct_linesize, block[1]);
2583                 s->dsp.idct_put(dest_y + dct_offset    , dct_linesize, block[2]);
2584                 s->dsp.idct_put(dest_y + dct_offset + 8, dct_linesize, block[3]);
2585
2586                 if(!(s->flags&CODEC_FLAG_GRAY)){
2587                     s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
2588                     s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
2589                 }
2590             }
2591         }
2592     }
2593 }
2594
2595 #ifdef CONFIG_ENCODERS
2596
2597 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
2598 {
2599     static const char tab[64]=
2600         {3,2,2,1,1,1,1,1,
2601          1,1,1,1,1,1,1,1,
2602          1,1,1,1,1,1,1,1,
2603          0,0,0,0,0,0,0,0,
2604          0,0,0,0,0,0,0,0,
2605          0,0,0,0,0,0,0,0,
2606          0,0,0,0,0,0,0,0,
2607          0,0,0,0,0,0,0,0};
2608     int score=0;
2609     int run=0;
2610     int i;
2611     DCTELEM *block= s->block[n];
2612     const int last_index= s->block_last_index[n];
2613     int skip_dc;
2614
2615     if(threshold<0){
2616         skip_dc=0;
2617         threshold= -threshold;
2618     }else
2619         skip_dc=1;
2620
2621     /* are all which we could set to zero are allready zero? */
2622     if(last_index<=skip_dc - 1) return;
2623
2624     for(i=0; i<=last_index; i++){
2625         const int j = s->intra_scantable.permutated[i];
2626         const int level = ABS(block[j]);
2627         if(level==1){
2628             if(skip_dc && i==0) continue;
2629             score+= tab[run];
2630             run=0;
2631         }else if(level>1){
2632             return;
2633         }else{
2634             run++;
2635         }
2636     }
2637     if(score >= threshold) return;
2638     for(i=skip_dc; i<=last_index; i++){
2639         const int j = s->intra_scantable.permutated[i];
2640         block[j]=0;
2641     }
2642     if(block[0]) s->block_last_index[n]= 0;
2643     else         s->block_last_index[n]= -1;
2644 }
2645
2646 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
2647 {
2648     int i;
2649     const int maxlevel= s->max_qcoeff;
2650     const int minlevel= s->min_qcoeff;
2651
2652     if(s->mb_intra){
2653         i=1; //skip clipping of intra dc
2654     }else
2655         i=0;
2656
2657     for(;i<=last_index; i++){
2658         const int j= s->intra_scantable.permutated[i];
2659         int level = block[j];
2660
2661         if     (level>maxlevel) level=maxlevel;
2662         else if(level<minlevel) level=minlevel;
2663
2664         block[j]= level;
2665     }
2666 }
2667
2668 #if 0
2669 static int pix_vcmp16x8(uint8_t *s, int stride){ //FIXME move to dsputil & optimize
2670     int score=0;
2671     int x,y;
2672
2673     for(y=0; y<7; y++){
2674         for(x=0; x<16; x+=4){
2675             score+= ABS(s[x  ] - s[x  +stride]) + ABS(s[x+1] - s[x+1+stride])
2676                    +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]);
2677         }
2678         s+= stride;
2679     }
2680
2681     return score;
2682 }
2683
2684 static int pix_diff_vcmp16x8(uint8_t *s1, uint8_t*s2, int stride){ //FIXME move to dsputil & optimize
2685     int score=0;
2686     int x,y;
2687
2688     for(y=0; y<7; y++){
2689         for(x=0; x<16; x++){
2690             score+= ABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
2691         }
2692         s1+= stride;
2693         s2+= stride;
2694     }
2695
2696     return score;
2697 }
2698 #else
2699 #define SQ(a) ((a)*(a))
2700
2701 static int pix_vcmp16x8(uint8_t *s, int stride){ //FIXME move to dsputil & optimize
2702     int score=0;
2703     int x,y;
2704
2705     for(y=0; y<7; y++){
2706         for(x=0; x<16; x+=4){
2707             score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride])
2708                    +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
2709         }
2710         s+= stride;
2711     }
2712
2713     return score;
2714 }
2715
2716 static int pix_diff_vcmp16x8(uint8_t *s1, uint8_t*s2, int stride){ //FIXME move to dsputil & optimize
2717     int score=0;
2718     int x,y;
2719
2720     for(y=0; y<7; y++){
2721         for(x=0; x<16; x++){
2722             score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
2723         }
2724         s1+= stride;
2725         s2+= stride;
2726     }
2727
2728     return score;
2729 }
2730
2731 #endif
2732
2733 #endif //CONFIG_ENCODERS
2734
2735 /**
2736  *
2737  * @param h is the normal height, this will be reduced automatically if needed for the last row
2738  */
2739 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
2740     if (s->avctx->draw_horiz_band) {
2741         AVFrame *src;
2742         int offset[4];
2743
2744         if(s->picture_structure != PICT_FRAME){
2745             h <<= 1;
2746             y <<= 1;
2747             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
2748         }
2749
2750         h= FFMIN(h, s->height - y);
2751
2752         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
2753             src= (AVFrame*)s->current_picture_ptr;
2754         else if(s->last_picture_ptr)
2755             src= (AVFrame*)s->last_picture_ptr;
2756         else
2757             return;
2758
2759         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
2760             offset[0]=
2761             offset[1]=
2762             offset[2]=
2763             offset[3]= 0;
2764         }else{
2765             offset[0]= y * s->linesize;;
2766             offset[1]=
2767             offset[2]= (y>>1) * s->uvlinesize;;
2768             offset[3]= 0;
2769         }
2770
2771         emms_c();
2772
2773         s->avctx->draw_horiz_band(s->avctx, src, offset,
2774                                   y, s->picture_structure, h);
2775     }
2776 }
2777
2778 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
2779     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
2780     const int uvlinesize= s->current_picture.linesize[1];
2781
2782     s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
2783     s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1)     + s->mb_x*2;
2784     s->block_index[2]= s->block_wrap[0]*(s->mb_y*2 + 2) - 1 + s->mb_x*2;
2785     s->block_index[3]= s->block_wrap[0]*(s->mb_y*2 + 2)     + s->mb_x*2;
2786     s->block_index[4]= s->block_wrap[4]*(s->mb_y + 1)                    + s->block_wrap[0]*(s->mb_height*2 + 2) + s->mb_x;
2787     s->block_index[5]= s->block_wrap[4]*(s->mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2) + s->mb_x;
2788
2789     if(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME){
2790         s->dest[0] = s->current_picture.data[0] + s->mb_x * 16 - 16;
2791         s->dest[1] = s->current_picture.data[1] + s->mb_x * 8 - 8;
2792         s->dest[2] = s->current_picture.data[2] + s->mb_x * 8 - 8;
2793     }else{
2794         s->dest[0] = s->current_picture.data[0] + (s->mb_y * 16* linesize  ) + s->mb_x * 16 - 16;
2795         s->dest[1] = s->current_picture.data[1] + (s->mb_y * 8 * uvlinesize) + s->mb_x * 8 - 8;
2796         s->dest[2] = s->current_picture.data[2] + (s->mb_y * 8 * uvlinesize) + s->mb_x * 8 - 8;
2797     }
2798 }
2799
2800 #ifdef CONFIG_ENCODERS
2801
2802 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2803 {
2804     const int mb_x= s->mb_x;
2805     const int mb_y= s->mb_y;
2806     int i;
2807     int skip_dct[6];
2808     int dct_offset   = s->linesize*8; //default for progressive frames
2809
2810     for(i=0; i<6; i++) skip_dct[i]=0;
2811
2812     if(s->adaptive_quant){
2813         s->dquant= s->current_picture.qscale_table[mb_x + mb_y*s->mb_stride] - s->qscale;
2814
2815         if(s->out_format==FMT_H263){
2816             if     (s->dquant> 2) s->dquant= 2;
2817             else if(s->dquant<-2) s->dquant=-2;
2818         }
2819
2820         if(s->codec_id==CODEC_ID_MPEG4){
2821             if(!s->mb_intra){
2822                 if(s->mv_dir&MV_DIRECT)
2823                     s->dquant=0;
2824
2825                 assert(s->dquant==0 || s->mv_type!=MV_TYPE_8X8);
2826             }
2827         }
2828         s->qscale+= s->dquant;
2829         s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
2830         s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
2831     }
2832
2833     if (s->mb_intra) {
2834         uint8_t *ptr;
2835         int wrap_y;
2836         int emu=0;
2837
2838         wrap_y = s->linesize;
2839         ptr = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
2840
2841         if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
2842             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
2843             ptr= s->edge_emu_buffer;
2844             emu=1;
2845         }
2846
2847         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
2848             int progressive_score, interlaced_score;
2849
2850             progressive_score= pix_vcmp16x8(ptr, wrap_y  ) + pix_vcmp16x8(ptr + wrap_y*8, wrap_y );
2851             interlaced_score = pix_vcmp16x8(ptr, wrap_y*2) + pix_vcmp16x8(ptr + wrap_y  , wrap_y*2);
2852
2853             if(progressive_score > interlaced_score + 100){
2854                 s->interlaced_dct=1;
2855
2856                 dct_offset= wrap_y;
2857                 wrap_y<<=1;
2858             }else
2859                 s->interlaced_dct=0;
2860         }
2861
2862         s->dsp.get_pixels(s->block[0], ptr                 , wrap_y);
2863         s->dsp.get_pixels(s->block[1], ptr              + 8, wrap_y);
2864         s->dsp.get_pixels(s->block[2], ptr + dct_offset    , wrap_y);
2865         s->dsp.get_pixels(s->block[3], ptr + dct_offset + 8, wrap_y);
2866
2867         if(s->flags&CODEC_FLAG_GRAY){
2868             skip_dct[4]= 1;
2869             skip_dct[5]= 1;
2870         }else{
2871             int wrap_c = s->uvlinesize;
2872             ptr = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
2873             if(emu){
2874                 ff_emulated_edge_mc(s->edge_emu_buffer, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2875                 ptr= s->edge_emu_buffer;
2876             }
2877             s->dsp.get_pixels(s->block[4], ptr, wrap_c);
2878
2879             ptr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
2880             if(emu){
2881                 ff_emulated_edge_mc(s->edge_emu_buffer, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2882                 ptr= s->edge_emu_buffer;
2883             }
2884             s->dsp.get_pixels(s->block[5], ptr, wrap_c);
2885         }
2886     }else{
2887         op_pixels_func (*op_pix)[4];
2888         qpel_mc_func (*op_qpix)[16];
2889         uint8_t *dest_y, *dest_cb, *dest_cr;
2890         uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2891         int wrap_y, wrap_c;
2892         int emu=0;
2893
2894         dest_y  = s->dest[0];
2895         dest_cb = s->dest[1];
2896         dest_cr = s->dest[2];
2897         wrap_y = s->linesize;
2898         wrap_c = s->uvlinesize;
2899         ptr_y  = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
2900         ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
2901         ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
2902
2903         if ((!s->no_rounding) || s->pict_type==B_TYPE){
2904             op_pix = s->dsp.put_pixels_tab;
2905             op_qpix= s->dsp.put_qpel_pixels_tab;
2906         }else{
2907             op_pix = s->dsp.put_no_rnd_pixels_tab;
2908             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
2909         }
2910
2911         if (s->mv_dir & MV_DIR_FORWARD) {
2912             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
2913             op_pix = s->dsp.avg_pixels_tab;
2914             op_qpix= s->dsp.avg_qpel_pixels_tab;
2915         }
2916         if (s->mv_dir & MV_DIR_BACKWARD) {
2917             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
2918         }
2919
2920         if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
2921             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
2922             ptr_y= s->edge_emu_buffer;
2923             emu=1;
2924         }
2925
2926         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
2927             int progressive_score, interlaced_score;
2928
2929             progressive_score= pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y  )
2930                              + pix_diff_vcmp16x8(ptr_y + wrap_y*8, dest_y + wrap_y*8, wrap_y  );
2931             interlaced_score = pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y*2)
2932                              + pix_diff_vcmp16x8(ptr_y + wrap_y  , dest_y + wrap_y  , wrap_y*2);
2933
2934             if(progressive_score > interlaced_score + 600){
2935                 s->interlaced_dct=1;
2936
2937                 dct_offset= wrap_y;
2938                 wrap_y<<=1;
2939             }else
2940                 s->interlaced_dct=0;
2941         }
2942
2943         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
2944         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
2945         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
2946         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
2947
2948         if(s->flags&CODEC_FLAG_GRAY){
2949             skip_dct[4]= 1;
2950             skip_dct[5]= 1;
2951         }else{
2952             if(emu){
2953                 ff_emulated_edge_mc(s->edge_emu_buffer, ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2954                 ptr_cb= s->edge_emu_buffer;
2955             }
2956             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2957             if(emu){
2958                 ff_emulated_edge_mc(s->edge_emu_buffer, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2959                 ptr_cr= s->edge_emu_buffer;
2960             }
2961             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2962         }
2963         /* pre quantization */
2964         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
2965             //FIXME optimize
2966             if(s->dsp.pix_abs8x8(ptr_y               , dest_y               , wrap_y) < 20*s->qscale) skip_dct[0]= 1;
2967             if(s->dsp.pix_abs8x8(ptr_y            + 8, dest_y            + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1;
2968             if(s->dsp.pix_abs8x8(ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y) < 20*s->qscale) skip_dct[2]= 1;
2969             if(s->dsp.pix_abs8x8(ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1;
2970             if(s->dsp.pix_abs8x8(ptr_cb              , dest_cb              , wrap_c) < 20*s->qscale) skip_dct[4]= 1;
2971             if(s->dsp.pix_abs8x8(ptr_cr              , dest_cr              , wrap_c) < 20*s->qscale) skip_dct[5]= 1;
2972 #if 0
2973 {
2974  static int stat[7];
2975  int num=0;
2976  for(i=0; i<6; i++)
2977   if(skip_dct[i]) num++;
2978  stat[num]++;
2979
2980  if(s->mb_x==0 && s->mb_y==0){
2981   for(i=0; i<7; i++){
2982    printf("%6d %1d\n", stat[i], i);
2983   }
2984  }
2985 }
2986 #endif
2987         }
2988
2989     }
2990
2991 #if 0
2992             {
2993                 float adap_parm;
2994
2995                 adap_parm = ((s->avg_mb_var << 1) + s->mb_var[s->mb_stride*mb_y+mb_x] + 1.0) /
2996                             ((s->mb_var[s->mb_stride*mb_y+mb_x] << 1) + s->avg_mb_var + 1.0);
2997
2998                 printf("\ntype=%c qscale=%2d adap=%0.2f dquant=%4.2f var=%4d avgvar=%4d",
2999                         (s->mb_type[s->mb_stride*mb_y+mb_x] > 0) ? 'I' : 'P',
3000                         s->qscale, adap_parm, s->qscale*adap_parm,
3001                         s->mb_var[s->mb_stride*mb_y+mb_x], s->avg_mb_var);
3002             }
3003 #endif
3004     /* DCT & quantize */
3005     if(s->out_format==FMT_MJPEG){
3006         for(i=0;i<6;i++) {
3007             int overflow;
3008             s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, 8, &overflow);
3009             if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
3010         }
3011     }else{
3012         for(i=0;i<6;i++) {
3013             if(!skip_dct[i]){
3014                 int overflow;
3015                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
3016             // FIXME we could decide to change to quantizer instead of clipping
3017             // JS: I don't think that would be a good idea it could lower quality instead
3018             //     of improve it. Just INTRADC clipping deserves changes in quantizer
3019                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
3020             }else
3021                 s->block_last_index[i]= -1;
3022         }
3023         if(s->luma_elim_threshold && !s->mb_intra)
3024             for(i=0; i<4; i++)
3025                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
3026         if(s->chroma_elim_threshold && !s->mb_intra)
3027             for(i=4; i<6; i++)
3028                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
3029     }
3030
3031     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
3032         s->block_last_index[4]=
3033         s->block_last_index[5]= 0;
3034         s->block[4][0]=
3035         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
3036     }
3037
3038     /* huffman encode */
3039     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
3040     case CODEC_ID_MPEG1VIDEO:
3041     case CODEC_ID_MPEG2VIDEO:
3042         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
3043 #ifdef CONFIG_RISKY
3044     case CODEC_ID_MPEG4:
3045         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
3046     case CODEC_ID_MSMPEG4V2:
3047     case CODEC_ID_MSMPEG4V3:
3048     case CODEC_ID_WMV1:
3049         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
3050     case CODEC_ID_WMV2:
3051          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
3052     case CODEC_ID_H263:
3053     case CODEC_ID_H263P:
3054     case CODEC_ID_FLV1:
3055     case CODEC_ID_RV10:
3056         h263_encode_mb(s, s->block, motion_x, motion_y); break;
3057 #endif
3058     case CODEC_ID_MJPEG:
3059         mjpeg_encode_mb(s, s->block); break;
3060     default:
3061         assert(0);
3062     }
3063 }
3064
3065 #endif //CONFIG_ENCODERS
3066
3067 /**
3068  * combines the (truncated) bitstream to a complete frame
3069  * @returns -1 if no complete frame could be created
3070  */
3071 int ff_combine_frame( MpegEncContext *s, int next, uint8_t **buf, int *buf_size){
3072     ParseContext *pc= &s->parse_context;
3073
3074 #if 0
3075     if(pc->overread){
3076         printf("overread %d, state:%X next:%d index:%d o_index:%d\n", pc->overread, pc->state, next, pc->index, pc->overread_index);
3077         printf("%X %X %X %X\n", (*buf)[0], (*buf)[1],(*buf)[2],(*buf)[3]);
3078     }
3079 #endif
3080
3081     /* copy overreaded byes from last frame into buffer */
3082     for(; pc->overread>0; pc->overread--){
3083         pc->buffer[pc->index++]= pc->buffer[pc->overread_index++];
3084     }
3085
3086     pc->last_index= pc->index;
3087
3088     /* copy into buffer end return */
3089     if(next == END_NOT_FOUND){
3090         pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, (*buf_size) + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
3091
3092         memcpy(&pc->buffer[pc->index], *buf, *buf_size);
3093         pc->index += *buf_size;
3094         return -1;
3095     }
3096
3097     *buf_size=
3098     pc->overread_index= pc->index + next;
3099
3100     /* append to buffer */
3101     if(pc->index){
3102         pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, next + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
3103
3104         memcpy(&pc->buffer[pc->index], *buf, next + FF_INPUT_BUFFER_PADDING_SIZE );
3105         pc->index = 0;
3106         *buf= pc->buffer;
3107     }
3108
3109     /* store overread bytes */
3110     for(;next < 0; next++){
3111         pc->state = (pc->state<<8) | pc->buffer[pc->last_index + next];
3112         pc->overread++;
3113     }
3114
3115 #if 0
3116     if(pc->overread){
3117         printf("overread %d, state:%X next:%d index:%d o_index:%d\n", pc->overread, pc->state, next, pc->index, pc->overread_index);
3118         printf("%X %X %X %X\n", (*buf)[0], (*buf)[1],(*buf)[2],(*buf)[3]);
3119     }
3120 #endif
3121
3122     return 0;
3123 }
3124
3125 void ff_mpeg_flush(AVCodecContext *avctx){
3126     int i;
3127     MpegEncContext *s = avctx->priv_data;
3128
3129     for(i=0; i<MAX_PICTURE_COUNT; i++){
3130        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
3131                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
3132         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
3133     }
3134     s->last_picture_ptr = s->next_picture_ptr = NULL;
3135
3136     s->parse_context.state= -1;
3137     s->parse_context.frame_start_found= 0;
3138     s->parse_context.overread= 0;
3139     s->parse_context.overread_index= 0;
3140     s->parse_context.index= 0;
3141     s->parse_context.last_index= 0;
3142 }
3143
3144 #ifdef CONFIG_ENCODERS
3145 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
3146 {
3147     int bytes= length>>4;
3148     int bits= length&15;
3149     int i;
3150
3151     if(length==0) return;
3152
3153     for(i=0; i<bytes; i++) put_bits(pb, 16, be2me_16(((uint16_t*)src)[i]));
3154     put_bits(pb, bits, be2me_16(((uint16_t*)src)[i])>>(16-bits));
3155 }
3156
3157 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
3158     int i;
3159
3160     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
3161
3162     /* mpeg1 */
3163     d->mb_skip_run= s->mb_skip_run;
3164     for(i=0; i<3; i++)
3165         d->last_dc[i]= s->last_dc[i];
3166
3167     /* statistics */
3168     d->mv_bits= s->mv_bits;
3169     d->i_tex_bits= s->i_tex_bits;
3170     d->p_tex_bits= s->p_tex_bits;
3171     d->i_count= s->i_count;
3172     d->f_count= s->f_count;
3173     d->b_count= s->b_count;
3174     d->skip_count= s->skip_count;
3175     d->misc_bits= s->misc_bits;
3176     d->last_bits= 0;
3177
3178     d->mb_skiped= 0;
3179     d->qscale= s->qscale;
3180 }
3181
3182 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
3183     int i;
3184
3185     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
3186     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
3187
3188     /* mpeg1 */
3189     d->mb_skip_run= s->mb_skip_run;
3190     for(i=0; i<3; i++)
3191         d->last_dc[i]= s->last_dc[i];
3192
3193     /* statistics */
3194     d->mv_bits= s->mv_bits;
3195     d->i_tex_bits= s->i_tex_bits;
3196     d->p_tex_bits= s->p_tex_bits;
3197     d->i_count= s->i_count;
3198     d->f_count= s->f_count;
3199     d->b_count= s->b_count;
3200     d->skip_count= s->skip_count;
3201     d->misc_bits= s->misc_bits;
3202
3203     d->mb_intra= s->mb_intra;
3204     d->mb_skiped= s->mb_skiped;
3205     d->mv_type= s->mv_type;
3206     d->mv_dir= s->mv_dir;
3207     d->pb= s->pb;
3208     if(s->data_partitioning){
3209         d->pb2= s->pb2;
3210         d->tex_pb= s->tex_pb;
3211     }
3212     d->block= s->block;
3213     for(i=0; i<6; i++)
3214         d->block_last_index[i]= s->block_last_index[i];
3215     d->interlaced_dct= s->interlaced_dct;
3216     d->qscale= s->qscale;
3217 }
3218
3219 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
3220                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
3221                            int *dmin, int *next_block, int motion_x, int motion_y)
3222 {
3223     int score;
3224     uint8_t *dest_backup[3];
3225
3226     copy_context_before_encode(s, backup, type);
3227
3228     s->block= s->blocks[*next_block];
3229     s->pb= pb[*next_block];
3230     if(s->data_partitioning){
3231         s->pb2   = pb2   [*next_block];
3232         s->tex_pb= tex_pb[*next_block];
3233     }
3234
3235     if(*next_block){
3236         memcpy(dest_backup, s->dest, sizeof(s->dest));
3237         s->dest[0] = s->me.scratchpad;
3238         s->dest[1] = s->me.scratchpad + 16;
3239         s->dest[2] = s->me.scratchpad + 16 + 8;
3240         assert(2*s->uvlinesize == s->linesize); //should be no prob for encoding
3241         assert(s->linesize >= 64); //FIXME
3242     }
3243
3244     encode_mb(s, motion_x, motion_y);
3245
3246     score= get_bit_count(&s->pb);
3247     if(s->data_partitioning){
3248         score+= get_bit_count(&s->pb2);
3249         score+= get_bit_count(&s->tex_pb);
3250     }
3251
3252     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
3253         MPV_decode_mb(s, s->block);
3254
3255         score *= s->qscale * s->qscale * 109;
3256         score += sse_mb(s) << 7;
3257     }
3258
3259     if(*next_block){
3260         memcpy(s->dest, dest_backup, sizeof(s->dest));
3261     }
3262
3263     if(score<*dmin){
3264         *dmin= score;
3265         *next_block^=1;
3266
3267         copy_context_after_encode(best, s, type);
3268     }
3269 }
3270
3271 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
3272     uint32_t *sq = squareTbl + 256;
3273     int acc=0;
3274     int x,y;
3275
3276     if(w==16 && h==16)
3277         return s->dsp.sse[0](NULL, src1, src2, stride);
3278     else if(w==8 && h==8)
3279         return s->dsp.sse[1](NULL, src1, src2, stride);
3280
3281     for(y=0; y<h; y++){
3282         for(x=0; x<w; x++){
3283             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
3284         }
3285     }
3286
3287     assert(acc>=0);
3288
3289     return acc;
3290 }
3291
3292 static int sse_mb(MpegEncContext *s){
3293     int w= 16;
3294     int h= 16;
3295
3296     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3297     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3298
3299     if(w==16 && h==16)
3300         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize)
3301                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize)
3302                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize);
3303     else
3304         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
3305                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
3306                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
3307 }
3308
3309 static void encode_picture(MpegEncContext *s, int picture_number)
3310 {
3311     int mb_x, mb_y, pdif = 0;
3312     int i;
3313     int bits;
3314     MpegEncContext best_s, backup_s;
3315     uint8_t bit_buf[2][3000];
3316     uint8_t bit_buf2[2][3000];
3317     uint8_t bit_buf_tex[2][3000];
3318     PutBitContext pb[2], pb2[2], tex_pb[2];
3319
3320     for(i=0; i<2; i++){
3321         init_put_bits(&pb    [i], bit_buf    [i], 3000, NULL, NULL);
3322         init_put_bits(&pb2   [i], bit_buf2   [i], 3000, NULL, NULL);
3323         init_put_bits(&tex_pb[i], bit_buf_tex[i], 3000, NULL, NULL);
3324     }
3325
3326     s->picture_number = picture_number;
3327
3328     /* Reset the average MB variance */
3329     s->current_picture.mb_var_sum = 0;
3330     s->current_picture.mc_mb_var_sum = 0;
3331
3332 #ifdef CONFIG_RISKY
3333     /* we need to initialize some time vars before we can encode b-frames */
3334     // RAL: Condition added for MPEG1VIDEO
3335     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
3336         ff_set_mpeg4_time(s, s->picture_number);
3337 #endif
3338
3339     s->scene_change_score=0;
3340
3341     s->qscale= (int)(s->frame_qscale + 0.5); //FIXME qscale / ... stuff for ME ratedistoration
3342
3343     if(s->pict_type==I_TYPE){
3344         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3345         else                        s->no_rounding=0;
3346     }else if(s->pict_type!=B_TYPE){
3347         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
3348             s->no_rounding ^= 1;
3349     }
3350
3351     /* Estimate motion for every MB */
3352     s->mb_intra=0; //for the rate distoration & bit compare functions
3353     if(s->pict_type != I_TYPE){
3354         if(s->pict_type != B_TYPE){
3355             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
3356                 s->me.pre_pass=1;
3357                 s->me.dia_size= s->avctx->pre_dia_size;
3358
3359                 for(mb_y=s->mb_height-1; mb_y >=0 ; mb_y--) {
3360                     for(mb_x=s->mb_width-1; mb_x >=0 ; mb_x--) {
3361                         s->mb_x = mb_x;
3362                         s->mb_y = mb_y;
3363                         ff_pre_estimate_p_frame_motion(s, mb_x, mb_y);
3364                     }
3365                 }
3366                 s->me.pre_pass=0;
3367             }
3368         }
3369
3370         s->me.dia_size= s->avctx->dia_size;
3371         for(mb_y=0; mb_y < s->mb_height; mb_y++) {
3372             s->block_index[0]= s->block_wrap[0]*(mb_y*2 + 1) - 1;
3373             s->block_index[1]= s->block_wrap[0]*(mb_y*2 + 1);
3374             s->block_index[2]= s->block_wrap[0]*(mb_y*2 + 2) - 1;
3375             s->block_index[3]= s->block_wrap[0]*(mb_y*2 + 2);
3376             for(mb_x=0; mb_x < s->mb_width; mb_x++) {
3377                 s->mb_x = mb_x;
3378                 s->mb_y = mb_y;
3379                 s->block_index[0]+=2;
3380                 s->block_index[1]+=2;
3381                 s->block_index[2]+=2;
3382                 s->block_index[3]+=2;
3383
3384                 /* compute motion vector & mb_type and store in context */
3385                 if(s->pict_type==B_TYPE)
3386                     ff_estimate_b_frame_motion(s, mb_x, mb_y);
3387                 else
3388                     ff_estimate_p_frame_motion(s, mb_x, mb_y);
3389             }
3390         }
3391     }else /* if(s->pict_type == I_TYPE) */{
3392         /* I-Frame */
3393         //FIXME do we need to zero them?
3394         memset(s->motion_val[0], 0, sizeof(int16_t)*(s->mb_width*2 + 2)*(s->mb_height*2 + 2)*2);
3395         memset(s->p_mv_table   , 0, sizeof(int16_t)*(s->mb_stride)*s->mb_height*2);
3396         memset(s->mb_type      , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height);
3397
3398         if(!s->fixed_qscale){
3399             /* finding spatial complexity for I-frame rate control */
3400             for(mb_y=0; mb_y < s->mb_height; mb_y++) {
3401                 for(mb_x=0; mb_x < s->mb_width; mb_x++) {
3402                     int xx = mb_x * 16;
3403                     int yy = mb_y * 16;
3404                     uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
3405                     int varc;
3406                     int sum = s->dsp.pix_sum(pix, s->linesize);
3407
3408                     varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
3409
3410                     s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
3411                     s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
3412                     s->current_picture.mb_var_sum    += varc;
3413                 }
3414             }
3415         }
3416     }
3417     emms_c();
3418
3419     if(s->scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
3420         s->pict_type= I_TYPE;
3421         memset(s->mb_type   , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height);
3422 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3423     }
3424
3425     if(!s->umvplus){
3426         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
3427             s->f_code= ff_get_best_fcode(s, s->p_mv_table, MB_TYPE_INTER);
3428
3429             ff_fix_long_p_mvs(s);
3430         }
3431
3432         if(s->pict_type==B_TYPE){
3433             int a, b;
3434
3435             a = ff_get_best_fcode(s, s->b_forw_mv_table, MB_TYPE_FORWARD);
3436             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, MB_TYPE_BIDIR);
3437             s->f_code = FFMAX(a, b);
3438
3439             a = ff_get_best_fcode(s, s->b_back_mv_table, MB_TYPE_BACKWARD);
3440             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, MB_TYPE_BIDIR);
3441             s->b_code = FFMAX(a, b);
3442
3443             ff_fix_long_b_mvs(s, s->b_forw_mv_table, s->f_code, MB_TYPE_FORWARD);
3444             ff_fix_long_b_mvs(s, s->b_back_mv_table, s->b_code, MB_TYPE_BACKWARD);
3445             ff_fix_long_b_mvs(s, s->b_bidir_forw_mv_table, s->f_code, MB_TYPE_BIDIR);
3446             ff_fix_long_b_mvs(s, s->b_bidir_back_mv_table, s->b_code, MB_TYPE_BIDIR);
3447         }
3448     }
3449
3450     if (s->fixed_qscale)
3451         s->frame_qscale = s->current_picture.quality;
3452     else
3453         s->frame_qscale = ff_rate_estimate_qscale(s);
3454
3455     if(s->adaptive_quant){
3456 #ifdef CONFIG_RISKY
3457         switch(s->codec_id){
3458         case CODEC_ID_MPEG4:
3459             ff_clean_mpeg4_qscales(s);
3460             break;
3461         case CODEC_ID_H263:
3462         case CODEC_ID_H263P:
3463         case CODEC_ID_FLV1:
3464             ff_clean_h263_qscales(s);
3465             break;
3466         }
3467 #endif
3468
3469         s->qscale= s->current_picture.qscale_table[0];
3470     }else
3471         s->qscale= (int)(s->frame_qscale + 0.5);
3472
3473     if (s->out_format == FMT_MJPEG) {
3474         /* for mjpeg, we do include qscale in the matrix */
3475         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
3476         for(i=1;i<64;i++){
3477             int j= s->dsp.idct_permutation[i];
3478
3479             s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3480         }
3481         convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3482                        s->q_intra_matrix16_bias, s->intra_matrix, s->intra_quant_bias, 8, 8);
3483     }
3484
3485     //FIXME var duplication
3486     s->current_picture.key_frame= s->pict_type == I_TYPE;
3487     s->current_picture.pict_type= s->pict_type;
3488
3489     if(s->current_picture.key_frame)
3490         s->picture_in_gop_number=0;
3491
3492     s->last_bits= get_bit_count(&s->pb);
3493     switch(s->out_format) {
3494     case FMT_MJPEG:
3495         mjpeg_picture_header(s);
3496         break;
3497 #ifdef CONFIG_RISKY
3498     case FMT_H263:
3499         if (s->codec_id == CODEC_ID_WMV2)
3500             ff_wmv2_encode_picture_header(s, picture_number);
3501         else if (s->h263_msmpeg4)
3502             msmpeg4_encode_picture_header(s, picture_number);
3503         else if (s->h263_pred)
3504             mpeg4_encode_picture_header(s, picture_number);
3505         else if (s->h263_rv10)
3506             rv10_encode_picture_header(s, picture_number);
3507         else if (s->codec_id == CODEC_ID_FLV1)
3508             ff_flv_encode_picture_header(s, picture_number);
3509         else
3510             h263_encode_picture_header(s, picture_number);
3511         break;
3512 #endif
3513     case FMT_MPEG1:
3514         mpeg1_encode_picture_header(s, picture_number);
3515         break;
3516     case FMT_H264:
3517         break;
3518     }
3519     bits= get_bit_count(&s->pb);
3520     s->header_bits= bits - s->last_bits;
3521     s->last_bits= bits;
3522     s->mv_bits=0;
3523     s->misc_bits=0;
3524     s->i_tex_bits=0;
3525     s->p_tex_bits=0;
3526     s->i_count=0;
3527     s->f_count=0;
3528     s->b_count=0;
3529     s->skip_count=0;
3530
3531     for(i=0; i<3; i++){
3532         /* init last dc values */
3533         /* note: quant matrix value (8) is implied here */
3534         s->last_dc[i] = 128;
3535
3536         s->current_picture_ptr->error[i] = 0;
3537     }
3538     s->mb_skip_run = 0;
3539     s->last_mv[0][0][0] = 0;
3540     s->last_mv[0][0][1] = 0;
3541     s->last_mv[1][0][0] = 0;
3542     s->last_mv[1][0][1] = 0;
3543
3544     s->last_mv_dir = 0;
3545
3546 #ifdef CONFIG_RISKY
3547     switch(s->codec_id){
3548     case CODEC_ID_H263:
3549     case CODEC_ID_H263P:
3550     case CODEC_ID_FLV1:
3551         s->gob_index = ff_h263_get_gob_height(s);
3552         break;
3553     case CODEC_ID_MPEG4:
3554         if(s->partitioned_frame)
3555             ff_mpeg4_init_partitions(s);
3556         break;
3557     }
3558 #endif
3559
3560     s->resync_mb_x=0;
3561     s->resync_mb_y=0;
3562     s->first_slice_line = 1;
3563     s->ptr_lastgob = s->pb.buf;
3564     for(mb_y=0; mb_y < s->mb_height; mb_y++) {
3565         s->mb_x=0;
3566         s->mb_y= mb_y;
3567
3568         s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
3569         s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
3570         ff_init_block_index(s);
3571
3572         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
3573             const int xy= mb_y*s->mb_stride + mb_x;
3574             int mb_type= s->mb_type[xy];
3575 //            int d;
3576             int dmin= INT_MAX;
3577
3578             s->mb_x = mb_x;
3579             ff_update_block_index(s);
3580
3581             /* write gob / video packet header  */
3582 #ifdef CONFIG_RISKY
3583             if(s->rtp_mode && mb_y + mb_x>0){
3584                 int current_packet_size, is_gob_start;
3585
3586                 current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
3587                 is_gob_start=0;
3588
3589                 if(s->codec_id==CODEC_ID_MPEG4){
3590                     if(current_packet_size >= s->rtp_payload_size){
3591
3592                         if(s->partitioned_frame){
3593                             ff_mpeg4_merge_partitions(s);
3594                             ff_mpeg4_init_partitions(s);
3595                         }
3596                         ff_mpeg4_encode_video_packet_header(s);
3597
3598                         if(s->flags&CODEC_FLAG_PASS1){
3599                             int bits= get_bit_count(&s->pb);
3600                             s->misc_bits+= bits - s->last_bits;
3601                             s->last_bits= bits;
3602                         }
3603                         ff_mpeg4_clean_buffers(s);
3604                         is_gob_start=1;
3605                     }
3606                 }else if(s->codec_id==CODEC_ID_MPEG1VIDEO){
3607                     if(   current_packet_size >= s->rtp_payload_size
3608                        && s->mb_skip_run==0){
3609                         ff_mpeg1_encode_slice_header(s);
3610                         ff_mpeg1_clean_buffers(s);
3611                         is_gob_start=1;
3612                     }
3613                 }else if(s->codec_id==CODEC_ID_MPEG2VIDEO){
3614                     if(   (   current_packet_size >= s->rtp_payload_size || mb_x==0)
3615                        && s->mb_skip_run==0){
3616                         ff_mpeg1_encode_slice_header(s);
3617                         ff_mpeg1_clean_buffers(s);
3618                         is_gob_start=1;
3619                     }
3620                 }else{
3621                     if(current_packet_size >= s->rtp_payload_size
3622                        && s->mb_x==0 && s->mb_y%s->gob_index==0){
3623
3624                         h263_encode_gob_header(s, mb_y);
3625                         is_gob_start=1;
3626                     }
3627                 }
3628
3629                 if(is_gob_start){
3630                     s->ptr_lastgob = pbBufPtr(&s->pb);
3631                     s->first_slice_line=1;
3632                     s->resync_mb_x=mb_x;
3633                     s->resync_mb_y=mb_y;
3634                 }
3635             }
3636 #endif
3637
3638             if(  (s->resync_mb_x   == s->mb_x)
3639                && s->resync_mb_y+1 == s->mb_y){
3640                 s->first_slice_line=0;
3641             }
3642
3643             s->mb_skiped=0;
3644
3645             if(mb_type & (mb_type-1)){ // more than 1 MB type possible
3646                 int next_block=0;
3647                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
3648
3649                 copy_context_before_encode(&backup_s, s, -1);
3650                 backup_s.pb= s->pb;
3651                 best_s.data_partitioning= s->data_partitioning;
3652                 best_s.partitioned_frame= s->partitioned_frame;
3653                 if(s->data_partitioning){
3654                     backup_s.pb2= s->pb2;
3655                     backup_s.tex_pb= s->tex_pb;
3656                 }
3657
3658                 if(mb_type&MB_TYPE_INTER){
3659                     s->mv_dir = MV_DIR_FORWARD;
3660                     s->mv_type = MV_TYPE_16X16;
3661                     s->mb_intra= 0;
3662                     s->mv[0][0][0] = s->p_mv_table[xy][0];
3663                     s->mv[0][0][1] = s->p_mv_table[xy][1];
3664                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER, pb, pb2, tex_pb,
3665                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3666                 }
3667                 if(mb_type&MB_TYPE_INTER4V){
3668                     s->mv_dir = MV_DIR_FORWARD;
3669                     s->mv_type = MV_TYPE_8X8;
3670                     s->mb_intra= 0;
3671                     for(i=0; i<4; i++){
3672                         s->mv[0][i][0] = s->motion_val[s->block_index[i]][0];
3673                         s->mv[0][i][1] = s->motion_val[s->block_index[i]][1];
3674                     }
3675                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER4V, pb, pb2, tex_pb,
3676                                  &dmin, &next_block, 0, 0);
3677                 }
3678                 if(mb_type&MB_TYPE_FORWARD){
3679                     s->mv_dir = MV_DIR_FORWARD;
3680                     s->mv_type = MV_TYPE_16X16;
3681                     s->mb_intra= 0;
3682                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3683                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3684                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_FORWARD, pb, pb2, tex_pb,
3685                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3686                 }
3687                 if(mb_type&MB_TYPE_BACKWARD){
3688                     s->mv_dir = MV_DIR_BACKWARD;
3689                     s->mv_type = MV_TYPE_16X16;
3690                     s->mb_intra= 0;
3691                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3692                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3693                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3694                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3695                 }
3696                 if(mb_type&MB_TYPE_BIDIR){
3697                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3698                     s->mv_type = MV_TYPE_16X16;
3699                     s->mb_intra= 0;
3700                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3701                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3702                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3703                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3704                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BIDIR, pb, pb2, tex_pb,
3705                                  &dmin, &next_block, 0, 0);
3706                 }
3707                 if(mb_type&MB_TYPE_DIRECT){
3708                     int mx= s->b_direct_mv_table[xy][0];
3709                     int my= s->b_direct_mv_table[xy][1];
3710
3711                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3712                     s->mb_intra= 0;
3713 #ifdef CONFIG_RISKY
3714                     ff_mpeg4_set_direct_mv(s, mx, my);
3715 #endif
3716                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_DIRECT, pb, pb2, tex_pb,
3717                                  &dmin, &next_block, mx, my);
3718                 }
3719                 if(mb_type&MB_TYPE_INTRA){
3720                     s->mv_dir = 0;
3721                     s->mv_type = MV_TYPE_16X16;
3722                     s->mb_intra= 1;
3723                     s->mv[0][0][0] = 0;
3724                     s->mv[0][0][1] = 0;
3725                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTRA, pb, pb2, tex_pb,
3726                                  &dmin, &next_block, 0, 0);
3727                     if(s->h263_pred || s->h263_aic){
3728                         if(best_s.mb_intra)
3729                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3730                         else
3731                             ff_clean_intra_table_entries(s); //old mode?
3732                     }
3733                 }
3734                 copy_context_after_encode(s, &best_s, -1);
3735
3736                 pb_bits_count= get_bit_count(&s->pb);
3737                 flush_put_bits(&s->pb);
3738                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3739                 s->pb= backup_s.pb;
3740
3741                 if(s->data_partitioning){
3742                     pb2_bits_count= get_bit_count(&s->pb2);
3743                     flush_put_bits(&s->pb2);
3744                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3745                     s->pb2= backup_s.pb2;
3746
3747                     tex_pb_bits_count= get_bit_count(&s->tex_pb);
3748                     flush_put_bits(&s->tex_pb);
3749                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3750                     s->tex_pb= backup_s.tex_pb;
3751                 }
3752                 s->last_bits= get_bit_count(&s->pb);
3753
3754 #ifdef CONFIG_RISKY
3755                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
3756                     ff_h263_update_motion_val(s);
3757 #endif
3758
3759                 if(next_block==0){
3760                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->me.scratchpad     , s->linesize  ,16);
3761                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->me.scratchpad + 16, s->uvlinesize, 8);
3762                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->me.scratchpad + 24, s->uvlinesize, 8);
3763                 }
3764
3765                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3766                     MPV_decode_mb(s, s->block);
3767             } else {
3768                 int motion_x, motion_y;
3769                 int intra_score;
3770                 int inter_score= s->current_picture.mb_cmp_score[mb_x + mb_y*s->mb_stride];
3771
3772               if(s->avctx->mb_decision==FF_MB_DECISION_SIMPLE && s->pict_type==P_TYPE){ //FIXME check if the mess is usefull at all
3773                 /* get luma score */
3774                 if((s->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
3775                     intra_score= (s->current_picture.mb_var[mb_x + mb_y*s->mb_stride]<<8) - 500; //FIXME dont scale it down so we dont have to fix it
3776                 }else{
3777                     uint8_t *dest_y;
3778
3779                     int mean= s->current_picture.mb_mean[mb_x + mb_y*s->mb_stride]; //FIXME
3780                     mean*= 0x01010101;
3781
3782                     dest_y  = s->new_picture.data[0] + (mb_y * 16 * s->linesize    ) + mb_x * 16;
3783
3784                     for(i=0; i<16; i++){
3785                         *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 0]) = mean;
3786                         *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 4]) = mean;
3787                         *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 8]) = mean;
3788                         *(uint32_t*)(&s->me.scratchpad[i*s->linesize+12]) = mean;
3789                     }
3790
3791                     s->mb_intra=1;
3792                     intra_score= s->dsp.mb_cmp[0](s, s->me.scratchpad, dest_y, s->linesize);
3793
3794 /*                    printf("intra:%7d inter:%7d var:%7d mc_var.%7d\n", intra_score>>8, inter_score>>8,
3795                         s->current_picture.mb_var[mb_x + mb_y*s->mb_stride],
3796                         s->current_picture.mc_mb_var[mb_x + mb_y*s->mb_stride]);*/
3797                 }
3798
3799                 /* get chroma score */
3800                 if(s->avctx->mb_cmp&FF_CMP_CHROMA){
3801                     int i;
3802
3803                     s->mb_intra=1;
3804                     for(i=1; i<3; i++){
3805                         uint8_t *dest_c;
3806                         int mean;
3807
3808                         if(s->out_format == FMT_H263){
3809                             mean= (s->dc_val[i][mb_x + (mb_y+1)*(s->mb_width+2)] + 4)>>3; //FIXME not exact but simple ;)
3810                         }else{
3811                             mean= (s->last_dc[i] + 4)>>3;
3812                         }
3813                         dest_c = s->new_picture.data[i] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
3814
3815                         mean*= 0x01010101;
3816                         for(i=0; i<8; i++){
3817                             *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 0]) = mean;
3818                             *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 4]) = mean;
3819                         }
3820
3821                         intra_score+= s->dsp.mb_cmp[1](s, s->me.scratchpad, dest_c, s->uvlinesize);
3822                     }
3823                 }
3824
3825                 /* bias */
3826                 switch(s->avctx->mb_cmp&0xFF){
3827                 default:
3828                 case FF_CMP_SAD:
3829                     intra_score+= 32*s->qscale;
3830                     break;
3831                 case FF_CMP_SSE:
3832                     intra_score+= 24*s->qscale*s->qscale;
3833                     break;
3834                 case FF_CMP_SATD:
3835                     intra_score+= 96*s->qscale;
3836                     break;
3837                 case FF_CMP_DCT:
3838                     intra_score+= 48*s->qscale;
3839                     break;
3840                 case FF_CMP_BIT:
3841                     intra_score+= 16;
3842                     break;
3843                 case FF_CMP_PSNR:
3844                 case FF_CMP_RD:
3845                     intra_score+= (s->qscale*s->qscale*109*8 + 64)>>7;
3846                     break;
3847                 }
3848
3849                 if(intra_score < inter_score)
3850                     mb_type= MB_TYPE_INTRA;
3851               }
3852
3853                 s->mv_type=MV_TYPE_16X16;
3854                 // only one MB-Type possible
3855
3856                 switch(mb_type){
3857                 case MB_TYPE_INTRA:
3858                     s->mv_dir = 0;
3859                     s->mb_intra= 1;
3860                     motion_x= s->mv[0][0][0] = 0;
3861                     motion_y= s->mv[0][0][1] = 0;
3862                     break;
3863                 case MB_TYPE_INTER:
3864                     s->mv_dir = MV_DIR_FORWARD;
3865                     s->mb_intra= 0;
3866                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3867                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3868                     break;
3869                 case MB_TYPE_INTER4V:
3870                     s->mv_dir = MV_DIR_FORWARD;
3871                     s->mv_type = MV_TYPE_8X8;
3872                     s->mb_intra= 0;
3873                     for(i=0; i<4; i++){
3874                         s->mv[0][i][0] = s->motion_val[s->block_index[i]][0];
3875                         s->mv[0][i][1] = s->motion_val[s->block_index[i]][1];
3876                     }
3877                     motion_x= motion_y= 0;
3878                     break;
3879                 case MB_TYPE_DIRECT:
3880                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3881                     s->mb_intra= 0;
3882                     motion_x=s->b_direct_mv_table[xy][0];
3883                     motion_y=s->b_direct_mv_table[xy][1];
3884 #ifdef CONFIG_RISKY
3885                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3886 #endif
3887                     break;
3888                 case MB_TYPE_BIDIR:
3889                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3890                     s->mb_intra= 0;
3891                     motion_x=0;
3892                     motion_y=0;
3893                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3894                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3895                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3896                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3897                     break;
3898                 case MB_TYPE_BACKWARD:
3899                     s->mv_dir = MV_DIR_BACKWARD;
3900                     s->mb_intra= 0;
3901                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3902                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3903                     break;
3904                 case MB_TYPE_FORWARD:
3905                     s->mv_dir = MV_DIR_FORWARD;
3906                     s->mb_intra= 0;
3907                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3908                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3909 //                    printf(" %d %d ", motion_x, motion_y);
3910                     break;
3911                 default:
3912                     motion_x=motion_y=0; //gcc warning fix
3913                     printf("illegal MB type\n");
3914                 }
3915
3916                 encode_mb(s, motion_x, motion_y);
3917
3918                 // RAL: Update last macrobloc type
3919                 s->last_mv_dir = s->mv_dir;
3920
3921 #ifdef CONFIG_RISKY
3922                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
3923                     ff_h263_update_motion_val(s);
3924 #endif
3925
3926                 MPV_decode_mb(s, s->block);
3927             }
3928
3929             /* clean the MV table in IPS frames for direct mode in B frames */
3930             if(s->mb_intra /* && I,P,S_TYPE */){
3931                 s->p_mv_table[xy][0]=0;
3932                 s->p_mv_table[xy][1]=0;
3933             }
3934
3935             if(s->flags&CODEC_FLAG_PSNR){
3936                 int w= 16;
3937                 int h= 16;
3938
3939                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3940                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3941
3942                 s->current_picture_ptr->error[0] += sse(
3943                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3944                     s->dest[0], w, h, s->linesize);
3945                 s->current_picture_ptr->error[1] += sse(
3946                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
3947                     s->dest[1], w>>1, h>>1, s->uvlinesize);
3948                 s->current_picture_ptr->error[2] += sse(
3949                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
3950                     s->dest[2], w>>1, h>>1, s->uvlinesize);
3951             }
3952 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, get_bit_count(&s->pb));
3953         }
3954     }
3955     emms_c();
3956
3957 #ifdef CONFIG_RISKY
3958     if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame)
3959         ff_mpeg4_merge_partitions(s);
3960
3961     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
3962         msmpeg4_encode_ext_header(s);
3963
3964     if(s->codec_id==CODEC_ID_MPEG4)
3965         ff_mpeg4_stuffing(&s->pb);
3966 #endif
3967
3968     //if (s->gob_number)
3969     //    fprintf(stderr,"\nNumber of GOB: %d", s->gob_number);
3970
3971     /* Send the last GOB if RTP */
3972     if (s->rtp_mode) {
3973         flush_put_bits(&s->pb);
3974         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
3975         /* Call the RTP callback to send the last GOB */
3976         if (s->rtp_callback)
3977             s->rtp_callback(s->ptr_lastgob, pdif, s->gob_number);
3978         s->ptr_lastgob = pbBufPtr(&s->pb);
3979         //fprintf(stderr,"\nGOB: %2d size: %d (last)", s->gob_number, pdif);
3980     }
3981 }
3982
3983 static int dct_quantize_trellis_c(MpegEncContext *s,
3984                         DCTELEM *block, int n,
3985                         int qscale, int *overflow){
3986     const int *qmat;
3987     const uint8_t *scantable= s->intra_scantable.scantable;
3988     int max=0;
3989     unsigned int threshold1, threshold2;
3990     int bias=0;
3991     int run_tab[65];
3992     int level_tab[65];
3993     int score_tab[65];
3994     int last_run=0;
3995     int last_level=0;
3996     int last_score= 0;
3997     int last_i= 0;
3998     int coeff[3][64];
3999     int coeff_count[64];
4000     int lambda, qmul, qadd, start_i, last_non_zero, i, dc;
4001     const int esc_length= s->ac_esc_length;
4002     uint8_t * length;
4003     uint8_t * last_length;
4004     int score_limit=0;
4005     int left_limit= 0;
4006
4007     s->dsp.fdct (block);
4008
4009     qmul= qscale*16;
4010     qadd= ((qscale-1)|1)*8;
4011
4012     if (s->mb_intra) {
4013         int q;
4014         if (!s->h263_aic) {
4015             if (n < 4)
4016                 q = s->y_dc_scale;
4017             else
4018                 q = s->c_dc_scale;
4019             q = q << 3;
4020         } else{
4021             /* For AIC we skip quant/dequant of INTRADC */
4022             q = 1 << 3;
4023             qadd=0;
4024         }
4025
4026         /* note: block[0] is assumed to be positive */
4027         block[0] = (block[0] + (q >> 1)) / q;
4028         start_i = 1;
4029         last_non_zero = 0;
4030         qmat = s->q_intra_matrix[qscale];
4031         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4032             bias= 1<<(QMAT_SHIFT-1);
4033         length     = s->intra_ac_vlc_length;
4034         last_length= s->intra_ac_vlc_last_length;
4035     } else {
4036         start_i = 0;
4037         last_non_zero = -1;
4038         qmat = s->q_inter_matrix[qscale];
4039         length     = s->inter_ac_vlc_length;
4040         last_length= s->inter_ac_vlc_last_length;
4041     }
4042
4043     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4044     threshold2= (threshold1<<1);
4045
4046     for(i=start_i; i<64; i++) {
4047         const int j = scantable[i];
4048         const int k= i-start_i;
4049         int level = block[j];
4050         level = level * qmat[j];
4051
4052 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
4053 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
4054         if(((unsigned)(level+threshold1))>threshold2){
4055             if(level>0){
4056                 level= (bias + level)>>QMAT_SHIFT;
4057                 coeff[0][k]= level;
4058                 coeff[1][k]= level-1;
4059 //                coeff[2][k]= level-2;
4060             }else{
4061                 level= (bias - level)>>QMAT_SHIFT;
4062                 coeff[0][k]= -level;
4063                 coeff[1][k]= -level+1;
4064 //                coeff[2][k]= -level+2;
4065             }
4066             coeff_count[k]= FFMIN(level, 2);
4067             max |=level;
4068             last_non_zero = i;
4069         }else{
4070             coeff[0][k]= (level>>31)|1;
4071             coeff_count[k]= 1;
4072         }
4073     }
4074
4075     *overflow= s->max_qcoeff < max; //overflow might have happend
4076
4077     if(last_non_zero < start_i){
4078         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
4079         return last_non_zero;
4080     }
4081
4082     lambda= (qscale*qscale*64*105 + 64)>>7; //FIXME finetune
4083
4084     score_tab[0]= 0;
4085     for(i=0; i<=last_non_zero - start_i; i++){
4086         int level_index, run, j;
4087         const int dct_coeff= block[ scantable[i + start_i] ];
4088         const int zero_distoration= dct_coeff*dct_coeff;
4089         int best_score=256*256*256*120;
4090
4091         last_score += zero_distoration;
4092         for(level_index=0; level_index < coeff_count[i]; level_index++){
4093             int distoration;
4094             int level= coeff[level_index][i];
4095             int unquant_coeff;
4096
4097             assert(level);
4098
4099             if(s->out_format == FMT_H263){
4100                 if(level>0){
4101                     unquant_coeff= level*qmul + qadd;
4102                 }else{
4103                     unquant_coeff= level*qmul - qadd;
4104                 }
4105             }else{ //MPEG1
4106                 j= s->dsp.idct_permutation[ scantable[i + start_i] ]; //FIXME optimize
4107                 if(s->mb_intra){
4108                     if (level < 0) {
4109                         unquant_coeff = (int)((-level) * qscale * s->intra_matrix[j]) >> 3;
4110                         unquant_coeff = -((unquant_coeff - 1) | 1);
4111                     } else {
4112                         unquant_coeff = (int)(  level  * qscale * s->intra_matrix[j]) >> 3;
4113                         unquant_coeff =   (unquant_coeff - 1) | 1;
4114                     }
4115                 }else{
4116                     if (level < 0) {
4117                         unquant_coeff = ((((-level) << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
4118                         unquant_coeff = -((unquant_coeff - 1) | 1);
4119                     } else {
4120                         unquant_coeff = (((  level  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
4121                         unquant_coeff =   (unquant_coeff - 1) | 1;
4122                     }
4123                 }
4124                 unquant_coeff<<= 3;
4125             }
4126
4127             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff);
4128             level+=64;
4129             if((level&(~127)) == 0){
4130                 for(run=0; run<=i - left_limit; run++){
4131                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4132                     score += score_tab[i-run];
4133
4134                     if(score < best_score){
4135                         best_score=
4136                         score_tab[i+1]= score;
4137                         run_tab[i+1]= run;
4138                         level_tab[i+1]= level-64;
4139                     }
4140                 }
4141
4142                 if(s->out_format == FMT_H263){
4143                     for(run=0; run<=i - left_limit; run++){
4144                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4145                         score += score_tab[i-run];
4146                         if(score < last_score){
4147                             last_score= score;
4148                             last_run= run;
4149                             last_level= level-64;
4150                             last_i= i+1;
4151                         }
4152                     }
4153                 }
4154             }else{
4155                 distoration += esc_length*lambda;
4156                 for(run=0; run<=i - left_limit; run++){
4157                     int score= distoration + score_tab[i-run];
4158
4159                     if(score < best_score){
4160                         best_score=
4161                         score_tab[i+1]= score;
4162                         run_tab[i+1]= run;
4163                         level_tab[i+1]= level-64;
4164                     }
4165                 }
4166
4167                 if(s->out_format == FMT_H263){
4168                     for(run=0; run<=i - left_limit; run++){
4169                         int score= distoration + score_tab[i-run];
4170                         if(score < last_score){
4171                             last_score= score;
4172                             last_run= run;
4173                             last_level= level-64;
4174                             last_i= i+1;
4175                         }
4176                     }
4177                 }
4178             }
4179         }
4180
4181         for(j=left_limit; j<=i; j++){
4182             score_tab[j] += zero_distoration;
4183         }
4184         score_limit+= zero_distoration;
4185         if(score_tab[i+1] < score_limit)
4186             score_limit= score_tab[i+1];
4187
4188         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
4189         while(score_tab[ left_limit ] > score_limit + lambda) left_limit++;
4190     }
4191
4192         //FIXME add some cbp penalty
4193
4194     if(s->out_format != FMT_H263){
4195         last_score= 256*256*256*120;
4196         for(i= left_limit; i<=last_non_zero - start_i + 1; i++){
4197             int score= score_tab[i];
4198             if(i) score += lambda*2; //FIXME exacter?
4199
4200             if(score < last_score){
4201                 last_score= score;
4202                 last_i= i;
4203                 last_level= level_tab[i];
4204                 last_run= run_tab[i];
4205             }
4206         }
4207     }
4208
4209     dc= block[0];
4210     last_non_zero= last_i - 1 + start_i;
4211     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
4212
4213     if(last_non_zero < start_i)
4214         return last_non_zero;
4215
4216     if(last_non_zero == 0 && start_i == 0){
4217         int best_level= 0;
4218         int best_score= dc * dc;
4219
4220         for(i=0; i<coeff_count[0]; i++){
4221             const int level= coeff[i][0];
4222             int unquant_coeff, score, distoration;
4223
4224             if(s->out_format == FMT_H263){
4225                 if(level>0){
4226                     unquant_coeff= (level*qmul + qadd)>>3;
4227                 }else{
4228                     unquant_coeff= (level*qmul - qadd)>>3;
4229                 }
4230             }else{ //MPEG1
4231                     if (level < 0) {
4232                         unquant_coeff = ((((-level) << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
4233                         unquant_coeff = -((unquant_coeff - 1) | 1);
4234                     } else {
4235                         unquant_coeff = (((  level  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
4236                         unquant_coeff =   (unquant_coeff - 1) | 1;
4237                     }
4238             }
4239             unquant_coeff = (unquant_coeff + 4) >> 3;
4240             unquant_coeff<<= 3 + 3;
4241
4242             distoration= (unquant_coeff - dc) * (unquant_coeff - dc);
4243             score= distoration + last_length[UNI_AC_ENC_INDEX(0, level+64)]*lambda;
4244             if(score < best_score){
4245                 best_score= score;
4246                 best_level= level;
4247             }
4248         }
4249         block[0]= best_level;
4250         if(best_level == 0)
4251             last_non_zero=-1;
4252         return last_non_zero;
4253     }
4254
4255     i= last_i;
4256     assert(last_level);
4257 //FIXME use permutated scantable
4258     block[ s->dsp.idct_permutation[ scantable[last_non_zero] ] ]= last_level;
4259     i -= last_run + 1;
4260
4261     for(;i>0 ; i -= run_tab[i] + 1){
4262         const int j= s->dsp.idct_permutation[ scantable[i - 1 + start_i] ];
4263
4264         block[j]= level_tab[i];
4265         assert(block[j]);
4266     }
4267
4268     return last_non_zero;
4269 }
4270
4271 static int dct_quantize_c(MpegEncContext *s,
4272                         DCTELEM *block, int n,
4273                         int qscale, int *overflow)
4274 {
4275     int i, j, level, last_non_zero, q;
4276     const int *qmat;
4277     const uint8_t *scantable= s->intra_scantable.scantable;
4278     int bias;
4279     int max=0;
4280     unsigned int threshold1, threshold2;
4281
4282     s->dsp.fdct (block);
4283
4284     if (s->mb_intra) {
4285         if (!s->h263_aic) {
4286             if (n < 4)
4287                 q = s->y_dc_scale;
4288             else
4289                 q = s->c_dc_scale;
4290             q = q << 3;
4291         } else
4292             /* For AIC we skip quant/dequant of INTRADC */
4293             q = 1 << 3;
4294
4295         /* note: block[0] is assumed to be positive */
4296         block[0] = (block[0] + (q >> 1)) / q;
4297         i = 1;
4298         last_non_zero = 0;
4299         qmat = s->q_intra_matrix[qscale];
4300         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4301     } else {
4302         i = 0;
4303         last_non_zero = -1;
4304         qmat = s->q_inter_matrix[qscale];
4305         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4306     }
4307     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4308     threshold2= (threshold1<<1);
4309
4310     for(;i<64;i++) {
4311         j = scantable[i];
4312         level = block[j];
4313         level = level * qmat[j];
4314
4315 //        if(   bias+level >= (1<<QMAT_SHIFT)
4316 //           || bias-level >= (1<<QMAT_SHIFT)){
4317         if(((unsigned)(level+threshold1))>threshold2){
4318             if(level>0){
4319                 level= (bias + level)>>QMAT_SHIFT;
4320                 block[j]= level;
4321             }else{
4322                 level= (bias - level)>>QMAT_SHIFT;
4323                 block[j]= -level;
4324             }
4325             max |=level;
4326             last_non_zero = i;
4327         }else{
4328             block[j]=0;
4329         }
4330     }
4331     *overflow= s->max_qcoeff < max; //overflow might have happend
4332
4333     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4334     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4335         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4336
4337     return last_non_zero;
4338 }
4339
4340 #endif //CONFIG_ENCODERS
4341
4342 static void dct_unquantize_mpeg1_c(MpegEncContext *s,
4343                                    DCTELEM *block, int n, int qscale)
4344 {
4345     int i, level, nCoeffs;
4346     const uint16_t *quant_matrix;
4347
4348     nCoeffs= s->block_last_index[n];
4349
4350     if (s->mb_intra) {
4351         if (n < 4)
4352             block[0] = block[0] * s->y_dc_scale;
4353         else
4354             block[0] = block[0] * s->c_dc_scale;
4355         /* XXX: only mpeg1 */
4356         quant_matrix = s->intra_matrix;
4357         for(i=1;i<=nCoeffs;i++) {
4358             int j= s->intra_scantable.permutated[i];
4359             level = block[j];
4360             if (level) {
4361                 if (level < 0) {
4362                     level = -level;
4363                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
4364                     level = (level - 1) | 1;
4365                     level = -level;
4366                 } else {
4367                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
4368                     level = (level - 1) | 1;
4369                 }
4370 #ifdef PARANOID
4371                 if (level < -2048 || level > 2047)
4372                     fprintf(stderr, "unquant error %d %d\n", i, level);
4373 #endif
4374                 block[j] = level;
4375             }
4376         }
4377     } else {
4378         i = 0;
4379         quant_matrix = s->inter_matrix;
4380         for(;i<=nCoeffs;i++) {
4381             int j= s->intra_scantable.permutated[i];
4382             level = block[j];
4383             if (level) {
4384                 if (level < 0) {
4385                     level = -level;
4386                     level = (((level << 1) + 1) * qscale *
4387                              ((int) (quant_matrix[j]))) >> 4;
4388                     level = (level - 1) | 1;
4389                     level = -level;
4390                 } else {
4391                     level = (((level << 1) + 1) * qscale *
4392                              ((int) (quant_matrix[j]))) >> 4;
4393                     level = (level - 1) | 1;
4394                 }
4395 #ifdef PARANOID
4396                 if (level < -2048 || level > 2047)
4397                     fprintf(stderr, "unquant error %d %d\n", i, level);
4398 #endif
4399                 block[j] = level;
4400             }
4401         }
4402     }
4403 }
4404
4405 static void dct_unquantize_mpeg2_c(MpegEncContext *s,
4406                                    DCTELEM *block, int n, int qscale)
4407 {
4408     int i, level, nCoeffs;
4409     const uint16_t *quant_matrix;
4410
4411     if(s->alternate_scan) nCoeffs= 63;
4412     else nCoeffs= s->block_last_index[n];
4413
4414     if (s->mb_intra) {
4415         if (n < 4)
4416             block[0] = block[0] * s->y_dc_scale;
4417         else
4418             block[0] = block[0] * s->c_dc_scale;
4419         quant_matrix = s->intra_matrix;
4420         for(i=1;i<=nCoeffs;i++) {
4421             int j= s->intra_scantable.permutated[i];
4422             level = block[j];
4423             if (level) {
4424                 if (level < 0) {
4425                     level = -level;
4426                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
4427                     level = -level;
4428                 } else {
4429                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
4430                 }
4431 #ifdef PARANOID
4432                 if (level < -2048 || level > 2047)
4433                     fprintf(stderr, "unquant error %d %d\n", i, level);
4434 #endif
4435                 block[j] = level;
4436             }
4437         }
4438     } else {
4439         int sum=-1;
4440         i = 0;
4441         quant_matrix = s->inter_matrix;
4442         for(;i<=nCoeffs;i++) {
4443             int j= s->intra_scantable.permutated[i];
4444             level = block[j];
4445             if (level) {
4446                 if (level < 0) {
4447                     level = -level;
4448                     level = (((level << 1) + 1) * qscale *
4449                              ((int) (quant_matrix[j]))) >> 4;
4450                     level = -level;
4451                 } else {
4452                     level = (((level << 1) + 1) * qscale *
4453                              ((int) (quant_matrix[j]))) >> 4;
4454                 }
4455 #ifdef PARANOID
4456                 if (level < -2048 || level > 2047)
4457                     fprintf(stderr, "unquant error %d %d\n", i, level);
4458 #endif
4459                 block[j] = level;
4460                 sum+=level;
4461             }
4462         }
4463         block[63]^=sum&1;
4464     }
4465 }
4466
4467
4468 static void dct_unquantize_h263_c(MpegEncContext *s,
4469                                   DCTELEM *block, int n, int qscale)
4470 {
4471     int i, level, qmul, qadd;
4472     int nCoeffs;
4473
4474     assert(s->block_last_index[n]>=0);
4475
4476     qadd = (qscale - 1) | 1;
4477     qmul = qscale << 1;
4478
4479     if (s->mb_intra) {
4480         if (!s->h263_aic) {
4481             if (n < 4)
4482                 block[0] = block[0] * s->y_dc_scale;
4483             else
4484                 block[0] = block[0] * s->c_dc_scale;
4485         }else
4486             qadd = 0;
4487         i = 1;
4488         nCoeffs= 63; //does not allways use zigzag table
4489     } else {
4490         i = 0;
4491         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
4492     }
4493
4494     for(;i<=nCoeffs;i++) {
4495         level = block[i];
4496         if (level) {
4497             if (level < 0) {
4498                 level = level * qmul - qadd;
4499             } else {
4500                 level = level * qmul + qadd;
4501             }
4502 #ifdef PARANOID
4503                 if (level < -2048 || level > 2047)
4504                     fprintf(stderr, "unquant error %d %d\n", i, level);
4505 #endif
4506             block[i] = level;
4507         }
4508     }
4509 }
4510
4511
4512 static const AVOption mpeg4_options[] =
4513 {
4514     AVOPTION_CODEC_INT("bitrate", "desired video bitrate", bit_rate, 4, 240000000, 800000),
4515     AVOPTION_CODEC_INT("ratetol", "number of bits the bitstream is allowed to diverge from the reference"
4516                        "the reference can be CBR (for CBR pass1) or VBR (for pass2)",
4517                        bit_rate_tolerance, 4, 240000000, 8000),
4518     AVOPTION_CODEC_INT("qmin", "minimum quantizer", qmin, 1, 31, 2),
4519     AVOPTION_CODEC_INT("qmax", "maximum quantizer", qmax, 1, 31, 31),
4520     AVOPTION_CODEC_STRING("rc_eq", "rate control equation",
4521                           rc_eq, "tex^qComp,option1,options2", 0),
4522     AVOPTION_CODEC_INT("rc_minrate", "rate control minimum bitrate",
4523                        rc_min_rate, 4, 24000000, 0),
4524     AVOPTION_CODEC_INT("rc_maxrate", "rate control maximum bitrate",
4525                        rc_max_rate, 4, 24000000, 0),
4526     AVOPTION_CODEC_DOUBLE("rc_buf_aggresivity", "rate control buffer aggresivity",
4527                           rc_buffer_aggressivity, 4, 24000000, 0),
4528     AVOPTION_CODEC_DOUBLE("rc_initial_cplx", "initial complexity for pass1 ratecontrol",
4529                           rc_initial_cplx, 0., 9999999., 0),
4530     AVOPTION_CODEC_DOUBLE("i_quant_factor", "qscale factor between p and i frames",
4531                           i_quant_factor, 0., 0., 0),
4532     AVOPTION_CODEC_DOUBLE("i_quant_offset", "qscale offset between p and i frames",
4533                           i_quant_factor, -999999., 999999., 0),
4534     AVOPTION_CODEC_INT("dct_algo", "dct alghorithm",
4535                        dct_algo, 0, 5, 0), // fixme - "Auto,FastInt,Int,MMX,MLib,Altivec"
4536     AVOPTION_CODEC_DOUBLE("lumi_masking", "luminance masking",
4537                           lumi_masking, 0., 999999., 0),
4538     AVOPTION_CODEC_DOUBLE("temporal_cplx_masking", "temporary complexity masking",
4539                           temporal_cplx_masking, 0., 999999., 0),
4540     AVOPTION_CODEC_DOUBLE("spatial_cplx_masking", "spatial complexity masking",
4541                           spatial_cplx_masking, 0., 999999., 0),
4542     AVOPTION_CODEC_DOUBLE("p_masking", "p block masking",
4543                           p_masking, 0., 999999., 0),
4544     AVOPTION_CODEC_DOUBLE("dark_masking", "darkness masking",
4545                           dark_masking, 0., 999999., 0),
4546     AVOPTION_CODEC_INT("idct_algo", "idct alghorithm",
4547                        idct_algo, 0, 8, 0), // fixme - "Auto,Int,Simple,SimpleMMX,LibMPEG2MMX,PS2,MLib,ARM,Altivec"
4548
4549     AVOPTION_CODEC_INT("mb_qmin", "minimum MB quantizer",
4550                        mb_qmin, 0, 8, 0),
4551     AVOPTION_CODEC_INT("mb_qmax", "maximum MB quantizer",
4552                        mb_qmin, 0, 8, 0),
4553
4554     AVOPTION_CODEC_INT("me_cmp", "ME compare function",
4555                        me_cmp, 0, 24000000, 0),
4556     AVOPTION_CODEC_INT("me_sub_cmp", "subpixel ME compare function",
4557                        me_sub_cmp, 0, 24000000, 0),
4558
4559
4560     AVOPTION_CODEC_INT("dia_size", "ME diamond size & shape",
4561                        dia_size, 0, 24000000, 0),
4562     AVOPTION_CODEC_INT("last_predictor_count", "amount of previous MV predictors",
4563                        last_predictor_count, 0, 24000000, 0),
4564
4565     AVOPTION_CODEC_INT("pre_me", "pre pass for ME",
4566                        pre_me, 0, 24000000, 0),
4567     AVOPTION_CODEC_INT("me_pre_cmp", "ME pre pass compare function",
4568                        me_pre_cmp, 0, 24000000, 0),
4569
4570     AVOPTION_CODEC_INT("me_range", "maximum ME search range",
4571                        me_range, 0, 24000000, 0),
4572     AVOPTION_CODEC_INT("pre_dia_size", "ME pre pass diamod size & shape",
4573                        pre_dia_size, 0, 24000000, 0),
4574     AVOPTION_CODEC_INT("me_subpel_quality", "subpel ME quality",
4575                        me_subpel_quality, 0, 24000000, 0),
4576     AVOPTION_CODEC_INT("me_range", "maximum ME search range",
4577                        me_range, 0, 24000000, 0),
4578     AVOPTION_CODEC_FLAG("psnr", "calculate PSNR of compressed frames",
4579                         flags, CODEC_FLAG_PSNR, 0),
4580     AVOPTION_CODEC_RCOVERRIDE("rc_override", "ratecontrol override (=startframe,endframe,qscale,quality_factor)",
4581                               rc_override),
4582     AVOPTION_SUB(avoptions_common),
4583     AVOPTION_END()
4584 };
4585
4586 #ifdef CONFIG_ENCODERS
4587
4588 AVCodec mpeg1video_encoder = {
4589     "mpeg1video",
4590     CODEC_TYPE_VIDEO,
4591     CODEC_ID_MPEG1VIDEO,
4592     sizeof(MpegEncContext),
4593     MPV_encode_init,
4594     MPV_encode_picture,
4595     MPV_encode_end,
4596 };
4597
4598 #ifdef CONFIG_RISKY
4599
4600 AVCodec mpeg2video_encoder = {
4601     "mpeg2video",
4602     CODEC_TYPE_VIDEO,
4603     CODEC_ID_MPEG2VIDEO,
4604     sizeof(MpegEncContext),
4605     MPV_encode_init,
4606     MPV_encode_picture,
4607     MPV_encode_end,
4608 };
4609
4610 AVCodec h263_encoder = {
4611     "h263",
4612     CODEC_TYPE_VIDEO,
4613     CODEC_ID_H263,
4614     sizeof(MpegEncContext),
4615     MPV_encode_init,
4616     MPV_encode_picture,
4617     MPV_encode_end,
4618 };
4619
4620 AVCodec h263p_encoder = {
4621     "h263p",
4622     CODEC_TYPE_VIDEO,
4623     CODEC_ID_H263P,
4624     sizeof(MpegEncContext),
4625     MPV_encode_init,
4626     MPV_encode_picture,
4627     MPV_encode_end,
4628 };
4629
4630 AVCodec flv_encoder = {
4631     "flv",
4632     CODEC_TYPE_VIDEO,
4633     CODEC_ID_FLV1,
4634     sizeof(MpegEncContext),
4635     MPV_encode_init,
4636     MPV_encode_picture,
4637     MPV_encode_end,
4638 };
4639
4640 AVCodec rv10_encoder = {
4641     "rv10",
4642     CODEC_TYPE_VIDEO,
4643     CODEC_ID_RV10,
4644     sizeof(MpegEncContext),
4645     MPV_encode_init,
4646     MPV_encode_picture,
4647     MPV_encode_end,
4648 };
4649
4650 AVCodec mpeg4_encoder = {
4651     "mpeg4",
4652     CODEC_TYPE_VIDEO,
4653     CODEC_ID_MPEG4,
4654     sizeof(MpegEncContext),
4655     MPV_encode_init,
4656     MPV_encode_picture,
4657     MPV_encode_end,
4658     .options = mpeg4_options,
4659 };
4660
4661 AVCodec msmpeg4v1_encoder = {
4662     "msmpeg4v1",
4663     CODEC_TYPE_VIDEO,
4664     CODEC_ID_MSMPEG4V1,
4665     sizeof(MpegEncContext),
4666     MPV_encode_init,
4667     MPV_encode_picture,
4668     MPV_encode_end,
4669     .options = mpeg4_options,
4670 };
4671
4672 AVCodec msmpeg4v2_encoder = {
4673     "msmpeg4v2",
4674     CODEC_TYPE_VIDEO,
4675     CODEC_ID_MSMPEG4V2,
4676     sizeof(MpegEncContext),
4677     MPV_encode_init,
4678     MPV_encode_picture,
4679     MPV_encode_end,
4680     .options = mpeg4_options,
4681 };
4682
4683 AVCodec msmpeg4v3_encoder = {
4684     "msmpeg4",
4685     CODEC_TYPE_VIDEO,
4686     CODEC_ID_MSMPEG4V3,
4687     sizeof(MpegEncContext),
4688     MPV_encode_init,
4689     MPV_encode_picture,
4690     MPV_encode_end,
4691     .options = mpeg4_options,
4692 };
4693
4694 AVCodec wmv1_encoder = {
4695     "wmv1",
4696     CODEC_TYPE_VIDEO,
4697     CODEC_ID_WMV1,
4698     sizeof(MpegEncContext),
4699     MPV_encode_init,
4700     MPV_encode_picture,
4701     MPV_encode_end,
4702     .options = mpeg4_options,
4703 };
4704
4705 #endif
4706
4707 AVCodec mjpeg_encoder = {
4708     "mjpeg",
4709     CODEC_TYPE_VIDEO,
4710     CODEC_ID_MJPEG,
4711     sizeof(MpegEncContext),
4712     MPV_encode_init,
4713     MPV_encode_picture,
4714     MPV_encode_end,
4715 };
4716
4717 #endif //CONFIG_ENCODERS
4718