synfig-core/trunk/src/modules/mod_libavcodec/libavcodec/imgresample.c

   1 /*
   2  * High quality image resampling with polyphase filters
   3  * Copyright (c) 2001 Fabrice Bellard.
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2 of the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Lesser General Public
  16  * License along with this library; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18  */
  19
  20 /**
  21  * @file imgresample.c
  22  * High quality image resampling with polyphase filters .
  23  */
  24
  25 #include "avcodec.h"
  26 #include "dsputil.h"
  27
  28 #ifdef USE_FASTMEMCPY
  29 #include "fastmemcpy.h"
  30 #endif
  31
  32 #define NB_COMPONENTS 3
  33
  34 #define PHASE_BITS 4
  35 #define NB_PHASES  (1 << PHASE_BITS)
  36 #define NB_TAPS    4
  37 #define FCENTER    1  /* index of the center of the filter */
  38 //#define TEST    1  /* Test it */
  39
  40 #define POS_FRAC_BITS 16
  41 #define POS_FRAC      (1 << POS_FRAC_BITS)
  42 /* 6 bits precision is needed for MMX */
  43 #define FILTER_BITS   8
  44
  45 #define LINE_BUF_HEIGHT (NB_TAPS * 4)
  46
  47 struct ImgReSampleContext {
  48     int iwidth, iheight, owidth, oheight, topBand, bottomBand, leftBand, rightBand;
  49     int h_incr, v_incr;
  50     int16_t h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */
  51     int16_t v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */
  52     uint8_t *line_buf;
  53 };
  54
  55 static inline int get_phase(int pos)
  56 {
  57     return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
  58 }
  59
  60 /* This function must be optimized */
  61 static void h_resample_fast(uint8_t *dst, int dst_width, uint8_t *src, int src_width,
  62                             int src_start, int src_incr, int16_t *filters)
  63 {
  64     int src_pos, phase, sum, i;
  65     uint8_t *s;
  66     int16_t *filter;
  67
  68     src_pos = src_start;
  69     for(i=0;i<dst_width;i++) {
  70 #ifdef TEST
  71         /* test */
  72         if ((src_pos >> POS_FRAC_BITS) < 0 ||
  73             (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
  74             av_abort();
  75 #endif
  76         s = src + (src_pos >> POS_FRAC_BITS);
  77         phase = get_phase(src_pos);
  78         filter = filters + phase * NB_TAPS;
  79 #if NB_TAPS == 4
  80         sum = s[0] * filter[0] +
  81             s[1] * filter[1] +
  82             s[2] * filter[2] +
  83             s[3] * filter[3];
  84 #else
  85         {
  86             int j;
  87             sum = 0;
  88             for(j=0;j<NB_TAPS;j++)
  89                 sum += s[j] * filter[j];
  90         }
  91 #endif
  92         sum = sum >> FILTER_BITS;
  93         if (sum < 0)
  94             sum = 0;
  95         else if (sum > 255)
  96             sum = 255;
  97         dst[0] = sum;
  98         src_pos += src_incr;
  99         dst++;
 100     }
 101 }
 102
 103 /* This function must be optimized */
 104 static void v_resample(uint8_t *dst, int dst_width, uint8_t *src, int wrap,
 105                        int16_t *filter)
 106 {
 107     int sum, i;
 108     uint8_t *s;
 109
 110     s = src;
 111     for(i=0;i<dst_width;i++) {
 112 #if NB_TAPS == 4
 113         sum = s[0 * wrap] * filter[0] +
 114             s[1 * wrap] * filter[1] +
 115             s[2 * wrap] * filter[2] +
 116             s[3 * wrap] * filter[3];
 117 #else
 118         {
 119             int j;
 120             uint8_t *s1 = s;
 121
 122             sum = 0;
 123             for(j=0;j<NB_TAPS;j++) {
 124                 sum += s1[0] * filter[j];
 125                 s1 += wrap;
 126             }
 127         }
 128 #endif
 129         sum = sum >> FILTER_BITS;
 130         if (sum < 0)
 131             sum = 0;
 132         else if (sum > 255)
 133             sum = 255;
 134         dst[0] = sum;
 135         dst++;
 136         s++;
 137     }
 138 }
 139
 140 #ifdef HAVE_MMX
 141
 142 #include "i386/mmx.h"
 143
 144 #define FILTER4(reg) \
 145 {\
 146         s = src + (src_pos >> POS_FRAC_BITS);\
 147         phase = get_phase(src_pos);\
 148         filter = filters + phase * NB_TAPS;\
 149         movq_m2r(*s, reg);\
 150         punpcklbw_r2r(mm7, reg);\
 151         movq_m2r(*filter, mm6);\
 152         pmaddwd_r2r(reg, mm6);\
 153         movq_r2r(mm6, reg);\
 154         psrlq_i2r(32, reg);\
 155         paddd_r2r(mm6, reg);\
 156         psrad_i2r(FILTER_BITS, reg);\
 157         src_pos += src_incr;\
 158 }
 159
 160 #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq);
 161
 162 /* XXX: do four pixels at a time */
 163 static void h_resample_fast4_mmx(uint8_t *dst, int dst_width, uint8_t *src, int src_width,
 164                                  int src_start, int src_incr, int16_t *filters)
 165 {
 166     int src_pos, phase;
 167     uint8_t *s;
 168     int16_t *filter;
 169     mmx_t tmp;
 170
 171     src_pos = src_start;
 172     pxor_r2r(mm7, mm7);
 173
 174     while (dst_width >= 4) {
 175
 176         FILTER4(mm0);
 177         FILTER4(mm1);
 178         FILTER4(mm2);
 179         FILTER4(mm3);
 180
 181         packuswb_r2r(mm7, mm0);
 182         packuswb_r2r(mm7, mm1);
 183         packuswb_r2r(mm7, mm3);
 184         packuswb_r2r(mm7, mm2);
 185         movq_r2m(mm0, tmp);
 186         dst[0] = tmp.ub[0];
 187         movq_r2m(mm1, tmp);
 188         dst[1] = tmp.ub[0];
 189         movq_r2m(mm2, tmp);
 190         dst[2] = tmp.ub[0];
 191         movq_r2m(mm3, tmp);
 192         dst[3] = tmp.ub[0];
 193         dst += 4;
 194         dst_width -= 4;
 195     }
 196     while (dst_width > 0) {
 197         FILTER4(mm0);
 198         packuswb_r2r(mm7, mm0);
 199         movq_r2m(mm0, tmp);
 200         dst[0] = tmp.ub[0];
 201         dst++;
 202         dst_width--;
 203     }
 204     emms();
 205 }
 206
 207 static void v_resample4_mmx(uint8_t *dst, int dst_width, uint8_t *src, int wrap,
 208                             int16_t *filter)
 209 {
 210     int sum, i, v;
 211     uint8_t *s;
 212     mmx_t tmp;
 213     mmx_t coefs[4];
 214
 215     for(i=0;i<4;i++) {
 216         v = filter[i];
 217         coefs[i].uw[0] = v;
 218         coefs[i].uw[1] = v;
 219         coefs[i].uw[2] = v;
 220         coefs[i].uw[3] = v;
 221     }
 222
 223     pxor_r2r(mm7, mm7);
 224     s = src;
 225     while (dst_width >= 4) {
 226         movq_m2r(s[0 * wrap], mm0);
 227         punpcklbw_r2r(mm7, mm0);
 228         movq_m2r(s[1 * wrap], mm1);
 229         punpcklbw_r2r(mm7, mm1);
 230         movq_m2r(s[2 * wrap], mm2);
 231         punpcklbw_r2r(mm7, mm2);
 232         movq_m2r(s[3 * wrap], mm3);
 233         punpcklbw_r2r(mm7, mm3);
 234
 235         pmullw_m2r(coefs[0], mm0);
 236         pmullw_m2r(coefs[1], mm1);
 237         pmullw_m2r(coefs[2], mm2);
 238         pmullw_m2r(coefs[3], mm3);
 239
 240         paddw_r2r(mm1, mm0);
 241         paddw_r2r(mm3, mm2);
 242         paddw_r2r(mm2, mm0);
 243         psraw_i2r(FILTER_BITS, mm0);
 244
 245         packuswb_r2r(mm7, mm0);
 246         movq_r2m(mm0, tmp);
 247
 248         *(uint32_t *)dst = tmp.ud[0];
 249         dst += 4;
 250         s += 4;
 251         dst_width -= 4;
 252     }
 253     while (dst_width > 0) {
 254         sum = s[0 * wrap] * filter[0] +
 255             s[1 * wrap] * filter[1] +
 256             s[2 * wrap] * filter[2] +
 257             s[3 * wrap] * filter[3];
 258         sum = sum >> FILTER_BITS;
 259         if (sum < 0)
 260             sum = 0;
 261         else if (sum > 255)
 262             sum = 255;
 263         dst[0] = sum;
 264         dst++;
 265         s++;
 266         dst_width--;
 267     }
 268     emms();
 269 }
 270 #endif
 271
 272 #ifdef HAVE_ALTIVEC
 273 typedef union {
 274     vector unsigned char v;
 275     unsigned char c[16];
 276 } vec_uc_t;
 277
 278 typedef union {
 279     vector signed short v;
 280     signed short s[8];
 281 } vec_ss_t;
 282
 283 void v_resample16_altivec(uint8_t *dst, int dst_width, uint8_t *src, int wrap,
 284                             int16_t *filter)
 285 {
 286     int sum, i;
 287     uint8_t *s;
 288     vector unsigned char *tv, tmp, dstv, zero;
 289     vec_ss_t srchv[4], srclv[4], fv[4];
 290     vector signed short zeros, sumhv, sumlv;
 291     s = src;
 292
 293     for(i=0;i<4;i++)
 294     {
 295         /*
 296            The vec_madds later on does an implicit >>15 on the result.
 297            Since FILTER_BITS is 8, and we have 15 bits of magnitude in
 298            a signed short, we have just enough bits to pre-shift our
 299            filter constants <<7 to compensate for vec_madds.
 300         */
 301         fv[i].s[0] = filter[i] << (15-FILTER_BITS);
 302         fv[i].v = vec_splat(fv[i].v, 0);
 303     }
 304
 305     zero = vec_splat_u8(0);
 306     zeros = vec_splat_s16(0);
 307
 308
 309     /*
 310        When we're resampling, we'd ideally like both our input buffers,
 311        and output buffers to be 16-byte aligned, so we can do both aligned
 312        reads and writes. Sadly we can't always have this at the moment, so
 313        we opt for aligned writes, as unaligned writes have a huge overhead.
 314        To do this, do enough scalar resamples to get dst 16-byte aligned.
 315     */
 316     i = (-(int)dst) & 0xf;
 317     while(i>0) {
 318         sum = s[0 * wrap] * filter[0] +
 319         s[1 * wrap] * filter[1] +
 320         s[2 * wrap] * filter[2] +
 321         s[3 * wrap] * filter[3];
 322         sum = sum >> FILTER_BITS;
 323         if (sum<0) sum = 0; else if (sum>255) sum=255;
 324         dst[0] = sum;
 325         dst++;
 326         s++;
 327         dst_width--;
 328         i--;
 329     }
 330
 331     /* Do our altivec resampling on 16 pixels at once. */
 332     while(dst_width>=16) {
 333         /*
 334            Read 16 (potentially unaligned) bytes from each of
 335            4 lines into 4 vectors, and split them into shorts.
 336            Interleave the multipy/accumulate for the resample
 337            filter with the loads to hide the 3 cycle latency
 338            the vec_madds have.
 339         */
 340         tv = (vector unsigned char *) &s[0 * wrap];
 341         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
 342         srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
 343         srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
 344         sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
 345         sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
 346
 347         tv = (vector unsigned char *) &s[1 * wrap];
 348         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
 349         srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
 350         srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
 351         sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
 352         sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
 353
 354         tv = (vector unsigned char *) &s[2 * wrap];
 355         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
 356         srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
 357         srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
 358         sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
 359         sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
 360
 361         tv = (vector unsigned char *) &s[3 * wrap];
 362         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
 363         srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
 364         srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
 365         sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
 366         sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
 367
 368         /*
 369            Pack the results into our destination vector,
 370            and do an aligned write of that back to memory.
 371         */
 372         dstv = vec_packsu(sumhv, sumlv) ;
 373         vec_st(dstv, 0, (vector unsigned char *) dst);
 374
 375         dst+=16;
 376         s+=16;
 377         dst_width-=16;
 378     }
 379
 380     /*
 381        If there are any leftover pixels, resample them
 382        with the slow scalar method.
 383     */
 384     while(dst_width>0) {
 385         sum = s[0 * wrap] * filter[0] +
 386         s[1 * wrap] * filter[1] +
 387         s[2 * wrap] * filter[2] +
 388         s[3 * wrap] * filter[3];
 389         sum = sum >> FILTER_BITS;
 390         if (sum<0) sum = 0; else if (sum>255) sum=255;
 391         dst[0] = sum;
 392         dst++;
 393         s++;
 394         dst_width--;
 395     }
 396 }
 397 #endif
 398
 399 /* slow version to handle limit cases. Does not need optimisation */
 400 static void h_resample_slow(uint8_t *dst, int dst_width, uint8_t *src, int src_width,
 401                             int src_start, int src_incr, int16_t *filters)
 402 {
 403     int src_pos, phase, sum, j, v, i;
 404     uint8_t *s, *src_end;
 405     int16_t *filter;
 406
 407     src_end = src + src_width;
 408     src_pos = src_start;
 409     for(i=0;i<dst_width;i++) {
 410         s = src + (src_pos >> POS_FRAC_BITS);
 411         phase = get_phase(src_pos);
 412         filter = filters + phase * NB_TAPS;
 413         sum = 0;
 414         for(j=0;j<NB_TAPS;j++) {
 415             if (s < src)
 416                 v = src[0];
 417             else if (s >= src_end)
 418                 v = src_end[-1];
 419             else
 420                 v = s[0];
 421             sum += v * filter[j];
 422             s++;
 423         }
 424         sum = sum >> FILTER_BITS;
 425         if (sum < 0)
 426             sum = 0;
 427         else if (sum > 255)
 428             sum = 255;
 429         dst[0] = sum;
 430         src_pos += src_incr;
 431         dst++;
 432     }
 433 }
 434
 435 static void h_resample(uint8_t *dst, int dst_width, uint8_t *src, int src_width,
 436                        int src_start, int src_incr, int16_t *filters)
 437 {
 438     int n, src_end;
 439
 440     if (src_start < 0) {
 441         n = (0 - src_start + src_incr - 1) / src_incr;
 442         h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
 443         dst += n;
 444         dst_width -= n;
 445         src_start += n * src_incr;
 446     }
 447     src_end = src_start + dst_width * src_incr;
 448     if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
 449         n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
 450             src_incr;
 451     } else {
 452         n = dst_width;
 453     }
 454 #ifdef HAVE_MMX
 455     if ((mm_flags & MM_MMX) && NB_TAPS == 4)
 456         h_resample_fast4_mmx(dst, n,
 457                              src, src_width, src_start, src_incr, filters);
 458     else
 459 #endif
 460         h_resample_fast(dst, n,
 461                         src, src_width, src_start, src_incr, filters);
 462     if (n < dst_width) {
 463         dst += n;
 464         dst_width -= n;
 465         src_start += n * src_incr;
 466         h_resample_slow(dst, dst_width,
 467                         src, src_width, src_start, src_incr, filters);
 468     }
 469 }
 470
 471 static void component_resample(ImgReSampleContext *s,
 472                                uint8_t *output, int owrap, int owidth, int oheight,
 473                                uint8_t *input, int iwrap, int iwidth, int iheight)
 474 {
 475     int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
 476     uint8_t *new_line, *src_line;
 477
 478     last_src_y = - FCENTER - 1;
 479     /* position of the bottom of the filter in the source image */
 480     src_y = (last_src_y + NB_TAPS) * POS_FRAC;
 481     ring_y = NB_TAPS; /* position in ring buffer */
 482     for(y=0;y<oheight;y++) {
 483         /* apply horizontal filter on new lines from input if needed */
 484         src_y1 = src_y >> POS_FRAC_BITS;
 485         while (last_src_y < src_y1) {
 486             if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
 487                 ring_y = NB_TAPS;
 488             last_src_y++;
 489             /* handle limit conditions : replicate line (slightly
 490                inefficient because we filter multiple times) */
 491             y1 = last_src_y;
 492             if (y1 < 0) {
 493                 y1 = 0;
 494             } else if (y1 >= iheight) {
 495                 y1 = iheight - 1;
 496             }
 497             src_line = input + y1 * iwrap;
 498             new_line = s->line_buf + ring_y * owidth;
 499             /* apply filter and handle limit cases correctly */
 500             h_resample(new_line, owidth,
 501                        src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
 502                        &s->h_filters[0][0]);
 503             /* handle ring buffer wraping */
 504             if (ring_y >= LINE_BUF_HEIGHT) {
 505                 memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
 506                        new_line, owidth);
 507             }
 508         }
 509         /* apply vertical filter */
 510         phase_y = get_phase(src_y);
 511 #ifdef HAVE_MMX
 512         /* desactivated MMX because loss of precision */
 513         if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
 514             v_resample4_mmx(output, owidth,
 515                             s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
 516                             &s->v_filters[phase_y][0]);
 517         else
 518 #endif
 519 #ifdef HAVE_ALTIVEC
 520             if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
 521                 v_resample16_altivec(output, owidth,
 522                                 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
 523                                 &s->v_filters[phase_y][0]);
 524         else
 525 #endif
 526             v_resample(output, owidth,
 527                        s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
 528                        &s->v_filters[phase_y][0]);
 529
 530         src_y += s->v_incr;
 531         output += owrap;
 532     }
 533 }
 534
 535 /* XXX: the following filter is quite naive, but it seems to suffice
 536    for 4 taps */
 537 static void build_filter(int16_t *filter, float factor)
 538 {
 539     int ph, i, v;
 540     float x, y, tab[NB_TAPS], norm, mult;
 541
 542     /* if upsampling, only need to interpolate, no filter */
 543     if (factor > 1.0)
 544         factor = 1.0;
 545
 546     for(ph=0;ph<NB_PHASES;ph++) {
 547         norm = 0;
 548         for(i=0;i<NB_TAPS;i++) {
 549
 550             x = M_PI * ((float)(i - FCENTER) - (float)ph / NB_PHASES) * factor;
 551             if (x == 0)
 552                 y = 1.0;
 553             else
 554                 y = sin(x) / x;
 555             tab[i] = y;
 556             norm += y;
 557         }
 558
 559         /* normalize so that an uniform color remains the same */
 560         mult = (float)(1 << FILTER_BITS) / norm;
 561         for(i=0;i<NB_TAPS;i++) {
 562             v = (int)(tab[i] * mult);
 563             filter[ph * NB_TAPS + i] = v;
 564         }
 565     }
 566 }
 567
 568 ImgReSampleContext *img_resample_init(int owidth, int oheight,
 569                                       int iwidth, int iheight)
 570 {
 571         return img_resample_full_init(owidth, oheight, iwidth, iheight, 0, 0, 0, 0);
 572 }
 573
 574 ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
 575                                       int iwidth, int iheight,
 576                                       int topBand, int bottomBand,
 577                                       int leftBand, int rightBand)
 578 {
 579     ImgReSampleContext *s;
 580
 581     s = av_mallocz(sizeof(ImgReSampleContext));
 582     if (!s)
 583         return NULL;
 584     s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
 585     if (!s->line_buf)
 586         goto fail;
 587
 588     s->owidth = owidth;
 589     s->oheight = oheight;
 590     s->iwidth = iwidth;
 591     s->iheight = iheight;
 592     s->topBand = topBand;
 593     s->bottomBand = bottomBand;
 594     s->leftBand = leftBand;
 595     s->rightBand = rightBand;
 596
 597     s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / owidth;
 598     s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / oheight;
 599
 600     build_filter(&s->h_filters[0][0], (float) owidth  / (float) (iwidth - leftBand - rightBand));
 601     build_filter(&s->v_filters[0][0], (float) oheight / (float) (iheight - topBand - bottomBand));
 602
 603     return s;
 604  fail:
 605     av_free(s);
 606     return NULL;
 607 }
 608
 609 void img_resample(ImgReSampleContext *s,
 610                   AVPicture *output, AVPicture *input)
 611 {
 612     int i, shift;
 613
 614     for(i=0;i<3;i++) {
 615         shift = (i == 0) ? 0 : 1;
 616         component_resample(s, output->data[i], output->linesize[i],
 617                            s->owidth >> shift, s->oheight >> shift,
 618                            input->data[i] + (input->linesize[i] * (s->topBand >> shift)) + (s->leftBand >> shift),
 619                            input->linesize[i], ((s->iwidth - s->leftBand - s->rightBand) >> shift),
 620                            (s->iheight - s->topBand - s->bottomBand) >> shift);
 621     }
 622 }
 623
 624 void img_resample_close(ImgReSampleContext *s)
 625 {
 626     av_free(s->line_buf);
 627     av_free(s);
 628 }
 629
 630 #ifdef TEST
 631
 632 void *av_mallocz(int size)
 633 {
 634     void *ptr;
 635     ptr = malloc(size);
 636     memset(ptr, 0, size);
 637     return ptr;
 638 }
 639
 640 void av_free(void *ptr)
 641 {
 642     /* XXX: this test should not be needed on most libcs */
 643     if (ptr)
 644         free(ptr);
 645 }
 646
 647 /* input */
 648 #define XSIZE 256
 649 #define YSIZE 256
 650 uint8_t img[XSIZE * YSIZE];
 651
 652 /* output */
 653 #define XSIZE1 512
 654 #define YSIZE1 512
 655 uint8_t img1[XSIZE1 * YSIZE1];
 656 uint8_t img2[XSIZE1 * YSIZE1];
 657
 658 void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
 659 {
 660     FILE *f;
 661     f=fopen(filename,"w");
 662     fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
 663     fwrite(img,1, xsize * ysize,f);
 664     fclose(f);
 665 }
 666
 667 static void dump_filter(int16_t *filter)
 668 {
 669     int i, ph;
 670
 671     for(ph=0;ph<NB_PHASES;ph++) {
 672         printf("%2d: ", ph);
 673         for(i=0;i<NB_TAPS;i++) {
 674             printf(" %5.2f", filter[ph * NB_TAPS + i] / 256.0);
 675         }
 676         printf("\n");
 677     }
 678 }
 679
 680 #ifdef HAVE_MMX
 681 int mm_flags;
 682 #endif
 683
 684 int main(int argc, char **argv)
 685 {
 686     int x, y, v, i, xsize, ysize;
 687     ImgReSampleContext *s;
 688     float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
 689     char buf[256];
 690
 691     /* build test image */
 692     for(y=0;y<YSIZE;y++) {
 693         for(x=0;x<XSIZE;x++) {
 694             if (x < XSIZE/2 && y < YSIZE/2) {
 695                 if (x < XSIZE/4 && y < YSIZE/4) {
 696                     if ((x % 10) <= 6 &&
 697                         (y % 10) <= 6)
 698                         v = 0xff;
 699                     else
 700                         v = 0x00;
 701                 } else if (x < XSIZE/4) {
 702                     if (x & 1)
 703                         v = 0xff;
 704                     else
 705                         v = 0;
 706                 } else if (y < XSIZE/4) {
 707                     if (y & 1)
 708                         v = 0xff;
 709                     else
 710                         v = 0;
 711                 } else {
 712                     if (y < YSIZE*3/8) {
 713                         if ((y+x) & 1)
 714                             v = 0xff;
 715                         else
 716                             v = 0;
 717                     } else {
 718                         if (((x+3) % 4) <= 1 &&
 719                             ((y+3) % 4) <= 1)
 720                             v = 0xff;
 721                         else
 722                             v = 0x00;
 723                     }
 724                 }
 725             } else if (x < XSIZE/2) {
 726                 v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
 727             } else if (y < XSIZE/2) {
 728                 v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
 729             } else {
 730                 v = ((x + y - XSIZE) * 255) / XSIZE;
 731             }
 732             img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
 733         }
 734     }
 735     save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
 736     for(i=0;i<sizeof(factors)/sizeof(float);i++) {
 737         fact = factors[i];
 738         xsize = (int)(XSIZE * fact);
 739         ysize = (int)((YSIZE - 100) * fact);
 740         s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0);
 741         printf("Factor=%0.2f\n", fact);
 742         dump_filter(&s->h_filters[0][0]);
 743         component_resample(s, img1, xsize, xsize, ysize,
 744                            img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
 745         img_resample_close(s);
 746
 747         sprintf(buf, "/tmp/out%d.pgm", i);
 748         save_pgm(buf, img1, xsize, ysize);
 749     }
 750
 751     /* mmx test */
 752 #ifdef HAVE_MMX
 753     printf("MMX test\n");
 754     fact = 0.72;
 755     xsize = (int)(XSIZE * fact);
 756     ysize = (int)(YSIZE * fact);
 757     mm_flags = MM_MMX;
 758     s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
 759     component_resample(s, img1, xsize, xsize, ysize,
 760                        img, XSIZE, XSIZE, YSIZE);
 761
 762     mm_flags = 0;
 763     s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
 764     component_resample(s, img2, xsize, xsize, ysize,
 765                        img, XSIZE, XSIZE, YSIZE);
 766     if (memcmp(img1, img2, xsize * ysize) != 0) {
 767         fprintf(stderr, "mmx error\n");
 768         exit(1);
 769     }
 770     printf("MMX OK\n");
 771 #endif
 772     return 0;
 773 }
 774
 775 #endif