Fix bugs in previous commit that caused FTBFS in synfig and ETL FTBFS with older...
[synfig.git] / synfig-core / tags / synfig_0_61_04 / synfig-core / src / modules / mod_libavcodec / libavcodec / libpostproc / postprocess.c
1 /*
2     Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3
4     This program is free software; you can redistribute it and/or modify
5     it under the terms of the GNU General Public License as published by
6     the Free Software Foundation; either version 2 of the License, or
7     (at your option) any later version.
8
9     This program is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12     GNU General Public License for more details.
13
14     You should have received a copy of the GNU General Public License
15     along with this program; if not, write to the Free Software
16     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17 */
18
19 /**
20  * @file postprocess.c
21  * postprocessing.
22  */
23  
24 /*
25                         C       MMX     MMX2    3DNow
26 isVertDC                Ec      Ec
27 isVertMinMaxOk          Ec      Ec
28 doVertLowPass           E               e       e
29 doVertDefFilter         Ec      Ec      e       e
30 isHorizDC               Ec      Ec
31 isHorizMinMaxOk         a       E
32 doHorizLowPass          E               e       e
33 doHorizDefFilter        Ec      Ec      e       e
34 deRing                  E               e       e*
35 Vertical RKAlgo1        E               a       a
36 Horizontal RKAlgo1                      a       a
37 Vertical X1#            a               E       E
38 Horizontal X1#          a               E       E
39 LinIpolDeinterlace      e               E       E*
40 CubicIpolDeinterlace    a               e       e*
41 LinBlendDeinterlace     e               E       E*
42 MedianDeinterlace#      E       Ec      Ec
43 TempDeNoiser#           E               e       e
44
45 * i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
46 # more or less selfinvented filters so the exactness isnt too meaningfull
47 E = Exact implementation
48 e = allmost exact implementation (slightly different rounding,...)
49 a = alternative / approximate impl
50 c = checked against the other implementations (-vo md5)
51 */
52
53 /*
54 TODO:
55 reduce the time wasted on the mem transfer
56 unroll stuff if instructions depend too much on the prior one
57 move YScale thing to the end instead of fixing QP
58 write a faster and higher quality deblocking filter :)
59 make the mainloop more flexible (variable number of blocks at once
60         (the if/else stuff per block is slowing things down)
61 compare the quality & speed of all filters
62 split this huge file
63 optimize c versions
64 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
65 ...
66 */
67
68 //Changelog: use the CVS log
69
70 #include "config.h"
71 #include <inttypes.h>
72 #include <stdio.h>
73 #include <stdlib.h>
74 #include <string.h>
75 #ifdef HAVE_MALLOC_H
76 #include <malloc.h>
77 #endif
78 //#undef HAVE_MMX2
79 //#define HAVE_3DNOW
80 //#undef HAVE_MMX
81 //#undef ARCH_X86
82 //#define DEBUG_BRIGHTNESS
83 #ifdef USE_FASTMEMCPY
84 #include "../fastmemcpy.h"
85 #endif
86 #include "postprocess.h"
87 #include "postprocess_internal.h"
88
89 #include "mangle.h" //FIXME should be supressed
90
91 #ifndef HAVE_MEMALIGN
92 #define memalign(a,b) malloc(b)
93 #endif
94
95 #define MIN(a,b) ((a) > (b) ? (b) : (a))
96 #define MAX(a,b) ((a) < (b) ? (b) : (a))
97 #define ABS(a) ((a) > 0 ? (a) : (-(a)))
98 #define SIGN(a) ((a) > 0 ? 1 : -1)
99
100 #define GET_MODE_BUFFER_SIZE 500
101 #define OPTIONS_ARRAY_SIZE 10
102 #define BLOCK_SIZE 8
103 #define TEMP_STRIDE 8
104 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
105
106 #ifdef ARCH_X86
107 static uint64_t __attribute__((aligned(8))) w05=                0x0005000500050005LL;
108 static uint64_t __attribute__((aligned(8))) w20=                0x0020002000200020LL;
109 static uint64_t __attribute__((aligned(8))) b00=                0x0000000000000000LL;
110 static uint64_t __attribute__((aligned(8))) b01=                0x0101010101010101LL;
111 static uint64_t __attribute__((aligned(8))) b02=                0x0202020202020202LL;
112 static uint64_t __attribute__((aligned(8))) b08=                0x0808080808080808LL;
113 static uint64_t __attribute__((aligned(8))) b80=                0x8080808080808080LL;
114 #endif
115
116
117 static uint8_t clip_table[3*256];
118 static uint8_t * const clip_tab= clip_table + 256;
119
120 static int verbose= 0;
121
122 static const int deringThreshold= 20;
123
124
125 static struct PPFilter filters[]=
126 {
127         {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
128         {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
129 /*      {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
130         {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
131         {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
132         {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
133         {"dr", "dering",                1, 5, 6, DERING},
134         {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
135         {"lb", "linblenddeint",         1, 1, 4, INTERPOLATION_LINEAR_BLEND_DEINT_FILTER},
136         {"li", "linipoldeint",          1, 1, 4, INTERPOLATION_LINEAR_IPOL_DEINT_FILTER},
137         {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
138         {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
139         {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
140         {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
141         {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
142         {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
143         {NULL, NULL,0,0,0,0} //End Marker
144 };
145
146 static char *replaceTable[]=
147 {
148         "default",      "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
149         "de",           "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
150         "fast",         "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
151         "fa",           "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
152         NULL //End Marker
153 };
154
155 #ifdef ARCH_X86
156 static inline void unusedVariableWarningFixer()
157 {
158         if(w05 + w20 + b00 + b01 + b02 + b08 + b80 == 0) b00=0;
159 }
160 #endif
161
162
163 #ifdef ARCH_X86
164 static inline void prefetchnta(void *p)
165 {
166         asm volatile(   "prefetchnta (%0)\n\t"
167                 : : "r" (p)
168         );
169 }
170
171 static inline void prefetcht0(void *p)
172 {
173         asm volatile(   "prefetcht0 (%0)\n\t"
174                 : : "r" (p)
175         );
176 }
177
178 static inline void prefetcht1(void *p)
179 {
180         asm volatile(   "prefetcht1 (%0)\n\t"
181                 : : "r" (p)
182         );
183 }
184
185 static inline void prefetcht2(void *p)
186 {
187         asm volatile(   "prefetcht2 (%0)\n\t"
188                 : : "r" (p)
189         );
190 }
191 #endif
192
193 // The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
194
195 /**
196  * Check if the given 8x8 Block is mostly "flat"
197  */
198 static inline int isHorizDC(uint8_t src[], int stride, PPContext *c)
199 {
200         int numEq= 0;
201         int y;
202         const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
203         const int dcThreshold= dcOffset*2 + 1;
204
205         for(y=0; y<BLOCK_SIZE; y++)
206         {
207                 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
208                 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
209                 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
210                 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
211                 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
212                 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
213                 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
214                 src+= stride;
215         }
216         return numEq > c->ppMode.flatnessThreshold;
217 }
218
219 /**
220  * Check if the middle 8x8 Block in the given 8x16 block is flat
221  */
222 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
223         int numEq= 0;
224         int y;
225         const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
226         const int dcThreshold= dcOffset*2 + 1;
227
228         src+= stride*4; // src points to begin of the 8x8 Block
229         for(y=0; y<BLOCK_SIZE-1; y++)
230         {
231                 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
232                 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
233                 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
234                 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
235                 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
236                 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
237                 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
238                 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
239                 src+= stride;
240         }
241         return numEq > c->ppMode.flatnessThreshold;
242 }
243
244 static inline int isHorizMinMaxOk(uint8_t src[], int stride, int QP)
245 {
246         int i;
247 #if 1
248         for(i=0; i<2; i++){
249                 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
250                 src += stride;
251                 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
252                 src += stride;
253                 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
254                 src += stride;
255                 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
256                 src += stride;
257         }
258 #else        
259         for(i=0; i<8; i++){
260                 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
261                 src += stride;
262         }
263 #endif
264         return 1;
265 }
266
267 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
268 {
269 #if 1
270 #if 1
271         int x;
272         src+= stride*4;
273         for(x=0; x<BLOCK_SIZE; x+=4)
274         {
275                 if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
276                 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
277                 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
278                 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
279         }
280 #else
281         int x;
282         src+= stride*3;
283         for(x=0; x<BLOCK_SIZE; x++)
284         {
285                 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
286         }
287 #endif
288         return 1;
289 #else
290         int x;
291         src+= stride*4;
292         for(x=0; x<BLOCK_SIZE; x++)
293         {
294                 int min=255;
295                 int max=0;
296                 int y;
297                 for(y=0; y<8; y++){
298                         int v= src[x + y*stride];
299                         if(v>max) max=v;
300                         if(v<min) min=v;
301                 }
302                 if(max-min > 2*QP) return 0;
303         }
304         return 1;
305 #endif
306 }
307
308 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
309         if( isVertDC_C(src, stride, c) ){
310                 if( isVertMinMaxOk_C(src, stride, c->QP) )
311                         return 1;
312                 else
313                         return 0;
314         }else{
315                 return 2;
316         }
317 }
318
319 static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP)
320 {
321         int y;
322         for(y=0; y<BLOCK_SIZE; y++)
323         {
324                 const int middleEnergy= 5*(dst[4] - dst[5]) + 2*(dst[2] - dst[5]);
325
326                 if(ABS(middleEnergy) < 8*QP)
327                 {
328                         const int q=(dst[3] - dst[4])/2;
329                         const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
330                         const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
331
332                         int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
333                         d= MAX(d, 0);
334
335                         d= (5*d + 32) >> 6;
336                         d*= SIGN(-middleEnergy);
337
338                         if(q>0)
339                         {
340                                 d= d<0 ? 0 : d;
341                                 d= d>q ? q : d;
342                         }
343                         else
344                         {
345                                 d= d>0 ? 0 : d;
346                                 d= d<q ? q : d;
347                         }
348
349                         dst[3]-= d;
350                         dst[4]+= d;
351                 }
352                 dst+= stride;
353         }
354 }
355
356 /**
357  * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
358  * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
359  */
360 static inline void doHorizLowPass(uint8_t dst[], int stride, int QP)
361 {
362
363         int y;
364         for(y=0; y<BLOCK_SIZE; y++)
365         {
366                 const int first= ABS(dst[-1] - dst[0]) < QP ? dst[-1] : dst[0];
367                 const int last= ABS(dst[8] - dst[7]) < QP ? dst[8] : dst[7];
368
369                 int sums[9];
370                 sums[0] = first + dst[0];
371                 sums[1] = dst[0] + dst[1];
372                 sums[2] = dst[1] + dst[2];
373                 sums[3] = dst[2] + dst[3];
374                 sums[4] = dst[3] + dst[4];
375                 sums[5] = dst[4] + dst[5];
376                 sums[6] = dst[5] + dst[6];
377                 sums[7] = dst[6] + dst[7];
378                 sums[8] = dst[7] + last;
379
380                 dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4;
381                 dst[1]= ((dst[1]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4;
382                 dst[2]= ((dst[2]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4;
383                 dst[3]= ((dst[3]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4;
384                 dst[4]= ((dst[4]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4;
385                 dst[5]= ((dst[5]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4;
386                 dst[6]= (((last + dst[6])<<2) + ((dst[7] + sums[5])<<1) + sums[3] + 8)>>4;
387                 dst[7]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4;
388
389                 dst+= stride;
390         }
391 }
392
393 /**
394  * Experimental Filter 1 (Horizontal)
395  * will not damage linear gradients
396  * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
397  * can only smooth blocks at the expected locations (it cant smooth them if they did move)
398  * MMX2 version does correct clipping C version doesnt
399  * not identical with the vertical one
400  */
401 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
402 {
403         int y;
404         static uint64_t *lut= NULL;
405         if(lut==NULL)
406         {
407                 int i;
408                 lut= (uint64_t*)memalign(8, 256*8);
409                 for(i=0; i<256; i++)
410                 {
411                         int v= i < 128 ? 2*i : 2*(i-256);
412 /*
413 //Simulate 112242211 9-Tap filter
414                         uint64_t a= (v/16) & 0xFF;
415                         uint64_t b= (v/8) & 0xFF;
416                         uint64_t c= (v/4) & 0xFF;
417                         uint64_t d= (3*v/8) & 0xFF;
418 */
419 //Simulate piecewise linear interpolation
420                         uint64_t a= (v/16) & 0xFF;
421                         uint64_t b= (v*3/16) & 0xFF;
422                         uint64_t c= (v*5/16) & 0xFF;
423                         uint64_t d= (7*v/16) & 0xFF;
424                         uint64_t A= (0x100 - a)&0xFF;
425                         uint64_t B= (0x100 - b)&0xFF;
426                         uint64_t C= (0x100 - c)&0xFF;
427                         uint64_t D= (0x100 - c)&0xFF;
428
429                         lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
430                                 (D<<24) | (C<<16) | (B<<8) | (A);
431                         //lut[i] = (v<<32) | (v<<24);
432                 }
433         }
434
435         for(y=0; y<BLOCK_SIZE; y++)
436         {
437                 int a= src[1] - src[2];
438                 int b= src[3] - src[4];
439                 int c= src[5] - src[6];
440
441                 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
442
443                 if(d < QP)
444                 {
445                         int v = d * SIGN(-b);
446
447                         src[1] +=v/8;
448                         src[2] +=v/4;
449                         src[3] +=3*v/8;
450                         src[4] -=3*v/8;
451                         src[5] -=v/4;
452                         src[6] -=v/8;
453
454                 }
455                 src+=stride;
456         }
457 }
458
459
460 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
461 //Plain C versions
462 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
463 #define COMPILE_C
464 #endif
465
466 #ifdef ARCH_X86
467
468 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
469 #define COMPILE_MMX
470 #endif
471
472 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
473 #define COMPILE_MMX2
474 #endif
475
476 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
477 #define COMPILE_3DNOW
478 #endif
479 #endif //ARCH_X86
480
481 #undef HAVE_MMX
482 #undef HAVE_MMX2
483 #undef HAVE_3DNOW
484 #undef ARCH_X86
485
486 #ifdef COMPILE_C
487 #undef HAVE_MMX
488 #undef HAVE_MMX2
489 #undef HAVE_3DNOW
490 #undef ARCH_X86
491 #define RENAME(a) a ## _C
492 #include "postprocess_template.c"
493 #endif
494
495 //MMX versions
496 #ifdef COMPILE_MMX
497 #undef RENAME
498 #define HAVE_MMX
499 #undef HAVE_MMX2
500 #undef HAVE_3DNOW
501 #define ARCH_X86
502 #define RENAME(a) a ## _MMX
503 #include "postprocess_template.c"
504 #endif
505
506 //MMX2 versions
507 #ifdef COMPILE_MMX2
508 #undef RENAME
509 #define HAVE_MMX
510 #define HAVE_MMX2
511 #undef HAVE_3DNOW
512 #define ARCH_X86
513 #define RENAME(a) a ## _MMX2
514 #include "postprocess_template.c"
515 #endif
516
517 //3DNOW versions
518 #ifdef COMPILE_3DNOW
519 #undef RENAME
520 #define HAVE_MMX
521 #undef HAVE_MMX2
522 #define HAVE_3DNOW
523 #define ARCH_X86
524 #define RENAME(a) a ## _3DNow
525 #include "postprocess_template.c"
526 #endif
527
528 // minor note: the HAVE_xyz is messed up after that line so dont use it
529
530 static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
531         QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
532 {
533         PPContext *c= (PPContext *)vc;
534         PPMode *ppMode= (PPMode *)vm;
535         c->ppMode= *ppMode; //FIXME
536
537         // useing ifs here as they are faster than function pointers allthough the
538         // difference wouldnt be messureable here but its much better because
539         // someone might exchange the cpu whithout restarting mplayer ;)
540 #ifdef RUNTIME_CPUDETECT
541 #ifdef ARCH_X86
542         // ordered per speed fasterst first
543         if(c->cpuCaps & PP_CPU_CAPS_MMX2)
544                 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
545         else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
546                 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
547         else if(c->cpuCaps & PP_CPU_CAPS_MMX)
548                 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
549         else
550                 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
551 #else
552                 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
553 #endif
554 #else //RUNTIME_CPUDETECT
555 #ifdef HAVE_MMX2
556                 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
557 #elif defined (HAVE_3DNOW)
558                 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
559 #elif defined (HAVE_MMX)
560                 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
561 #else
562                 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
563 #endif
564 #endif //!RUNTIME_CPUDETECT
565 }
566
567 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
568 //      QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
569
570 /* -pp Command line Help
571 */
572 char *pp_help=
573 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
574 "long form example:\n"
575 "vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
576 "short form example:\n"
577 "vb:a/hb:a/lb                                   de,-vb\n"
578 "more examples:\n"
579 "tn:64:128:256\n"
580 "Filters                        Options\n"
581 "short  long name       short   long option     Description\n"
582 "*      *               a       autoq           CPU power dependent enabler\n"
583 "                       c       chrom           chrominance filtering enabled\n"
584 "                       y       nochrom         chrominance filtering disabled\n"
585 "hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
586 "       1. difference factor: default=32, higher -> more deblocking\n"
587 "       2. flatness threshold: default=39, lower -> more deblocking\n"
588 "                       the h & v deblocking filters share these\n"
589 "                       so you can't set different thresholds for h / v\n"
590 "vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
591 "h1     x1hdeblock                              experimental h deblock filter 1\n"
592 "v1     x1vdeblock                              experimental v deblock filter 1\n"
593 "dr     dering                                  deringing filter\n"
594 "al     autolevels                              automatic brightness / contrast\n"
595 "                       f       fullyrange      stretch luminance to (0..255)\n"
596 "lb     linblenddeint                           linear blend deinterlacer\n"
597 "li     linipoldeint                            linear interpolating deinterlace\n"
598 "ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
599 "md     mediandeint                             median deinterlacer\n"
600 "fd     ffmpegdeint                             ffmpeg deinterlacer\n"
601 "de     default                                 hb:a,vb:a,dr:a,al\n"
602 "fa     fast                                    h1:a,v1:a,dr:a,al\n"
603 "tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
604 "                       1. <= 2. <= 3.          larger -> stronger filtering\n"
605 "fq     forceQuant      <quantizer>             force quantizer\n"
606 ;
607
608 pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
609 {
610         char temp[GET_MODE_BUFFER_SIZE];
611         char *p= temp;
612         char *filterDelimiters= ",/";
613         char *optionDelimiters= ":";
614         struct PPMode *ppMode;
615         char *filterToken;
616
617         ppMode= memalign(8, sizeof(PPMode));
618         
619         ppMode->lumMode= 0;
620         ppMode->chromMode= 0;
621         ppMode->maxTmpNoise[0]= 700;
622         ppMode->maxTmpNoise[1]= 1500;
623         ppMode->maxTmpNoise[2]= 3000;
624         ppMode->maxAllowedY= 234;
625         ppMode->minAllowedY= 16;
626         ppMode->baseDcDiff= 256/8;
627         ppMode->flatnessThreshold= 56-16-1;
628         ppMode->maxClippedThreshold= 0.01;
629         ppMode->error=0;
630
631         strncpy(temp, name, GET_MODE_BUFFER_SIZE);
632
633         if(verbose>1) printf("pp: %s\n", name);
634
635         for(;;){
636                 char *filterName;
637                 int q= 1000000; //PP_QUALITY_MAX;
638                 int chrom=-1;
639                 char *option;
640                 char *options[OPTIONS_ARRAY_SIZE];
641                 int i;
642                 int filterNameOk=0;
643                 int numOfUnknownOptions=0;
644                 int enable=1; //does the user want us to enabled or disabled the filter
645
646                 filterToken= strtok(p, filterDelimiters);
647                 if(filterToken == NULL) break;
648                 p+= strlen(filterToken) + 1; // p points to next filterToken
649                 filterName= strtok(filterToken, optionDelimiters);
650                 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
651
652                 if(*filterName == '-')
653                 {
654                         enable=0;
655                         filterName++;
656                 }
657
658                 for(;;){ //for all options
659                         option= strtok(NULL, optionDelimiters);
660                         if(option == NULL) break;
661
662                         if(verbose>1) printf("pp: option: %s\n", option);
663                         if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
664                         else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
665                         else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
666                         else
667                         {
668                                 options[numOfUnknownOptions] = option;
669                                 numOfUnknownOptions++;
670                         }
671                         if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
672                 }
673                 options[numOfUnknownOptions] = NULL;
674
675                 /* replace stuff from the replace Table */
676                 for(i=0; replaceTable[2*i]!=NULL; i++)
677                 {
678                         if(!strcmp(replaceTable[2*i], filterName))
679                         {
680                                 int newlen= strlen(replaceTable[2*i + 1]);
681                                 int plen;
682                                 int spaceLeft;
683
684                                 if(p==NULL) p= temp, *p=0;      //last filter
685                                 else p--, *p=',';               //not last filter
686
687                                 plen= strlen(p);
688                                 spaceLeft= p - temp + plen;
689                                 if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
690                                 {
691                                         ppMode->error++;
692                                         break;
693                                 }
694                                 memmove(p + newlen, p, plen+1);
695                                 memcpy(p, replaceTable[2*i + 1], newlen);
696                                 filterNameOk=1;
697                         }
698                 }
699
700                 for(i=0; filters[i].shortName!=NULL; i++)
701                 {
702 //                      printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
703                         if(   !strcmp(filters[i].longName, filterName)
704                            || !strcmp(filters[i].shortName, filterName))
705                         {
706                                 ppMode->lumMode &= ~filters[i].mask;
707                                 ppMode->chromMode &= ~filters[i].mask;
708
709                                 filterNameOk=1;
710                                 if(!enable) break; // user wants to disable it
711
712                                 if(q >= filters[i].minLumQuality)
713                                         ppMode->lumMode|= filters[i].mask;
714                                 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
715                                         if(q >= filters[i].minChromQuality)
716                                                 ppMode->chromMode|= filters[i].mask;
717
718                                 if(filters[i].mask == LEVEL_FIX)
719                                 {
720                                         int o;
721                                         ppMode->minAllowedY= 16;
722                                         ppMode->maxAllowedY= 234;
723                                         for(o=0; options[o]!=NULL; o++)
724                                         {
725                                                 if(  !strcmp(options[o],"fullyrange")
726                                                    ||!strcmp(options[o],"f"))
727                                                 {
728                                                         ppMode->minAllowedY= 0;
729                                                         ppMode->maxAllowedY= 255;
730                                                         numOfUnknownOptions--;
731                                                 }
732                                         }
733                                 }
734                                 else if(filters[i].mask == TEMP_NOISE_FILTER)
735                                 {
736                                         int o;
737                                         int numOfNoises=0;
738
739                                         for(o=0; options[o]!=NULL; o++)
740                                         {
741                                                 char *tail;
742                                                 ppMode->maxTmpNoise[numOfNoises]=
743                                                         strtol(options[o], &tail, 0);
744                                                 if(tail!=options[o])
745                                                 {
746                                                         numOfNoises++;
747                                                         numOfUnknownOptions--;
748                                                         if(numOfNoises >= 3) break;
749                                                 }
750                                         }
751                                 }
752                                 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK)
753                                 {
754                                         int o;
755
756                                         for(o=0; options[o]!=NULL && o<2; o++)
757                                         {
758                                                 char *tail;
759                                                 int val= strtol(options[o], &tail, 0);
760                                                 if(tail==options[o]) break;
761
762                                                 numOfUnknownOptions--;
763                                                 if(o==0) ppMode->baseDcDiff= val;
764                                                 else ppMode->flatnessThreshold= val;
765                                         }
766                                 }
767                                 else if(filters[i].mask == FORCE_QUANT)
768                                 {
769                                         int o;
770                                         ppMode->forcedQuant= 15;
771
772                                         for(o=0; options[o]!=NULL && o<1; o++)
773                                         {
774                                                 char *tail;
775                                                 int val= strtol(options[o], &tail, 0);
776                                                 if(tail==options[o]) break;
777
778                                                 numOfUnknownOptions--;
779                                                 ppMode->forcedQuant= val;
780                                         }
781                                 }
782                         }
783                 }
784                 if(!filterNameOk) ppMode->error++;
785                 ppMode->error += numOfUnknownOptions;
786         }
787
788         if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
789         if(ppMode->error)
790         {
791                 fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
792                 free(ppMode);
793                 return NULL;
794         }
795         return ppMode;
796 }
797
798 void pp_free_mode(pp_mode_t *mode){
799     if(mode) free(mode);
800 }
801
802 static void reallocAlign(void **p, int alignment, int size){
803         if(*p) free(*p);
804         *p= memalign(alignment, size);
805         memset(*p, 0, size);
806 }
807
808 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
809         int mbWidth = (width+15)>>4;
810         int mbHeight= (height+15)>>4;
811         int i;
812
813         c->stride= stride;
814         c->qpStride= qpStride;
815
816         reallocAlign((void **)&c->tempDst, 8, stride*24);
817         reallocAlign((void **)&c->tempSrc, 8, stride*24);
818         reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
819         reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
820         for(i=0; i<256; i++)
821                 c->yHistogram[i]= width*height/64*15/256;
822
823         for(i=0; i<3; i++)
824         {
825                 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
826                 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
827                 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
828         }
829
830         reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
831         reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
832         reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
833         reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
834 }
835
836 static void global_init(void){
837         int i;
838         memset(clip_table, 0, 256);
839         for(i=256; i<512; i++)
840                 clip_table[i]= i;
841         memset(clip_table+512, 0, 256);
842 }
843
844 pp_context_t *pp_get_context(int width, int height, int cpuCaps){
845         PPContext *c= memalign(32, sizeof(PPContext));
846         int stride= (width+15)&(~15); //assumed / will realloc if needed
847         int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
848         
849         global_init();
850
851         memset(c, 0, sizeof(PPContext));
852         c->cpuCaps= cpuCaps;
853         if(cpuCaps&PP_FORMAT){
854                 c->hChromaSubSample= cpuCaps&0x3;
855                 c->vChromaSubSample= (cpuCaps>>4)&0x3;
856         }else{
857                 c->hChromaSubSample= 1;
858                 c->vChromaSubSample= 1;
859         }
860
861         reallocBuffers(c, width, height, stride, qpStride);
862         
863         c->frameNum=-1;
864
865         return c;
866 }
867
868 void pp_free_context(void *vc){
869         PPContext *c = (PPContext*)vc;
870         int i;
871         
872         for(i=0; i<3; i++) free(c->tempBlured[i]);
873         for(i=0; i<3; i++) free(c->tempBluredPast[i]);
874         
875         free(c->tempBlocks);
876         free(c->yHistogram);
877         free(c->tempDst);
878         free(c->tempSrc);
879         free(c->deintTemp);
880         free(c->stdQPTable);
881         free(c->nonBQPTable);
882         free(c->forcedQPTable);
883         
884         memset(c, 0, sizeof(PPContext));
885
886         free(c);
887 }
888
889 void  pp_postprocess(uint8_t * src[3], int srcStride[3],
890                  uint8_t * dst[3], int dstStride[3],
891                  int width, int height,
892                  QP_STORE_T *QP_store,  int QPStride,
893                  pp_mode_t *vm,  void *vc, int pict_type)
894 {
895         int mbWidth = (width+15)>>4;
896         int mbHeight= (height+15)>>4;
897         PPMode *mode = (PPMode*)vm;
898         PPContext *c = (PPContext*)vc;
899         int minStride= MAX(srcStride[0], dstStride[0]);
900
901         if(c->stride < minStride || c->qpStride < QPStride)
902                 reallocBuffers(c, width, height, 
903                                 MAX(minStride, c->stride), 
904                                 MAX(c->qpStride, QPStride));
905
906         if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) 
907         {
908                 int i;
909                 QP_store= c->forcedQPTable;
910                 QPStride= 0;
911                 if(mode->lumMode & FORCE_QUANT)
912                         for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
913                 else
914                         for(i=0; i<mbWidth; i++) QP_store[i]= 1;
915         }
916 //printf("pict_type:%d\n", pict_type);
917
918         if(pict_type & PP_PICT_TYPE_QP2){
919                 int i;
920                 const int count= mbHeight * QPStride;
921                 for(i=0; i<(count>>2); i++){
922                         ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
923                 }
924                 for(i<<=2; i<count; i++){
925                         c->stdQPTable[i] = QP_store[i]>>1;
926                 }
927                 QP_store= c->stdQPTable;
928         }
929
930 if(0){
931 int x,y;
932 for(y=0; y<mbHeight; y++){
933         for(x=0; x<mbWidth; x++){
934                 printf("%2d ", QP_store[x + y*QPStride]);
935         }
936         printf("\n");
937 }
938         printf("\n");
939 }
940
941         if((pict_type&7)!=3)
942         {
943                 int i;
944                 const int count= mbHeight * QPStride;
945                 for(i=0; i<(count>>2); i++){
946                         ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x1F1F1F1F;
947                 }
948                 for(i<<=2; i<count; i++){
949                         c->nonBQPTable[i] = QP_store[i] & 0x1F;
950                 }
951         }
952
953         if(verbose>2)
954         {
955                 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
956         }
957
958         postProcess(src[0], srcStride[0], dst[0], dstStride[0],
959                 width, height, QP_store, QPStride, 0, mode, c);
960
961         width  = (width )>>c->hChromaSubSample;
962         height = (height)>>c->vChromaSubSample;
963
964         if(mode->chromMode)
965         {
966                 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
967                         width, height, QP_store, QPStride, 1, mode, c);
968                 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
969                         width, height, QP_store, QPStride, 2, mode, c);
970         }
971         else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
972         {
973                 memcpy(dst[1], src[1], srcStride[1]*height);
974                 memcpy(dst[2], src[2], srcStride[2]*height);
975         }
976         else
977         {
978                 int y;
979                 for(y=0; y<height; y++)
980                 {
981                         memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
982                         memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
983                 }
984         }
985 }
986