yadif_template.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of Libav.
5  *
6  * Libav is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * Libav is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with Libav; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19  */
20 
21 #ifdef COMPILE_TEMPLATE_SSE2
22 #define MM "%%xmm"
23 #define MOV "movq"
24 #define MOVQ "movdqa"
25 #define MOVQU "movdqu"
26 #define STEP 8
27 #define LOAD(mem,dst) \
28  MOV" "mem", "dst" \n\t"\
29  "punpcklbw "MM"7, "dst" \n\t"
30 #define PSRL1(reg) "psrldq $1, "reg" \n\t"
31 #define PSRL2(reg) "psrldq $2, "reg" \n\t"
32 #define PSHUF(src,dst) "movdqa "dst", "src" \n\t"\
33  "psrldq $2, "src" \n\t"
34 #else
35 #define MM "%%mm"
36 #define MOV "movd"
37 #define MOVQ "movq"
38 #define MOVQU "movq"
39 #define STEP 4
40 #define LOAD(mem,dst) \
41  MOV" "mem", "dst" \n\t"\
42  "punpcklbw "MM"7, "dst" \n\t"
43 #define PSRL1(reg) "psrlq $8, "reg" \n\t"
44 #define PSRL2(reg) "psrlq $16, "reg" \n\t"
45 #define PSHUF(src,dst) "pshufw $9, "dst", "src" \n\t"
46 #endif
47 
48 #ifdef COMPILE_TEMPLATE_SSSE3
49 #define PABS(tmp,dst) \
50  "pabsw "dst", "dst" \n\t"
51 #else
52 #define PABS(tmp,dst) \
53  "pxor "tmp", "tmp" \n\t"\
54  "psubw "dst", "tmp" \n\t"\
55  "pmaxsw "tmp", "dst" \n\t"
56 #endif
57 
58 #define CHECK(pj,mj) \
59  MOVQU" "#pj"(%[cur],%[mrefs]), "MM"2 \n\t" /* cur[x-refs-1+j] */\
60  MOVQU" "#mj"(%[cur],%[prefs]), "MM"3 \n\t" /* cur[x+refs-1-j] */\
61  MOVQ" "MM"2, "MM"4 \n\t"\
62  MOVQ" "MM"2, "MM"5 \n\t"\
63  "pxor "MM"3, "MM"4 \n\t"\
64  "pavgb "MM"3, "MM"5 \n\t"\
65  "pand "MANGLE(pb_1)", "MM"4 \n\t"\
66  "psubusb "MM"4, "MM"5 \n\t"\
67  PSRL1(MM"5") \
68  "punpcklbw "MM"7, "MM"5 \n\t" /* (cur[x-refs+j] + cur[x+refs-j])>>1 */\
69  MOVQ" "MM"2, "MM"4 \n\t"\
70  "psubusb "MM"3, "MM"2 \n\t"\
71  "psubusb "MM"4, "MM"3 \n\t"\
72  "pmaxub "MM"3, "MM"2 \n\t"\
73  MOVQ" "MM"2, "MM"3 \n\t"\
74  MOVQ" "MM"2, "MM"4 \n\t" /* ABS(cur[x-refs-1+j] - cur[x+refs-1-j]) */\
75  PSRL1(MM"3") /* ABS(cur[x-refs +j] - cur[x+refs -j]) */\
76  PSRL2(MM"4") /* ABS(cur[x-refs+1+j] - cur[x+refs+1-j]) */\
77  "punpcklbw "MM"7, "MM"2 \n\t"\
78  "punpcklbw "MM"7, "MM"3 \n\t"\
79  "punpcklbw "MM"7, "MM"4 \n\t"\
80  "paddw "MM"3, "MM"2 \n\t"\
81  "paddw "MM"4, "MM"2 \n\t" /* score */
82 
83 #define CHECK1 \
84  MOVQ" "MM"0, "MM"3 \n\t"\
85  "pcmpgtw "MM"2, "MM"3 \n\t" /* if(score < spatial_score) */\
86  "pminsw "MM"2, "MM"0 \n\t" /* spatial_score= score; */\
87  MOVQ" "MM"3, "MM"6 \n\t"\
88  "pand "MM"3, "MM"5 \n\t"\
89  "pandn "MM"1, "MM"3 \n\t"\
90  "por "MM"5, "MM"3 \n\t"\
91  MOVQ" "MM"3, "MM"1 \n\t" /* spatial_pred= (cur[x-refs+j] + cur[x+refs-j])>>1; */
92 
93 #define CHECK2 /* pretend not to have checked dir=2 if dir=1 was bad.\
94  hurts both quality and speed, but matches the C version. */\
95  "paddw "MANGLE(pw_1)", "MM"6 \n\t"\
96  "psllw $14, "MM"6 \n\t"\
97  "paddsw "MM"6, "MM"2 \n\t"\
98  MOVQ" "MM"0, "MM"3 \n\t"\
99  "pcmpgtw "MM"2, "MM"3 \n\t"\
100  "pminsw "MM"2, "MM"0 \n\t"\
101  "pand "MM"3, "MM"5 \n\t"\
102  "pandn "MM"1, "MM"3 \n\t"\
103  "por "MM"5, "MM"3 \n\t"\
104  MOVQ" "MM"3, "MM"1 \n\t"
105 
106 static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
107  uint8_t *next, int w, int prefs,
108  int mrefs, int parity, int mode)
109 {
110  DECLARE_ALIGNED(16, uint8_t, tmp)[16*4];
111  int x;
112 
113 #define FILTER\
114  for(x=0; x<w; x+=STEP){\
115  __asm__ volatile(\
116  "pxor "MM"7, "MM"7 \n\t"\
117  LOAD("(%[cur],%[mrefs])", MM"0") /* c = cur[x-refs] */\
118  LOAD("(%[cur],%[prefs])", MM"1") /* e = cur[x+refs] */\
119  LOAD("(%["prev2"])", MM"2") /* prev2[x] */\
120  LOAD("(%["next2"])", MM"3") /* next2[x] */\
121  MOVQ" "MM"3, "MM"4 \n\t"\
122  "paddw "MM"2, "MM"3 \n\t"\
123  "psraw $1, "MM"3 \n\t" /* d = (prev2[x] + next2[x])>>1 */\
124  MOVQ" "MM"0, (%[tmp]) \n\t" /* c */\
125  MOVQ" "MM"3, 16(%[tmp]) \n\t" /* d */\
126  MOVQ" "MM"1, 32(%[tmp]) \n\t" /* e */\
127  "psubw "MM"4, "MM"2 \n\t"\
128  PABS( MM"4", MM"2") /* temporal_diff0 */\
129  LOAD("(%[prev],%[mrefs])", MM"3") /* prev[x-refs] */\
130  LOAD("(%[prev],%[prefs])", MM"4") /* prev[x+refs] */\
131  "psubw "MM"0, "MM"3 \n\t"\
132  "psubw "MM"1, "MM"4 \n\t"\
133  PABS( MM"5", MM"3")\
134  PABS( MM"5", MM"4")\
135  "paddw "MM"4, "MM"3 \n\t" /* temporal_diff1 */\
136  "psrlw $1, "MM"2 \n\t"\
137  "psrlw $1, "MM"3 \n\t"\
138  "pmaxsw "MM"3, "MM"2 \n\t"\
139  LOAD("(%[next],%[mrefs])", MM"3") /* next[x-refs] */\
140  LOAD("(%[next],%[prefs])", MM"4") /* next[x+refs] */\
141  "psubw "MM"0, "MM"3 \n\t"\
142  "psubw "MM"1, "MM"4 \n\t"\
143  PABS( MM"5", MM"3")\
144  PABS( MM"5", MM"4")\
145  "paddw "MM"4, "MM"3 \n\t" /* temporal_diff2 */\
146  "psrlw $1, "MM"3 \n\t"\
147  "pmaxsw "MM"3, "MM"2 \n\t"\
148  MOVQ" "MM"2, 48(%[tmp]) \n\t" /* diff */\
149 \
150  "paddw "MM"0, "MM"1 \n\t"\
151  "paddw "MM"0, "MM"0 \n\t"\
152  "psubw "MM"1, "MM"0 \n\t"\
153  "psrlw $1, "MM"1 \n\t" /* spatial_pred */\
154  PABS( MM"2", MM"0") /* ABS(c-e) */\
155 \
156  MOVQU" -1(%[cur],%[mrefs]), "MM"2 \n\t" /* cur[x-refs-1] */\
157  MOVQU" -1(%[cur],%[prefs]), "MM"3 \n\t" /* cur[x+refs-1] */\
158  MOVQ" "MM"2, "MM"4 \n\t"\
159  "psubusb "MM"3, "MM"2 \n\t"\
160  "psubusb "MM"4, "MM"3 \n\t"\
161  "pmaxub "MM"3, "MM"2 \n\t"\
162  PSHUF(MM"3", MM"2") \
163  "punpcklbw "MM"7, "MM"2 \n\t" /* ABS(cur[x-refs-1] - cur[x+refs-1]) */\
164  "punpcklbw "MM"7, "MM"3 \n\t" /* ABS(cur[x-refs+1] - cur[x+refs+1]) */\
165  "paddw "MM"2, "MM"0 \n\t"\
166  "paddw "MM"3, "MM"0 \n\t"\
167  "psubw "MANGLE(pw_1)", "MM"0 \n\t" /* spatial_score */\
168 \
169  CHECK(-2,0)\
170  CHECK1\
171  CHECK(-3,1)\
172  CHECK2\
173  CHECK(0,-2)\
174  CHECK1\
175  CHECK(1,-3)\
176  CHECK2\
177 \
178  /* if(p->mode<2) ... */\
179  MOVQ" 48(%[tmp]), "MM"6 \n\t" /* diff */\
180  "cmpl $2, %[mode] \n\t"\
181  "jge 1f \n\t"\
182  LOAD("(%["prev2"],%[mrefs],2)", MM"2") /* prev2[x-2*refs] */\
183  LOAD("(%["next2"],%[mrefs],2)", MM"4") /* next2[x-2*refs] */\
184  LOAD("(%["prev2"],%[prefs],2)", MM"3") /* prev2[x+2*refs] */\
185  LOAD("(%["next2"],%[prefs],2)", MM"5") /* next2[x+2*refs] */\
186  "paddw "MM"4, "MM"2 \n\t"\
187  "paddw "MM"5, "MM"3 \n\t"\
188  "psrlw $1, "MM"2 \n\t" /* b */\
189  "psrlw $1, "MM"3 \n\t" /* f */\
190  MOVQ" (%[tmp]), "MM"4 \n\t" /* c */\
191  MOVQ" 16(%[tmp]), "MM"5 \n\t" /* d */\
192  MOVQ" 32(%[tmp]), "MM"7 \n\t" /* e */\
193  "psubw "MM"4, "MM"2 \n\t" /* b-c */\
194  "psubw "MM"7, "MM"3 \n\t" /* f-e */\
195  MOVQ" "MM"5, "MM"0 \n\t"\
196  "psubw "MM"4, "MM"5 \n\t" /* d-c */\
197  "psubw "MM"7, "MM"0 \n\t" /* d-e */\
198  MOVQ" "MM"2, "MM"4 \n\t"\
199  "pminsw "MM"3, "MM"2 \n\t"\
200  "pmaxsw "MM"4, "MM"3 \n\t"\
201  "pmaxsw "MM"5, "MM"2 \n\t"\
202  "pminsw "MM"5, "MM"3 \n\t"\
203  "pmaxsw "MM"0, "MM"2 \n\t" /* max */\
204  "pminsw "MM"0, "MM"3 \n\t" /* min */\
205  "pxor "MM"4, "MM"4 \n\t"\
206  "pmaxsw "MM"3, "MM"6 \n\t"\
207  "psubw "MM"2, "MM"4 \n\t" /* -max */\
208  "pmaxsw "MM"4, "MM"6 \n\t" /* diff= MAX3(diff, min, -max); */\
209  "1: \n\t"\
210 \
211  MOVQ" 16(%[tmp]), "MM"2 \n\t" /* d */\
212  MOVQ" "MM"2, "MM"3 \n\t"\
213  "psubw "MM"6, "MM"2 \n\t" /* d-diff */\
214  "paddw "MM"6, "MM"3 \n\t" /* d+diff */\
215  "pmaxsw "MM"2, "MM"1 \n\t"\
216  "pminsw "MM"3, "MM"1 \n\t" /* d = clip(spatial_pred, d-diff, d+diff); */\
217  "packuswb "MM"1, "MM"1 \n\t"\
218 \
219  ::[prev] "r"(prev),\
220  [cur] "r"(cur),\
221  [next] "r"(next),\
222  [prefs]"r"((x86_reg)prefs),\
223  [mrefs]"r"((x86_reg)mrefs),\
224  [mode] "g"(mode),\
225  [tmp] "r"(tmp)\
226  );\
227  __asm__ volatile(MOV" "MM"1, %0" :"=m"(*dst));\
228  dst += STEP;\
229  prev+= STEP;\
230  cur += STEP;\
231  next+= STEP;\
232  }
233 
234  if (parity) {
235 #define prev2 "prev"
236 #define next2 "cur"
237  FILTER
238 #undef prev2
239 #undef next2
240  } else {
241 #define prev2 "cur"
242 #define next2 "next"
243  FILTER
244 #undef prev2
245 #undef next2
246  }
247 }
248 #undef STEP
249 #undef MM
250 #undef MOV
251 #undef MOVQ
252 #undef MOVQU
253 #undef PSHUF
254 #undef PSRL1
255 #undef PSRL2
256 #undef LOAD
257 #undef PABS
258 #undef CHECK
259 #undef CHECK1
260 #undef CHECK2
261 #undef FILTER
uint8_t
#define FILTER
#define RENAME(a)
Definition: mpegaudiodec.c:107
static void RENAME() yadif_filter_line(uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, int w, int prefs, int mrefs, int parity, int mode)