1 /*
2 * chendct.c --
3 *
4 * A simple DCT algorithm that seems to have fairly nice arithmetic
5 * properties.
6 *
7 * W. H. Chen, C. H. Smith and S. C. Fralick "A fast computational
8 * algorithm for the discrete cosine transform," IEEE Trans. Commun.,
9 * vol. COM-25, pp. 1004-1009, Sept 1977.
10 */
11
12 /*************************************************************
13 Copyright (C) 1990, 1991, 1993 Andy C. Hung, all rights reserved.
14 PUBLIC DOMAIN LICENSE: Stanford University Portable Video Research
15 Group. If you use this software, you agree to the following: This
16 program package is purely experimental, and is licensed "as is".
17 Permission is granted to use, modify, and distribute this program
18 without charge for any purpose, provided this license/ disclaimer
19 notice appears in the copies. No warranty or maintenance is given,
20 either expressed or implied. In no event shall the author(s) be
21 liable to you or a third party for any special, incidental,
22 consequential, or other damages, arising out of the use or inability
23 to use the program for any purpose (or the loss of data), even if we
24 have been advised of such possibilities. Any public reference or
25 advertisement of this source code should refer to it as the Portable
26 Video Research Group (PVRG) code, and not by any author(s) (or
27 Stanford University) name.
28 *************************************************************/
29
30 /*LABEL chendct.c */
31
32 /*PUBLIC*/
33
34 extern void ChenDct();
35 extern void ChenIDct();
36
37 /*PRIVATE*/
38
39 /* Standard Macros */
40
41 #define NO_MULTIPLY
42
43 #ifdef NO_MULTIPLY
44 #define LS(r,s) ((r) << (s))
45 #define RS(r,s) ((r) >> (s)) /* Caution with rounding... */
46 #else
47 #define LS(r,s) ((r) * (1 << (s)))
48 #define RS(r,s) ((r) / (1 << (s))) /* Correct rounding */
49 #endif
50
51 #define MSCALE(expr) RS((expr),9)
52
53 /* Cos constants */
54
55 #define c1d4 362L
56
57 #define c1d8 473L
58 #define c3d8 196L
59
60 #define c1d16 502L
61 #define c3d16 426L
62 #define c5d16 284L
63 #define c7d16 100L
64
65 /*
66 VECTOR_DEFINITION makes the temporary variables vectors.
67 Useful for machines with small register spaces.
68
69 */
70
71 #ifdef VECTOR_DEFINITION
72 #define a0 a[0]
73 #define a1 a[1]
74 #define a2 a[2]
75 #define a3 a[3]
76 #define b0 b[0]
77 #define b1 b[1]
78 #define b2 b[2]
79 #define b3 b[3]
80 #define c0 c[0]
81 #define c1 c[1]
82 #define c2 c[2]
83 #define c3 c[3]
84 #endif
85
86 /*START*/
87 /*BFUNC
88
89 ChenDCT() implements the Chen forward dct. Note that there are two
90 input vectors that represent x=input, and y=output, and must be
91 defined (and storage allocated) before this routine is called.
92
93 EFUNC*/
94
95 void ChenDct(x,y)
96 int *x;
97 int *y;
98 {
99 register int i;
100 register int *aptr,*bptr;
101 #ifdef VECTOR_DEFINTION
102 register int a[4];
103 register int b[4];
104 register int c[4];
105 #else
106 register int a0,a1,a2,a3;
107 register int b0,b1,b2,b3;
108 register int c0,c1,c2,c3;
109 #endif
110
111 /* Loop over columns */
112
113 for(i=0;i<8;i++)
114 {
115 aptr = x+i;
116 bptr = aptr+56;
117
118 a0 = LS((*aptr+*bptr),2);
119 c3 = LS((*aptr-*bptr),2);
120 aptr += 8;
121 bptr -= 8;
122 a1 = LS((*aptr+*bptr),2);
123 c2 = LS((*aptr-*bptr),2);
124 aptr += 8;
125 bptr -= 8;
126 a2 = LS((*aptr+*bptr),2);
127 c1 = LS((*aptr-*bptr),2);
128 aptr += 8;
129 bptr -= 8;
130 a3 = LS((*aptr+*bptr),2);
131 c0 = LS((*aptr-*bptr),2);
132
133 b0 = a0+a3;
134 b1 = a1+a2;
135 b2 = a1-a2;
136 b3 = a0-a3;
137
138 aptr = y+i;
139
140 *aptr = MSCALE(c1d4*(b0+b1));
141 aptr[32] = MSCALE(c1d4*(b0-b1));
142
143 aptr[16] = MSCALE((c3d8*b2)+(c1d8*b3));
144 aptr[48] = MSCALE((c3d8*b3)-(c1d8*b2));
145
146 b0 = MSCALE(c1d4*(c2-c1));
147 b1 = MSCALE(c1d4*(c2+c1));
148
149 a0 = c0+b0;
150 a1 = c0-b0;
151 a2 = c3-b1;
152 a3 = c3+b1;
153
154 aptr[8] = MSCALE((c7d16*a0)+(c1d16*a3));
155 aptr[24] = MSCALE((c3d16*a2)-(c5d16*a1));
156 aptr[40] = MSCALE((c3d16*a1)+(c5d16*a2));
157 aptr[56] = MSCALE((c7d16*a3)-(c1d16*a0));
158 }
159
160 for(i=0;i<8;i++)
161 { /* Loop over rows */
162 aptr = y+LS(i,3);
163 bptr = aptr+7;
164
165 c3 = RS((*(aptr)-*(bptr)),1);
166 a0 = RS((*(aptr++)+*(bptr--)),1);
167 c2 = RS((*(aptr)-*(bptr)),1);
168 a1 = RS((*(aptr++)+*(bptr--)),1);
169 c1 = RS((*(aptr)-*(bptr)),1);
170 a2 = RS((*(aptr++)+*(bptr--)),1);
171 c0 = RS((*(aptr)-*(bptr)),1);
172 a3 = RS((*(aptr)+*(bptr)),1);
173
174 b0 = a0+a3;
175 b1 = a1+a2;
176 b2 = a1-a2;
177 b3 = a0-a3;
178
179 aptr = y+LS(i,3);
180
181 *aptr = MSCALE(c1d4*(b0+b1));
182 aptr[4] = MSCALE(c1d4*(b0-b1));
183 aptr[2] = MSCALE((c3d8*b2)+(c1d8*b3));
184 aptr[6] = MSCALE((c3d8*b3)-(c1d8*b2));
185
186 b0 = MSCALE(c1d4*(c2-c1));
187 b1 = MSCALE(c1d4*(c2+c1));
188
189 a0 = c0+b0;
190 a1 = c0-b0;
191 a2 = c3-b1;
192 a3 = c3+b1;
193
194 aptr[1] = MSCALE((c7d16*a0)+(c1d16*a3));
195 aptr[3] = MSCALE((c3d16*a2)-(c5d16*a1));
196 aptr[5] = MSCALE((c3d16*a1)+(c5d16*a2));
197 aptr[7] = MSCALE((c7d16*a3)-(c1d16*a0));
198 }
199
200 /* We have an additional factor of 8 in the Chen algorithm. */
201
202 for(i=0,aptr=y;i<64;i++,aptr++)
203 *aptr = (((*aptr<0) ? (*aptr-4) : (*aptr+4))/8);
204 }
205
206
207 /*BFUNC
208
209 ChenIDCT() implements the Chen inverse dct. Note that there are two
210 input vectors that represent x=input, and y=output, and must be
211 defined (and storage allocated) before this routine is called.
212
213 EFUNC*/
214
215 void ChenIDct(x,y)
216 int *x;
217 int *y;
218 {
219 register int i;
220 register int *aptr;
221 #ifdef VECTOR_DEFINTION
222 register int a[4];
223 register int b[4];
224 register int c[4];
225 #else
226 register int a0,a1,a2,a3;
227 register int b0,b1,b2,b3;
228 register int c0,c1,c2,c3;
229 #endif
230
231 /* Loop over columns */
232
233 for(i=0;i<8;i++)
234 {
235 aptr = x+i;
236 b0 = LS(*aptr,2);
237 aptr += 8;
238 a0 = LS(*aptr,2);
239 aptr += 8;
240 b2 = LS(*aptr,2);
241 aptr += 8;
242 a1 = LS(*aptr,2);
243 aptr += 8;
244 b1 = LS(*aptr,2);
245 aptr += 8;
246 a2 = LS(*aptr,2);
247 aptr += 8;
248 b3 = LS(*aptr,2);
249 aptr += 8;
250 a3 = LS(*aptr,2);
251
252 /* Split into even mode b0 = x0 b1 = x4 b2 = x2 b3 = x6.
253 And the odd terms a0 = x1 a1 = x3 a2 = x5 a3 = x7.
254 */
255
256 c0 = MSCALE((c7d16*a0)-(c1d16*a3));
257 c1 = MSCALE((c3d16*a2)-(c5d16*a1));
258 c2 = MSCALE((c3d16*a1)+(c5d16*a2));
259 c3 = MSCALE((c1d16*a0)+(c7d16*a3));
260
261 /* First Butterfly on even terms.*/
262
263 a0 = MSCALE(c1d4*(b0+b1));
264 a1 = MSCALE(c1d4*(b0-b1));
265
266 a2 = MSCALE((c3d8*b2)-(c1d8*b3));
267 a3 = MSCALE((c1d8*b2)+(c3d8*b3));
268
269 b0 = a0+a3;
270 b1 = a1+a2;
271 b2 = a1-a2;
272 b3 = a0-a3;
273
274 /* Second Butterfly */
275
276 a0 = c0+c1;
277 a1 = c0-c1;
278 a2 = c3-c2;
279 a3 = c3+c2;
280
281 c0 = a0;
282 c1 = MSCALE(c1d4*(a2-a1));
283 c2 = MSCALE(c1d4*(a2+a1));
284 c3 = a3;
285
286 aptr = y+i;
287 *aptr = b0+c3;
288 aptr += 8;
289 *aptr = b1+c2;
290 aptr += 8;
291 *aptr = b2+c1;
292 aptr += 8;
293 *aptr = b3+c0;
294 aptr += 8;
295 *aptr = b3-c0;
296 aptr += 8;
297 *aptr = b2-c1;
298 aptr += 8;
299 *aptr = b1-c2;
300 aptr += 8;
301 *aptr = b0-c3;
302 }
303
304 /* Loop over rows */
305
306 for(i=0;i<8;i++)
307 {
308 aptr = y+LS(i,3);
309 b0 = *(aptr++);
310 a0 = *(aptr++);
311 b2 = *(aptr++);
312 a1 = *(aptr++);
313 b1 = *(aptr++);
314 a2 = *(aptr++);
315 b3 = *(aptr++);
316 a3 = *(aptr);
317
318 /*
319 Split into even mode b0 = x0 b1 = x4 b2 = x2 b3 = x6.
320 And the odd terms a0 = x1 a1 = x3 a2 = x5 a3 = x7.
321 */
322
323 c0 = MSCALE((c7d16*a0)-(c1d16*a3));
324 c1 = MSCALE((c3d16*a2)-(c5d16*a1));
325 c2 = MSCALE((c3d16*a1)+(c5d16*a2));
326 c3 = MSCALE((c1d16*a0)+(c7d16*a3));
327
328 /* First Butterfly on even terms.*/
329
330 a0 = MSCALE(c1d4*(b0+b1));
331 a1 = MSCALE(c1d4*(b0-b1));
332
333 a2 = MSCALE((c3d8*b2)-(c1d8*b3));
334 a3 = MSCALE((c1d8*b2)+(c3d8*b3));
335
336 /* Calculate last set of b's */
337
338 b0 = a0+a3;
339 b1 = a1+a2;
340 b2 = a1-a2;
341 b3 = a0-a3;
342
343 /* Second Butterfly */
344
345 a0 = c0+c1;
346 a1 = c0-c1;
347 a2 = c3-c2;
348 a3 = c3+c2;
349
350 c0 = a0;
351 c1 = MSCALE(c1d4*(a2-a1));
352 c2 = MSCALE(c1d4*(a2+a1));
353 c3 = a3;
354
355 aptr = y+LS(i,3);
356 *(aptr++) = b0+c3;
357 *(aptr++) = b1+c2;
358 *(aptr++) = b2+c1;
359 *(aptr++) = b3+c0;
360 *(aptr++) = b3-c0;
361 *(aptr++) = b2-c1;
362 *(aptr++) = b1-c2;
363 *(aptr) = b0-c3;
364 }
365
366 /*
367 Retrieve correct accuracy. We have additional factor
368 of 16 that must be removed.
369 */
370
371 for(i=0,aptr=y;i<64;i++,aptr++)
372 *aptr = (((*aptr<0) ? (*aptr-8) : (*aptr+8)) /16);
373 }
374
375 /*END*/
376
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more
information.