1 /*
2 * jpeg-odct.cc --
3 *
4 * FIXME: This file needs a description here.
5 *
6 * Copyright (c) 1996-2002 The Regents of the University of California.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions are met:
11 *
12 * A. Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 * B. Redistributions in binary form must reproduce the above copyright notice,
15 * this list of conditions and the following disclaimer in the documentation
16 * and/or other materials provided with the distribution.
17 * C. Neither the names of the copyright holders nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
22 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 void
35 xidct(register short* bp, u_int* mask, u_char* p, int stride)
36 {
37 /* Pass 1: process rows. */
38 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
39 /* furthermore, we scale the results by 2**PASS1_BITS. */
40
41 u_int m0 = mask[0];
42 u_int m1 = mask[1];
43
44 /*FIXME*/
45 if (*bp)
46 m0 |= 1;
47
48 for (int rowctr = 0; rowctr < 8; ++rowctr) {
49 int tmp0, tmp1, tmp2, tmp3;
50 int tmp10, tmp11, tmp12, tmp13;
51 int z1, z2, z3, z4, z5;
52
53 /*
54 * Due to quantization, we will usually find that many of the input
55 * coefficients are zero, especially the AC terms. We can exploit this
56 * by short-circuiting the IDCT calculation for any row in which all
57 * the AC terms are zero. In that case each output is equal to the
58 * DC coefficient (with scale factor as needed).
59 * With typical images and quantization tables, half or more of the
60 * row DCT calculations can be simplified this way.
61 */
62 if ((m0 & 0xfe) == 0) {
63 /* AC terms all zero */
64 int v;
65 if (m0 & 1) {
66 v = (bp[0] << PASS1_BITS) & 0xffff;
67 v |= v << 16;
68 } else
69 v = 0;
70 ((u_int*)bp)[0] = v;
71 ((u_int*)bp)[1] = v;
72 ((u_int*)bp)[2] = v;
73 ((u_int*)bp)[3] = v;
74 goto nextrow;
75 }
76
77 /* Even part: reverse the even part of the forward DCT. */
78 /* The rotator is sqrt(2)*c(-6). */
79 if (m0 & 1 << 6) {
80 int d6 = bp[6];
81 if (m0 & 1 << 4) {
82 int d4 = bp[4];
83 if (m0 & 1 << 2) {
84 int d2 = bp[2];
85 if (m0 & 1) {
86 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
87 int d0 = bp[0];
88 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
89 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
90 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
91
92 tmp0 = (d0 + d4) << CONST_BITS;
93 tmp1 = (d0 - d4) << CONST_BITS;
94
95 tmp10 = tmp0 + tmp3;
96 tmp13 = tmp0 - tmp3;
97 tmp11 = tmp1 + tmp2;
98 tmp12 = tmp1 - tmp2;
99 } else {
100 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
101 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
102 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
103 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
104
105 tmp0 = d4 << CONST_BITS;
106
107 tmp10 = tmp0 + tmp3;
108 tmp13 = tmp0 - tmp3;
109 tmp11 = tmp2 - tmp0;
110 tmp12 = -(tmp0 + tmp2);
111 }
112 } else {
113 if (m0 & 1) {
114 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
115 int d0 = bp[0];
116 tmp2 = MULTIPLY(d6, - FIX(1.306562965));
117 tmp3 = MULTIPLY(d6, FIX(0.541196100));
118
119 tmp0 = (d0 + d4) << CONST_BITS;
120 tmp1 = (d0 - d4) << CONST_BITS;
121
122 tmp10 = tmp0 + tmp3;
123 tmp13 = tmp0 - tmp3;
124 tmp11 = tmp1 + tmp2;
125 tmp12 = tmp1 - tmp2;
126 } else {
127 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
128 tmp2 = MULTIPLY(d6, -FIX(1.306562965));
129 tmp3 = MULTIPLY(d6, FIX(0.541196100));
130
131 tmp0 = d4 << CONST_BITS;
132
133 tmp10 = tmp0 + tmp3;
134 tmp13 = tmp0 - tmp3;
135 tmp11 = tmp2 - tmp0;
136 tmp12 = -(tmp0 + tmp2);
137 }
138 }
139 } else {
140 if (m0 & 1 << 2) {
141 int d2 = bp[2];
142 if (m0 & 1) {
143 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
144 int d0 = bp[0];
145 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
146 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
147 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
148
149 tmp0 = d0 << CONST_BITS;
150
151 tmp10 = tmp0 + tmp3;
152 tmp13 = tmp0 - tmp3;
153 tmp11 = tmp0 + tmp2;
154 tmp12 = tmp0 - tmp2;
155 } else {
156 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
157 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
158 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
159 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
160
161 tmp10 = tmp3;
162 tmp13 = -tmp3;
163 tmp11 = tmp2;
164 tmp12 = -tmp2;
165 }
166 } else {
167 if (m0 & 1) {
168 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
169 int d0 = bp[0];
170 tmp2 = MULTIPLY(d6, - FIX(1.306562965));
171 tmp3 = MULTIPLY(d6, FIX(0.541196100));
172
173 tmp0 = d0 << CONST_BITS;
174
175 tmp10 = tmp0 + tmp3;
176 tmp13 = tmp0 - tmp3;
177 tmp11 = tmp0 + tmp2;
178 tmp12 = tmp0 - tmp2;
179 } else {
180 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
181 tmp2 = MULTIPLY(d6, - FIX(1.306562965));
182 tmp3 = MULTIPLY(d6, FIX(0.541196100));
183
184 tmp10 = tmp3;
185 tmp13 = -tmp3;
186 tmp11 = tmp2;
187 tmp12 = -tmp2;
188 }
189 }
190 }
191 } else {
192 if (m0 & 1 << 4) {
193 int d4 = bp[4];
194 if (m0 & 1 << 2) {
195 int d2 = bp[2];
196 if (m0 & 1) {
197 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
198 int d0 = bp[0];
199 tmp2 = MULTIPLY(d2, FIX(0.541196100));
200 tmp3 = MULTIPLY(d2, FIX(1.306562965));
201
202 tmp0 = (d0 + d4) << CONST_BITS;
203 tmp1 = (d0 - d4) << CONST_BITS;
204
205 tmp10 = tmp0 + tmp3;
206 tmp13 = tmp0 - tmp3;
207 tmp11 = tmp1 + tmp2;
208 tmp12 = tmp1 - tmp2;
209 } else {
210 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
211 tmp2 = MULTIPLY(d2, FIX(0.541196100));
212 tmp3 = MULTIPLY(d2, FIX(1.306562965));
213
214 tmp0 = d4 << CONST_BITS;
215
216 tmp10 = tmp0 + tmp3;
217 tmp13 = tmp0 - tmp3;
218 tmp11 = tmp2 - tmp0;
219 tmp12 = -(tmp0 + tmp2);
220 }
221 } else {
222 if (m0 & 1) {
223 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
224 int d0 = bp[0];
225 tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
226 tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
227 } else {
228 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
229 tmp10 = tmp13 = d4 << CONST_BITS;
230 tmp11 = tmp12 = -tmp10;
231 }
232 }
233 } else {
234 if (m0 & 1 << 2) {
235 int d2 = bp[2];
236 if (m0 & 1) {
237 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
238 int d0 = bp[0];
239 tmp2 = MULTIPLY(d2, FIX(0.541196100));
240 tmp3 = MULTIPLY(d2, FIX(1.306562965));
241
242 tmp0 = d0 << CONST_BITS;
243
244 tmp10 = tmp0 + tmp3;
245 tmp13 = tmp0 - tmp3;
246 tmp11 = tmp0 + tmp2;
247 tmp12 = tmp0 - tmp2;
248 } else {
249 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
250 tmp2 = MULTIPLY(d2, FIX(0.541196100));
251 tmp3 = MULTIPLY(d2, FIX(1.306562965));
252
253 tmp10 = tmp3;
254 tmp13 = -tmp3;
255 tmp11 = tmp2;
256 tmp12 = -tmp2;
257 }
258 } else {
259 if (m0 & 1) {
260 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
261 int d0 = bp[0];
262 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
263 } else {
264 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
265 tmp10 = tmp13 = tmp11 = tmp12 = 0;
266 }
267 }
268 }
269 }
270
271
272 /* Odd part per figure 8; the matrix is unitary and hence its
273 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
274 */
275
276 if (m0 & 1 << 7) {
277 int d7 = bp[7];
278 if (m0 & 1 << 5) {
279 int d5 = bp[5];
280 if (m0 & 1 << 3) {
281 int d3 = bp[3];
282 if (m0 & 1 << 1) {
283 int d1 = bp[1];
284 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
285 z1 = d7 + d1;
286 z2 = d5 + d3;
287 z3 = d7 + d3;
288 z4 = d5 + d1;
289 z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
290
291 tmp0 = MULTIPLY(d7, FIX(0.298631336));
292 tmp1 = MULTIPLY(d5, FIX(2.053119869));
293 tmp2 = MULTIPLY(d3, FIX(3.072711026));
294 tmp3 = MULTIPLY(d1, FIX(1.501321110));
295 z1 = MULTIPLY(z1, - FIX(0.899976223));
296 z2 = MULTIPLY(z2, - FIX(2.562915447));
297 z3 = MULTIPLY(z3, - FIX(1.961570560));
298 z4 = MULTIPLY(z4, - FIX(0.390180644));
299
300 z3 += z5;
301 z4 += z5;
302
303 tmp0 += z1 + z3;
304 tmp1 += z2 + z4;
305 tmp2 += z2 + z3;
306 tmp3 += z1 + z4;
307 } else {
308 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
309 z1 = d7;
310 z2 = d5 + d3;
311 z3 = d7 + d3;
312 z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
313
314 tmp0 = MULTIPLY(d7, FIX(0.298631336));
315 tmp1 = MULTIPLY(d5, FIX(2.053119869));
316 tmp2 = MULTIPLY(d3, FIX(3.072711026));
317 z1 = MULTIPLY(d7, - FIX(0.899976223));
318 z2 = MULTIPLY(z2, - FIX(2.562915447));
319 z3 = MULTIPLY(z3, - FIX(1.961570560));
320 z4 = MULTIPLY(d5, - FIX(0.390180644));
321
322 z3 += z5;
323 z4 += z5;
324
325 tmp0 += z1 + z3;
326 tmp1 += z2 + z4;
327 tmp2 += z2 + z3;
328 tmp3 = z1 + z4;
329 }
330 } else {
331 if (m0 & 1 << 1) {
332 int d1 = bp[1];
333 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
334 z1 = d7 + d1;
335 z2 = d5;
336 z3 = d7;
337 z4 = d5 + d1;
338 z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
339
340 tmp0 = MULTIPLY(d7, FIX(0.298631336));
341 tmp1 = MULTIPLY(d5, FIX(2.053119869));
342 tmp3 = MULTIPLY(d1, FIX(1.501321110));
343 z1 = MULTIPLY(z1, - FIX(0.899976223));
344 z2 = MULTIPLY(d5, - FIX(2.562915447));
345 z3 = MULTIPLY(d7, - FIX(1.961570560));
346 z4 = MULTIPLY(z4, - FIX(0.390180644));
347
348 z3 += z5;
349 z4 += z5;
350
351 tmp0 += z1 + z3;
352 tmp1 += z2 + z4;
353 tmp2 = z2 + z3;
354 tmp3 += z1 + z4;
355 } else {
356 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
357 tmp0 = MULTIPLY(d7, - FIX(0.601344887));
358 z1 = MULTIPLY(d7, - FIX(0.899976223));
359 z3 = MULTIPLY(d7, - FIX(1.961570560));
360 tmp1 = MULTIPLY(d5, - FIX(0.509795578));
361 z2 = MULTIPLY(d5, - FIX(2.562915447));
362 z4 = MULTIPLY(d5, - FIX(0.390180644));
363 z5 = MULTIPLY(d5 + d7, FIX(1.175875602));
364
365 z3 += z5;
366 z4 += z5;
367
368 tmp0 += z3;
369 tmp1 += z4;
370 tmp2 = z2 + z3;
371 tmp3 = z1 + z4;
372 }
373 }
374 } else {
375 if (m0 & 1 << 3) {
376 int d3 = bp[3];
377 if (m0 & 1 << 1) {
378 int d1 = bp[1];
379 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
380 z1 = d7 + d1;
381 z3 = d7 + d3;
382 z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
383
384 tmp0 = MULTIPLY(d7, FIX(0.298631336));
385 tmp2 = MULTIPLY(d3, FIX(3.072711026));
386 tmp3 = MULTIPLY(d1, FIX(1.501321110));
387 z1 = MULTIPLY(z1, - FIX(0.899976223));
388 z2 = MULTIPLY(d3, - FIX(2.562915447));
389 z3 = MULTIPLY(z3, - FIX(1.961570560));
390 z4 = MULTIPLY(d1, - FIX(0.390180644));
391
392 z3 += z5;
393 z4 += z5;
394
395 tmp0 += z1 + z3;
396 tmp1 = z2 + z4;
397 tmp2 += z2 + z3;
398 tmp3 += z1 + z4;
399 } else {
400 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
401 z3 = d7 + d3;
402
403 tmp0 = MULTIPLY(d7, - FIX(0.601344887));
404 z1 = MULTIPLY(d7, - FIX(0.899976223));
405 tmp2 = MULTIPLY(d3, FIX(0.509795579));
406 z2 = MULTIPLY(d3, - FIX(2.562915447));
407 z5 = MULTIPLY(z3, FIX(1.175875602));
408 z3 = MULTIPLY(z3, - FIX(0.785694958));
409
410 tmp0 += z3;
411 tmp1 = z2 + z5;
412 tmp2 += z3;
413 tmp3 = z1 + z5;
414 }
415 } else {
416 if (m0 & 1 << 1) {
417 int d1 = bp[1];
418 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
419 z1 = d7 + d1;
420 z5 = MULTIPLY(z1, FIX(1.175875602));
421
422 z1 = MULTIPLY(z1, FIX(0.275899379));
423 z3 = MULTIPLY(d7, - FIX(1.961570560));
424 tmp0 = MULTIPLY(d7, - FIX(1.662939224));
425 z4 = MULTIPLY(d1, - FIX(0.390180644));
426 tmp3 = MULTIPLY(d1, FIX(1.111140466));
427
428 tmp0 += z1;
429 tmp1 = z4 + z5;
430 tmp2 = z3 + z5;
431 tmp3 += z1;
432 } else {
433 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
434 tmp0 = MULTIPLY(d7, - FIX(1.387039845));
435 tmp1 = MULTIPLY(d7, FIX(1.175875602));
436 tmp2 = MULTIPLY(d7, - FIX(0.785694958));
437 tmp3 = MULTIPLY(d7, FIX(0.275899379));
438 }
439 }
440 }
441 } else {
442 if (m0 & 1 << 5) {
443 int d5 = bp[5];
444 if (m0 & 1 << 3) {
445 int d3 = bp[3];
446 if (m0 & 1 << 1) {
447 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
448 int d1 = bp[1];
449 z2 = d5 + d3;
450 z4 = d5 + d1;
451 z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
452
453 tmp1 = MULTIPLY(d5, FIX(2.053119869));
454 tmp2 = MULTIPLY(d3, FIX(3.072711026));
455 tmp3 = MULTIPLY(d1, FIX(1.501321110));
456 z1 = MULTIPLY(d1, - FIX(0.899976223));
457 z2 = MULTIPLY(z2, - FIX(2.562915447));
458 z3 = MULTIPLY(d3, - FIX(1.961570560));
459 z4 = MULTIPLY(z4, - FIX(0.390180644));
460
461 z3 += z5;
462 z4 += z5;
463
464 tmp0 = z1 + z3;
465 tmp1 += z2 + z4;
466 tmp2 += z2 + z3;
467 tmp3 += z1 + z4;
468 } else {
469 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
470 z2 = d5 + d3;
471
472 z5 = MULTIPLY(z2, FIX(1.175875602));
473 tmp1 = MULTIPLY(d5, FIX(1.662939225));
474 z4 = MULTIPLY(d5, - FIX(0.390180644));
475 z2 = MULTIPLY(z2, - FIX(1.387039845));
476 tmp2 = MULTIPLY(d3, FIX(1.111140466));
477 z3 = MULTIPLY(d3, - FIX(1.961570560));
478
479 tmp0 = z3 + z5;
480 tmp1 += z2;
481 tmp2 += z2;
482 tmp3 = z4 + z5;
483 }
484 } else {
485 if (m0 & 1 << 1) {
486 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
487 int d1 = bp[1];
488 z4 = d5 + d1;
489
490 z5 = MULTIPLY(z4, FIX(1.175875602));
491 z1 = MULTIPLY(d1, - FIX(0.899976223));
492 tmp3 = MULTIPLY(d1, FIX(0.601344887));
493 tmp1 = MULTIPLY(d5, - FIX(0.509795578));
494 z2 = MULTIPLY(d5, - FIX(2.562915447));
495 z4 = MULTIPLY(z4, FIX(0.785694958));
496
497 tmp0 = z1 + z5;
498 tmp1 += z4;
499 tmp2 = z2 + z5;
500 tmp3 += z4;
501 } else {
502 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
503 tmp0 = MULTIPLY(d5, FIX(1.175875602));
504 tmp1 = MULTIPLY(d5, FIX(0.275899380));
505 tmp2 = MULTIPLY(d5, - FIX(1.387039845));
506 tmp3 = MULTIPLY(d5, FIX(0.785694958));
507 }
508 }
509 } else {
510 if (m0 & 1 << 3) {
511 int d3 = bp[3];
512 if (m0 & 1 << 1) {
513 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
514 int d1 = bp[1];
515 z5 = d1 + d3;
516 tmp3 = MULTIPLY(d1, FIX(0.211164243));
517 tmp2 = MULTIPLY(d3, - FIX(1.451774981));
518 z1 = MULTIPLY(d1, FIX(1.061594337));
519 z2 = MULTIPLY(d3, - FIX(2.172734803));
520 z4 = MULTIPLY(z5, FIX(0.785694958));
521 z5 = MULTIPLY(z5, FIX(1.175875602));
522
523 tmp0 = z1 - z4;
524 tmp1 = z2 + z4;
525 tmp2 += z5;
526 tmp3 += z5;
527 } else {
528 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
529 tmp0 = MULTIPLY(d3, - FIX(0.785694958));
530 tmp1 = MULTIPLY(d3, - FIX(1.387039845));
531 tmp2 = MULTIPLY(d3, - FIX(0.275899379));
532 tmp3 = MULTIPLY(d3, FIX(1.175875602));
533 }
534 } else {
535 if (m0 & 1 << 1) {
536 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
537 int d1 = bp[1];
538 tmp0 = MULTIPLY(d1, FIX(0.275899379));
539 tmp1 = MULTIPLY(d1, FIX(0.785694958));
540 tmp2 = MULTIPLY(d1, FIX(1.175875602));
541 tmp3 = MULTIPLY(d1, FIX(1.387039845));
542 } else {
543 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
544 tmp0 = tmp1 = tmp2 = tmp3 = 0;
545 }
546 }
547 }
548 }
549
550 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
551
552 bp[0] = DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
553 bp[7] = DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
554 bp[1] = DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
555 bp[6] = DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
556 bp[2] = DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
557 bp[5] = DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
558 bp[3] = DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
559 bp[4] = DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
560
561 nextrow:
562 bp += 8; /* advance pointer to next row */
563 m0 >>= 8;
564 m0 |= m1 << 24;
565 m1 >>= 8;
566 }
567
568 /* Pass 2: process columns. */
569 /* Note that we must descale the results by a factor of 8 == 2**3, */
570 /* and also undo the PASS1_BITS scaling. */
571
572 bp -= 64;
573 for (rowctr = 8; --rowctr >= 0;) {
574 int tmp0, tmp1, tmp2, tmp3;
575 int tmp10, tmp11, tmp12, tmp13;
576 int z1, z2, z3, z4, z5;
577
578 /* Columns of zeroes can be exploited in the same way as we did with rows.
579 * However, the row calculation has created many nonzero AC terms, so the
580 * simplification applies less often (typically 5% to 10% of the time).
581 * On machines with very fast multiplication, it's possible that the
582 * test takes more time than it's worth. In that case this section
583 * may be commented out.
584 */
585
586 int d0 = bp[8*0];
587 int d1 = bp[8*1];
588 int d2 = bp[8*2];
589 int d3 = bp[8*3];
590 int d4 = bp[8*4];
591 int d5 = bp[8*5];
592 int d6 = bp[8*6];
593 int d7 = bp[8*7];
594
595 /* Even part: reverse the even part of the forward DCT. */
596 /* The rotator is sqrt(2)*c(-6). */
597 if (d6) {
598 if (d4) {
599 if (d2) {
600 if (d0) {
601 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
602 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
603 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
604 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
605
606 tmp0 = (d0 + d4) << CONST_BITS;
607 tmp1 = (d0 - d4) << CONST_BITS;
608
609 tmp10 = tmp0 + tmp3;
610 tmp13 = tmp0 - tmp3;
611 tmp11 = tmp1 + tmp2;
612 tmp12 = tmp1 - tmp2;
613 } else {
614 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
615 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
616 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
617 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
618
619 tmp0 = d4 << CONST_BITS;
620
621 tmp10 = tmp0 + tmp3;
622 tmp13 = tmp0 - tmp3;
623 tmp11 = tmp2 - tmp0;
624 tmp12 = -(tmp0 + tmp2);
625 }
626 } else {
627 if (d0) {
628 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
629 tmp2 = MULTIPLY(d6, - FIX(1.306562965));
630 tmp3 = MULTIPLY(d6, FIX(0.541196100));
631
632 tmp0 = (d0 + d4) << CONST_BITS;
633 tmp1 = (d0 - d4) << CONST_BITS;
634
635 tmp10 = tmp0 + tmp3;
636 tmp13 = tmp0 - tmp3;
637 tmp11 = tmp1 + tmp2;
638 tmp12 = tmp1 - tmp2;
639 } else {
640 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
641 tmp2 = MULTIPLY(d6, -FIX(1.306562965));
642 tmp3 = MULTIPLY(d6, FIX(0.541196100));
643
644 tmp0 = d4 << CONST_BITS;
645
646 tmp10 = tmp0 + tmp3;
647 tmp13 = tmp0 - tmp3;
648 tmp11 = tmp2 - tmp0;
649 tmp12 = -(tmp0 + tmp2);
650 }
651 }
652 } else {
653 if (d2) {
654 if (d0) {
655 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
656 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
657 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
658 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
659
660 tmp0 = d0 << CONST_BITS;
661
662 tmp10 = tmp0 + tmp3;
663 tmp13 = tmp0 - tmp3;
664 tmp11 = tmp0 + tmp2;
665 tmp12 = tmp0 - tmp2;
666 } else {
667 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
668 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
669 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
670 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
671
672 tmp10 = tmp3;
673 tmp13 = -tmp3;
674 tmp11 = tmp2;
675 tmp12 = -tmp2;
676 }
677 } else {
678 if (d0) {
679 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
680 tmp2 = MULTIPLY(d6, - FIX(1.306562965));
681 tmp3 = MULTIPLY(d6, FIX(0.541196100));
682
683 tmp0 = d0 << CONST_BITS;
684
685 tmp10 = tmp0 + tmp3;
686 tmp13 = tmp0 - tmp3;
687 tmp11 = tmp0 + tmp2;
688 tmp12 = tmp0 - tmp2;
689 } else {
690 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
691 tmp2 = MULTIPLY(d6, - FIX(1.306562965));
692 tmp3 = MULTIPLY(d6, FIX(0.541196100));
693
694 tmp10 = tmp3;
695 tmp13 = -tmp3;
696 tmp11 = tmp2;
697 tmp12 = -tmp2;
698 }
699 }
700 }
701 } else {
702 if (d4) {
703 if (d2) {
704 if (d0) {
705 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
706 tmp2 = MULTIPLY(d2, FIX(0.541196100));
707 tmp3 = MULTIPLY(d2, FIX(1.306562965));
708
709 tmp0 = (d0 + d4) << CONST_BITS;
710 tmp1 = (d0 - d4) << CONST_BITS;
711
712 tmp10 = tmp0 + tmp3;
713 tmp13 = tmp0 - tmp3;
714 tmp11 = tmp1 + tmp2;
715 tmp12 = tmp1 - tmp2;
716 } else {
717 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
718 tmp2 = MULTIPLY(d2, FIX(0.541196100));
719 tmp3 = MULTIPLY(d2, FIX(1.306562965));
720
721 tmp0 = d4 << CONST_BITS;
722
723 tmp10 = tmp0 + tmp3;
724 tmp13 = tmp0 - tmp3;
725 tmp11 = tmp2 - tmp0;
726 tmp12 = -(tmp0 + tmp2);
727 }
728 } else {
729 if (d0) {
730 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
731 tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
732 tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
733 } else {
734 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
735 tmp10 = tmp13 = d4 << CONST_BITS;
736 tmp11 = tmp12 = -tmp10;
737 }
738 }
739 } else {
740 if (d2) {
741 if (d0) {
742 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
743 tmp2 = MULTIPLY(d2, FIX(0.541196100));
744 tmp3 = MULTIPLY(d2, FIX(1.306562965));
745
746 tmp0 = d0 << CONST_BITS;
747
748 tmp10 = tmp0 + tmp3;
749 tmp13 = tmp0 - tmp3;
750 tmp11 = tmp0 + tmp2;
751 tmp12 = tmp0 - tmp2;
752 } else {
753 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
754 tmp2 = MULTIPLY(d2, FIX(0.541196100));
755 tmp3 = MULTIPLY(d2, FIX(1.306562965));
756
757 tmp10 = tmp3;
758 tmp13 = -tmp3;
759 tmp11 = tmp2;
760 tmp12 = -tmp2;
761 }
762 } else {
763 if (d0) {
764 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
765 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
766 } else {
767 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
768 tmp10 = tmp13 = tmp11 = tmp12 = 0;
769 }
770 }
771 }
772 }
773
774 /* Odd part per figure 8; the matrix is unitary and hence its
775 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
776 */
777 if (d7) {
778 if (d5) {
779 if (d3) {
780 if (d1) {
781 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
782 z1 = d7 + d1;
783 z2 = d5 + d3;
784 z3 = d7 + d3;
785 z4 = d5 + d1;
786 z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
787
788 tmp0 = MULTIPLY(d7, FIX(0.298631336));
789 tmp1 = MULTIPLY(d5, FIX(2.053119869));
790 tmp2 = MULTIPLY(d3, FIX(3.072711026));
791 tmp3 = MULTIPLY(d1, FIX(1.501321110));
792 z1 = MULTIPLY(z1, - FIX(0.899976223));
793 z2 = MULTIPLY(z2, - FIX(2.562915447));
794 z3 = MULTIPLY(z3, - FIX(1.961570560));
795 z4 = MULTIPLY(z4, - FIX(0.390180644));
796
797 z3 += z5;
798 z4 += z5;
799
800 tmp0 += z1 + z3;
801 tmp1 += z2 + z4;
802 tmp2 += z2 + z3;
803 tmp3 += z1 + z4;
804 } else {
805 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
806 z1 = d7;
807 z2 = d5 + d3;
808 z3 = d7 + d3;
809 z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
810
811 tmp0 = MULTIPLY(d7, FIX(0.298631336));
812 tmp1 = MULTIPLY(d5, FIX(2.053119869));
813 tmp2 = MULTIPLY(d3, FIX(3.072711026));
814 z1 = MULTIPLY(d7, - FIX(0.899976223));
815 z2 = MULTIPLY(z2, - FIX(2.562915447));
816 z3 = MULTIPLY(z3, - FIX(1.961570560));
817 z4 = MULTIPLY(d5, - FIX(0.390180644));
818
819 z3 += z5;
820 z4 += z5;
821
822 tmp0 += z1 + z3;
823 tmp1 += z2 + z4;
824 tmp2 += z2 + z3;
825 tmp3 = z1 + z4;
826 }
827 } else {
828 if (d1) {
829 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
830 z1 = d7 + d1;
831 z2 = d5;
832 z3 = d7;
833 z4 = d5 + d1;
834 z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
835
836 tmp0 = MULTIPLY(d7, FIX(0.298631336));
837 tmp1 = MULTIPLY(d5, FIX(2.053119869));
838 tmp3 = MULTIPLY(d1, FIX(1.501321110));
839 z1 = MULTIPLY(z1, - FIX(0.899976223));
840 z2 = MULTIPLY(d5, - FIX(2.562915447));
841 z3 = MULTIPLY(d7, - FIX(1.961570560));
842 z4 = MULTIPLY(z4, - FIX(0.390180644));
843
844 z3 += z5;
845 z4 += z5;
846
847 tmp0 += z1 + z3;
848 tmp1 += z2 + z4;
849 tmp2 = z2 + z3;
850 tmp3 += z1 + z4;
851 } else {
852 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
853 tmp0 = MULTIPLY(d7, - FIX(0.601344887));
854 z1 = MULTIPLY(d7, - FIX(0.899976223));
855 z3 = MULTIPLY(d7, - FIX(1.961570560));
856 tmp1 = MULTIPLY(d5, - FIX(0.509795578));
857 z2 = MULTIPLY(d5, - FIX(2.562915447));
858 z4 = MULTIPLY(d5, - FIX(0.390180644));
859 z5 = MULTIPLY(d5 + d7, FIX(1.175875602));
860
861 z3 += z5;
862 z4 += z5;
863
864 tmp0 += z3;
865 tmp1 += z4;
866 tmp2 = z2 + z3;
867 tmp3 = z1 + z4;
868 }
869 }
870 } else {
871 if (d3) {
872 if (d1) {
873 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
874 z1 = d7 + d1;
875 z3 = d7 + d3;
876 z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
877
878 tmp0 = MULTIPLY(d7, FIX(0.298631336));
879 tmp2 = MULTIPLY(d3, FIX(3.072711026));
880 tmp3 = MULTIPLY(d1, FIX(1.501321110));
881 z1 = MULTIPLY(z1, - FIX(0.899976223));
882 z2 = MULTIPLY(d3, - FIX(2.562915447));
883 z3 = MULTIPLY(z3, - FIX(1.961570560));
884 z4 = MULTIPLY(d1, - FIX(0.390180644));
885
886 z3 += z5;
887 z4 += z5;
888
889 tmp0 += z1 + z3;
890 tmp1 = z2 + z4;
891 tmp2 += z2 + z3;
892 tmp3 += z1 + z4;
893 } else {
894 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
895 z3 = d7 + d3;
896
897 tmp0 = MULTIPLY(d7, - FIX(0.601344887));
898 z1 = MULTIPLY(d7, - FIX(0.899976223));
899 tmp2 = MULTIPLY(d3, FIX(0.509795579));
900 z2 = MULTIPLY(d3, - FIX(2.562915447));
901 z5 = MULTIPLY(z3, FIX(1.175875602));
902 z3 = MULTIPLY(z3, - FIX(0.785694958));
903
904 tmp0 += z3;
905 tmp1 = z2 + z5;
906 tmp2 += z3;
907 tmp3 = z1 + z5;
908 }
909 } else {
910 if (d1) {
911 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
912 z1 = d7 + d1;
913 z5 = MULTIPLY(z1, FIX(1.175875602));
914
915 z1 = MULTIPLY(z1, FIX(0.275899379));
916 z3 = MULTIPLY(d7, - FIX(1.961570560));
917 tmp0 = MULTIPLY(d7, - FIX(1.662939224));
918 z4 = MULTIPLY(d1, - FIX(0.390180644));
919 tmp3 = MULTIPLY(d1, FIX(1.111140466));
920
921 tmp0 += z1;
922 tmp1 = z4 + z5;
923 tmp2 = z3 + z5;
924 tmp3 += z1;
925 } else {
926 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
927 tmp0 = MULTIPLY(d7, - FIX(1.387039845));
928 tmp1 = MULTIPLY(d7, FIX(1.175875602));
929 tmp2 = MULTIPLY(d7, - FIX(0.785694958));
930 tmp3 = MULTIPLY(d7, FIX(0.275899379));
931 }
932 }
933 }
934 } else {
935 if (d5) {
936 if (d3) {
937 if (d1) {
938 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
939 z2 = d5 + d3;
940 z4 = d5 + d1;
941 z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
942
943 tmp1 = MULTIPLY(d5, FIX(2.053119869));
944 tmp2 = MULTIPLY(d3, FIX(3.072711026));
945 tmp3 = MULTIPLY(d1, FIX(1.501321110));
946 z1 = MULTIPLY(d1, - FIX(0.899976223));
947 z2 = MULTIPLY(z2, - FIX(2.562915447));
948 z3 = MULTIPLY(d3, - FIX(1.961570560));
949 z4 = MULTIPLY(z4, - FIX(0.390180644));
950
951 z3 += z5;
952 z4 += z5;
953
954 tmp0 = z1 + z3;
955 tmp1 += z2 + z4;
956 tmp2 += z2 + z3;
957 tmp3 += z1 + z4;
958 } else {
959 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
960 z2 = d5 + d3;
961
962 z5 = MULTIPLY(z2, FIX(1.175875602));
963 tmp1 = MULTIPLY(d5, FIX(1.662939225));
964 z4 = MULTIPLY(d5, - FIX(0.390180644));
965 z2 = MULTIPLY(z2, - FIX(1.387039845));
966 tmp2 = MULTIPLY(d3, FIX(1.111140466));
967 z3 = MULTIPLY(d3, - FIX(1.961570560));
968
969 tmp0 = z3 + z5;
970 tmp1 += z2;
971 tmp2 += z2;
972 tmp3 = z4 + z5;
973 }
974 } else {
975 if (d1) {
976 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
977 z4 = d5 + d1;
978
979 z5 = MULTIPLY(z4, FIX(1.175875602));
980 z1 = MULTIPLY(d1, - FIX(0.899976223));
981 tmp3 = MULTIPLY(d1, FIX(0.601344887));
982 tmp1 = MULTIPLY(d5, - FIX(0.509795578));
983 z2 = MULTIPLY(d5, - FIX(2.562915447));
984 z4 = MULTIPLY(z4, FIX(0.785694958));
985
986 tmp0 = z1 + z5;
987 tmp1 += z4;
988 tmp2 = z2 + z5;
989 tmp3 += z4;
990 } else {
991 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
992 tmp0 = MULTIPLY(d5, FIX(1.175875602));
993 tmp1 = MULTIPLY(d5, FIX(0.275899380));
994 tmp2 = MULTIPLY(d5, - FIX(1.387039845));
995 tmp3 = MULTIPLY(d5, FIX(0.785694958));
996 }
997 }
998 } else {
999 if (d3) {
1000 if (d1) {
1001 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
1002 z5 = d1 + d3;
1003 tmp3 = MULTIPLY(d1, FIX(0.211164243));
1004 tmp2 = MULTIPLY(d3, - FIX(1.451774981));
1005 z1 = MULTIPLY(d1, FIX(1.061594337));
1006 z2 = MULTIPLY(d3, - FIX(2.172734803));
1007 z4 = MULTIPLY(z5, FIX(0.785694958));
1008 z5 = MULTIPLY(z5, FIX(1.175875602));
1009
1010 tmp0 = z1 - z4;
1011 tmp1 = z2 + z4;
1012 tmp2 += z5;
1013 tmp3 += z5;
1014 } else {
1015 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
1016 tmp0 = MULTIPLY(d3, - FIX(0.785694958));
1017 tmp1 = MULTIPLY(d3, - FIX(1.387039845));
1018 tmp2 = MULTIPLY(d3, - FIX(0.275899379));
1019 tmp3 = MULTIPLY(d3, FIX(1.175875602));
1020 }
1021 } else {
1022 if (d1) {
1023 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
1024 tmp0 = MULTIPLY(d1, FIX(0.275899379));
1025 tmp1 = MULTIPLY(d1, FIX(0.785694958));
1026 tmp2 = MULTIPLY(d1, FIX(1.175875602));
1027 tmp3 = MULTIPLY(d1, FIX(1.387039845));
1028 } else {
1029 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
1030 tmp0 = tmp1 = tmp2 = tmp3 = 0;
1031 }
1032 }
1033 }
1034 }
1035
1036 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1037
1038 d0 = DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3);
1039 d0 = UCLIMIT(d0 + 128, d1);
1040 *p = d0;
1041 p += stride;
1042
1043 d0 = DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3);
1044 d0 = UCLIMIT(d0 + 128, d1);
1045 *p = d0;
1046 p += stride;
1047
1048 d0 = DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3);
1049 d0 = UCLIMIT(d0 + 128, d1);
1050 *p = d0;
1051 p += stride;
1052
1053 d0 = DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3);
1054 d0 = UCLIMIT(d0 + 128, d1);
1055 *p = d0;
1056 p += stride;
1057
1058 d0 = DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3);
1059 d0 = UCLIMIT(d0 + 128, d1);
1060 *p = d0;
1061 p += stride;
1062
1063 d0 = DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3);
1064 d0 = UCLIMIT(d0 + 128, d1);
1065 *p = d0;
1066 p += stride;
1067
1068 d0 = DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3);
1069 d0 = UCLIMIT(d0 + 128, d1);
1070 *p = d0;
1071 p += stride;
1072
1073 d0 = DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3);
1074 d0 = UCLIMIT(d0 + 128, d1);
1075 *p = d0;
1076 p += stride;
1077
1078 p -= stride <&