~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Open Mash Cross Reference
mash/codec/jpeg/jpeg-odct.cc

Component: ~ [ mash ] ~ [ apps ] ~ [ gsm ] ~ [ lib ] ~ [ otcl ] ~ [ srm ] ~ [ tcl8.3 ] ~ [ tclcl ] ~ [ tk8.3 ] ~ [ tutorials ] ~

  1 /*
  2  * jpeg-odct.cc --
  3  *
  4  *      FIXME: This file needs a description here.
  5  *
  6  * Copyright (c) 1996-2002 The Regents of the University of California.
  7  * All rights reserved.
  8  *
  9  * Redistribution and use in source and binary forms, with or without
 10  * modification, are permitted provided that the following conditions are met:
 11  *
 12  * A. Redistributions of source code must retain the above copyright notice,
 13  *    this list of conditions and the following disclaimer.
 14  * B. Redistributions in binary form must reproduce the above copyright notice,
 15  *    this list of conditions and the following disclaimer in the documentation
 16  *    and/or other materials provided with the distribution.
 17  * C. Neither the names of the copyright holders nor the names of its
 18  *    contributors may be used to endorse or promote products derived from this
 19  *    software without specific prior written permission.
 20  *
 21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
 22  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 23  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
 25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 31  * POSSIBILITY OF SUCH DAMAGE.
 32  */
 33 
 34 void
 35 xidct(register short* bp, u_int* mask, u_char* p, int stride)
 36 {
 37   /* Pass 1: process rows. */
 38   /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
 39   /* furthermore, we scale the results by 2**PASS1_BITS. */
 40 
 41   u_int m0 = mask[0];
 42   u_int m1 = mask[1];
 43 
 44 /*FIXME*/
 45   if (*bp)
 46           m0 |= 1;
 47 
 48   for (int rowctr = 0; rowctr < 8; ++rowctr) {
 49     int tmp0, tmp1, tmp2, tmp3;
 50     int tmp10, tmp11, tmp12, tmp13;
 51     int z1, z2, z3, z4, z5;
 52 
 53     /*
 54      * Due to quantization, we will usually find that many of the input
 55      * coefficients are zero, especially the AC terms.  We can exploit this
 56      * by short-circuiting the IDCT calculation for any row in which all
 57      * the AC terms are zero.  In that case each output is equal to the
 58      * DC coefficient (with scale factor as needed).
 59      * With typical images and quantization tables, half or more of the
 60      * row DCT calculations can be simplified this way.
 61      */
 62     if ((m0 & 0xfe) == 0) {
 63       /* AC terms all zero */
 64       int v;
 65      if (m0 & 1) {
 66           v = (bp[0] << PASS1_BITS) & 0xffff;
 67           v |= v << 16;
 68       } else
 69           v = 0;
 70       ((u_int*)bp)[0] = v;
 71       ((u_int*)bp)[1] = v;
 72       ((u_int*)bp)[2] = v;
 73       ((u_int*)bp)[3] = v;
 74       goto nextrow;
 75     }
 76 
 77     /* Even part: reverse the even part of the forward DCT. */
 78     /* The rotator is sqrt(2)*c(-6). */
 79     if (m0 & 1 << 6) {
 80         int d6 = bp[6];
 81         if (m0 & 1 << 4) {
 82             int d4 = bp[4];
 83             if (m0 & 1 << 2) {
 84                 int d2 = bp[2];
 85                 if (m0 & 1) {
 86                     /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
 87                     int d0 = bp[0];
 88                     z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
 89                     tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
 90                     tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
 91 
 92                     tmp0 = (d0 + d4) << CONST_BITS;
 93                     tmp1 = (d0 - d4) << CONST_BITS;
 94 
 95                     tmp10 = tmp0 + tmp3;
 96                     tmp13 = tmp0 - tmp3;
 97                     tmp11 = tmp1 + tmp2;
 98                     tmp12 = tmp1 - tmp2;
 99                 } else {
100                     /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
101                     z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
102                     tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
103                     tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
104 
105                     tmp0 = d4 << CONST_BITS;
106 
107                     tmp10 = tmp0 + tmp3;
108                     tmp13 = tmp0 - tmp3;
109                     tmp11 = tmp2 - tmp0;
110                     tmp12 = -(tmp0 + tmp2);
111                 }
112             } else {
113                 if (m0 & 1) {
114                     /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
115                     int d0 = bp[0];
116                     tmp2 = MULTIPLY(d6, - FIX(1.306562965));
117                     tmp3 = MULTIPLY(d6, FIX(0.541196100));
118 
119                     tmp0 = (d0 + d4) << CONST_BITS;
120                     tmp1 = (d0 - d4) << CONST_BITS;
121 
122                     tmp10 = tmp0 + tmp3;
123                     tmp13 = tmp0 - tmp3;
124                     tmp11 = tmp1 + tmp2;
125                     tmp12 = tmp1 - tmp2;
126                 } else {
127                     /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
128                     tmp2 = MULTIPLY(d6, -FIX(1.306562965));
129                     tmp3 = MULTIPLY(d6, FIX(0.541196100));
130 
131                     tmp0 = d4 << CONST_BITS;
132 
133                     tmp10 = tmp0 + tmp3;
134                     tmp13 = tmp0 - tmp3;
135                     tmp11 = tmp2 - tmp0;
136                     tmp12 = -(tmp0 + tmp2);
137                 }
138             }
139         } else {
140             if (m0 & 1 << 2) {
141                 int d2 = bp[2];
142                 if (m0 & 1) {
143                     /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
144                     int d0 = bp[0];
145                     z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
146                     tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
147                     tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
148 
149                     tmp0 = d0 << CONST_BITS;
150 
151                     tmp10 = tmp0 + tmp3;
152                     tmp13 = tmp0 - tmp3;
153                     tmp11 = tmp0 + tmp2;
154                     tmp12 = tmp0 - tmp2;
155                 } else {
156                     /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
157                     z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
158                     tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
159                     tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
160 
161                     tmp10 = tmp3;
162                     tmp13 = -tmp3;
163                     tmp11 = tmp2;
164                     tmp12 = -tmp2;
165                 }
166             } else {
167                 if (m0 & 1) {
168                     /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
169                     int d0 = bp[0];
170                     tmp2 = MULTIPLY(d6, - FIX(1.306562965));
171                     tmp3 = MULTIPLY(d6, FIX(0.541196100));
172 
173                     tmp0 = d0 << CONST_BITS;
174 
175                     tmp10 = tmp0 + tmp3;
176                     tmp13 = tmp0 - tmp3;
177                     tmp11 = tmp0 + tmp2;
178                     tmp12 = tmp0 - tmp2;
179                 } else {
180                     /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
181                     tmp2 = MULTIPLY(d6, - FIX(1.306562965));
182                     tmp3 = MULTIPLY(d6, FIX(0.541196100));
183 
184                     tmp10 = tmp3;
185                     tmp13 = -tmp3;
186                     tmp11 = tmp2;
187                     tmp12 = -tmp2;
188                 }
189             }
190         }
191     } else {
192         if (m0 & 1 << 4) {
193             int d4 = bp[4];
194             if (m0 & 1 << 2) {
195                 int d2 = bp[2];
196                 if (m0 & 1) {
197                     /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
198                     int d0 = bp[0];
199                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
200                     tmp3 = MULTIPLY(d2, FIX(1.306562965));
201 
202                     tmp0 = (d0 + d4) << CONST_BITS;
203                     tmp1 = (d0 - d4) << CONST_BITS;
204 
205                     tmp10 = tmp0 + tmp3;
206                     tmp13 = tmp0 - tmp3;
207                     tmp11 = tmp1 + tmp2;
208                     tmp12 = tmp1 - tmp2;
209                 } else {
210                     /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
211                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
212                     tmp3 = MULTIPLY(d2, FIX(1.306562965));
213 
214                     tmp0 = d4 << CONST_BITS;
215 
216                     tmp10 = tmp0 + tmp3;
217                     tmp13 = tmp0 - tmp3;
218                     tmp11 = tmp2 - tmp0;
219                     tmp12 = -(tmp0 + tmp2);
220                 }
221             } else {
222                 if (m0 & 1) {
223                     /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
224                     int d0 = bp[0];
225                     tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
226                     tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
227                 } else {
228                     /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
229                     tmp10 = tmp13 = d4 << CONST_BITS;
230                     tmp11 = tmp12 = -tmp10;
231                 }
232             }
233         } else {
234             if (m0 & 1 << 2) {
235                 int d2 = bp[2];
236                 if (m0 & 1) {
237                     /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
238                     int d0 = bp[0];
239                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
240                     tmp3 = MULTIPLY(d2, FIX(1.306562965));
241 
242                     tmp0 = d0 << CONST_BITS;
243 
244                     tmp10 = tmp0 + tmp3;
245                     tmp13 = tmp0 - tmp3;
246                     tmp11 = tmp0 + tmp2;
247                     tmp12 = tmp0 - tmp2;
248                 } else {
249                     /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
250                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
251                     tmp3 = MULTIPLY(d2, FIX(1.306562965));
252 
253                     tmp10 = tmp3;
254                     tmp13 = -tmp3;
255                     tmp11 = tmp2;
256                     tmp12 = -tmp2;
257                 }
258             } else {
259                 if (m0 & 1) {
260                     /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
261                     int d0 = bp[0];
262                     tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
263                 } else {
264                     /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
265                     tmp10 = tmp13 = tmp11 = tmp12 = 0;
266                 }
267             }
268         }
269     }
270 
271 
272     /* Odd part per figure 8; the matrix is unitary and hence its
273      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
274      */
275 
276     if (m0 & 1 << 7) {
277         int d7 = bp[7];
278         if (m0 & 1 << 5) {
279             int d5 = bp[5];
280             if (m0 & 1 << 3) {
281                 int d3 = bp[3];
282                 if (m0 & 1 << 1) {
283                     int d1 = bp[1];
284                     /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
285                     z1 = d7 + d1;
286                     z2 = d5 + d3;
287                     z3 = d7 + d3;
288                     z4 = d5 + d1;
289                     z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
290 
291                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
292                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
293                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
294                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
295                     z1 = MULTIPLY(z1, - FIX(0.899976223));
296                     z2 = MULTIPLY(z2, - FIX(2.562915447));
297                     z3 = MULTIPLY(z3, - FIX(1.961570560));
298                     z4 = MULTIPLY(z4, - FIX(0.390180644));
299 
300                     z3 += z5;
301                     z4 += z5;
302 
303                     tmp0 += z1 + z3;
304                     tmp1 += z2 + z4;
305                     tmp2 += z2 + z3;
306                     tmp3 += z1 + z4;
307                 } else {
308                     /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
309                     z1 = d7;
310                     z2 = d5 + d3;
311                     z3 = d7 + d3;
312                     z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
313 
314                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
315                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
316                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
317                     z1 = MULTIPLY(d7, - FIX(0.899976223));
318                     z2 = MULTIPLY(z2, - FIX(2.562915447));
319                     z3 = MULTIPLY(z3, - FIX(1.961570560));
320                     z4 = MULTIPLY(d5, - FIX(0.390180644));
321 
322                     z3 += z5;
323                     z4 += z5;
324 
325                     tmp0 += z1 + z3;
326                     tmp1 += z2 + z4;
327                     tmp2 += z2 + z3;
328                     tmp3 = z1 + z4;
329                 }
330             } else {
331                 if (m0 & 1 << 1) {
332                     int d1 = bp[1];
333                     /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
334                     z1 = d7 + d1;
335                     z2 = d5;
336                     z3 = d7;
337                     z4 = d5 + d1;
338                     z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
339 
340                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
341                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
342                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
343                     z1 = MULTIPLY(z1, - FIX(0.899976223));
344                     z2 = MULTIPLY(d5, - FIX(2.562915447));
345                     z3 = MULTIPLY(d7, - FIX(1.961570560));
346                     z4 = MULTIPLY(z4, - FIX(0.390180644));
347 
348                     z3 += z5;
349                     z4 += z5;
350 
351                     tmp0 += z1 + z3;
352                     tmp1 += z2 + z4;
353                     tmp2 = z2 + z3;
354                     tmp3 += z1 + z4;
355                 } else {
356                     /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
357                     tmp0 = MULTIPLY(d7, - FIX(0.601344887));
358                     z1 = MULTIPLY(d7, - FIX(0.899976223));
359                     z3 = MULTIPLY(d7, - FIX(1.961570560));
360                     tmp1 = MULTIPLY(d5, - FIX(0.509795578));
361                     z2 = MULTIPLY(d5, - FIX(2.562915447));
362                     z4 = MULTIPLY(d5, - FIX(0.390180644));
363                     z5 = MULTIPLY(d5 + d7, FIX(1.175875602));
364 
365                     z3 += z5;
366                     z4 += z5;
367 
368                     tmp0 += z3;
369                     tmp1 += z4;
370                     tmp2 = z2 + z3;
371                     tmp3 = z1 + z4;
372                 }
373             }
374         } else {
375             if (m0 & 1 << 3) {
376                 int d3 = bp[3];
377                 if (m0 & 1 << 1) {
378                     int d1 = bp[1];
379                     /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
380                     z1 = d7 + d1;
381                     z3 = d7 + d3;
382                     z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
383 
384                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
385                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
386                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
387                     z1 = MULTIPLY(z1, - FIX(0.899976223));
388                     z2 = MULTIPLY(d3, - FIX(2.562915447));
389                     z3 = MULTIPLY(z3, - FIX(1.961570560));
390                     z4 = MULTIPLY(d1, - FIX(0.390180644));
391 
392                     z3 += z5;
393                     z4 += z5;
394 
395                     tmp0 += z1 + z3;
396                     tmp1 = z2 + z4;
397                     tmp2 += z2 + z3;
398                     tmp3 += z1 + z4;
399                 } else {
400                     /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
401                     z3 = d7 + d3;
402 
403                     tmp0 = MULTIPLY(d7, - FIX(0.601344887));
404                     z1 = MULTIPLY(d7, - FIX(0.899976223));
405                     tmp2 = MULTIPLY(d3, FIX(0.509795579));
406                     z2 = MULTIPLY(d3, - FIX(2.562915447));
407                     z5 = MULTIPLY(z3, FIX(1.175875602));
408                     z3 = MULTIPLY(z3, - FIX(0.785694958));
409 
410                     tmp0 += z3;
411                     tmp1 = z2 + z5;
412                     tmp2 += z3;
413                     tmp3 = z1 + z5;
414                 }
415             } else {
416                 if (m0 & 1 << 1) {
417                     int d1 = bp[1];
418                     /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
419                     z1 = d7 + d1;
420                     z5 = MULTIPLY(z1, FIX(1.175875602));
421 
422                     z1 = MULTIPLY(z1, FIX(0.275899379));
423                     z3 = MULTIPLY(d7, - FIX(1.961570560));
424                     tmp0 = MULTIPLY(d7, - FIX(1.662939224));
425                     z4 = MULTIPLY(d1, - FIX(0.390180644));
426                     tmp3 = MULTIPLY(d1, FIX(1.111140466));
427 
428                     tmp0 += z1;
429                     tmp1 = z4 + z5;
430                     tmp2 = z3 + z5;
431                     tmp3 += z1;
432                 } else {
433                     /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
434                     tmp0 = MULTIPLY(d7, - FIX(1.387039845));
435                     tmp1 = MULTIPLY(d7, FIX(1.175875602));
436                     tmp2 = MULTIPLY(d7, - FIX(0.785694958));
437                     tmp3 = MULTIPLY(d7, FIX(0.275899379));
438                 }
439             }
440         }
441     } else {
442         if (m0 & 1 << 5) {
443             int d5 = bp[5];
444             if (m0 & 1 << 3) {
445                 int d3 = bp[3];
446                 if (m0 & 1 << 1) {
447                     /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
448                     int d1 = bp[1];
449                     z2 = d5 + d3;
450                     z4 = d5 + d1;
451                     z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
452 
453                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
454                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
455                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
456                     z1 = MULTIPLY(d1, - FIX(0.899976223));
457                     z2 = MULTIPLY(z2, - FIX(2.562915447));
458                     z3 = MULTIPLY(d3, - FIX(1.961570560));
459                     z4 = MULTIPLY(z4, - FIX(0.390180644));
460 
461                     z3 += z5;
462                     z4 += z5;
463 
464                     tmp0 = z1 + z3;
465                     tmp1 += z2 + z4;
466                     tmp2 += z2 + z3;
467                     tmp3 += z1 + z4;
468                 } else {
469                     /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
470                     z2 = d5 + d3;
471 
472                     z5 = MULTIPLY(z2, FIX(1.175875602));
473                     tmp1 = MULTIPLY(d5, FIX(1.662939225));
474                     z4 = MULTIPLY(d5, - FIX(0.390180644));
475                     z2 = MULTIPLY(z2, - FIX(1.387039845));
476                     tmp2 = MULTIPLY(d3, FIX(1.111140466));
477                     z3 = MULTIPLY(d3, - FIX(1.961570560));
478 
479                     tmp0 = z3 + z5;
480                     tmp1 += z2;
481                     tmp2 += z2;
482                     tmp3 = z4 + z5;
483                 }
484             } else {
485                 if (m0 & 1 << 1) {
486                     /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
487                     int d1 = bp[1];
488                     z4 = d5 + d1;
489 
490                     z5 = MULTIPLY(z4, FIX(1.175875602));
491                     z1 = MULTIPLY(d1, - FIX(0.899976223));
492                     tmp3 = MULTIPLY(d1, FIX(0.601344887));
493                     tmp1 = MULTIPLY(d5, - FIX(0.509795578));
494                     z2 = MULTIPLY(d5, - FIX(2.562915447));
495                     z4 = MULTIPLY(z4, FIX(0.785694958));
496 
497                     tmp0 = z1 + z5;
498                     tmp1 += z4;
499                     tmp2 = z2 + z5;
500                     tmp3 += z4;
501                 } else {
502                     /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
503                     tmp0 = MULTIPLY(d5, FIX(1.175875602));
504                     tmp1 = MULTIPLY(d5, FIX(0.275899380));
505                     tmp2 = MULTIPLY(d5, - FIX(1.387039845));
506                     tmp3 = MULTIPLY(d5, FIX(0.785694958));
507                 }
508             }
509         } else {
510             if (m0 & 1 << 3) {
511                 int d3 = bp[3];
512                 if (m0 & 1 << 1) {
513                     /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
514                     int d1 = bp[1];
515                     z5 = d1 + d3;
516                     tmp3 = MULTIPLY(d1, FIX(0.211164243));
517                     tmp2 = MULTIPLY(d3, - FIX(1.451774981));
518                     z1 = MULTIPLY(d1, FIX(1.061594337));
519                     z2 = MULTIPLY(d3, - FIX(2.172734803));
520                     z4 = MULTIPLY(z5, FIX(0.785694958));
521                     z5 = MULTIPLY(z5, FIX(1.175875602));
522 
523                     tmp0 = z1 - z4;
524                     tmp1 = z2 + z4;
525                     tmp2 += z5;
526                     tmp3 += z5;
527                 } else {
528                     /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
529                     tmp0 = MULTIPLY(d3, - FIX(0.785694958));
530                     tmp1 = MULTIPLY(d3, - FIX(1.387039845));
531                     tmp2 = MULTIPLY(d3, - FIX(0.275899379));
532                     tmp3 = MULTIPLY(d3, FIX(1.175875602));
533                 }
534             } else {
535                 if (m0 & 1 << 1) {
536                     /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
537                     int d1 = bp[1];
538                     tmp0 = MULTIPLY(d1, FIX(0.275899379));
539                     tmp1 = MULTIPLY(d1, FIX(0.785694958));
540                     tmp2 = MULTIPLY(d1, FIX(1.175875602));
541                     tmp3 = MULTIPLY(d1, FIX(1.387039845));
542                 } else {
543                     /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
544                     tmp0 = tmp1 = tmp2 = tmp3 = 0;
545                 }
546             }
547         }
548     }
549 
550     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
551 
552     bp[0] =  DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
553     bp[7] =  DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
554     bp[1] =  DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
555     bp[6] =  DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
556     bp[2] =  DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
557     bp[5] =  DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
558     bp[3] =  DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
559     bp[4] =  DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
560 
561  nextrow:
562     bp += 8;            /* advance pointer to next row */
563     m0 >>= 8;
564     m0 |= m1 << 24;
565     m1 >>= 8;
566   }
567 
568   /* Pass 2: process columns. */
569   /* Note that we must descale the results by a factor of 8 == 2**3, */
570   /* and also undo the PASS1_BITS scaling. */
571 
572   bp -= 64;
573   for (rowctr = 8; --rowctr >= 0;) {
574     int tmp0, tmp1, tmp2, tmp3;
575     int tmp10, tmp11, tmp12, tmp13;
576     int z1, z2, z3, z4, z5;
577 
578     /* Columns of zeroes can be exploited in the same way as we did with rows.
579      * However, the row calculation has created many nonzero AC terms, so the
580      * simplification applies less often (typically 5% to 10% of the time).
581      * On machines with very fast multiplication, it's possible that the
582      * test takes more time than it's worth.  In that case this section
583      * may be commented out.
584      */
585 
586     int d0 = bp[8*0];
587     int d1 = bp[8*1];
588     int d2 = bp[8*2];
589     int d3 = bp[8*3];
590     int d4 = bp[8*4];
591     int d5 = bp[8*5];
592     int d6 = bp[8*6];
593     int d7 = bp[8*7];
594 
595     /* Even part: reverse the even part of the forward DCT. */
596     /* The rotator is sqrt(2)*c(-6). */
597     if (d6) {
598         if (d4) {
599             if (d2) {
600                 if (d0) {
601                     /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
602                     z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
603                     tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
604                     tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
605 
606                     tmp0 = (d0 + d4) << CONST_BITS;
607                     tmp1 = (d0 - d4) << CONST_BITS;
608 
609                     tmp10 = tmp0 + tmp3;
610                     tmp13 = tmp0 - tmp3;
611                     tmp11 = tmp1 + tmp2;
612                     tmp12 = tmp1 - tmp2;
613                 } else {
614                     /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
615                     z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
616                     tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
617                     tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
618 
619                     tmp0 = d4 << CONST_BITS;
620 
621                     tmp10 = tmp0 + tmp3;
622                     tmp13 = tmp0 - tmp3;
623                     tmp11 = tmp2 - tmp0;
624                     tmp12 = -(tmp0 + tmp2);
625                 }
626             } else {
627                 if (d0) {
628                     /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
629                     tmp2 = MULTIPLY(d6, - FIX(1.306562965));
630                     tmp3 = MULTIPLY(d6, FIX(0.541196100));
631 
632                     tmp0 = (d0 + d4) << CONST_BITS;
633                     tmp1 = (d0 - d4) << CONST_BITS;
634 
635                     tmp10 = tmp0 + tmp3;
636                     tmp13 = tmp0 - tmp3;
637                     tmp11 = tmp1 + tmp2;
638                     tmp12 = tmp1 - tmp2;
639                 } else {
640                     /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
641                     tmp2 = MULTIPLY(d6, -FIX(1.306562965));
642                     tmp3 = MULTIPLY(d6, FIX(0.541196100));
643 
644                     tmp0 = d4 << CONST_BITS;
645 
646                     tmp10 = tmp0 + tmp3;
647                     tmp13 = tmp0 - tmp3;
648                     tmp11 = tmp2 - tmp0;
649                     tmp12 = -(tmp0 + tmp2);
650                 }
651             }
652         } else {
653             if (d2) {
654                 if (d0) {
655                     /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
656                     z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
657                     tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
658                     tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
659 
660                     tmp0 = d0 << CONST_BITS;
661 
662                     tmp10 = tmp0 + tmp3;
663                     tmp13 = tmp0 - tmp3;
664                     tmp11 = tmp0 + tmp2;
665                     tmp12 = tmp0 - tmp2;
666                 } else {
667                     /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
668                     z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
669                     tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
670                     tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
671 
672                     tmp10 = tmp3;
673                     tmp13 = -tmp3;
674                     tmp11 = tmp2;
675                     tmp12 = -tmp2;
676                 }
677             } else {
678                 if (d0) {
679                     /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
680                     tmp2 = MULTIPLY(d6, - FIX(1.306562965));
681                     tmp3 = MULTIPLY(d6, FIX(0.541196100));
682 
683                     tmp0 = d0 << CONST_BITS;
684 
685                     tmp10 = tmp0 + tmp3;
686                     tmp13 = tmp0 - tmp3;
687                     tmp11 = tmp0 + tmp2;
688                     tmp12 = tmp0 - tmp2;
689                 } else {
690                     /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
691                     tmp2 = MULTIPLY(d6, - FIX(1.306562965));
692                     tmp3 = MULTIPLY(d6, FIX(0.541196100));
693 
694                     tmp10 = tmp3;
695                     tmp13 = -tmp3;
696                     tmp11 = tmp2;
697                     tmp12 = -tmp2;
698                 }
699             }
700         }
701     } else {
702         if (d4) {
703             if (d2) {
704                 if (d0) {
705                     /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
706                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
707                     tmp3 = MULTIPLY(d2, FIX(1.306562965));
708 
709                     tmp0 = (d0 + d4) << CONST_BITS;
710                     tmp1 = (d0 - d4) << CONST_BITS;
711 
712                     tmp10 = tmp0 + tmp3;
713                     tmp13 = tmp0 - tmp3;
714                     tmp11 = tmp1 + tmp2;
715                     tmp12 = tmp1 - tmp2;
716                 } else {
717                     /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
718                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
719                     tmp3 = MULTIPLY(d2, FIX(1.306562965));
720 
721                     tmp0 = d4 << CONST_BITS;
722 
723                     tmp10 = tmp0 + tmp3;
724                     tmp13 = tmp0 - tmp3;
725                     tmp11 = tmp2 - tmp0;
726                     tmp12 = -(tmp0 + tmp2);
727                 }
728             } else {
729                 if (d0) {
730                     /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
731                     tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
732                     tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
733                 } else {
734                     /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
735                     tmp10 = tmp13 = d4 << CONST_BITS;
736                     tmp11 = tmp12 = -tmp10;
737                 }
738             }
739         } else {
740             if (d2) {
741                 if (d0) {
742                     /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
743                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
744                     tmp3 = MULTIPLY(d2, FIX(1.306562965));
745 
746                     tmp0 = d0 << CONST_BITS;
747 
748                     tmp10 = tmp0 + tmp3;
749                     tmp13 = tmp0 - tmp3;
750                     tmp11 = tmp0 + tmp2;
751                     tmp12 = tmp0 - tmp2;
752                 } else {
753                     /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
754                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
755                     tmp3 = MULTIPLY(d2, FIX(1.306562965));
756 
757                     tmp10 = tmp3;
758                     tmp13 = -tmp3;
759                     tmp11 = tmp2;
760                     tmp12 = -tmp2;
761                 }
762             } else {
763                 if (d0) {
764                     /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
765                     tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
766                 } else {
767                     /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
768                     tmp10 = tmp13 = tmp11 = tmp12 = 0;
769                 }
770             }
771         }
772     }
773 
774     /* Odd part per figure 8; the matrix is unitary and hence its
775      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
776      */
777     if (d7) {
778         if (d5) {
779             if (d3) {
780                 if (d1) {
781                     /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
782                     z1 = d7 + d1;
783                     z2 = d5 + d3;
784                     z3 = d7 + d3;
785                     z4 = d5 + d1;
786                     z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
787 
788                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
789                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
790                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
791                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
792                     z1 = MULTIPLY(z1, - FIX(0.899976223));
793                     z2 = MULTIPLY(z2, - FIX(2.562915447));
794                     z3 = MULTIPLY(z3, - FIX(1.961570560));
795                     z4 = MULTIPLY(z4, - FIX(0.390180644));
796 
797                     z3 += z5;
798                     z4 += z5;
799 
800                     tmp0 += z1 + z3;
801                     tmp1 += z2 + z4;
802                     tmp2 += z2 + z3;
803                     tmp3 += z1 + z4;
804                 } else {
805                     /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
806                     z1 = d7;
807                     z2 = d5 + d3;
808                     z3 = d7 + d3;
809                     z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
810 
811                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
812                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
813                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
814                     z1 = MULTIPLY(d7, - FIX(0.899976223));
815                     z2 = MULTIPLY(z2, - FIX(2.562915447));
816                     z3 = MULTIPLY(z3, - FIX(1.961570560));
817                     z4 = MULTIPLY(d5, - FIX(0.390180644));
818 
819                     z3 += z5;
820                     z4 += z5;
821 
822                     tmp0 += z1 + z3;
823                     tmp1 += z2 + z4;
824                     tmp2 += z2 + z3;
825                     tmp3 = z1 + z4;
826                 }
827             } else {
828                 if (d1) {
829                     /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
830                     z1 = d7 + d1;
831                     z2 = d5;
832                     z3 = d7;
833                     z4 = d5 + d1;
834                     z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
835 
836                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
837                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
838                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
839                     z1 = MULTIPLY(z1, - FIX(0.899976223));
840                     z2 = MULTIPLY(d5, - FIX(2.562915447));
841                     z3 = MULTIPLY(d7, - FIX(1.961570560));
842                     z4 = MULTIPLY(z4, - FIX(0.390180644));
843 
844                     z3 += z5;
845                     z4 += z5;
846 
847                     tmp0 += z1 + z3;
848                     tmp1 += z2 + z4;
849                     tmp2 = z2 + z3;
850                     tmp3 += z1 + z4;
851                 } else {
852                     /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
853                     tmp0 = MULTIPLY(d7, - FIX(0.601344887));
854                     z1 = MULTIPLY(d7, - FIX(0.899976223));
855                     z3 = MULTIPLY(d7, - FIX(1.961570560));
856                     tmp1 = MULTIPLY(d5, - FIX(0.509795578));
857                     z2 = MULTIPLY(d5, - FIX(2.562915447));
858                     z4 = MULTIPLY(d5, - FIX(0.390180644));
859                     z5 = MULTIPLY(d5 + d7, FIX(1.175875602));
860 
861                     z3 += z5;
862                     z4 += z5;
863 
864                     tmp0 += z3;
865                     tmp1 += z4;
866                     tmp2 = z2 + z3;
867                     tmp3 = z1 + z4;
868                 }
869             }
870         } else {
871             if (d3) {
872                 if (d1) {
873                     /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
874                     z1 = d7 + d1;
875                     z3 = d7 + d3;
876                     z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
877 
878                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
879                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
880                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
881                     z1 = MULTIPLY(z1, - FIX(0.899976223));
882                     z2 = MULTIPLY(d3, - FIX(2.562915447));
883                     z3 = MULTIPLY(z3, - FIX(1.961570560));
884                     z4 = MULTIPLY(d1, - FIX(0.390180644));
885 
886                     z3 += z5;
887                     z4 += z5;
888 
889                     tmp0 += z1 + z3;
890                     tmp1 = z2 + z4;
891                     tmp2 += z2 + z3;
892                     tmp3 += z1 + z4;
893                 } else {
894                     /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
895                     z3 = d7 + d3;
896 
897                     tmp0 = MULTIPLY(d7, - FIX(0.601344887));
898                     z1 = MULTIPLY(d7, - FIX(0.899976223));
899                     tmp2 = MULTIPLY(d3, FIX(0.509795579));
900                     z2 = MULTIPLY(d3, - FIX(2.562915447));
901                     z5 = MULTIPLY(z3, FIX(1.175875602));
902                     z3 = MULTIPLY(z3, - FIX(0.785694958));
903 
904                     tmp0 += z3;
905                     tmp1 = z2 + z5;
906                     tmp2 += z3;
907                     tmp3 = z1 + z5;
908                 }
909             } else {
910                 if (d1) {
911                     /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
912                     z1 = d7 + d1;
913                     z5 = MULTIPLY(z1, FIX(1.175875602));
914 
915                     z1 = MULTIPLY(z1, FIX(0.275899379));
916                     z3 = MULTIPLY(d7, - FIX(1.961570560));
917                     tmp0 = MULTIPLY(d7, - FIX(1.662939224));
918                     z4 = MULTIPLY(d1, - FIX(0.390180644));
919                     tmp3 = MULTIPLY(d1, FIX(1.111140466));
920 
921                     tmp0 += z1;
922                     tmp1 = z4 + z5;
923                     tmp2 = z3 + z5;
924                     tmp3 += z1;
925                 } else {
926                     /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
927                     tmp0 = MULTIPLY(d7, - FIX(1.387039845));
928                     tmp1 = MULTIPLY(d7, FIX(1.175875602));
929                     tmp2 = MULTIPLY(d7, - FIX(0.785694958));
930                     tmp3 = MULTIPLY(d7, FIX(0.275899379));
931                 }
932             }
933         }
934     } else {
935         if (d5) {
936             if (d3) {
937                 if (d1) {
938                     /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
939                     z2 = d5 + d3;
940                     z4 = d5 + d1;
941                     z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
942 
943                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
944                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
945                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
946                     z1 = MULTIPLY(d1, - FIX(0.899976223));
947                     z2 = MULTIPLY(z2, - FIX(2.562915447));
948                     z3 = MULTIPLY(d3, - FIX(1.961570560));
949                     z4 = MULTIPLY(z4, - FIX(0.390180644));
950 
951                     z3 += z5;
952                     z4 += z5;
953 
954                     tmp0 = z1 + z3;
955                     tmp1 += z2 + z4;
956                     tmp2 += z2 + z3;
957                     tmp3 += z1 + z4;
958                 } else {
959                     /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
960                     z2 = d5 + d3;
961 
962                     z5 = MULTIPLY(z2, FIX(1.175875602));
963                     tmp1 = MULTIPLY(d5, FIX(1.662939225));
964                     z4 = MULTIPLY(d5, - FIX(0.390180644));
965                     z2 = MULTIPLY(z2, - FIX(1.387039845));
966                     tmp2 = MULTIPLY(d3, FIX(1.111140466));
967                     z3 = MULTIPLY(d3, - FIX(1.961570560));
968 
969                     tmp0 = z3 + z5;
970                     tmp1 += z2;
971                     tmp2 += z2;
972                     tmp3 = z4 + z5;
973                 }
974             } else {
975                 if (d1) {
976                     /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
977                     z4 = d5 + d1;
978 
979                     z5 = MULTIPLY(z4, FIX(1.175875602));
980                     z1 = MULTIPLY(d1, - FIX(0.899976223));
981                     tmp3 = MULTIPLY(d1, FIX(0.601344887));
982                     tmp1 = MULTIPLY(d5, - FIX(0.509795578));
983                     z2 = MULTIPLY(d5, - FIX(2.562915447));
984                     z4 = MULTIPLY(z4, FIX(0.785694958));
985 
986                     tmp0 = z1 + z5;
987                     tmp1 += z4;
988                     tmp2 = z2 + z5;
989                     tmp3 += z4;
990                 } else {
991                     /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
992                     tmp0 = MULTIPLY(d5, FIX(1.175875602));
993                     tmp1 = MULTIPLY(d5, FIX(0.275899380));
994                     tmp2 = MULTIPLY(d5, - FIX(1.387039845));
995                     tmp3 = MULTIPLY(d5, FIX(0.785694958));
996                 }
997             }
998         } else {
999             if (d3) {
1000                 if (d1) {
1001                     /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
1002                     z5 = d1 + d3;
1003                     tmp3 = MULTIPLY(d1, FIX(0.211164243));
1004                     tmp2 = MULTIPLY(d3, - FIX(1.451774981));
1005                     z1 = MULTIPLY(d1, FIX(1.061594337));
1006                     z2 = MULTIPLY(d3, - FIX(2.172734803));
1007                     z4 = MULTIPLY(z5, FIX(0.785694958));
1008                     z5 = MULTIPLY(z5, FIX(1.175875602));
1009 
1010                     tmp0 = z1 - z4;
1011                     tmp1 = z2 + z4;
1012                     tmp2 += z5;
1013                     tmp3 += z5;
1014                 } else {
1015                     /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
1016                     tmp0 = MULTIPLY(d3, - FIX(0.785694958));
1017                     tmp1 = MULTIPLY(d3, - FIX(1.387039845));
1018                     tmp2 = MULTIPLY(d3, - FIX(0.275899379));
1019                     tmp3 = MULTIPLY(d3, FIX(1.175875602));
1020                 }
1021             } else {
1022                 if (d1) {
1023                     /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
1024                     tmp0 = MULTIPLY(d1, FIX(0.275899379));
1025                     tmp1 = MULTIPLY(d1, FIX(0.785694958));
1026                     tmp2 = MULTIPLY(d1, FIX(1.175875602));
1027                     tmp3 = MULTIPLY(d1, FIX(1.387039845));
1028                 } else {
1029                     /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
1030                     tmp0 = tmp1 = tmp2 = tmp3 = 0;
1031                 }
1032             }
1033         }
1034     }
1035 
1036     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1037 
1038     d0 = DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3);
1039     d0 = UCLIMIT(d0 + 128, d1);
1040     *p = d0;
1041     p += stride;
1042 
1043     d0 = DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3);
1044     d0 = UCLIMIT(d0 + 128, d1);
1045     *p = d0;
1046     p += stride;
1047 
1048     d0 = DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3);
1049     d0 = UCLIMIT(d0 + 128, d1);
1050     *p = d0;
1051     p += stride;
1052 
1053     d0 = DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3);
1054     d0 = UCLIMIT(d0 + 128, d1);
1055     *p = d0;
1056     p += stride;
1057 
1058     d0 = DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3);
1059     d0 = UCLIMIT(d0 + 128, d1);
1060     *p = d0;
1061     p += stride;
1062 
1063     d0 = DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3);
1064     d0 = UCLIMIT(d0 + 128, d1);
1065     *p = d0;
1066     p += stride;
1067 
1068     d0 = DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3);
1069     d0 = UCLIMIT(d0 + 128, d1);
1070     *p = d0;
1071     p += stride;
1072 
1073     d0 = DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3);
1074     d0 = UCLIMIT(d0 + 128, d1);
1075     *p = d0;
1076     p += stride;
1077 
1078     p -= stride <&