~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Open Mash Cross Reference
mash/codec/jpeg/jpeg-hyst.cc

Component: ~ [ mash ] ~ [ apps ] ~ [ gsm ] ~ [ lib ] ~ [ otcl ] ~ [ srm ] ~ [ tcl8.3 ] ~ [ tclcl ] ~ [ tk8.3 ] ~ [ tutorials ] ~

  1 /*
  2  * jpeg-hyst.cc --
  3  *
  4  *      FIXME: This file needs a description here.
  5  */
  6 
  7 /*
  8  * This code is derived from the Independent JPEG Group's JPEG software:
  9  *
 10  * Copyright (C) 1991, 1992, Thomas G. Lane.
 11  * This file is part of the Independent JPEG Group's software.
 12  * For conditions of distribution and use, see the accompanying
 13  * README.IJPG file.
 14  */
 15 
 16 #include "jpeg.h"
 17 
 18 #include <stdlib.h>
 19 #include <stdio.h>
 20 #include <sys/param.h>
 21 #include <netinet/in.h>
 22 
 23 extern "C" {
 24 void j_rev_dct(short*);
 25 void init_pre_idct();
 26 }
 27 
 28 #include <bstring.h>
 29 
 30 #ifdef __alpha
 31 #include <machine/endian.h>
 32 #endif
 33 
 34 
 35 /*
 36  * These two macros stolen from nv.
 37  */
 38 /* Sick little macro which will limit x to [0..255] with logical ops */
 39 #define UCLIMIT(x) ((t = (x)), (t &= ~(t>>31)), (t | ~((t-256) >> 31)))
 40 /* A variant of above which will limit x to [-128..127] */
 41 #define SCLIMIT(x) (UCLIMIT((x)+128)-128)
 42 
 43 /*
 44  * ZAG[i] is the natural-order position of the i'th element of zigzag order.
 45  * If the incoming data is corrupted, huff_decode_mcu could attempt to
 46  * reference values beyond the end of the array.  To avoid a wild store,
 47  * we put some extra zeroes after the real entries.
 48  */
 49 static const int ZAG[] = {
 50         0,  1,  8, 16,  9,  2,  3, 10,
 51         17, 24, 32, 25, 18, 11,  4,  5,
 52         12, 19, 26, 33, 40, 48, 41, 34,
 53         27, 20, 13,  6,  7, 14, 21, 28,
 54         35, 42, 49, 56, 57, 50, 43, 36,
 55         29, 22, 15, 23, 30, 37, 44, 51,
 56         58, 59, 52, 45, 38, 31, 39, 46,
 57         53, 60, 61, 54, 47, 55, 62, 63,
 58         /* extra entries in case k>63 below */
 59         0,  0,  0,  0,  0,  0,  0,  0,
 60         0,  0,  0,  0,  0,  0,  0,  0
 61 };
 62 
 63 JpegDecoder::JpegDecoder(const config& c)
 64 {
 65         reconfig(c);
 66 }
 67 
 68 void JpegDecoder::reconfig(const config& c)
 69 {
 70         width = c.width;
 71         height = c.height;
 72         jpeg_color_space = c.cs;
 73         bcopy(c.qtab, qtab, sizeof(qtab));
 74         ncomp = c.ncomp;
 75 
 76         max_h_samp_factor = 1;
 77         max_v_samp_factor = 1;
 78         for (int i = ncomp; --i >= 0; ) {
 79                 comp[i].id = c.comp[i].id;
 80                 int v = comp[i].h_samp_factor = c.comp[i].h_samp_factor;
 81                 if (max_h_samp_factor < v)
 82                         max_h_samp_factor = v;
 83                 v = comp[i].v_samp_factor = c.comp[i].v_samp_factor;
 84                 if (max_v_samp_factor < v)
 85                         max_v_samp_factor = v;
 86                 comp[i].quant_tbl_no = c.comp[i].quant_tbl_no;
 87                 comp[i].dc_tbl_no = c.comp[i].dc_tbl_no;
 88                 comp[i].ac_tbl_no = c.comp[i].ac_tbl_no;
 89         }
 90         for (i = 0; i < 4; ++i) {
 91                 if (c.dc_huffval[i] != 0)
 92                         dc_hufftab[i] = huffbuild(c.dc_huffbits[i],
 93                                                   c.dc_huffval[i]);
 94                 if (c.ac_huffval[i] != 0)
 95                         ac_hufftab[i] = huffbuild(c.ac_huffbits[i],
 96                                                   c.ac_huffval[i]);
 97         }
 98         interleaved_scan_setup();
 99 }
100 
101 void JpegDecoder::init()
102 {
103 #ifdef ROWE
104         init_pre_idct();
105 #endif
106         huffreset();
107 }
108 
109 /* Compute a rounded up to next multiple of b; a >= 0, b > 0 */
110 static inline int jround_up(int a, int b)
111 {
112         a += b - 1;
113         return (a - (a % b));
114 }
115 
116 void JpegDecoder::interleaved_scan_setup()
117 {
118         short ci, mcublks;
119 
120         MCUs_per_row = (width + 8 * max_h_samp_factor - 1)
121                 / (8 * max_h_samp_factor);
122 
123         MCU_rows_in_scan = (height + 8 * max_v_samp_factor - 1)
124                 / (8 * max_v_samp_factor);
125 
126         blocks_in_MCU = 0;
127 
128         for (ci = 0; ci < ncomp; ci++) {
129                 /*
130                  * for interleaved scan, sampling factors give # of blocks
131                  * per component
132                  */
133                 component& p = comp[ci];
134                 p.MCU_width = p.h_samp_factor;
135                 p.MCU_height = p.v_samp_factor;
136                 p.MCU_blocks = p.MCU_width * p.MCU_height;
137 
138                 /* Prepare array describing MCU composition */
139                 mcublks = p.MCU_blocks;
140                 if (blocks_in_MCU + mcublks > MAX_BLOCKS_IN_MCU)
141                         abort();
142                 while (mcublks-- > 0)
143                         MCU_membership[blocks_in_MCU++] = ci;
144         }
145 }
146 
147 void JpegDecoder::fill(int dc, u_char* out, const int stride) const
148 {
149         register int t;
150 
151         dc = UCLIMIT(dc) & 0xff;
152         dc |= dc << 8;
153         dc |= dc << 16;
154         *(u_word*)out = dc;
155         *(u_word*)(out + 4) = dc;
156         out += stride;
157         *(u_word*)out = dc;
158         *(u_word*)(out + 4) = dc;
159         out += stride;
160         *(u_word*)out = dc;
161         *(u_word*)(out + 4) = dc;
162         out += stride;
163         *(u_word*)out = dc;
164         *(u_word*)(out + 4) = dc;
165         out += stride;
166         *(u_word*)out = dc;
167         *(u_word*)(out + 4) = dc;
168         out += stride;
169         *(u_word*)out = dc;
170         *(u_word*)(out + 4) = dc;
171         out += stride;
172         *(u_word*)out = dc;
173         *(u_word*)(out + 4) = dc;
174         out += stride;
175         *(u_word*)out = dc;
176         *(u_word*)(out + 4) = dc;
177 }
178 
179 void JpegDecoder::mix(const int dc, const short* bp, u_char* out,
180                       const int stride) const
181 {
182         for (register int k = 8; --k >= 0; ) {
183                 register u_word o;
184                 register int t;
185 #if BYTE_ORDER == LITTLE_ENDIAN
186                 o = UCLIMIT(bp[0] + dc) & 0xff;
187                 o |= (UCLIMIT(bp[1] + dc) & 0xff) << 8;
188                 o |= (UCLIMIT(bp[2] + dc) & 0xff) << 16;
189                 o |= (UCLIMIT(bp[3] + dc) & 0xff) << 24;
190                 *(u_word *)out = o;
191                 o = UCLIMIT(bp[4] + dc) & 0xff;
192                 o |= (UCLIMIT(bp[5] + dc) & 0xff) << 8;
193                 o |= (UCLIMIT(bp[6] + dc) & 0xff) << 16;
194                 o |= (UCLIMIT(bp[7] + dc) & 0xff) << 24;
195                 *(u_word *)(out + 4) = o;
196 #else
197                 o = (UCLIMIT(bp[0] + dc) & 0xff) << 24;
198                 o |= (UCLIMIT(bp[1] + dc) & 0xff) << 16;
199                 o |= (UCLIMIT(bp[2] + dc) & 0xff) << 8;
200                 o |= UCLIMIT(bp[3] + dc) & 0xff;
201                 *(u_word *)out = o;
202                 o = (UCLIMIT(bp[4] + dc) & 0xff) << 24;
203                 o |= (UCLIMIT(bp[5] + dc) & 0xff) << 16;
204                 o |= (UCLIMIT(bp[6] + dc) & 0xff) << 8;
205                 o |= UCLIMIT(bp[7] + dc) & 0xff;
206                 *(u_word *)(out + 4) = o;
207 #endif
208                 bp += 8;
209                 out += stride;
210         }
211 }
212 
213 #define MAXCODE 24
214 struct hcnode {
215         struct hcnode *next;
216         u_word n;
217         u_word code[MAXCODE];
218         short block[64];
219 };
220 
221 #define HASHSIZE (2*4096)
222 struct hcnode *hashtab[HASHSIZE];
223 /*FIXME*/
224 #define NHC (2*HASHSIZE)
225 struct hcnode hcpool[NHC];
226 static int nhc;
227 
228 static struct hcnode *
229 scavenge()
230 {
231         register struct hcnode *p;
232 
233         int i = nhc;
234         if (i >= NHC) {
235                 static int rover = 0;
236 
237                 i = rover;
238                 do
239                         i = (i + 1) & (HASHSIZE - 1);
240                 while ((p = hashtab[i]) == 0);
241                 rover = i;
242                 hashtab[i] = p->next;
243         } else {
244                 p  = &hcpool[i];
245                 nhc = i + 1;
246         }
247         return (p);
248 }
249 
250 static inline int
251 hchash(int n, u_word *code)
252 {
253         int v = 0;
254 
255         while (--n >= 0)
256                 v += code[n] + 37;
257         return ((v ^ v >> 16) & (HASHSIZE - 1));
258 }
259 
260 static inline struct hcnode *
261 hclookup(int h, int n, u_word *code)
262 {
263         struct hcnode *p;
264 
265         for (p = hashtab[h]; p != 0; p = p->next)
266                 if (p->n == n &&
267                     bcmp(p->code, code, n * sizeof(*code)) == 0)
268                         break;
269 
270         return (p);
271 }
272 
273 static void
274 hcenter(struct hcnode *p, int h, int n, u_word *code)
275 {
276         bcopy(code, p->code, n * sizeof(*code));
277         p->n = n;
278 
279         p->block[0] = 0;
280         j_rev_dct(p->block);
281 
282         p->next = hashtab[h];
283         hashtab[h] = p;
284 }
285 
286 int hcmiss;
287 int hchit;
288 int dcblk;
289 int bblk;
290 int sblk;
291 
292 static int
293 length(struct hcnode *p)
294 {
295         int n = 0;
296         for (; p != 0; p = p->next)
297                 ++n;
298         return (n);
299 }
300 
301 void
302 pstats()
303 {
304         int i;
305         int len;
306         int bucket = 0;
307         int maxlen = 0;
308 
309         for (i = 0; i < HASHSIZE; ++i) {
310                 if (hashtab[i] == 0)
311                         continue;
312                 ++bucket;
313                 len = length(hashtab[i]);
314                 if (len > maxlen)
315                         maxlen = len;
316         }
317         printf("hit\t%d\n", hchit);
318         printf("miss\t%d\n", hcmiss);
319         printf("bucket\t%d\n", bucket);
320         printf("maxlen\t%d\n", maxlen);
321         printf("dcblk\t%d\n", dcblk);
322         printf("bblk\t%d\n", bblk);
323         printf("sblk\t%d\n", sblk);
324 }
325 
326 struct blkcache {
327         int init;
328         short block[64];
329 };
330 
331 int
332 blkthresh(short* b0, short* b1, int thresh, short *qt)
333 {
334         int n = 0;
335         for (int i = 64; --i >= 0; ) {
336                 int k = ZAG[i];
337                 int d = b0[k] - b1[k];
338                 if (d < 0)
339                         d = -d;
340                 if (d > hlevel * qt[i]) {
341                         if (i == 0)
342                                 n += 5;
343                         else
344                                 n += 1;
345                         if (n > thresh)
346                                 return (-1);
347                 }
348         }
349         return (0);
350 }
351 
352 #ifdef notdef
353 void
354 pblk(short *b, short *a, short *qt)
355 {
356         for (int i = 0; i < 64; ++i) {
357                 int k = ZAG[i];
358                 int d = b[k] - a[k];
359                 if (d != 0)
360                         printf("%d:\t%d\td %d\tq %d\n", i, b[k], d, qt[i]);
361         }
362 }
363 #endif
364 
365 
366 int JpegDecoder::decode_block_with_hysteresis(int ci, int blkno,
367                                                int off, int stride)
368 {
369         int n, h;
370         struct hcnode *p;
371         short *qt;
372         int dc;
373         struct blkcache *bc;
374         short block[64];
375         u_word code[128];
376 
377         /*FIXME*/
378         component& c = comp[ci];
379         qt = qtab[c.quant_tbl_no];
380 
381         bc = &dctcache[ci][blkno];
382 
383         n = huffparse(ci, code);
384         if (n < 0 || huffblock(ci, n, code, block) < 0)
385                 return (-1);
386 
387         if (!bc->init || blkthresh(block, bc->block, hysteresis, qt)) {
388                 bcopy(block, bc->block, sizeof(block));
389                 j_rev_dct(block);
390                 mix(128, block, image[ci] + off, stride);
391                 bc->init = 1;
392         }
393         return (0);
394 }
395 
396 int JpegDecoder::decode_block(int ci, int blkno, int off, int stride)
397 {
398         int n, h;
399         struct hcnode *p;
400         short *qt;
401         int dc;
402         short block[64];
403         u_word code[128];
404 
405         /*FIXME*/
406         if (hysteresis)
407                 return (decode_block_with_hysteresis(ci, blkno, off, stride));
408 
409         /*FIXME*/
410         component& c = comp[ci];
411         qt = qtab[c.quant_tbl_no];
412 
413         n = huffparse(ci, code);
414         if (n == 0) {
415                 dc = c.dc * qt[0];
416                 fill((dc >> 3) + 128, image[ci] + off, stride);
417                 ++dcblk;
418         } else if (n < MAXCODE) {
419                 ++sblk;
420                 h = hchash(n, code);
421                 p = hclookup(h, n, code);
422                 if (p == 0) {
423                         ++hcmiss;
424                         p = scavenge();
425                         if (huffblock(ci, n, code, p->block) < 0)
426                                 return (-1);
427                         hcenter(p, h, n, code);
428                 } else
429                         ++hchit;
430                 dc = c.dc * qt[0];
431                 mix((dc >> 3) + 128, p->block, image[ci] + off, stride);
432         } else {
433                 ++bblk;
434                 if (huffblock(ci, n, code, block) < 0)
435                         return (-1);
436                 j_rev_dct(block);
437                 mix(128, block, image[ci] + off, stride);
438         }
439         return (0);
440 }
441 
442 int JpegDecoder::decode(u_char* in, int len)
443 {
444 #ifdef notdef
445         nbits = 8 * len;
446 #endif
447         inb = in;
448         nbb = 0;
449 
450         const int stride0 = 7 * width;/*FIXME*/
451         const int stride1 = 7 * width / 2;/*FIXME*/
452 
453         int off0 = 0;
454         int off1 = 0;
455         int blkno = 0;
456         huffreset();
457         for (int row = MCU_rows_in_scan; --row >= 0; ) {
458                 for (int mcu = MCUs_per_row; --mcu >= 0; ) {
459                         /* FIXME this works only for hsamp = 2, vsamp = 1 */
460                         if (decode_block(0, blkno, off0, width) < 0)
461                                 return (-1);
462                         off0 += 8;
463                         if (decode_block(0, blkno + 1, off0, width) < 0)
464                                 return (-1);
465                         off0 += 8;
466                         if (decode_block(1, blkno >> 1, off1, width / 2) < 0)
467                                 return (-1);
468                         if (decode_block(2, blkno >> 1, off1, width / 2) < 0)
469                                 return (-1);
470                         off1 += 8;
471                         blkno += 2;
472                 }
473                 off0 += stride0;
474                 off1 += stride1;
475         }
476         return (0);
477 }
478 
479 /* Figure F.12: extend sign bit */
480 
481 #ifdef notdef
482 #define huff_EXTEND(x,s)  ((x) < extend_test[s] ? (x) + extend_offset[s] : (x))
483 
484 static const int extend_test[16] =   /* entry n is 2**(n-1) */
485   { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
486     0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 };
487 
488 static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */
489   { 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1,
490     ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1,
491     ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1,
492     ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 };
493 #else
494 /* is this really faster? */
495 inline int
496 huff_EXTEND(int x, int s)
497 {
498         register int b = x >> (s - 1);
499         register int m = ((b & 1) - 1) << s;
500         return ((x | m) + (~b & 1));
501 }
502 #endif
503 
504 #define HUFFRQ(bb) \
505  { \
506         register int v; \
507         register u_char *cp = inb; \
508  \
509         bb <<= 16; \
510         v = *cp++; \
511         if (v == 0xff) ++cp; \
512         bb |= v << 8; \
513         v = *cp++; \
514         if (v == 0xff) ++cp; \
515         bb |= v; \
516         inb = cp; \
517  \
518 }
519 
520 #define MASK(s) ((1 << (s)) - 1)
521 
522 #define HUFF_DECODE(ht, nbb, bb, result) { \
523         register int s_, v_; \
524  \
525         if (nbb < 16) { \
526                 HUFFRQ(bb); \
527                 nbb += 16; \
528         } \
529         v_ = (bb >> (nbb - 16)) & 0xffff; \
530         s_ = (ht)[v_]; \
531         nbb -= (s_ >> 8); \
532         result = s_ & 0xff; \
533  }
534 
535 #define GET_BITS(n, nbb, bb, result) \
536 { \
537         nbb -= n; \
538         if (nbb < 0)  { \
539                 HUFFRQ(bb); \
540                 nbb += 16; \
541         } \
542         (result) = ((bb >> nbb) & MASK(n)); \
543 }
544 
545 #define SKIP_BITS(n, nbb, bb) \
546 { \
547         nbb -= n; \
548         if (nbb < 0)  { \
549                 HUFFRQ(bb); \
550                 nbb += 16; \
551         } \
552 }
553 
554 int JpegDecoder::huffparse(int ci, u_word *code)
555 {
556         component& p = comp[ci];
557 
558         /* Decode a single block's worth of coefficients */
559 
560         /* Section F.2.2.1: decode the DC coefficient difference */
561         register int bb_ = bb;
562         register int nbb_ = nbb;
563         u_short* ht = dc_hufftab[p.dc_tbl_no];
564         register int s, r;
565         HUFF_DECODE(ht, nbb_, bb_, s);
566         if (s != 0) {
567                 GET_BITS(s, nbb_, bb_, r);
568                 s = huff_EXTEND(r, s);
569         }
570         /* Convert DC difference to actual value, update last_dc_val */
571         s += p.dc;
572         p.dc = s;
573 
574         /* Section F.2.2.2: decode the AC coefficients */
575         ht = ac_hufftab[p.ac_tbl_no];
576         register int n = 0;
577         for (register int k = 1; k < 64; ) {
578                 /* Symbol-1 */
579                 register int v;
580                 HUFF_DECODE(ht, nbb_, bb_, v);
581                 s = v & 15;
582                 r = v >> 4;
583                 if (s != 0) {
584                         k += r;
585                         /* Symbol-2 */
586                         GET_BITS(s, nbb_, bb_, s);
587                         *code++ = (s << 8) | v;
588                         ++n;
589                         ++k;
590                 } else {
591                         if (r != 15)
592                                 /* end of block */
593                                 break;
594                         *code++ = v;
595                         ++n;
596                         k += 16;
597                 }
598         }
599         nbb = nbb_;
600         bb = bb_;
601 
602         return (n);
603 }
604 
605 int JpegDecoder::huffskip(int ci)
606 {
607         /* Decode a single block's worth of coefficients */
608 
609         /* Section F.2.2.1: decode the DC coefficient difference */
610         register int bb_ = bb;
611         register int nbb_ = nbb;
612         component& p = comp[ci];
613         u_short* ht = dc_hufftab[p.dc_tbl_no];
614         register int s;
615         HUFF_DECODE(ht, nbb_, bb_, s);
616         if (s != 0) {
617                 SKIP_BITS(s, nbb_, bb_);
618         }
619         /* Section F.2.2.2: decode the AC coefficients */
620         ht = ac_hufftab[p.ac_tbl_no];
621         for (register int k = 1; k < 64; ) {
622                 /* Symbol-1 */
623                 register int v;
624                 HUFF_DECODE(ht, nbb_, bb_, v);
625                 s = v & 15;
626                 register int r = v >> 4;
627                 if (s != 0) {
628                         k += r;
629                         /* Symbol-2 */
630                         SKIP_BITS(s, nbb_, bb_);
631                         ++k;
632                 } else {
633                         if (r != 15)
634                                 /* end of block */
635                                 break;
636                         k += 16;
637                 }
638         }
639         nbb = nbb_;
640         bb = bb_;
641 
642         return (0);
643 }
644 
645 void JpegDecoder::huffreset()
646 {
647         nbb = 0;
648         comp[0].dc = 0;
649         comp[1].dc = 0;
650         comp[2].dc = 0;
651         comp[3].dc = 0;
652 }
653 
654 u_short* JpegDecoder::huffbuild(const u_char* bits, const u_char* vals) const
655 {
656         /* Figure C.1: make table of Huffman code length for each symbol */
657         /* Note that this is in code-length order. */
658 
659         int nsym = 0;
660         int huffsize[257];
661         for (int codelen = 1; codelen <= 16; ++codelen) {
662                 for (int i = 1; i <= bits[codelen]; ++i)
663                         /*
664                          * FIXME should sanity check that nsym stays
665                          * below 256.
666                          */
667                         huffsize[nsym++] = codelen;
668         }
669         huffsize[nsym] = 0;
670 
671         /* Figure C.2: generate the codes themselves */
672         /* Note that this is in code-length order. */
673 
674         int code = 0;
675         int si = huffsize[0];
676         u_short huffcode[256];
677         int p = 0;
678         while (p < nsym) {
679                 while (huffsize[p] == si)
680                         huffcode[p++] = code++;
681 
682                 code <<= 1;
683                 ++si;
684         }
685         /*
686          * Build the direct-map lookup table.
687          */
688         u_short *ht = new u_short[65536];
689         bzero(ht, 65536 * sizeof(u_short));
690         for (int sym = 0; sym < nsym; ++sym) {
691                 int codelen = huffsize[sym];
692                 int nbit = 16 - codelen;
693                 int code = huffcode[sym] << nbit;
694                 int map = (codelen << 8) | vals[sym];
695                 /*
696                  * The low nbit bits are don't cares.
697                  * Spin through all possible combos.
698                  */
699                 for (int n = 1 << nbit; --n >= 0; )
700                         ht[code | n] = map;
701         }
702         return (ht);
703 }
704 
705 int JpegDecoder::huffblock(int ci, int n, u_word *code, short *blk) const
706 {
707         register int k;
708         register const short *qt;
709 
710         const component& p = comp[ci];
711         qt = qtab[p.quant_tbl_no];
712         bzero(blk, 64 * sizeof(*blk));
713         /* Descale and output the DC coefficient (assumes ZAG[0] = 0) */
714         blk[0] = p.dc * qt[0];
715 
716         /* Section F.2.2.2: decode the AC coefficients */
717         /* Since zero values are skipped, output area must be zeroed
718            beforehand */
719         for (k = 1; k < 64; ) {
720                 register int s, r;
721                 register int v;
722 
723                 /*FIXME*/
724                 if (--n < 0)
725                         return (0);
726 
727                 v = *code++;
728                 s = v & 0x0f;
729                 r = (v >> 4) & 0x0f;
730                 if (s != 0) {
731                         k += r;
732                         r = v >> 8;
733                         s = huff_EXTEND(r, s);
734                         /*
735                          * Descale coefficient and output in natural
736                          * (dezigzagged) order
737                          */
738                         if (k >= 64)
739                                 return (-1);
740                         blk[ZAG[k]] = s * qt[k];
741                         ++k;
742                 } else {
743                         if (r != 15)
744                                 /* end of block */
745                                 break;
746                         k += 16;
747                 }
748         }
749         return (0);
750 }
751 
752 YUVJpegDecoder::YUVJpegDecoder(const config& c) : JpegDecoder(c)
753 {
754         allocimage();
755 
756         if (hysteresis) {
757                 int n = imagesize / 64;
758                 dctcache[0] = new blkcache[n];
759                 bzero(dctcache[0], sizeof(*dctcache) * n);
760                 dctcache[1] = new blkcache[n];
761                 bzero(dctcache[1], sizeof(*dctcache) * n);
762                 dctcache[2] = new blkcache[n];
763                 bzero(dctcache[2], sizeof(*dctcache) * n);
764         }
765 }
766 
767 void YUVJpegDecoder::allocimage()
768 {
769         int imagesize = width * height;
770         image[0] = new u_char[imagesize];
771         image[1] = new u_char[imagesize];
772         image[2] = new u_char[imagesize];
773         image[3] = 0;
774 }
775 
776 YUVJpegDecoder::reconfig(const config& c)
777 {
778         JpegDecoder::reconfig(c);
779         delete image[0];
780         delete image[1];
781         delete image[2];
782         allocimage();
783 }
784 
785 GrayJpegDecoder::GrayJpegDecoder(const config& c, u_char* out, int* clut,
786                                  int s)
787         : JpegDecoder(c), scale(s)
788 {
789         image[0] = out;
790         bcopy(clut, clut_, sizeof(clut_));
791 
792         if (hysteresis) {
793                 int n = width * height / 64;
794                 dctcache[0] = new blkcache[n];
795                 bzero(dctcache[0], sizeof(dctcache[0]) * n);
796         }
797 }
798 
799 int GrayJpegDecoder::decode(u_char* in, int len)
800 {
801         inb = in;
802         nbb = 0;
803 
804         int delta = (scale < 0) ? 16 : 8;
805         int stride0 = (delta - 1) * width;/*FIXME*/
806         int stride1 = stride1 / 2;
807         /*FIXME*/
808         if (scale < 0) {
809                 stride0 *= 2;
810                 stride1 *= 2;
811         }
812         int off0 = 0;
813         int off1 = 0;
814         int blkno = 0;
815         huffreset();
816         for (int row = MCU_rows_in_scan; --row >= 0; ) {
817                 for (int mcu = MCUs_per_row; --mcu >= 0; ) {
818                         /* FIXME this works only for hsamp = 2, vsamp = 1 */
819                         if (decode_block(0, blkno, off0, width) < 0)
820                                 return (-1);
821                         off0 += delta;
822                         if (decode_block(0, blkno + 1, off0, width) < 0)
823                                 return (-1);
824                         off0 += delta;
825                         if (huffskip(1) < 0)
826                                 return (-1);
827                         if (huffskip(2) < 0)
828                                 return (-1);
829                         off1 += delta;
830                         blkno += 2;
831                 }
832                 off0 += stride0;
833                 off1 += stride1;
834         }
835         return (0);
836 }
837 
838 void GrayJpegDecoder::dmix(const int dc, const short* bp, u_char* out,
839                            const int stride) const
840 {
841         register const int *clut = clut_;
842 
843         for (register int k = 8; --k >= 0; ) {
844                 register u_word o;
845                 register int t;
846 #if BYTE_ORDER == LITTLE_ENDIAN
847                 o = clut[(UCLIMIT(bp[0] + dc) & 0xff)];
848                 o |= clut[(UCLIMIT(bp[1] + dc) & 0xff)] << 16;
849                 o |= o << 8;
850                 *(u_word *)out = o;
851                 *(u_word *)(out + stride) = o;
852 
853                 o = clut[(UCLIMIT(bp[2] + dc) & 0xff)];
854                 o |= clut[(UCLIMIT(bp[3] + dc) & 0xff)] << 16;
855                 o |= o << 8;
856                 *(u_word *)(out + 4) = o;
857                 *(u_word *)(out + stride + 4) = o;
858 
859                 o = clut[(UCLIMIT(bp[4] + dc) & 0xff)];
860                 o |= clut[(UCLIMIT(bp[5] + dc) & 0xff)] << 16;
861                 o |= o << 8;
862                 *(u_word *)(out + 8) = o;
863                 *(u_word *)(out + stride + 8) = o;
864 
865                 o = clut[(UCLIMIT(bp[6] + dc) & 0xff)];
866                 o |= clut[(UCLIMIT(bp[7] + dc) & 0xff)] << 16;
867                 o |= o << 8;
868                 *(u_word *)(out + 12) = o;
869                 *(u_word *)(out + stride + 12) = o;
870 #else
871                 o = clut[(UCLIMIT(bp[0] + dc) & 0xff)] << 16;
872                 o |= clut[(UCLIMIT(bp[1] + dc) & 0xff)];
873                 o |= o << 8;
874                 *(u_word *)out = o;
875                 *(u_word *)(out + stride) = o;
876 
877                 o = clut[(UCLIMIT(bp[2] + dc) & 0xff)] << 16;
878                 o |= clut[(UCLIMIT(bp[3] + dc) & 0xff)];
879                 o |= o << 8;
880                 *(u_word *)(out + 4) = o;
881                 *(u_word *)(out + stride + 4) = o;
882 
883                 o = clut[(UCLIMIT(bp[4] + dc) & 0xff)] << 16;
884                 o |= clut[(UCLIMIT(bp[5] + dc) & 0xff)];
885                 o |= o << 8;
886                 *(u_word *)(out + 8) = o;
887                 *(u_word *)(out + stride + 8) = o;
888 
889                 o = clut[(UCLIMIT(bp[6] + dc) & 0xff)] << 16;
890                 o |= clut[(UCLIMIT(bp[7] + dc) & 0xff)];
891                 o |= o << 8;
892                 *(u_word *)(out + 12) = o;
893                 *(u_word *)(out + stride + 12) = o;
894 #endif
895                 bp += 8;
896                 out += stride << 1;
897         }
898 }
899 
900 void GrayJpegDecoder::mix(const int dc, const short* bp, u_char* out,
901                           const int stride) const
902 {
903         if (scale < 0) {
904                 dmix(dc, bp, out, stride << 1);
905                 return;
906         }
907 
908         register const int *clut = clut_;
909 
910         for (register int k = 8; --k >= 0; ) {
911                 register int t;
912 #ifdef __alpha
913                 register u_long o;
914                 o = (u_long)clut[(UCLIMIT(bp[0] + dc) & 0xff)];
915                 o |= (u_long)clut[(UCLIMIT(bp[1] + dc) & 0xff)] << 8;
916                 o |= (u_long)clut[(UCLIMIT(bp[2] + dc) & 0xff)] << 16;
917                 o |= (u_long)clut[(UCLIMIT(bp[3] + dc) & 0xff)] << 24;
918                 o |= (u_long)clut[(UCLIMIT(bp[4] + dc) & 0xff)] << 32;
919                 o |= (u_long)clut[(UCLIMIT(bp[5] + dc) & 0xff)] << 40;
920                 o |= (u_long)clut[(UCLIMIT(bp[6] + dc) & 0xff)] << 48;
921                 o |= (u_long)clut[(UCLIMIT(bp[7] + dc) & 0xff)] << 56;
922                 *(u_long *)out = o;
923 #elif BYTE_ORDER == LITTLE_ENDIAN
924                 register u_word o;
925                 o = clut[(UCLIMIT(bp[0] + dc) & 0xff)];
926                 o |= clut[(UCLIMIT(bp[1] + dc) & 0xff)] << 8;
927                 o |= clut[(UCLIMIT(bp[2] + dc) & 0xff)] << 16;
928                 o |= clut[(UCLIMIT(bp[3] + dc) & 0xff)] << 24;
929                 *(u_word *)out = o;
930                 o = clut[(UCLIMIT(bp[4] + dc) & 0xff)];
931                 o |= clut[(UCLIMIT(bp[5] + dc) & 0xff)] << 8;
932                 o |= clut[(UCLIMIT(bp[6] + dc) & 0xff)] << 16;
933                 o |= clut[(UCLIMIT(bp[7] + dc) & 0xff)] << 24;
934                 *(u_word *)(out + 4) = o;
935 #else
936                 register u_word o;
937                 o = clut[(UCLIMIT(bp[0] + dc) & 0xff)] << 24;
938                 o |= clut[(UCLIMIT(bp[1] + dc) & 0xff)] << 16;
939                 o |= clut[(UCLIMIT(bp[2] + dc) & 0xff)] << 8;
940                 o |= clut[(UCLIMIT(bp[3] + dc) & 0xff)];
941                 *(u_word *)out = o;
942                 o = clut[(UCLIMIT(bp[4] + dc) & 0xff)] << 24;
943                 o |= clut[(UCLIMIT(bp[5] + dc) & 0xff)] << 16;
944                 o |= clut[(UCLIMIT(bp[6] + dc) & 0xff)] << 8;
945                 o |= clut[(UCLIMIT(bp[7] + dc) & 0xff)];
946                 *(u_word *)(out + 4) = o;
947 #endif
948                 bp += 8;
949                 out += stride;
950         }
951 }
952 
953 void GrayJpegDecoder::dfill(int dc, u_char* out, const int stride) const
954 {
955         register int t;
956         dc = UCLIMIT(dc) & 0xff;
957         dc = clut_[dc];
958         dc |= dc << 8;
959         dc |= dc << 16;
960         int i = 16;
961         do {
962                 *(u_word*)out = dc;
963                 *(u_word*)(out + 4) = dc;
964                 *(u_word*)(out + 8) = dc;
965                 *(u_word*)(out + 12) = dc;
966                 out += stride;
967         } while (--i > 0);
968 }
969 
970 void GrayJpegDecoder::fill(int xdc, u_char* out, const int stride) const
971 {
972         if (scale < 0) {
973                 dfill(xdc, out, stride << 1);
974                 return;
975         }
976         register int t;
977         xdc = UCLIMIT(xdc) & 0xff;
978         register u_long dc = clut_[xdc];
979         dc |= dc << 8;
980         dc |= dc << 16;
981 #ifdef __alpha
982         dc |= dc << 32;
983 #endif
984         *(u_long*)out = dc;
985 #ifndef __alpha
986         *(u_long*)(out + 4) = dc;
987 #endif
988         out += stride;
989         *(u_long*)out = dc;
990 #ifndef __alpha
991         *(u_long*)(out + 4) = dc;
992 #endif
993         out += stride;
994         *(u_long*)out = dc;
995 #ifndef __alpha
996         *(u_long*)(out + 4) = dc;
997 #endif
998         out += stride;
999         *(u_long*)out = dc;
1000 #ifndef __alpha
1001         *(u_long*)(out + 4) = dc;
1002 #endif
1003         out += stride;
1004         *(u_long*)out = dc;
1005 #ifndef __alpha
1006         *(u_long*)(out + 4) = dc;
1007 #endif
1008         out += stride;
1009         *(u_long*)out = dc;
1010 #ifndef __alpha
1011         *(u_long*)(out + 4) = dc;
1012 #endif
1013         out += stride;
1014         *(u_long*)out = dc;
1015 #ifndef __alpha
1016         *(u_long*)(out + 4) = dc;
1017 #endif
1018         out += stride;
1019         *(u_long*)out = dc;
1020 #ifndef __alpha
1021         *(u_long*)(out + 4) = dc;
1022 #endif
1023 }
1024 
1025 DiffJpegDecoder::DiffJpegDecoder(const config& c) : JpegDecoder(c)
1026 {
1027         int n = width * height / 64;
1028         dcts = new blkcache[n];
1029         bzero(dcts, sizeof(*dcts) * n);
1030 
1031         /*
1032          * Don't use any quantization.
1033          * It just complicates the differencing heuristic.
1034          */
1035         short* qt = qtab[comp[0].quant_tbl_no];
1036         for (int i = 0; i < 64; ++i)
1037                 qt[i] = 1;
1038 }
1039 
1040 int cost[64] = {
1041         5, 4, 4, 3, 3, 2, 1, 0,
1042         4, 4, 3, 3, 2, 1, 1, 0,
1043         4, 3, 3, 2, 1, 1, 0, 0,
1044         3, 3, 2, 1, 1, 0, 0, 0,
1045         3, 2, 1, 1, 0, 0, 0, 0,
1046         2, 1, 1, 0, 0, 0, 0, 0,
1047         1, 1, 0, 0, 0, 0, 0, 0,
1048         0, 0, 0, 0, 0, 0, 0, 0,
1049 };
1050 
1051 static int
1052 diff(short* b0, short* b1)
1053 {
1054         int n = 0;
1055         /*
1056          * FIXME ignore high frequencies for differencing decision
1057          */
1058         for (int i = 64; --i >= 0; ) {
1059                 int k = ZAG[i];
1060                 int d = b0[k] - b1[k];
1061                 if (d < 0)
1062                         d = -d;
1063                 n += d << (5 - cost[k]);
1064 #ifdef notdef
1065                 n += d;
1066 #endif
1067         }
1068         return (n);
1069 }
1070 
1071 int DiffJpegDecoder::decode_block_diff(blkcache* bc, blkcache* dctcache,
1072                                         u_char* dv)
1073 {
1074         u_word code[128];
1075 
1076         int n = huffparse(0, code);
1077         if (n < 0)
1078                 return (-1);
1079         if (huffblock(0, n, code, bc->block) < 0)
1080                 return (-1);
1081         *dv = diff(bc->block, dctcache->block);
1082         return (0);
1083 }
1084 
1085 int DiffJpegDecoder::decode(u_char* in, int len, blkcache* dctcache,
1086                                   u_char* diffvector)
1087 {
1088         inb = in;
1089         nbb = 0;
1090 
1091         huffreset();
1092         int blkno = 0;
1093         struct blkcache* bc = dcts;
1094         for (int row = MCU_rows_in_scan; --row >= 0; ) {
1095                 for (int mcu = MCUs_per_row; --mcu >= 0; ) {
1096                         /* FIXME this works only for hsamp = 2, vsamp = 1 */
1097                         if (decode_block_diff(bc++, dctcache++,
1098                                               diffvector++) < 0)
1099                                 return (-1);
1100 
1101                         if (decode_block_diff(bc++, dctcache++,
1102                                               diffv