1 /*
2 * jpeg-hyst.cc --
3 *
4 * FIXME: This file needs a description here.
5 */
6
7 /*
8 * This code is derived from the Independent JPEG Group's JPEG software:
9 *
10 * Copyright (C) 1991, 1992, Thomas G. Lane.
11 * This file is part of the Independent JPEG Group's software.
12 * For conditions of distribution and use, see the accompanying
13 * README.IJPG file.
14 */
15
16 #include "jpeg.h"
17
18 #include <stdlib.h>
19 #include <stdio.h>
20 #include <sys/param.h>
21 #include <netinet/in.h>
22
23 extern "C" {
24 void j_rev_dct(short*);
25 void init_pre_idct();
26 }
27
28 #include <bstring.h>
29
30 #ifdef __alpha
31 #include <machine/endian.h>
32 #endif
33
34
35 /*
36 * These two macros stolen from nv.
37 */
38 /* Sick little macro which will limit x to [0..255] with logical ops */
39 #define UCLIMIT(x) ((t = (x)), (t &= ~(t>>31)), (t | ~((t-256) >> 31)))
40 /* A variant of above which will limit x to [-128..127] */
41 #define SCLIMIT(x) (UCLIMIT((x)+128)-128)
42
43 /*
44 * ZAG[i] is the natural-order position of the i'th element of zigzag order.
45 * If the incoming data is corrupted, huff_decode_mcu could attempt to
46 * reference values beyond the end of the array. To avoid a wild store,
47 * we put some extra zeroes after the real entries.
48 */
49 static const int ZAG[] = {
50 0, 1, 8, 16, 9, 2, 3, 10,
51 17, 24, 32, 25, 18, 11, 4, 5,
52 12, 19, 26, 33, 40, 48, 41, 34,
53 27, 20, 13, 6, 7, 14, 21, 28,
54 35, 42, 49, 56, 57, 50, 43, 36,
55 29, 22, 15, 23, 30, 37, 44, 51,
56 58, 59, 52, 45, 38, 31, 39, 46,
57 53, 60, 61, 54, 47, 55, 62, 63,
58 /* extra entries in case k>63 below */
59 0, 0, 0, 0, 0, 0, 0, 0,
60 0, 0, 0, 0, 0, 0, 0, 0
61 };
62
63 JpegDecoder::JpegDecoder(const config& c)
64 {
65 reconfig(c);
66 }
67
68 void JpegDecoder::reconfig(const config& c)
69 {
70 width = c.width;
71 height = c.height;
72 jpeg_color_space = c.cs;
73 bcopy(c.qtab, qtab, sizeof(qtab));
74 ncomp = c.ncomp;
75
76 max_h_samp_factor = 1;
77 max_v_samp_factor = 1;
78 for (int i = ncomp; --i >= 0; ) {
79 comp[i].id = c.comp[i].id;
80 int v = comp[i].h_samp_factor = c.comp[i].h_samp_factor;
81 if (max_h_samp_factor < v)
82 max_h_samp_factor = v;
83 v = comp[i].v_samp_factor = c.comp[i].v_samp_factor;
84 if (max_v_samp_factor < v)
85 max_v_samp_factor = v;
86 comp[i].quant_tbl_no = c.comp[i].quant_tbl_no;
87 comp[i].dc_tbl_no = c.comp[i].dc_tbl_no;
88 comp[i].ac_tbl_no = c.comp[i].ac_tbl_no;
89 }
90 for (i = 0; i < 4; ++i) {
91 if (c.dc_huffval[i] != 0)
92 dc_hufftab[i] = huffbuild(c.dc_huffbits[i],
93 c.dc_huffval[i]);
94 if (c.ac_huffval[i] != 0)
95 ac_hufftab[i] = huffbuild(c.ac_huffbits[i],
96 c.ac_huffval[i]);
97 }
98 interleaved_scan_setup();
99 }
100
101 void JpegDecoder::init()
102 {
103 #ifdef ROWE
104 init_pre_idct();
105 #endif
106 huffreset();
107 }
108
109 /* Compute a rounded up to next multiple of b; a >= 0, b > 0 */
110 static inline int jround_up(int a, int b)
111 {
112 a += b - 1;
113 return (a - (a % b));
114 }
115
116 void JpegDecoder::interleaved_scan_setup()
117 {
118 short ci, mcublks;
119
120 MCUs_per_row = (width + 8 * max_h_samp_factor - 1)
121 / (8 * max_h_samp_factor);
122
123 MCU_rows_in_scan = (height + 8 * max_v_samp_factor - 1)
124 / (8 * max_v_samp_factor);
125
126 blocks_in_MCU = 0;
127
128 for (ci = 0; ci < ncomp; ci++) {
129 /*
130 * for interleaved scan, sampling factors give # of blocks
131 * per component
132 */
133 component& p = comp[ci];
134 p.MCU_width = p.h_samp_factor;
135 p.MCU_height = p.v_samp_factor;
136 p.MCU_blocks = p.MCU_width * p.MCU_height;
137
138 /* Prepare array describing MCU composition */
139 mcublks = p.MCU_blocks;
140 if (blocks_in_MCU + mcublks > MAX_BLOCKS_IN_MCU)
141 abort();
142 while (mcublks-- > 0)
143 MCU_membership[blocks_in_MCU++] = ci;
144 }
145 }
146
147 void JpegDecoder::fill(int dc, u_char* out, const int stride) const
148 {
149 register int t;
150
151 dc = UCLIMIT(dc) & 0xff;
152 dc |= dc << 8;
153 dc |= dc << 16;
154 *(u_word*)out = dc;
155 *(u_word*)(out + 4) = dc;
156 out += stride;
157 *(u_word*)out = dc;
158 *(u_word*)(out + 4) = dc;
159 out += stride;
160 *(u_word*)out = dc;
161 *(u_word*)(out + 4) = dc;
162 out += stride;
163 *(u_word*)out = dc;
164 *(u_word*)(out + 4) = dc;
165 out += stride;
166 *(u_word*)out = dc;
167 *(u_word*)(out + 4) = dc;
168 out += stride;
169 *(u_word*)out = dc;
170 *(u_word*)(out + 4) = dc;
171 out += stride;
172 *(u_word*)out = dc;
173 *(u_word*)(out + 4) = dc;
174 out += stride;
175 *(u_word*)out = dc;
176 *(u_word*)(out + 4) = dc;
177 }
178
179 void JpegDecoder::mix(const int dc, const short* bp, u_char* out,
180 const int stride) const
181 {
182 for (register int k = 8; --k >= 0; ) {
183 register u_word o;
184 register int t;
185 #if BYTE_ORDER == LITTLE_ENDIAN
186 o = UCLIMIT(bp[0] + dc) & 0xff;
187 o |= (UCLIMIT(bp[1] + dc) & 0xff) << 8;
188 o |= (UCLIMIT(bp[2] + dc) & 0xff) << 16;
189 o |= (UCLIMIT(bp[3] + dc) & 0xff) << 24;
190 *(u_word *)out = o;
191 o = UCLIMIT(bp[4] + dc) & 0xff;
192 o |= (UCLIMIT(bp[5] + dc) & 0xff) << 8;
193 o |= (UCLIMIT(bp[6] + dc) & 0xff) << 16;
194 o |= (UCLIMIT(bp[7] + dc) & 0xff) << 24;
195 *(u_word *)(out + 4) = o;
196 #else
197 o = (UCLIMIT(bp[0] + dc) & 0xff) << 24;
198 o |= (UCLIMIT(bp[1] + dc) & 0xff) << 16;
199 o |= (UCLIMIT(bp[2] + dc) & 0xff) << 8;
200 o |= UCLIMIT(bp[3] + dc) & 0xff;
201 *(u_word *)out = o;
202 o = (UCLIMIT(bp[4] + dc) & 0xff) << 24;
203 o |= (UCLIMIT(bp[5] + dc) & 0xff) << 16;
204 o |= (UCLIMIT(bp[6] + dc) & 0xff) << 8;
205 o |= UCLIMIT(bp[7] + dc) & 0xff;
206 *(u_word *)(out + 4) = o;
207 #endif
208 bp += 8;
209 out += stride;
210 }
211 }
212
213 #define MAXCODE 24
214 struct hcnode {
215 struct hcnode *next;
216 u_word n;
217 u_word code[MAXCODE];
218 short block[64];
219 };
220
221 #define HASHSIZE (2*4096)
222 struct hcnode *hashtab[HASHSIZE];
223 /*FIXME*/
224 #define NHC (2*HASHSIZE)
225 struct hcnode hcpool[NHC];
226 static int nhc;
227
228 static struct hcnode *
229 scavenge()
230 {
231 register struct hcnode *p;
232
233 int i = nhc;
234 if (i >= NHC) {
235 static int rover = 0;
236
237 i = rover;
238 do
239 i = (i + 1) & (HASHSIZE - 1);
240 while ((p = hashtab[i]) == 0);
241 rover = i;
242 hashtab[i] = p->next;
243 } else {
244 p = &hcpool[i];
245 nhc = i + 1;
246 }
247 return (p);
248 }
249
250 static inline int
251 hchash(int n, u_word *code)
252 {
253 int v = 0;
254
255 while (--n >= 0)
256 v += code[n] + 37;
257 return ((v ^ v >> 16) & (HASHSIZE - 1));
258 }
259
260 static inline struct hcnode *
261 hclookup(int h, int n, u_word *code)
262 {
263 struct hcnode *p;
264
265 for (p = hashtab[h]; p != 0; p = p->next)
266 if (p->n == n &&
267 bcmp(p->code, code, n * sizeof(*code)) == 0)
268 break;
269
270 return (p);
271 }
272
273 static void
274 hcenter(struct hcnode *p, int h, int n, u_word *code)
275 {
276 bcopy(code, p->code, n * sizeof(*code));
277 p->n = n;
278
279 p->block[0] = 0;
280 j_rev_dct(p->block);
281
282 p->next = hashtab[h];
283 hashtab[h] = p;
284 }
285
286 int hcmiss;
287 int hchit;
288 int dcblk;
289 int bblk;
290 int sblk;
291
292 static int
293 length(struct hcnode *p)
294 {
295 int n = 0;
296 for (; p != 0; p = p->next)
297 ++n;
298 return (n);
299 }
300
301 void
302 pstats()
303 {
304 int i;
305 int len;
306 int bucket = 0;
307 int maxlen = 0;
308
309 for (i = 0; i < HASHSIZE; ++i) {
310 if (hashtab[i] == 0)
311 continue;
312 ++bucket;
313 len = length(hashtab[i]);
314 if (len > maxlen)
315 maxlen = len;
316 }
317 printf("hit\t%d\n", hchit);
318 printf("miss\t%d\n", hcmiss);
319 printf("bucket\t%d\n", bucket);
320 printf("maxlen\t%d\n", maxlen);
321 printf("dcblk\t%d\n", dcblk);
322 printf("bblk\t%d\n", bblk);
323 printf("sblk\t%d\n", sblk);
324 }
325
326 struct blkcache {
327 int init;
328 short block[64];
329 };
330
331 int
332 blkthresh(short* b0, short* b1, int thresh, short *qt)
333 {
334 int n = 0;
335 for (int i = 64; --i >= 0; ) {
336 int k = ZAG[i];
337 int d = b0[k] - b1[k];
338 if (d < 0)
339 d = -d;
340 if (d > hlevel * qt[i]) {
341 if (i == 0)
342 n += 5;
343 else
344 n += 1;
345 if (n > thresh)
346 return (-1);
347 }
348 }
349 return (0);
350 }
351
352 #ifdef notdef
353 void
354 pblk(short *b, short *a, short *qt)
355 {
356 for (int i = 0; i < 64; ++i) {
357 int k = ZAG[i];
358 int d = b[k] - a[k];
359 if (d != 0)
360 printf("%d:\t%d\td %d\tq %d\n", i, b[k], d, qt[i]);
361 }
362 }
363 #endif
364
365
366 int JpegDecoder::decode_block_with_hysteresis(int ci, int blkno,
367 int off, int stride)
368 {
369 int n, h;
370 struct hcnode *p;
371 short *qt;
372 int dc;
373 struct blkcache *bc;
374 short block[64];
375 u_word code[128];
376
377 /*FIXME*/
378 component& c = comp[ci];
379 qt = qtab[c.quant_tbl_no];
380
381 bc = &dctcache[ci][blkno];
382
383 n = huffparse(ci, code);
384 if (n < 0 || huffblock(ci, n, code, block) < 0)
385 return (-1);
386
387 if (!bc->init || blkthresh(block, bc->block, hysteresis, qt)) {
388 bcopy(block, bc->block, sizeof(block));
389 j_rev_dct(block);
390 mix(128, block, image[ci] + off, stride);
391 bc->init = 1;
392 }
393 return (0);
394 }
395
396 int JpegDecoder::decode_block(int ci, int blkno, int off, int stride)
397 {
398 int n, h;
399 struct hcnode *p;
400 short *qt;
401 int dc;
402 short block[64];
403 u_word code[128];
404
405 /*FIXME*/
406 if (hysteresis)
407 return (decode_block_with_hysteresis(ci, blkno, off, stride));
408
409 /*FIXME*/
410 component& c = comp[ci];
411 qt = qtab[c.quant_tbl_no];
412
413 n = huffparse(ci, code);
414 if (n == 0) {
415 dc = c.dc * qt[0];
416 fill((dc >> 3) + 128, image[ci] + off, stride);
417 ++dcblk;
418 } else if (n < MAXCODE) {
419 ++sblk;
420 h = hchash(n, code);
421 p = hclookup(h, n, code);
422 if (p == 0) {
423 ++hcmiss;
424 p = scavenge();
425 if (huffblock(ci, n, code, p->block) < 0)
426 return (-1);
427 hcenter(p, h, n, code);
428 } else
429 ++hchit;
430 dc = c.dc * qt[0];
431 mix((dc >> 3) + 128, p->block, image[ci] + off, stride);
432 } else {
433 ++bblk;
434 if (huffblock(ci, n, code, block) < 0)
435 return (-1);
436 j_rev_dct(block);
437 mix(128, block, image[ci] + off, stride);
438 }
439 return (0);
440 }
441
442 int JpegDecoder::decode(u_char* in, int len)
443 {
444 #ifdef notdef
445 nbits = 8 * len;
446 #endif
447 inb = in;
448 nbb = 0;
449
450 const int stride0 = 7 * width;/*FIXME*/
451 const int stride1 = 7 * width / 2;/*FIXME*/
452
453 int off0 = 0;
454 int off1 = 0;
455 int blkno = 0;
456 huffreset();
457 for (int row = MCU_rows_in_scan; --row >= 0; ) {
458 for (int mcu = MCUs_per_row; --mcu >= 0; ) {
459 /* FIXME this works only for hsamp = 2, vsamp = 1 */
460 if (decode_block(0, blkno, off0, width) < 0)
461 return (-1);
462 off0 += 8;
463 if (decode_block(0, blkno + 1, off0, width) < 0)
464 return (-1);
465 off0 += 8;
466 if (decode_block(1, blkno >> 1, off1, width / 2) < 0)
467 return (-1);
468 if (decode_block(2, blkno >> 1, off1, width / 2) < 0)
469 return (-1);
470 off1 += 8;
471 blkno += 2;
472 }
473 off0 += stride0;
474 off1 += stride1;
475 }
476 return (0);
477 }
478
479 /* Figure F.12: extend sign bit */
480
481 #ifdef notdef
482 #define huff_EXTEND(x,s) ((x) < extend_test[s] ? (x) + extend_offset[s] : (x))
483
484 static const int extend_test[16] = /* entry n is 2**(n-1) */
485 { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
486 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 };
487
488 static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */
489 { 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1,
490 ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1,
491 ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1,
492 ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 };
493 #else
494 /* is this really faster? */
495 inline int
496 huff_EXTEND(int x, int s)
497 {
498 register int b = x >> (s - 1);
499 register int m = ((b & 1) - 1) << s;
500 return ((x | m) + (~b & 1));
501 }
502 #endif
503
504 #define HUFFRQ(bb) \
505 { \
506 register int v; \
507 register u_char *cp = inb; \
508 \
509 bb <<= 16; \
510 v = *cp++; \
511 if (v == 0xff) ++cp; \
512 bb |= v << 8; \
513 v = *cp++; \
514 if (v == 0xff) ++cp; \
515 bb |= v; \
516 inb = cp; \
517 \
518 }
519
520 #define MASK(s) ((1 << (s)) - 1)
521
522 #define HUFF_DECODE(ht, nbb, bb, result) { \
523 register int s_, v_; \
524 \
525 if (nbb < 16) { \
526 HUFFRQ(bb); \
527 nbb += 16; \
528 } \
529 v_ = (bb >> (nbb - 16)) & 0xffff; \
530 s_ = (ht)[v_]; \
531 nbb -= (s_ >> 8); \
532 result = s_ & 0xff; \
533 }
534
535 #define GET_BITS(n, nbb, bb, result) \
536 { \
537 nbb -= n; \
538 if (nbb < 0) { \
539 HUFFRQ(bb); \
540 nbb += 16; \
541 } \
542 (result) = ((bb >> nbb) & MASK(n)); \
543 }
544
545 #define SKIP_BITS(n, nbb, bb) \
546 { \
547 nbb -= n; \
548 if (nbb < 0) { \
549 HUFFRQ(bb); \
550 nbb += 16; \
551 } \
552 }
553
554 int JpegDecoder::huffparse(int ci, u_word *code)
555 {
556 component& p = comp[ci];
557
558 /* Decode a single block's worth of coefficients */
559
560 /* Section F.2.2.1: decode the DC coefficient difference */
561 register int bb_ = bb;
562 register int nbb_ = nbb;
563 u_short* ht = dc_hufftab[p.dc_tbl_no];
564 register int s, r;
565 HUFF_DECODE(ht, nbb_, bb_, s);
566 if (s != 0) {
567 GET_BITS(s, nbb_, bb_, r);
568 s = huff_EXTEND(r, s);
569 }
570 /* Convert DC difference to actual value, update last_dc_val */
571 s += p.dc;
572 p.dc = s;
573
574 /* Section F.2.2.2: decode the AC coefficients */
575 ht = ac_hufftab[p.ac_tbl_no];
576 register int n = 0;
577 for (register int k = 1; k < 64; ) {
578 /* Symbol-1 */
579 register int v;
580 HUFF_DECODE(ht, nbb_, bb_, v);
581 s = v & 15;
582 r = v >> 4;
583 if (s != 0) {
584 k += r;
585 /* Symbol-2 */
586 GET_BITS(s, nbb_, bb_, s);
587 *code++ = (s << 8) | v;
588 ++n;
589 ++k;
590 } else {
591 if (r != 15)
592 /* end of block */
593 break;
594 *code++ = v;
595 ++n;
596 k += 16;
597 }
598 }
599 nbb = nbb_;
600 bb = bb_;
601
602 return (n);
603 }
604
605 int JpegDecoder::huffskip(int ci)
606 {
607 /* Decode a single block's worth of coefficients */
608
609 /* Section F.2.2.1: decode the DC coefficient difference */
610 register int bb_ = bb;
611 register int nbb_ = nbb;
612 component& p = comp[ci];
613 u_short* ht = dc_hufftab[p.dc_tbl_no];
614 register int s;
615 HUFF_DECODE(ht, nbb_, bb_, s);
616 if (s != 0) {
617 SKIP_BITS(s, nbb_, bb_);
618 }
619 /* Section F.2.2.2: decode the AC coefficients */
620 ht = ac_hufftab[p.ac_tbl_no];
621 for (register int k = 1; k < 64; ) {
622 /* Symbol-1 */
623 register int v;
624 HUFF_DECODE(ht, nbb_, bb_, v);
625 s = v & 15;
626 register int r = v >> 4;
627 if (s != 0) {
628 k += r;
629 /* Symbol-2 */
630 SKIP_BITS(s, nbb_, bb_);
631 ++k;
632 } else {
633 if (r != 15)
634 /* end of block */
635 break;
636 k += 16;
637 }
638 }
639 nbb = nbb_;
640 bb = bb_;
641
642 return (0);
643 }
644
645 void JpegDecoder::huffreset()
646 {
647 nbb = 0;
648 comp[0].dc = 0;
649 comp[1].dc = 0;
650 comp[2].dc = 0;
651 comp[3].dc = 0;
652 }
653
654 u_short* JpegDecoder::huffbuild(const u_char* bits, const u_char* vals) const
655 {
656 /* Figure C.1: make table of Huffman code length for each symbol */
657 /* Note that this is in code-length order. */
658
659 int nsym = 0;
660 int huffsize[257];
661 for (int codelen = 1; codelen <= 16; ++codelen) {
662 for (int i = 1; i <= bits[codelen]; ++i)
663 /*
664 * FIXME should sanity check that nsym stays
665 * below 256.
666 */
667 huffsize[nsym++] = codelen;
668 }
669 huffsize[nsym] = 0;
670
671 /* Figure C.2: generate the codes themselves */
672 /* Note that this is in code-length order. */
673
674 int code = 0;
675 int si = huffsize[0];
676 u_short huffcode[256];
677 int p = 0;
678 while (p < nsym) {
679 while (huffsize[p] == si)
680 huffcode[p++] = code++;
681
682 code <<= 1;
683 ++si;
684 }
685 /*
686 * Build the direct-map lookup table.
687 */
688 u_short *ht = new u_short[65536];
689 bzero(ht, 65536 * sizeof(u_short));
690 for (int sym = 0; sym < nsym; ++sym) {
691 int codelen = huffsize[sym];
692 int nbit = 16 - codelen;
693 int code = huffcode[sym] << nbit;
694 int map = (codelen << 8) | vals[sym];
695 /*
696 * The low nbit bits are don't cares.
697 * Spin through all possible combos.
698 */
699 for (int n = 1 << nbit; --n >= 0; )
700 ht[code | n] = map;
701 }
702 return (ht);
703 }
704
705 int JpegDecoder::huffblock(int ci, int n, u_word *code, short *blk) const
706 {
707 register int k;
708 register const short *qt;
709
710 const component& p = comp[ci];
711 qt = qtab[p.quant_tbl_no];
712 bzero(blk, 64 * sizeof(*blk));
713 /* Descale and output the DC coefficient (assumes ZAG[0] = 0) */
714 blk[0] = p.dc * qt[0];
715
716 /* Section F.2.2.2: decode the AC coefficients */
717 /* Since zero values are skipped, output area must be zeroed
718 beforehand */
719 for (k = 1; k < 64; ) {
720 register int s, r;
721 register int v;
722
723 /*FIXME*/
724 if (--n < 0)
725 return (0);
726
727 v = *code++;
728 s = v & 0x0f;
729 r = (v >> 4) & 0x0f;
730 if (s != 0) {
731 k += r;
732 r = v >> 8;
733 s = huff_EXTEND(r, s);
734 /*
735 * Descale coefficient and output in natural
736 * (dezigzagged) order
737 */
738 if (k >= 64)
739 return (-1);
740 blk[ZAG[k]] = s * qt[k];
741 ++k;
742 } else {
743 if (r != 15)
744 /* end of block */
745 break;
746 k += 16;
747 }
748 }
749 return (0);
750 }
751
752 YUVJpegDecoder::YUVJpegDecoder(const config& c) : JpegDecoder(c)
753 {
754 allocimage();
755
756 if (hysteresis) {
757 int n = imagesize / 64;
758 dctcache[0] = new blkcache[n];
759 bzero(dctcache[0], sizeof(*dctcache) * n);
760 dctcache[1] = new blkcache[n];
761 bzero(dctcache[1], sizeof(*dctcache) * n);
762 dctcache[2] = new blkcache[n];
763 bzero(dctcache[2], sizeof(*dctcache) * n);
764 }
765 }
766
767 void YUVJpegDecoder::allocimage()
768 {
769 int imagesize = width * height;
770 image[0] = new u_char[imagesize];
771 image[1] = new u_char[imagesize];
772 image[2] = new u_char[imagesize];
773 image[3] = 0;
774 }
775
776 YUVJpegDecoder::reconfig(const config& c)
777 {
778 JpegDecoder::reconfig(c);
779 delete image[0];
780 delete image[1];
781 delete image[2];
782 allocimage();
783 }
784
785 GrayJpegDecoder::GrayJpegDecoder(const config& c, u_char* out, int* clut,
786 int s)
787 : JpegDecoder(c), scale(s)
788 {
789 image[0] = out;
790 bcopy(clut, clut_, sizeof(clut_));
791
792 if (hysteresis) {
793 int n = width * height / 64;
794 dctcache[0] = new blkcache[n];
795 bzero(dctcache[0], sizeof(dctcache[0]) * n);
796 }
797 }
798
799 int GrayJpegDecoder::decode(u_char* in, int len)
800 {
801 inb = in;
802 nbb = 0;
803
804 int delta = (scale < 0) ? 16 : 8;
805 int stride0 = (delta - 1) * width;/*FIXME*/
806 int stride1 = stride1 / 2;
807 /*FIXME*/
808 if (scale < 0) {
809 stride0 *= 2;
810 stride1 *= 2;
811 }
812 int off0 = 0;
813 int off1 = 0;
814 int blkno = 0;
815 huffreset();
816 for (int row = MCU_rows_in_scan; --row >= 0; ) {
817 for (int mcu = MCUs_per_row; --mcu >= 0; ) {
818 /* FIXME this works only for hsamp = 2, vsamp = 1 */
819 if (decode_block(0, blkno, off0, width) < 0)
820 return (-1);
821 off0 += delta;
822 if (decode_block(0, blkno + 1, off0, width) < 0)
823 return (-1);
824 off0 += delta;
825 if (huffskip(1) < 0)
826 return (-1);
827 if (huffskip(2) < 0)
828 return (-1);
829 off1 += delta;
830 blkno += 2;
831 }
832 off0 += stride0;
833 off1 += stride1;
834 }
835 return (0);
836 }
837
838 void GrayJpegDecoder::dmix(const int dc, const short* bp, u_char* out,
839 const int stride) const
840 {
841 register const int *clut = clut_;
842
843 for (register int k = 8; --k >= 0; ) {
844 register u_word o;
845 register int t;
846 #if BYTE_ORDER == LITTLE_ENDIAN
847 o = clut[(UCLIMIT(bp[0] + dc) & 0xff)];
848 o |= clut[(UCLIMIT(bp[1] + dc) & 0xff)] << 16;
849 o |= o << 8;
850 *(u_word *)out = o;
851 *(u_word *)(out + stride) = o;
852
853 o = clut[(UCLIMIT(bp[2] + dc) & 0xff)];
854 o |= clut[(UCLIMIT(bp[3] + dc) & 0xff)] << 16;
855 o |= o << 8;
856 *(u_word *)(out + 4) = o;
857 *(u_word *)(out + stride + 4) = o;
858
859 o = clut[(UCLIMIT(bp[4] + dc) & 0xff)];
860 o |= clut[(UCLIMIT(bp[5] + dc) & 0xff)] << 16;
861 o |= o << 8;
862 *(u_word *)(out + 8) = o;
863 *(u_word *)(out + stride + 8) = o;
864
865 o = clut[(UCLIMIT(bp[6] + dc) & 0xff)];
866 o |= clut[(UCLIMIT(bp[7] + dc) & 0xff)] << 16;
867 o |= o << 8;
868 *(u_word *)(out + 12) = o;
869 *(u_word *)(out + stride + 12) = o;
870 #else
871 o = clut[(UCLIMIT(bp[0] + dc) & 0xff)] << 16;
872 o |= clut[(UCLIMIT(bp[1] + dc) & 0xff)];
873 o |= o << 8;
874 *(u_word *)out = o;
875 *(u_word *)(out + stride) = o;
876
877 o = clut[(UCLIMIT(bp[2] + dc) & 0xff)] << 16;
878 o |= clut[(UCLIMIT(bp[3] + dc) & 0xff)];
879 o |= o << 8;
880 *(u_word *)(out + 4) = o;
881 *(u_word *)(out + stride + 4) = o;
882
883 o = clut[(UCLIMIT(bp[4] + dc) & 0xff)] << 16;
884 o |= clut[(UCLIMIT(bp[5] + dc) & 0xff)];
885 o |= o << 8;
886 *(u_word *)(out + 8) = o;
887 *(u_word *)(out + stride + 8) = o;
888
889 o = clut[(UCLIMIT(bp[6] + dc) & 0xff)] << 16;
890 o |= clut[(UCLIMIT(bp[7] + dc) & 0xff)];
891 o |= o << 8;
892 *(u_word *)(out + 12) = o;
893 *(u_word *)(out + stride + 12) = o;
894 #endif
895 bp += 8;
896 out += stride << 1;
897 }
898 }
899
900 void GrayJpegDecoder::mix(const int dc, const short* bp, u_char* out,
901 const int stride) const
902 {
903 if (scale < 0) {
904 dmix(dc, bp, out, stride << 1);
905 return;
906 }
907
908 register const int *clut = clut_;
909
910 for (register int k = 8; --k >= 0; ) {
911 register int t;
912 #ifdef __alpha
913 register u_long o;
914 o = (u_long)clut[(UCLIMIT(bp[0] + dc) & 0xff)];
915 o |= (u_long)clut[(UCLIMIT(bp[1] + dc) & 0xff)] << 8;
916 o |= (u_long)clut[(UCLIMIT(bp[2] + dc) & 0xff)] << 16;
917 o |= (u_long)clut[(UCLIMIT(bp[3] + dc) & 0xff)] << 24;
918 o |= (u_long)clut[(UCLIMIT(bp[4] + dc) & 0xff)] << 32;
919 o |= (u_long)clut[(UCLIMIT(bp[5] + dc) & 0xff)] << 40;
920 o |= (u_long)clut[(UCLIMIT(bp[6] + dc) & 0xff)] << 48;
921 o |= (u_long)clut[(UCLIMIT(bp[7] + dc) & 0xff)] << 56;
922 *(u_long *)out = o;
923 #elif BYTE_ORDER == LITTLE_ENDIAN
924 register u_word o;
925 o = clut[(UCLIMIT(bp[0] + dc) & 0xff)];
926 o |= clut[(UCLIMIT(bp[1] + dc) & 0xff)] << 8;
927 o |= clut[(UCLIMIT(bp[2] + dc) & 0xff)] << 16;
928 o |= clut[(UCLIMIT(bp[3] + dc) & 0xff)] << 24;
929 *(u_word *)out = o;
930 o = clut[(UCLIMIT(bp[4] + dc) & 0xff)];
931 o |= clut[(UCLIMIT(bp[5] + dc) & 0xff)] << 8;
932 o |= clut[(UCLIMIT(bp[6] + dc) & 0xff)] << 16;
933 o |= clut[(UCLIMIT(bp[7] + dc) & 0xff)] << 24;
934 *(u_word *)(out + 4) = o;
935 #else
936 register u_word o;
937 o = clut[(UCLIMIT(bp[0] + dc) & 0xff)] << 24;
938 o |= clut[(UCLIMIT(bp[1] + dc) & 0xff)] << 16;
939 o |= clut[(UCLIMIT(bp[2] + dc) & 0xff)] << 8;
940 o |= clut[(UCLIMIT(bp[3] + dc) & 0xff)];
941 *(u_word *)out = o;
942 o = clut[(UCLIMIT(bp[4] + dc) & 0xff)] << 24;
943 o |= clut[(UCLIMIT(bp[5] + dc) & 0xff)] << 16;
944 o |= clut[(UCLIMIT(bp[6] + dc) & 0xff)] << 8;
945 o |= clut[(UCLIMIT(bp[7] + dc) & 0xff)];
946 *(u_word *)(out + 4) = o;
947 #endif
948 bp += 8;
949 out += stride;
950 }
951 }
952
953 void GrayJpegDecoder::dfill(int dc, u_char* out, const int stride) const
954 {
955 register int t;
956 dc = UCLIMIT(dc) & 0xff;
957 dc = clut_[dc];
958 dc |= dc << 8;
959 dc |= dc << 16;
960 int i = 16;
961 do {
962 *(u_word*)out = dc;
963 *(u_word*)(out + 4) = dc;
964 *(u_word*)(out + 8) = dc;
965 *(u_word*)(out + 12) = dc;
966 out += stride;
967 } while (--i > 0);
968 }
969
970 void GrayJpegDecoder::fill(int xdc, u_char* out, const int stride) const
971 {
972 if (scale < 0) {
973 dfill(xdc, out, stride << 1);
974 return;
975 }
976 register int t;
977 xdc = UCLIMIT(xdc) & 0xff;
978 register u_long dc = clut_[xdc];
979 dc |= dc << 8;
980 dc |= dc << 16;
981 #ifdef __alpha
982 dc |= dc << 32;
983 #endif
984 *(u_long*)out = dc;
985 #ifndef __alpha
986 *(u_long*)(out + 4) = dc;
987 #endif
988 out += stride;
989 *(u_long*)out = dc;
990 #ifndef __alpha
991 *(u_long*)(out + 4) = dc;
992 #endif
993 out += stride;
994 *(u_long*)out = dc;
995 #ifndef __alpha
996 *(u_long*)(out + 4) = dc;
997 #endif
998 out += stride;
999 *(u_long*)out = dc;
1000 #ifndef __alpha
1001 *(u_long*)(out + 4) = dc;
1002 #endif
1003 out += stride;
1004 *(u_long*)out = dc;
1005 #ifndef __alpha
1006 *(u_long*)(out + 4) = dc;
1007 #endif
1008 out += stride;
1009 *(u_long*)out = dc;
1010 #ifndef __alpha
1011 *(u_long*)(out + 4) = dc;
1012 #endif
1013 out += stride;
1014 *(u_long*)out = dc;
1015 #ifndef __alpha
1016 *(u_long*)(out + 4) = dc;
1017 #endif
1018 out += stride;
1019 *(u_long*)out = dc;
1020 #ifndef __alpha
1021 *(u_long*)(out + 4) = dc;
1022 #endif
1023 }
1024
1025 DiffJpegDecoder::DiffJpegDecoder(const config& c) : JpegDecoder(c)
1026 {
1027 int n = width * height / 64;
1028 dcts = new blkcache[n];
1029 bzero(dcts, sizeof(*dcts) * n);
1030
1031 /*
1032 * Don't use any quantization.
1033 * It just complicates the differencing heuristic.
1034 */
1035 short* qt = qtab[comp[0].quant_tbl_no];
1036 for (int i = 0; i < 64; ++i)
1037 qt[i] = 1;
1038 }
1039
1040 int cost[64] = {
1041 5, 4, 4, 3, 3, 2, 1, 0,
1042 4, 4, 3, 3, 2, 1, 1, 0,
1043 4, 3, 3, 2, 1, 1, 0, 0,
1044 3, 3, 2, 1, 1, 0, 0, 0,
1045 3, 2, 1, 1, 0, 0, 0, 0,
1046 2, 1, 1, 0, 0, 0, 0, 0,
1047 1, 1, 0, 0, 0, 0, 0, 0,
1048 0, 0, 0, 0, 0, 0, 0, 0,
1049 };
1050
1051 static int
1052 diff(short* b0, short* b1)
1053 {
1054 int n = 0;
1055 /*
1056 * FIXME ignore high frequencies for differencing decision
1057 */
1058 for (int i = 64; --i >= 0; ) {
1059 int k = ZAG[i];
1060 int d = b0[k] - b1[k];
1061 if (d < 0)
1062 d = -d;
1063 n += d << (5 - cost[k]);
1064 #ifdef notdef
1065 n += d;
1066 #endif
1067 }
1068 return (n);
1069 }
1070
1071 int DiffJpegDecoder::decode_block_diff(blkcache* bc, blkcache* dctcache,
1072 u_char* dv)
1073 {
1074 u_word code[128];
1075
1076 int n = huffparse(0, code);
1077 if (n < 0)
1078 return (-1);
1079 if (huffblock(0, n, code, bc->block) < 0)
1080 return (-1);
1081 *dv = diff(bc->block, dctcache->block);
1082 return (0);
1083 }
1084
1085 int DiffJpegDecoder::decode(u_char* in, int len, blkcache* dctcache,
1086 u_char* diffvector)
1087 {
1088 inb = in;
1089 nbb = 0;
1090
1091 huffreset();
1092 int blkno = 0;
1093 struct blkcache* bc = dcts;
1094 for (int row = MCU_rows_in_scan; --row >= 0; ) {
1095 for (int mcu = MCUs_per_row; --mcu >= 0; ) {
1096 /* FIXME this works only for hsamp = 2, vsamp = 1 */
1097 if (decode_block_diff(bc++, dctcache++,
1098 diffvector++) < 0)
1099 return (-1);
1100
1101 if (decode_block_diff(bc++, dctcache++,
1102 diffv