1 /*
2 * p64.cc --
3 *
4 * "P64" H.261 decoder
5 */
6
7 /*
8 * This code is derived from the P64 software implementation by the
9 * Stanford PVRG group:
10 *
11 * Copyright (C) 1990, 1991, 1993 Andy C. Hung, all rights reserved.
12 * PUBLIC DOMAIN LICENSE: Stanford University Portable Video Research
13 * Group. If you use this software, you agree to the following: This
14 * program package is purely experimental, and is licensed "as is".
15 * Permission is granted to use, modify, and distribute this program
16 * without charge for any purpose, provided this license/ disclaimer
17 * notice appears in the copies. No warranty or maintenance is given,
18 * either expressed or implied. In no event shall the author(s) be
19 * liable to you or a third party for any special, incidental,
20 * consequential, or other damages, arising out of the use or inability
21 * to use the program for any purpose (or the loss of data), even if we
22 * have been advised of such possibilities. Any public reference or
23 * advertisement of this source code should refer to it as the Portable
24 * Video Research Group (PVRG) code, and not by any author(s) (or
25 * Stanford University) name.
26 */
27
28 #ifndef lint
29 static const char rcsid[] =
30 "@(#) $Header: /usr/mash/src/repository/mash/mash-1/codec/p64/p64.cc,v 1.16 2003/11/19 19:20:22 aswan Exp $";
31 #endif
32
33 #include <stdarg.h>
34 #include <stdio.h>
35 #include <string.h>
36
37 #ifdef WIN32
38 #include <winsock.h>
39 #else
40 #include <sys/param.h>
41 #include <sys/file.h>
42 #endif
43 #include <sys/stat.h>
44
45 #include "codec/p64/p64.h"
46 #include "codec/p64/p64-huff.h"
47 #include "codec/dct.h"
48 #include "misc/bsd-endian.h"
49
50 #ifdef DEVELOPMENT_VERSION
51 void P64Decoder::err(const char* msg ...) const
52 #else
53 void P64Decoder::err(const char* /* msg */ ...) const
54 #endif
55 {
56 #ifdef DEVELOPMENT_VERSION
57 va_list ap;
58 va_start(ap, msg);
59 vfprintf(stderr, msg, ap);
60 fprintf(stderr, " @g%d m%d %d/%d of %d/%d: %04x %04x %04x %04x|%04x\n",
61 gob_, mba_,
62 (int)((u_char*)bs_ - (u_char*)ps_), nbb_,
63 (int)((u_char*)es_ - (u_char*)ps_), pebit_,
64 bs_[-4], bs_[-3], bs_[-2], bs_[-1], bs_[0]);
65 #endif
66 }
67
68 P64Decoder::P64Decoder()
69 : fs_(0), front_(0), back_(0),
70 ngob_(0), maxgob_(0), ndmblk_(0), gobquant_(0),
71 mt_(0), gob_(0), mba_(0), mvdh_(0), mvdv_(0),
72 marks_(0), mark_(0),
73 bad_psc_(0), bad_bits_(0), bad_GOBno_(0), bad_fmt_(0)
74 {
75 fmt_ = IT_CIF; /* default is CIF */
76 inithuff();
77 initquant();
78 }
79
80 P64Decoder::~P64Decoder()
81 {
82 delete[] fs_;
83 }
84
85 void P64Decoder::init()
86 {
87 if (fmt_ == IT_CIF) {
88 ngob_ = 12;
89 width_ = 352;
90 height_ = 288;
91 } else {
92 ngob_ = 3;
93 width_ = 176;
94 height_ = 144;
95 }
96 size_ = width_ * height_;
97 memset(mb_state_, MBST_OLD, sizeof(mb_state_));
98
99 for (u_int gob = 0; gob < 12; ++gob) {
100 u_short* p = &base_[gob << 6];
101 for (int mba = 0; mba < MBPERGOB; ++mba) {
102 u_int x = 2 * (mba % 11);
103 u_int y;
104 if (fmt_ == IT_CIF) {
105 y = 2 * (3 * (gob >> 1) + mba / 11);
106 if (gob & 1)
107 x += 22;
108 } else
109 y = 2 * (3 * gob + mba / 11);
110
111 p[mba] = (x << 8) | y;
112 }
113 }
114 minx_ = width_;
115 miny_ = height_;
116 maxx_ = 0;
117 maxy_ = 0;
118
119 allocate();
120
121 // invalidate the just-changed-block table (marks_ buffer) so no buffer
122 // overrun occurs (see the CVS log for a more detailed explanation)
123 marks_ = 0;
124 }
125
126 #if BYTE_ORDER == LITTLE_ENDIAN
127 #define HUFFRQ(bs, bb) \
128 { \
129 register int t = *bs++; \
130 bb <<= 16; \
131 bb |= (t & 0xff) << 8; \
132 bb |= t >> 8; \
133 }
134 #else
135 #define HUFFRQ(bs, bb) \
136 { \
137 bb <<= 16; \
138 bb |= *bs++; \
139 }
140 #endif
141
142 #define MASK(s) ((1 << (s)) - 1)
143
144 #define HUFF_DECODE(bs, ht, nbb, bb, result) { \
145 register int s__, v__; \
146 \
147 if (nbb < 16) { \
148 HUFFRQ(bs, bb); \
149 nbb += 16; \
150 } \
151 s__ = ht.maxlen; \
152 v__ = (bb >> (nbb - s__)) & MASK(s__); \
153 s__ = (ht.prefix)[v__]; \
154 nbb -= (s__ & 0x1f); \
155 result = s__ >> 5; \
156 }
157
158 #define GET_BITS(bs, n, nbb, bb, result) \
159 { \
160 nbb -= n; \
161 if (nbb < 0) { \
162 HUFFRQ(bs, bb); \
163 nbb += 16; \
164 } \
165 (result) = ((bb >> nbb) & MASK(n)); \
166 }
167
168 #define SKIP_BITS(bs, n, nbb, bb) \
169 { \
170 nbb -= n; \
171 if (nbb < 0) { \
172 HUFFRQ(bs, bb); \
173 nbb += 16; \
174 } \
175 }
176
177 /*
178 * Set up the huffman tables.
179 */
180 void P64Decoder::inithuff()
181 {
182 ht_mtype_.prefix = htd_mtype;
183 ht_mtype_.maxlen = htd_mtype_width;
184 ht_mba_.prefix = htd_mba;
185 ht_mba_.maxlen = htd_mba_width;
186 ht_mvd_.prefix = htd_dvm;
187 ht_mvd_.maxlen = htd_dvm_width;
188 ht_cbp_.prefix = htd_cbp;
189 ht_cbp_.maxlen = htd_cbp_width;
190 ht_tcoeff_.prefix = htd_tcoeff;
191 ht_tcoeff_.maxlen = htd_tcoeff_width;
192 }
193
194 int P64Decoder::quantize(int v, int q)
195 {
196 if (v > 0)
197 return (((v << 1) + 1) * q) - (~q & 1);
198 else
199 return (((v << 1) - 1) * q) + (~q & 1);
200 }
201
202 /*
203 * Build quantization lookup table.
204 * One for each possible MQUANT paramenter.
205 */
206 void P64Decoder::initquant()
207 {
208 for (int mq = 0; mq < 32; ++mq) {
209 short* qt = &quant_[mq << 8];
210 for (int v = 0; v < 256; ++v) {
211 int s = (v << 24) >> 24;
212 qt[v] = quantize(s, mq);
213 }
214 }
215 }
216
217 /*
218 * Decode the next block of transform coefficients
219 * from the input stream.
220 * Return number of non-zero ac coefficients.
221 */
222 #ifdef INT_64
223 int P64Decoder::parse_block(short* blk, INT_64* mask)
224 #else
225 int P64Decoder::parse_block(short* blk, u_int* mask)
226 #endif
227 {
228 #ifdef INT_64
229 INT_64 m0 = 0;
230 #else
231 u_int m1 = 0, m0 = 0;
232 #endif
233 /*
234 * Cache bit buffer in registers.
235 */
236 register int nbb = nbb_;
237 register int bb = bb_;
238 register short* qt = qt_;
239
240 int k;
241 if ((mt_ & MT_CBP) == 0) {
242 int v;
243 GET_BITS(bs_, 8, nbb, bb, v);
244 if (v == 255)
245 v = 128;
246 if (mt_ & MT_INTRA)
247 v <<= 3;
248 else
249 v = qt[v];
250 blk[0] = v;
251 k = 1;
252 m0 |= 1;
253 } else if ((bb >> (nbb - 1)) & 1) {
254 /*
255 * In CBP blocks, the first block present must be
256 * non-empty (otherwise it's mask bit wouldn't
257 * be set), so the first code cannot be an EOB.
258 * CCITT optimizes this case by using a huffman
259 * table equivalent to ht_tcoeff_ but without EOB,
260 * in which 1 is coded as "1" instead of "11".
261 * We grab two bits, the first bit is the code
262 * and the second is the sign.
263 */
264 int v;
265 GET_BITS(bs_, 2, nbb, bb, v);
266 /*FIXME quantize?*/
267 blk[0] = qt[(v & 1) ? 0xff : 1];
268 k = 1;
269 m0 |= 1;
270 } else {
271 k = 0;
272 #ifndef INT_64
273 blk[0] = 0;/*FIXME need this because the way we set bits below*/
274 #endif
275 }
276 int nc = 0;
277 for (;;) {
278 int r, v;
279 HUFF_DECODE(bs_, ht_tcoeff_, nbb, bb, r);
280 if (r <= 0) {
281 /* SYM_EOB, SYM_ILLEGAL, or SYM_ESCAPE */
282 if (r == SYM_ESCAPE) {
283 GET_BITS(bs_, 14, nbb, bb, r);
284 v = r & 0xff;
285 r >>= 8;
286 } else {
287 if (r == SYM_ILLEGAL) {
288 bb_ = bb;
289 nbb_ = nbb;
290 err("illegal symbol in block");
291 }
292 /* EOB */
293 break;
294 }
295 } else {
296 v = (r << 22) >> 27;
297 r = r & 0x1f;
298 }
299 k += r;
300 if (k >= 64) {
301 bb_ = bb;
302 nbb_ = nbb;
303 err("bad run length %d (r %d, v %d)", k, r, v);
304 break;
305 }
306 r = COLZAG[k++];
307 blk[r] = qt[v & 0xff];
308 ++nc;
309 #ifdef INT_64
310 m0 |= (INT_64)1 << r;
311 #else
312 if (r < 32)
313 m0 |= 1 << r;
314 else
315 m1 |= 1 << (r - 32);
316 #endif
317 }
318 /*
319 * Done reading input. Update bit buffer.
320 */
321 bb_ = bb;
322 nbb_ = nbb;
323
324 *mask = m0;
325 #ifndef INT_64
326 mask[1] = m1;
327 #endif
328 return (nc);
329 }
330
331 /*
332 * Mix in a motion-compensated, filtered block. Note that
333 * the input block may be misaligned so we cannot try fancy,
334 * word-at-a-time accesses without being careful. The output
335 * block is, of course, aligned.
336 *
337 * The 2-D loop filter is separable into 1-D FIR (0.25 0.5 0.25)
338 * horizontal and vertical passes. At the block edge, the filter
339 * taps are (0 1 0). Full arithmetic precision must be maintained,
340 * until the output stage, where values are rounded (up).
341 *
342 * The code below tries to be efficient by caching the input
343 * rows in registers, and running the filter on 3x3 chunks.
344 * Multiple columns can be computed in parallel by using
345 * two 16-bit adds in a 32-bit register, or four 16-bit adds
346 * in a 64-bit register.
347 */
348 void P64Decoder::filter(u_char* in, u_char* out, u_int stride)
349 {
350 /* Corner pixel has filter coef 1 */
351 u_int s = in[0];
352 u_int o = 0;
353 SPLICE(o, s, 24);
354
355 u_int r00 = s << 24 | in[1] << 16 | in[2] << 8 | in[3];
356 u_int r01 = in[4] << 24 | in[5] << 16 | in[6] << 8 | in[7];
357 in += stride;
358
359 /*
360 * First row.
361 */
362 s += (r00 >> 15) & 0x1fe;
363 s += (r00 >> 8) & 0xff;
364 /* round */
365 s += 2;
366 s >>= 2;
367 SPLICE(o, s, 16);
368
369 s = (r00 >> 16) & 0xff;
370 s += (r00 >> 7) & 0x1fe;
371 s += r00 & 0xff;
372 /* round */
373 s += 2;
374 s >>= 2;
375 SPLICE(o, s, 8);
376
377 s = (r00 >> 8) & 0xff;
378 s += (r00 & 0xff) << 1;
379 s += r01 >> 24;
380 /* round */
381 s += 2;
382 s >>= 2;
383 SPLICE(o, s, 0);
384 *(u_int*)out = o;
385
386 s = r00 & 0xff;
387 s += (r01 >> 23) & 0x1fe;
388 s += (r01 >> 16) & 0xff;
389 /* round */
390 s += 2;
391 s >>= 2;
392 o = 0;
393 SPLICE(o, s, 24);
394
395 s = r01 >> 24;
396 s += (r01 >> 15) & 0x1fe;
397 s += (r01 >> 8) & 0xff;
398 /* round */
399 s += 2;
400 s >>= 2;
401 SPLICE(o, s, 16);
402
403 s = (r01 >> 16) & 0xff;
404 s += (r01 >> 7) & 0x1fe;
405 s += r01 & 0xff;
406 /* round */
407 s += 2;
408 s >>= 2;
409 SPLICE(o, s, 8);
410
411 /* corner has filter coef 1 */
412 s = r01 & 0xff;
413 SPLICE(o, s, 0);
414 *(u_int*)(out + 4) = o;
415 out += stride;
416
417 /* load next rows into cache */
418 u_int r10 = in[0] << 24 | in[1] << 16 | in[2] << 8 | in[3];
419 u_int r11 = in[4] << 24 | in[5] << 16 | in[6] << 8 | in[7];
420 in += stride;
421
422 u_int r20, r21;
423 u_int mask = 0xff00ff;
424 for (int k = 6; --k >= 0; ) {
425 /* load next row */
426 r20 = in[0] << 24 | in[1] << 16 | in[2] << 8 | in[3];
427 r21 = in[4] << 24 | in[5] << 16 | in[6] << 8 | in[7];
428 in += stride;
429
430 /* columns 0,2 */
431 u_int v = (r00 >> 8) & mask;
432 v += ((r10 >> 8) & mask) << 1;
433 v += (r20 >> 8) & mask;
434
435 /* first pixel */
436 s = v >> 16;
437 /* round */
438 s += 2;
439 s >>= 2;
440 o = 0;
441 SPLICE(o, s, 24);
442
443 /* columns 1,3 */
444 u_int w = r00 & mask;
445 w += (r10 & mask) << 1;
446 w += r20 & mask;
447
448 /* row */
449 s = v >> 16;
450 s += v & 0xffff;
451 s += w >> (16-1);
452 /* round */
453 s += 8;
454 s >>= 4;
455 SPLICE(o, s, 16);
456
457 s = w >> 16;
458 s += w & 0xffff;
459 s += (v & 0xffff) << 1;
460 /* round */
461 s += 8;
462 s >>= 4;
463 SPLICE(o, s, 8);
464
465 /* start next row */
466 s = v & 0xffff;
467 s += (w & 0xffff) << 1;
468 /* but first do columns 4,6 */
469 v = (r01 >> 8) & mask;
470 v += ((r11 >> 8) & mask) << 1;
471 v += (r21 >> 8) & mask;
472 /* finish row */
473 s += v >> 16;
474 /* round */
475 s += 8;
476 s >>= 4;
477 SPLICE(o, s, 0);
478 *(u_int*)out = o;
479
480 /* start next row */
481 s = w & 0xffff;
482 s += (v >> 16) << 1;
483 /* but first do columns 5,7 */
484 w = r01 & mask;
485 w += (r11 & mask) << 1;
486 w += r21 & mask;
487 /* finish row */
488 s += w >> 16;
489 /* round */
490 s += 8;
491 s >>= 4;
492 o = 0;
493 SPLICE(o, s, 24);
494
495 s = v >> 16;
496 s += v & 0xffff;
497 s += w >> (16-1);
498 /* round */
499 s += 8;
500 s >>= 4;
501 SPLICE(o, s, 16);
502
503 s = w >> 16;
504 s += w & 0xffff;
505 s += (v & 0xffff) << 1;
506 /* round */
507 s += 8;
508 s >>= 4;
509 SPLICE(o, s, 8);
510
511 s = w & 0xffff;
512 /* round */
513 s += 2;
514 s >>= 2;
515 SPLICE(o, s, 0);
516 *(u_int*)(out + 4) = o;
517
518 out += stride;
519
520 /* roll lines up cache */
521 r00 = r10;
522 r01 = r11;
523 r10 = r20;
524 r11 = r21;
525 }
526 /*
527 * last row
528 */
529 s = r20 >> 24;
530 o = 0;
531 SPLICE(o, s, 24);
532
533 s += (r20 >> 15) & 0x1fe;
534 s += (r20 >> 8) & 0xff;
535 /* round */
536 s += 2;
537 s >>= 2;
538 SPLICE(o, s, 16);
539
540 s = (r20 >> 16) & 0xff;
541 s += (r20 >> 7) & 0x1fe;
542 s += r20 & 0xff;
543 /* round */
544 s += 2;
545 s >>= 2;
546 SPLICE(o, s, 8);
547
548 s = (r20 >> 8) & 0xff;
549 s += (r20 & 0xff) << 1;
550 s += r21 >> 24;
551 /* round */
552 s += 2;
553 s >>= 2;
554 SPLICE(o, s, 0);
555 *(u_int*)out = o;
556
557 s = r20 & 0xff;
558 s += (r21 >> 23) & 0x1fe;
559 s += (r21 >> 16) & 0xff;
560 /* round */
561 s += 2;
562 s >>= 2;
563 o = 0;
564 SPLICE(o, s, 24);
565
566 s = r21 >> 24;
567 s += (r21 >> 15) & 0x1fe;
568 s += (r21 >> 8) & 0xff;
569 /* round */
570 s += 2;
571 s >>= 2;
572 SPLICE(o, s, 16);
573
574 s = (r21 >> 16) & 0xff;
575 s += (r21 >> 7) & 0x1fe;
576 s += r21 & 0xff;
577 /* round */
578 s += 2;
579 s >>= 2;
580 SPLICE(o, s, 8);
581
582 /* corner has filter coef 1 */
583 s = r21 & 0xff;
584 SPLICE(o, s, 0);
585 *(u_int*)(out + 4) = o;
586 }
587
588
589 void P64Decoder::mvblka(u_char* in, u_char* out, u_int stride)
590 {
591 #ifdef INT_64
592 *(INT_64*)out = *(INT_64*)in;
593 out += stride; in += stride;
594 *(INT_64*)out = *(INT_64*)in;
595 out += stride; in += stride;
596 *(INT_64*)out = *(INT_64*)in;
597 out += stride; in += stride;
598 *(INT_64*)out = *(INT_64*)in;
599 out += stride; in += stride;
600 *(INT_64*)out = *(INT_64*)in;
601 out += stride; in += stride;
602 *(INT_64*)out = *(INT_64*)in;
603 out += stride; in += stride;
604 *(INT_64*)out = *(INT_64*)in;
605 out += stride; in += stride;
606 *(INT_64*)out = *(INT_64*)in;
607 #else
608 for (int k = 8; --k >= 0; ) {
609 *(u_int*)out = *(u_int*)in;
610 *(u_int*)(out + 4) = *(u_int*)(in + 4);
611 in += stride;
612 out += stride;
613 }
614 #endif
615 }
616
617 void P64Decoder::mvblk(u_char* in, u_char* out, u_int stride)
618 {
619 #ifdef INT_64
620 if (((u_long)in & 7) == 0) {
621 mvblka(in, out, stride);
622 return;
623 }
624 #else
625 if (((u_long)in & 3) == 0) {
626 mvblka(in, out, stride);
627 return;
628 }
629 #endif
630 for (int k = 8; --k >= 0;) {
631 u_int* o = (u_int*)out;
632 #if BYTE_ORDER == LITTLE_ENDIAN
633 o[0] = in[3] << 24 | in[2] << 16 | in[1] << 8 | in[0];
634 o[1] = in[7] << 24 | in[6] << 16 | in[5] << 8 | in[4];
635 #else
636 o[0] = in[0] << 24 | in[1] << 16 | in[2] << 8 | in[3];
637 o[1] = in[4] << 24 | in[5] << 16 | in[6] << 8 | in[7];
638 #endif
639 in += stride;
640 out += stride;
641 }
642 }
643
644 /*
645 * Parse a picture header. We assume that the picture
646 * start code (20-bit long PSC field) has already been snarfed.
647 *
648 * The H.261 picture header is defined as following:
649 *
650 * 0 1 2 3
651 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
652 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
653 * | PCS | TR | PTYPE |P|
654 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
655 *
656 * The fields in the H.261 picture header have the following meanings:
657 *
658 * Picture Start Code (PCS): 20 bits
659 * Must be 0x00010
660 *
661 * Temporal Reference (TR): 5 bits
662 * ...
663 *
664 * Type Information (PTYPE): 6 bits
665 * bit 1: Split screen indicator, "" off, "1" on
666 * bit 2: Document camera indicator, "" off, "1" on
667 * bit 3: Freeze Picture Release, "" off, "1" on
668 * bit 4: Source format, "" QCIF, "1" CIF
669 * bit 5: Optional still image mode HI_RES. on(0)/off(1)
670 * bit 6: Spare
671 *
672 * Extra Insertion Information, PEI (P): 1 bit
673 * "1" signals the presence of the following optional data field
674 *
675 */
676 int P64Decoder::parse_picture_hdr()
677 {
678 /* throw away the temporal reference */
679 SKIP_BITS(bs_, 5, nbb_, bb_);
680 u_int pt;
681 GET_BITS(bs_, 6, nbb_, bb_, pt);
682 u_int fmt = (pt >> 2) & 1;
683 if (fmt_ != fmt) {
684 /* change formats */
685 fmt_ = fmt;
686 init();
687 }
688 int v;
689 GET_BITS(bs_, 1, nbb_, bb_, v);
690 while (v != 0) {
691 GET_BITS(bs_, 9, nbb_, bb_, v);
692 /*
693 * FIXME from pvrg code: 0x8c in PSPARE means ntsc.
694 * this is a hack. we don't support it.
695 */
696 int pspare = v >> 1;
697 if (pspare == 0x8c && (pt & 0x04) != 0) {
698 static int first = 1;
699 if (first) {
700 err("pvrg ntsc not supported");
701 first = 0;
702 }
703 }
704 v &= 1;
705 }
706 return (0);
707 }
708
709 inline int P64Decoder::parse_sc()
710 {
711 int v;
712 GET_BITS(bs_, 16, nbb_, bb_, v);
713 if (v != 0x0001) {
714 err("bad start code %04x", v);
715 ++bad_psc_;
716 return (-1);
717 }
718 return (0);
719 }
720
721 /*
722 * Parse a GOB header, which consists of the GOB quantiation
723 * factor (GQUANT) and spare bytes that we ignore.
724 */
725 int P64Decoder::parse_gob_hdr(int ebit)
726 {
727 mba_ = -1;
728 mvdh_ = 0;
729 mvdv_ = 0;
730
731 /*
732 * Get the next GOB number (or 0 for a picture header).
733 * The invariant at the top of this loop is that the
734 * bit stream is positioned immediately past the last
735 * start code.
736 */
737 u_int gob;
738 for (;;) {
739 GET_BITS(bs_, 4, nbb_, bb_, gob);
740 if (gob != 0)
741 break;
742 /*
743 * should happen only on first iteration
744 * (if at all). pictures always start on
745 * packet boundaries per section 5 of the
746 * Internet Draft.
747 */
748 if (parse_picture_hdr() < 0) {
749 ++bad_fmt_;
750 return (-1);
751 }
752 /*
753 * Check to see that the next 16 bits
754 * are a start code and throw them away.
755 * But first check that we have the bits.
756 */
757 int nbit = ((es_ - bs_) << 4) + nbb_ - ebit;
758 if (nbit < 20)
759 return (0);
760
761 if (parse_sc() < 0)
762 return (-1);
763 }
764 gob -= 1;
765 if (fmt_ == IT_QCIF)
766 /*
767 * Number QCIF GOBs 0,1,2 instead of 0,2,4.
768 */
769 gob >>= 1;
770
771 if (gob >= ngob_) {
772 err("gob number too big (%d>%d)", gob, ngob_);
773 return (-1);
774 }
775
776 int mq;
777 GET_BITS(bs_, 5, nbb_, bb_, mq);
778 gobquant_ = mq;
779 qt_ = &quant_[mq << 8];
780
781 int v;
782 GET_BITS(bs_, 1, nbb_, bb_, v);
783 while (v != 0) {
784 GET_BITS(bs_, 9, nbb_, bb_, v);
785 v &= 1;
786 }
787 gob_ = gob;
788 if (gob > maxgob_)
789 maxgob_ = gob;
790
791 return (gob);
792 }
793
794 /*
795 * Parse a macroblock header. If there is no mb header because
796 * we hit the next start code, return -1, otherwise 0.
797 */
798 int P64Decoder::parse_mb_hdr(u_int& cbp)
799 {
800 /*
801 * Read the macroblock address (MBA)
802 */
803 int v;
804 HUFF_DECODE(bs_, ht_mba_, nbb_, bb_, v);
805 if (v <= 0) {
806 /*
807 * (probably) hit a start code; either the
808 * next GOB or the next picture header.
809 * If we got MBA stuffing (0) we need to return
810 * so the outer loop can check if we're at the
811 * end of the buffer (lots of codecs put stuffing
812 * at the end of a picture to byte align the psc).
813 */
814 return (v);
815 }
816
817 /*
818 * MBA is differentially encoded.
819 */
820 mba_ += v;
821 if (mba_ >= MBPERGOB) {
822 err("mba too big %d", mba_);
823 return (SYM_ILLEGAL);
824 }
825 // printf ("addr = (%d,%d) ", gob_+1, mba_+1);
826
827 u_int omt = mt_;
828 HUFF_DECODE(bs_, ht_mtype_, nbb_, bb_, mt_);
829 if (mt_ & MT_MQUANT) {
830 int mq;
831 GET_BITS(bs_, 5, nbb_, bb_, mq);
832 qt_ = &quant_[mq << 8];
833 }
834 if (mt_ & MT_MVD) {
835 /*
836 * Read motion vector.
837 */
838 int dh;
839 int dv;
840 HUFF_DECODE(bs_, ht_mvd_, nbb_, bb_, dh);
841 HUFF_DECODE(bs_, ht_mvd_, nbb_, bb_, dv);
842 /*
843 * Section 4.2.3.4
844 * The vector is differentially coded unless any of:
845 * - the current mba delta isn't 1
846 * - the current mba is 1, 12, or 23 (mba mod 11 = 1)
847 * - the last block didn't have motion vectors.
848 *
849 * This arithmetic is twos-complement restricted
850 * to 5 bits.
851 */
852 if ((omt & MT_MVD) != 0 && v == 1 &&
853 mba_ != 0 && mba_ != 11 && mba_ != 22) {
854 dh += mvdh_;
855 dv += mvdv_;
856 }
857 mvdh_ = (dh << 27) >> 27;
858 mvdv_ = (dv << 27) >> 27;
859 }
860 /*
861 * Coded block pattern.
862 */
863 if (mt_ & MT_CBP) {
864 HUFF_DECODE(bs_, ht_cbp_, nbb_, bb_, cbp);
865 if (cbp > 63) {
866 err("cbp invalid %x", cbp);
867 return (SYM_ILLEGAL);
868 }
869 } else
870 cbp = 0x3f;
871
872 return (1);
873 }
874
875 /*
876 * Handle the next block in the current macroblock.
877 * If tc is non-zero, then coefficients are present
878 * in the input stream and they are parsed. Otherwise,
879 * coefficients are not present, but we take action
880 * according to the type macroblock that we have.
881 */
882 void P64Decoder::decode_block(u_int tc, u_int x, u_int y, u_int stride,
883 u_char* front, u_char* back, int sf)
884 {
885 short blk[64];
886 #ifdef INT_64
887 INT_64 mask;
888 #define MASK_VAL mask
889 #define MASK_REF &mask
890 #else
891 u_int mask[2];
892 #define MASK_VAL mask[0], mask[1]
893 #define MASK_REF mask
894 #endif
895 int nc;
896 if (tc != 0)
897 nc = parse_block(blk, MASK_REF);
898
899 int off = y * stride + x;
900 u_char* out = front + off;
901
902 if (mt_ & MT_INTRA) {
903 if (tc != 0) {
904 if (nc == 0)
905 dcfill((blk[0] + 4) >> 3, out, stride);
906 #ifdef notdef
907 else if (nc == 1) {
908 #ifdef INT_64
909 u_int dc = (mask & 1) ? (blk[0] + 4) >> 3 : 0;
910 for (int k = 1; k < 64; ++k) {
911 if (mask & ((INT_64)1 << k)) {
912 bv_rdct1(dc, blk, k,
913 out, stride);
914 return;
915 }
916 }
917 #else
918 u_int m0 = mask[0];
919 u_int m1 = mask[1];
920 u_int dc = (m0 & 1) ? (blk[0] + 4) >> 3 : 0;
921 for (int k = 1; k < 64; ++k) {
922 m0 >>= 1;
923 m0 |= m1 << 31;
924 m1 >>= 1;
925 if (m0 & 1) {
926 bv_rdct1(dc, blk, k,
927 out, stride);
928 return;
929 }
930 }
931 #endif
932 #endif
933 else
934 rdct(blk, MASK_VAL, out, stride, (u_char*)0);
935 } else {
936 u_char* in = back + off;
937 mvblka(in, out, stride);
938 }
939 return;
940 }
941 if ((mt_ & MT_MVD) == 0) {
942 u_char* in = back + off;
943 if (tc != 0) {
944 if (nc == 0) {
945 dcsum((blk[0] + 4) >> 3, in, out, stride);
946 } else
947 rdct(blk, MASK_VAL, out, stride, in);
948 } else
949 mvblka(in, out, stride);
950 return;
951 }
952 u_int sx = x + (mvdh_ / sf);
953 u_int sy = y + (mvdv_ / sf);
954 u_char* in = back + sy * stride + sx;
955 if (mt_ & MT_FILTER) {
956 filter(in, out, stride);
957 if (tc != 0) {
958 if (nc == 0)
959 dcsum2((blk[0] + 4) >> 3, out, out, stride);
960 else
961 rdct(blk, MASK_VAL, out, stride, out);
962 }
963 } else {
964 if (tc != 0) {
965 if (nc == 0)
966 dcsum2((blk[0] + 4) >> 3, in, out, stride);
967 else
968 rdct(blk, MASK_VAL, out, stride, in);
969 } else
970 mvblk(in, out, stride);
971 }
972 }
973
974 /*
975 * Decompress the next macroblock. Return 0 if the macroblock
976 * was present (with no errors). Return SYM_STARTCODE (-1),
977 * if there was no macroblock but instead the start of the
978 * next GOB or picture (in which case the start code has
979 * been consumed). Return SYM_ILLEGAL (-2) if there was an error.
980 */
981 int P64Decoder::decode_mb()
982 {
983 u_int cbp;
984 register int v;
985
986 if ((v = parse_mb_hdr(cbp)) <= 0)
987 return (v);
988
989 /*
990 * Lookup the base coordinate for this MBA.
991 * Convert from a block to a pixel coord.
992 */
993 register u_int x, y;
994 x = coord_[mba_];
995 y = (x & 0xff) << 3;
996 x >>= 8;
997 x <<= 3;
998
999 /* Update bounding box */
1000 if (x < minx_)
1001 minx_ = x;
1002 if (x > maxx_)
1003 maxx_ = x;
1004 if (y < miny_)
1005 miny_ = y;
1006 if (y > maxy_)
1007 maxy_ = y;
1008
1009 /*
1010 * Decode the six blocks in the MB (4Y:1U:1V with 4:2:0 subsampling scheme).
1011 * (This code assumes MT_TCOEFF is 1.)
1012 */
1013 register u_int tc = mt_ & MT_TCOEFF;
1014 register u_int s = width_;
1015 decode_block(tc & (cbp >> 5), x, y, s, front_, back_, 1);
1016 decode_block(tc & (cbp >> 4), x + 8, y, s, front_, back_, 1);
1017 decode_block(tc & (cbp >> 3), x, y + 8, s, front_, back_, 1);
1018 decode_block(tc & (cbp >> 2), x + 8, y + 8, s, front_, back_, 1);
1019 s >>= 1;
1020 int off = size_;
1021 decode_block(tc & (cbp >> 1), x >> 1, y >> 1, s,
1022 front_ + off, back_ + off, 2);
1023 off += size_ >> 2;
1024 decode_block(tc & (cbp >> 0), x >> 1, y >> 1, s,
1025 front_ + off, back_ + off, 2);
1026
1027 mbst_[mba_] = MBST_NEW;
1028
1029 /*
1030 * If a marking table was attached, take note.
1031 * This allows us to dither only the blocks that have changed,
1032 * rather than the entire image on each frame.
1033 */
1034 if (marks_) {
1035 /* convert to 8x8 block offset */
1036 off = (x >> 3) + (y >> 3) * (width_ >> 3);
1037 int m = mark_;
1038 marks_[off] = m;
1039 marks_[off + 1] = m;
1040 off += width_ >> 3;
1041 marks_[off] = m;
1042 marks_[off + 1] = m;
1043 }
1044 return (0);
1045 }
1046
1047 /*
1048 * Decode H.261 stream. Decoding can begin on either
1049 * a GOB or macroblock header. All the macroblocks of
1050 * a given frame can be decoded in any order, but chunks
1051 * cannot be reordered across frame boundaries. Since data
1052 * can be decoded in any order, this entry point can't tell
1053 * when a frame is fully decoded (actually, we could count
1054 * macroblocks but if there is loss, we would not know when
1055 * to sync). Instead, the callee should sync the decoder
1056 * by calling the sync() method after the entire frame
1057 * has been decoded (modulo loss).
1058 *
1059 * This routine should not be called with more than
1060 * one frame present since there is no callback mechanism
1061 * for renderering frames (i.e., don't call this routine
1062 * with a buffer that has a picture header that's not
1063 * at the front).
1064 */
1065 int P64Decoder::decode(const u_char* bp, int cc, int sbit, int ebit,
1066 int mba, int gob, int mq, int mvdh, int mvdv)
1067 {
1068 ps_ = (u_short*)bp;
1069
1070 /*
1071 * If cc is odd, ignore 8 extra bits in last short.
1072 */
1073 int odd = cc & 1;
1074 ebit += odd << 3;
1075 pebit_ = ebit;
1076 es_ = (u_short*)(bp + ((cc - 1) &~ 1));
1077
1078 /*
1079 * If input buffer not aligned, prime bit-buffer
1080 * with 8 bits; otherwise, prime it with a 16.
1081 */
1082 if ((int)bp & 1) {
1083 bs_ = (u_short*)(bp + 1);
1084 bb_ = *bp;
1085 nbb_ = 8 - sbit;
1086 } else {
1087 bs_ = (u_short*)bp;
1088 HUFFRQ(bs_, bb_);
1089 nbb_ = 16 - sbit;
1090 }
1091
1092 mba_ = mba;
1093 qt_ = &quant_[mq << 8];
1094 mvdh_ = mvdh;
1095 mvdv_ = mvdv;
1096
1097 /* don't rely on this (GOB number in RTP header) */
1098 if (gob != 0) {
1099 gob -= 1;
1100 if (fmt_ == IT_QCIF)
1101 gob >>= 1;
1102 }
1103
1104 while (bs_ < es_ || (bs_ == es_ && nbb_ > ebit)) {
1105 mbst_ = &mb_state_[gob << 6];
1106 coord_ = &base_[gob << 6];
1107
1108 int v = decode_mb();
1109 if (v == 0) {
1110 // a macroblock has been decoded. Continue in next chunk
1111 ndmblk_++;
1112 continue;
1113 }
1114
1115 // check if this was the start of the next GOB or picture (in which
1116 // case the start code has been consumed)
1117 if (v != SYM_STARTCODE) {
1118 err("expected GOB startcode");
1119 ++bad_bits_;
1120 return (0);
1121 }
1122 gob = parse_gob_hdr(ebit);
1123 if (gob < 0) {
1124 /*FIXME*/
1125 ++bad_bits_;
1126 return (0);
1127 }
1128 }
1129 return (0);
1130 }
1131
1132 FullP64Decoder::FullP64Decoder()
1133 {
1134 init();
1135 }
1136
1137 void FullP64Decoder::allocate()
1138 {
1139 delete[] fs_;
1140 int n = size_ + (size_ >> 1);
1141 fs_ = new u_char[2 * n];
1142 /* initialize to gray */
1143 memset(fs_, 0x80, 2 * n);
1144 front_ = fs_;
1145 back_ = front_ + n;
1146 }
1147
1148 /*
1149 * Swap the `front' and `back' frame buffers. While decoding a
1150 * frame, the front buffer is the image being constructed while
1151 * the back buffer is the reference image. Rather than copy
1152 * the whole image each time, we just swap pointers here.
1153 * We defer this copying until we find out that we're skipping
1154 * over a macroblock, or even a whole gob. In this case, we
1155 * go ahead and copy it, but take note in the mb_skip_ array.
1156 * Next time we need to copy it, we skip it if the skip array
1157 * says it's okay (e.g., there is no reason to copy a given block
1158 * back and forth between buffers if it never changes). When we
1159 * modify a macroblock, we clear out it's entry in mb_skip_.
1160 */
1161 void FullP64Decoder::swap()
1162 {
1163 u_char* p = front_;
1164 front_ = back_;
1165 back_ = p;
1166 }
1167
1168 /*
1169 * Copy a macroblock from the saved frame (back buffer)
1170 * to the current frame (front buffer). coord_ determines
1171 * which GOB we're in.
1172 */
1173 void FullP64Decoder::mbcopy(u_int mba)
1174 {
1175 u_int x, y;
1176 x = coord_[mba];
1177 y = (x & 0xff) << 3;
1178 x >>= 8;
1179 x <<= 3;
1180
1181 u_int stride = width_;
1182 u_int off = y * stride + x;
1183 u_char* in = back_ + off;
1184 u_char* out = front_ + off;
1185
1186 mvblka(in, out, stride);
1187 mvblka(in + 8, out + 8, stride);
1188 in += stride << 3;
1189 out += stride << 3;
1190 mvblka(in, out, stride);
1191 mvblka(in + 8, out + 8, stride);
1192 x >>= 1;
1193 y >>= 1;
1194 stride >>= 1;