~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Open Mash Cross Reference
mash/codec/p64/p64.cc

Component: ~ [ mash ] ~ [ apps ] ~ [ gsm ] ~ [ lib ] ~ [ otcl ] ~ [ srm ] ~ [ tcl8.3 ] ~ [ tclcl ] ~ [ tk8.3 ] ~ [ tutorials ] ~

  1 /*
  2  * p64.cc --
  3  *
  4  *      "P64" H.261 decoder
  5  */
  6 
  7 /*
  8  * This code is derived from the P64 software implementation by the
  9  * Stanford PVRG group:
 10  *
 11  * Copyright (C) 1990, 1991, 1993 Andy C. Hung, all rights reserved.
 12  * PUBLIC DOMAIN LICENSE: Stanford University Portable Video Research
 13  * Group. If you use this software, you agree to the following: This
 14  * program package is purely experimental, and is licensed "as is".
 15  * Permission is granted to use, modify, and distribute this program
 16  * without charge for any purpose, provided this license/ disclaimer
 17  * notice appears in the copies.  No warranty or maintenance is given,
 18  * either expressed or implied.  In no event shall the author(s) be
 19  * liable to you or a third party for any special, incidental,
 20  * consequential, or other damages, arising out of the use or inability
 21  * to use the program for any purpose (or the loss of data), even if we
 22  * have been advised of such possibilities.  Any public reference or
 23  * advertisement of this source code should refer to it as the Portable
 24  * Video Research Group (PVRG) code, and not by any author(s) (or
 25  * Stanford University) name.
 26  */
 27 
 28 #ifndef lint
 29 static const char rcsid[] =
 30     "@(#) $Header: /usr/mash/src/repository/mash/mash-1/codec/p64/p64.cc,v 1.16 2003/11/19 19:20:22 aswan Exp $";
 31 #endif
 32 
 33 #include <stdarg.h>
 34 #include <stdio.h>
 35 #include <string.h>
 36 
 37 #ifdef WIN32
 38 #include <winsock.h>
 39 #else
 40 #include <sys/param.h>
 41 #include <sys/file.h>
 42 #endif
 43 #include <sys/stat.h>
 44 
 45 #include "codec/p64/p64.h"
 46 #include "codec/p64/p64-huff.h"
 47 #include "codec/dct.h"
 48 #include "misc/bsd-endian.h"
 49 
 50 #ifdef DEVELOPMENT_VERSION
 51 void P64Decoder::err(const char* msg ...) const
 52 #else
 53 void P64Decoder::err(const char* /* msg */ ...) const
 54 #endif
 55 {
 56 #ifdef DEVELOPMENT_VERSION
 57         va_list ap;
 58         va_start(ap, msg);
 59         vfprintf(stderr, msg, ap);
 60         fprintf(stderr, " @g%d m%d %d/%d of %d/%d: %04x %04x %04x %04x|%04x\n",
 61                 gob_, mba_,
 62                 (int)((u_char*)bs_ - (u_char*)ps_), nbb_,
 63                 (int)((u_char*)es_ - (u_char*)ps_), pebit_,
 64                bs_[-4], bs_[-3], bs_[-2], bs_[-1], bs_[0]);
 65 #endif
 66 }
 67 
 68 P64Decoder::P64Decoder()
 69         : fs_(0), front_(0), back_(0),
 70           ngob_(0), maxgob_(0), ndmblk_(0), gobquant_(0),
 71           mt_(0), gob_(0), mba_(0), mvdh_(0), mvdv_(0),
 72           marks_(0), mark_(0),
 73           bad_psc_(0), bad_bits_(0), bad_GOBno_(0), bad_fmt_(0)
 74 {
 75         fmt_ = IT_CIF; /* default is CIF */
 76         inithuff();
 77         initquant();
 78 }
 79 
 80 P64Decoder::~P64Decoder()
 81 {
 82         delete[] fs_;
 83 }
 84 
 85 void P64Decoder::init()
 86 {
 87         if (fmt_ == IT_CIF) {
 88                 ngob_ = 12;
 89                 width_ = 352;
 90                 height_ = 288;
 91         } else {
 92                 ngob_ = 3;
 93                 width_ = 176;
 94                 height_ = 144;
 95         }
 96         size_ = width_ * height_;
 97         memset(mb_state_, MBST_OLD, sizeof(mb_state_));
 98 
 99         for (u_int gob = 0; gob < 12; ++gob) {
100                 u_short* p = &base_[gob << 6];
101                 for (int mba = 0; mba < MBPERGOB; ++mba) {
102                         u_int x = 2 * (mba % 11);
103                         u_int y;
104                         if (fmt_ == IT_CIF) {
105                                 y = 2 * (3 * (gob >> 1) + mba / 11);
106                                 if (gob & 1)
107                                         x += 22;
108                         } else
109                                 y = 2 * (3 * gob + mba / 11);
110 
111                         p[mba] = (x << 8) | y;
112                 }
113         }
114         minx_ = width_;
115         miny_ = height_;
116         maxx_ = 0;
117         maxy_ = 0;
118 
119         allocate();
120 
121         // invalidate the just-changed-block table (marks_ buffer) so no buffer
122         //      overrun occurs (see the CVS log for a more detailed explanation)
123         marks_ = 0;
124 }
125 
126 #if BYTE_ORDER == LITTLE_ENDIAN
127 #define HUFFRQ(bs, bb) \
128  { \
129         register int t = *bs++; \
130         bb <<= 16; \
131         bb |= (t & 0xff) << 8; \
132         bb |= t >> 8; \
133 }
134 #else
135 #define HUFFRQ(bs, bb) \
136  { \
137         bb <<= 16; \
138         bb |= *bs++; \
139 }
140 #endif
141 
142 #define MASK(s) ((1 << (s)) - 1)
143 
144 #define HUFF_DECODE(bs, ht, nbb, bb, result) { \
145         register int s__, v__; \
146  \
147         if (nbb < 16) { \
148                 HUFFRQ(bs, bb); \
149                 nbb += 16; \
150         } \
151         s__ = ht.maxlen; \
152         v__ = (bb >> (nbb - s__)) & MASK(s__); \
153         s__ = (ht.prefix)[v__]; \
154         nbb -= (s__ & 0x1f); \
155         result = s__ >> 5; \
156  }
157 
158 #define GET_BITS(bs, n, nbb, bb, result) \
159 { \
160         nbb -= n; \
161         if (nbb < 0)  { \
162                 HUFFRQ(bs, bb); \
163                 nbb += 16; \
164         } \
165         (result) = ((bb >> nbb) & MASK(n)); \
166 }
167 
168 #define SKIP_BITS(bs, n, nbb, bb) \
169 { \
170         nbb -= n; \
171         if (nbb < 0)  { \
172                 HUFFRQ(bs, bb); \
173                 nbb += 16; \
174         } \
175 }
176 
177 /*
178  * Set up the huffman tables.
179  */
180 void P64Decoder::inithuff()
181 {
182         ht_mtype_.prefix = htd_mtype;
183         ht_mtype_.maxlen = htd_mtype_width;
184         ht_mba_.prefix = htd_mba;
185         ht_mba_.maxlen = htd_mba_width;
186         ht_mvd_.prefix = htd_dvm;
187         ht_mvd_.maxlen = htd_dvm_width;
188         ht_cbp_.prefix = htd_cbp;
189         ht_cbp_.maxlen = htd_cbp_width;
190         ht_tcoeff_.prefix = htd_tcoeff;
191         ht_tcoeff_.maxlen = htd_tcoeff_width;
192 }
193 
194 int P64Decoder::quantize(int v, int q)
195 {
196         if (v > 0)
197                 return (((v << 1) + 1) * q) - (~q & 1);
198         else
199                 return (((v << 1) - 1) * q) + (~q & 1);
200 }
201 
202 /*
203  * Build quantization lookup table.
204  * One for each possible MQUANT paramenter.
205  */
206 void P64Decoder::initquant()
207 {
208         for (int mq = 0; mq < 32; ++mq) {
209                 short* qt = &quant_[mq << 8];
210                 for (int v = 0; v < 256; ++v) {
211                         int s = (v << 24) >> 24;
212                         qt[v] = quantize(s, mq);
213                 }
214         }
215 }
216 
217 /*
218  * Decode the next block of transform coefficients
219  * from the input stream.
220  * Return number of non-zero ac coefficients.
221  */
222 #ifdef INT_64
223 int P64Decoder::parse_block(short* blk, INT_64* mask)
224 #else
225 int P64Decoder::parse_block(short* blk, u_int* mask)
226 #endif
227 {
228 #ifdef INT_64
229         INT_64 m0 = 0;
230 #else
231         u_int m1 = 0, m0 = 0;
232 #endif
233         /*
234          * Cache bit buffer in registers.
235          */
236         register int nbb = nbb_;
237         register int bb = bb_;
238         register short* qt = qt_;
239 
240         int k;
241         if ((mt_ & MT_CBP) == 0) {
242                 int v;
243                 GET_BITS(bs_, 8, nbb, bb, v);
244                 if (v == 255)
245                         v = 128;
246                 if (mt_ & MT_INTRA)
247                         v <<= 3;
248                 else
249                         v = qt[v];
250                 blk[0] = v;
251                 k = 1;
252                 m0 |= 1;
253         } else if ((bb >> (nbb - 1)) & 1) {
254                 /*
255                  * In CBP blocks, the first block present must be
256                  * non-empty (otherwise it's mask bit wouldn't
257                  * be set), so the first code cannot be an EOB.
258                  * CCITT optimizes this case by using a huffman
259                  * table equivalent to ht_tcoeff_ but without EOB,
260                  * in which 1 is coded as "1" instead of "11".
261                  * We grab two bits, the first bit is the code
262                  * and the second is the sign.
263                  */
264                 int v;
265                 GET_BITS(bs_, 2, nbb, bb, v);
266                 /*FIXME quantize?*/
267                 blk[0] = qt[(v & 1) ? 0xff : 1];
268                 k = 1;
269                 m0 |= 1;
270         } else {
271                 k = 0;
272 #ifndef INT_64
273                 blk[0] = 0;/*FIXME need this because the way we set bits below*/
274 #endif
275         }
276         int nc = 0;
277         for (;;) {
278                 int r, v;
279                 HUFF_DECODE(bs_, ht_tcoeff_, nbb, bb, r);
280                 if (r <= 0) {
281                         /* SYM_EOB, SYM_ILLEGAL, or SYM_ESCAPE */
282                         if (r == SYM_ESCAPE) {
283                                 GET_BITS(bs_, 14, nbb, bb, r);
284                                 v = r & 0xff;
285                                 r >>= 8;
286                         } else {
287                                 if (r == SYM_ILLEGAL) {
288                                         bb_ = bb;
289                                         nbb_ = nbb;
290                                         err("illegal symbol in block");
291                                 }
292                                 /* EOB */
293                                 break;
294                         }
295                 } else {
296                         v = (r << 22) >> 27;
297                         r = r & 0x1f;
298                 }
299                 k += r;
300                 if (k >= 64) {
301                         bb_ = bb;
302                         nbb_ = nbb;
303                         err("bad run length %d (r %d, v %d)", k, r, v);
304                         break;
305                 }
306                 r = COLZAG[k++];
307                 blk[r] = qt[v & 0xff];
308                 ++nc;
309 #ifdef INT_64
310                 m0 |= (INT_64)1 << r;
311 #else
312                 if (r < 32)
313                         m0 |= 1 << r;
314                 else
315                         m1 |= 1 << (r - 32);
316 #endif
317         }
318         /*
319          * Done reading input.  Update bit buffer.
320          */
321         bb_ = bb;
322         nbb_ = nbb;
323 
324         *mask = m0;
325 #ifndef INT_64
326         mask[1] = m1;
327 #endif
328         return (nc);
329 }
330 
331 /*
332  * Mix in a motion-compensated, filtered block.  Note that
333  * the input block may be misaligned so we cannot try fancy,
334  * word-at-a-time accesses without being careful.  The output
335  * block is, of course, aligned.
336  *
337  * The 2-D loop filter is separable into 1-D FIR (0.25 0.5 0.25)
338  * horizontal and vertical passes.  At the block edge, the filter
339  * taps are (0 1 0).  Full arithmetic precision must be maintained,
340  * until the output stage, where values are rounded (up).
341  *
342  * The code below tries to be efficient by caching the input
343  * rows in registers, and running the filter on 3x3 chunks.
344  * Multiple columns can be computed in parallel by using
345  * two 16-bit adds in a 32-bit register, or four 16-bit adds
346  * in a 64-bit register.
347  */
348 void P64Decoder::filter(u_char* in, u_char* out, u_int stride)
349 {
350         /* Corner pixel has filter coef 1 */
351         u_int s = in[0];
352         u_int o = 0;
353         SPLICE(o, s, 24);
354 
355         u_int r00 = s << 24 | in[1] << 16 | in[2] << 8 | in[3];
356         u_int r01 = in[4] << 24 | in[5] << 16 | in[6] << 8 | in[7];
357         in += stride;
358 
359         /*
360          * First row.
361          */
362         s += (r00 >> 15) & 0x1fe;
363         s += (r00 >> 8) & 0xff;
364         /* round */
365         s += 2;
366         s >>= 2;
367         SPLICE(o, s, 16);
368 
369         s = (r00 >> 16) & 0xff;
370         s += (r00 >> 7) & 0x1fe;
371         s += r00 & 0xff;
372         /* round */
373         s += 2;
374         s >>= 2;
375         SPLICE(o, s, 8);
376 
377         s = (r00 >> 8) & 0xff;
378         s += (r00 & 0xff) << 1;
379         s += r01 >> 24;
380         /* round */
381         s += 2;
382         s >>= 2;
383         SPLICE(o, s, 0);
384         *(u_int*)out = o;
385 
386         s = r00 & 0xff;
387         s += (r01 >> 23) & 0x1fe;
388         s += (r01 >> 16) & 0xff;
389         /* round */
390         s += 2;
391         s >>= 2;
392         o = 0;
393         SPLICE(o, s, 24);
394 
395         s = r01 >> 24;
396         s += (r01 >> 15) & 0x1fe;
397         s += (r01 >> 8) & 0xff;
398         /* round */
399         s += 2;
400         s >>= 2;
401         SPLICE(o, s, 16);
402 
403         s = (r01 >> 16) & 0xff;
404         s += (r01 >> 7) & 0x1fe;
405         s += r01 & 0xff;
406         /* round */
407         s += 2;
408         s >>= 2;
409         SPLICE(o, s, 8);
410 
411         /* corner has filter coef 1 */
412         s = r01 & 0xff;
413         SPLICE(o, s, 0);
414         *(u_int*)(out + 4) = o;
415         out += stride;
416 
417         /* load next rows into cache */
418         u_int r10 = in[0] << 24 | in[1] << 16 | in[2] << 8 | in[3];
419         u_int r11 = in[4] << 24 | in[5] << 16 | in[6] << 8 | in[7];
420         in += stride;
421 
422         u_int r20, r21;
423         u_int mask = 0xff00ff;
424         for (int k = 6; --k >= 0; ) {
425                 /* load next row */
426                 r20 = in[0] << 24 | in[1] << 16 | in[2] << 8 | in[3];
427                 r21 = in[4] << 24 | in[5] << 16 | in[6] << 8 | in[7];
428                 in += stride;
429 
430                 /* columns 0,2 */
431                 u_int v = (r00 >> 8) & mask;
432                 v += ((r10 >> 8) & mask) << 1;
433                 v += (r20 >> 8) & mask;
434 
435                 /* first pixel */
436                 s = v >> 16;
437                 /* round */
438                 s += 2;
439                 s >>= 2;
440                 o = 0;
441                 SPLICE(o, s, 24);
442 
443                 /* columns 1,3 */
444                 u_int w = r00 & mask;
445                 w += (r10 & mask) << 1;
446                 w += r20 & mask;
447 
448                 /* row */
449                 s = v >> 16;
450                 s += v & 0xffff;
451                 s += w >> (16-1);
452                 /* round */
453                 s += 8;
454                 s >>= 4;
455                 SPLICE(o, s, 16);
456 
457                 s = w >> 16;
458                 s += w & 0xffff;
459                 s += (v & 0xffff) << 1;
460                 /* round */
461                 s += 8;
462                 s >>= 4;
463                 SPLICE(o, s, 8);
464 
465                 /* start next row */
466                 s = v & 0xffff;
467                 s += (w & 0xffff) << 1;
468                 /* but first do columns 4,6 */
469                 v = (r01 >> 8) & mask;
470                 v += ((r11 >> 8) & mask) << 1;
471                 v += (r21 >> 8) & mask;
472                 /* finish row */
473                 s += v >> 16;
474                 /* round */
475                 s += 8;
476                 s >>= 4;
477                 SPLICE(o, s, 0);
478                 *(u_int*)out = o;
479 
480                 /* start next row */
481                 s = w & 0xffff;
482                 s += (v >> 16) << 1;
483                 /* but first do columns 5,7 */
484                 w = r01 & mask;
485                 w += (r11 & mask) << 1;
486                 w += r21 & mask;
487                 /* finish row */
488                 s += w >> 16;
489                 /* round */
490                 s += 8;
491                 s >>= 4;
492                 o = 0;
493                 SPLICE(o, s, 24);
494 
495                 s = v >> 16;
496                 s += v & 0xffff;
497                 s += w >> (16-1);
498                 /* round */
499                 s += 8;
500                 s >>= 4;
501                 SPLICE(o, s, 16);
502 
503                 s = w >> 16;
504                 s += w & 0xffff;
505                 s += (v & 0xffff) << 1;
506                 /* round */
507                 s += 8;
508                 s >>= 4;
509                 SPLICE(o, s, 8);
510 
511                 s = w & 0xffff;
512                 /* round */
513                 s += 2;
514                 s >>= 2;
515                 SPLICE(o, s, 0);
516                 *(u_int*)(out + 4) = o;
517 
518                 out += stride;
519 
520                 /* roll lines up cache */
521                 r00 = r10;
522                 r01 = r11;
523                 r10 = r20;
524                 r11 = r21;
525         }
526         /*
527          * last row
528          */
529         s = r20 >> 24;
530         o = 0;
531         SPLICE(o, s, 24);
532 
533         s += (r20 >> 15) & 0x1fe;
534         s += (r20 >> 8) & 0xff;
535         /* round */
536         s += 2;
537         s >>= 2;
538         SPLICE(o, s, 16);
539 
540         s = (r20 >> 16) & 0xff;
541         s += (r20 >> 7) & 0x1fe;
542         s += r20 & 0xff;
543         /* round */
544         s += 2;
545         s >>= 2;
546         SPLICE(o, s, 8);
547 
548         s = (r20 >> 8) & 0xff;
549         s += (r20 & 0xff) << 1;
550         s += r21 >> 24;
551         /* round */
552         s += 2;
553         s >>= 2;
554         SPLICE(o, s, 0);
555         *(u_int*)out = o;
556 
557         s = r20 & 0xff;
558         s += (r21 >> 23) & 0x1fe;
559         s += (r21 >> 16) & 0xff;
560         /* round */
561         s += 2;
562         s >>= 2;
563         o = 0;
564         SPLICE(o, s, 24);
565 
566         s = r21 >> 24;
567         s += (r21 >> 15) & 0x1fe;
568         s += (r21 >> 8) & 0xff;
569         /* round */
570         s += 2;
571         s >>= 2;
572         SPLICE(o, s, 16);
573 
574         s = (r21 >> 16) & 0xff;
575         s += (r21 >> 7) & 0x1fe;
576         s += r21 & 0xff;
577         /* round */
578         s += 2;
579         s >>= 2;
580         SPLICE(o, s, 8);
581 
582         /* corner has filter coef 1 */
583         s = r21 & 0xff;
584         SPLICE(o, s, 0);
585         *(u_int*)(out + 4) = o;
586 }
587 
588 
589 void P64Decoder::mvblka(u_char* in, u_char* out, u_int stride)
590 {
591 #ifdef INT_64
592         *(INT_64*)out = *(INT_64*)in;
593         out += stride; in += stride;
594         *(INT_64*)out = *(INT_64*)in;
595         out += stride; in += stride;
596         *(INT_64*)out = *(INT_64*)in;
597         out += stride; in += stride;
598         *(INT_64*)out = *(INT_64*)in;
599         out += stride; in += stride;
600         *(INT_64*)out = *(INT_64*)in;
601         out += stride; in += stride;
602         *(INT_64*)out = *(INT_64*)in;
603         out += stride; in += stride;
604         *(INT_64*)out = *(INT_64*)in;
605         out += stride; in += stride;
606         *(INT_64*)out = *(INT_64*)in;
607 #else
608         for (int k = 8; --k >= 0; ) {
609                 *(u_int*)out = *(u_int*)in;
610                 *(u_int*)(out + 4) = *(u_int*)(in + 4);
611                 in += stride;
612                 out += stride;
613         }
614 #endif
615 }
616 
617 void P64Decoder::mvblk(u_char* in, u_char* out, u_int stride)
618 {
619 #ifdef INT_64
620         if (((u_long)in & 7) == 0) {
621                 mvblka(in, out, stride);
622                 return;
623         }
624 #else
625         if (((u_long)in & 3) == 0) {
626                 mvblka(in, out, stride);
627                 return;
628         }
629 #endif
630         for (int k = 8; --k >= 0;) {
631                 u_int* o = (u_int*)out;
632 #if BYTE_ORDER == LITTLE_ENDIAN
633                 o[0] = in[3] << 24 | in[2] << 16 | in[1] << 8 | in[0];
634                 o[1] = in[7] << 24 | in[6] << 16 | in[5] << 8 | in[4];
635 #else
636                 o[0] = in[0] << 24 | in[1] << 16 | in[2] << 8 | in[3];
637                 o[1] = in[4] << 24 | in[5] << 16 | in[6] << 8 | in[7];
638 #endif
639                 in += stride;
640                 out += stride;
641         }
642 }
643 
644 /*
645  * Parse a picture header. We assume that the picture
646  * start code (20-bit long PSC field) has already been snarfed.
647  *
648  *   The H.261 picture header is defined as following:
649  *
650  *     0                   1                   2                   3
651  *     0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
652  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
653  *    |                  PCS                  |   TR    |   PTYPE   |P|
654  *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
655  *
656  *   The fields in the H.261 picture header have the following meanings:
657  *
658  *   Picture Start Code (PCS): 20 bits
659  *     Must be 0x00010
660  *
661  *   Temporal Reference (TR): 5 bits
662  *     ...
663  *
664  *   Type Information (PTYPE): 6 bits
665  *     bit 1: Split screen indicator, "" off, "1" on
666  *     bit 2: Document camera indicator, "" off, "1" on
667  *     bit 3: Freeze Picture Release, "" off, "1" on
668  *     bit 4: Source format, "" QCIF, "1" CIF
669  *     bit 5: Optional still image mode HI_RES. on(0)/off(1)
670  *     bit 6: Spare
671  *
672  *   Extra Insertion Information, PEI (P): 1 bit
673  *     "1" signals the presence of the following optional data field
674  *
675  */
676 int P64Decoder::parse_picture_hdr()
677 {
678         /* throw away the temporal reference */
679         SKIP_BITS(bs_, 5, nbb_, bb_);
680         u_int pt;
681         GET_BITS(bs_, 6, nbb_, bb_, pt);
682         u_int fmt = (pt >> 2) & 1;
683         if (fmt_ != fmt) {
684                 /* change formats */
685                 fmt_ = fmt;
686                 init();
687         }
688         int v;
689         GET_BITS(bs_, 1, nbb_, bb_, v);
690         while (v != 0) {
691                 GET_BITS(bs_, 9, nbb_, bb_, v);
692                 /*
693                  * FIXME from pvrg code: 0x8c in PSPARE means ntsc.
694                  * this is a hack.  we don't support it.
695                  */
696                 int pspare = v >> 1;
697                 if (pspare == 0x8c && (pt & 0x04) != 0) {
698                         static int first = 1;
699                         if (first) {
700                                 err("pvrg ntsc not supported");
701                                 first = 0;
702                         }
703                 }
704                 v &= 1;
705         }
706         return (0);
707 }
708 
709 inline int P64Decoder::parse_sc()
710 {
711         int v;
712         GET_BITS(bs_, 16, nbb_, bb_, v);
713         if (v != 0x0001) {
714                 err("bad start code %04x", v);
715                 ++bad_psc_;
716                 return (-1);
717         }
718         return (0);
719 }
720 
721 /*
722  * Parse a GOB header, which consists of the GOB quantiation
723  * factor (GQUANT) and spare bytes that we ignore.
724  */
725 int P64Decoder::parse_gob_hdr(int ebit)
726 {
727         mba_ = -1;
728         mvdh_ = 0;
729         mvdv_ = 0;
730 
731         /*
732          * Get the next GOB number (or 0 for a picture header).
733          * The invariant at the top of this loop is that the
734          * bit stream is positioned immediately past the last
735          * start code.
736          */
737         u_int gob;
738         for (;;) {
739                 GET_BITS(bs_, 4, nbb_, bb_, gob);
740                 if (gob != 0)
741                         break;
742                 /*
743                  * should happen only on first iteration
744                  * (if at all).  pictures always start on
745                  * packet boundaries per section 5 of the
746                  * Internet Draft.
747                  */
748                 if (parse_picture_hdr() < 0) {
749                         ++bad_fmt_;
750                         return (-1);
751                 }
752                 /*
753                  * Check to see that the next 16 bits
754                  * are a start code and throw them away.
755                  * But first check that we have the bits.
756                  */
757                 int nbit = ((es_ - bs_) << 4) + nbb_ - ebit;
758                 if (nbit < 20)
759                         return (0);
760 
761                 if (parse_sc() < 0)
762                         return (-1);
763         }
764         gob -= 1;
765         if (fmt_ == IT_QCIF)
766                 /*
767                  * Number QCIF GOBs 0,1,2 instead of 0,2,4.
768                  */
769                 gob >>= 1;
770 
771         if (gob >= ngob_) {
772                 err("gob number too big (%d>%d)", gob, ngob_);
773                 return (-1);
774         }
775 
776         int mq;
777         GET_BITS(bs_, 5, nbb_, bb_, mq);
778         gobquant_ = mq;
779         qt_ = &quant_[mq << 8];
780 
781         int v;
782         GET_BITS(bs_, 1, nbb_, bb_, v);
783         while (v != 0) {
784                 GET_BITS(bs_, 9, nbb_, bb_, v);
785                 v &= 1;
786         }
787         gob_ = gob;
788         if (gob > maxgob_)
789                 maxgob_ = gob;
790 
791         return (gob);
792 }
793 
794 /*
795  * Parse a macroblock header.  If there is no mb header because
796  * we hit the next start code, return -1, otherwise 0.
797  */
798 int P64Decoder::parse_mb_hdr(u_int& cbp)
799 {
800         /*
801          * Read the macroblock address (MBA)
802          */
803         int v;
804         HUFF_DECODE(bs_, ht_mba_, nbb_, bb_, v);
805         if (v <= 0) {
806                 /*
807                  * (probably) hit a start code; either the
808                  * next GOB or the next picture header.
809                  * If we got MBA stuffing (0) we need to return
810                  * so the outer loop can check if we're at the
811                  * end of the buffer (lots of codecs put stuffing
812                  * at the end of a picture to byte align the psc).
813                  */
814                 return (v);
815         }
816 
817         /*
818          * MBA is differentially encoded.
819          */
820         mba_ += v;
821         if (mba_ >= MBPERGOB) {
822                 err("mba too big %d", mba_);
823                 return (SYM_ILLEGAL);
824         }
825 // printf ("addr = (%d,%d) ", gob_+1, mba_+1);
826 
827         u_int omt = mt_;
828         HUFF_DECODE(bs_, ht_mtype_, nbb_, bb_, mt_);
829         if (mt_ & MT_MQUANT) {
830                 int mq;
831                 GET_BITS(bs_, 5, nbb_, bb_, mq);
832                 qt_ = &quant_[mq << 8];
833         }
834         if (mt_ & MT_MVD) {
835                 /*
836                  * Read motion vector.
837                  */
838                 int dh;
839                 int dv;
840                 HUFF_DECODE(bs_, ht_mvd_, nbb_, bb_, dh);
841                 HUFF_DECODE(bs_, ht_mvd_, nbb_, bb_, dv);
842                 /*
843                  * Section 4.2.3.4
844                  * The vector is differentially coded unless any of:
845                  *   - the current mba delta isn't 1
846                  *   - the current mba is 1, 12, or 23 (mba mod 11 = 1)
847                  *   - the last block didn't have motion vectors.
848                  *
849                  * This arithmetic is twos-complement restricted
850                  * to 5 bits.
851                  */
852                 if ((omt & MT_MVD) != 0 && v == 1 &&
853                     mba_ != 0 && mba_ != 11 && mba_ != 22) {
854                         dh += mvdh_;
855                         dv += mvdv_;
856                 }
857                 mvdh_ = (dh << 27) >> 27;
858                 mvdv_ = (dv << 27) >> 27;
859         }
860         /*
861          * Coded block pattern.
862          */
863         if (mt_ & MT_CBP) {
864                 HUFF_DECODE(bs_, ht_cbp_, nbb_, bb_, cbp);
865                 if (cbp > 63) {
866                         err("cbp invalid %x", cbp);
867                         return (SYM_ILLEGAL);
868                 }
869         } else
870                 cbp = 0x3f;
871 
872         return (1);
873 }
874 
875 /*
876  * Handle the next block in the current macroblock.
877  * If tc is non-zero, then coefficients are present
878  * in the input stream and they are parsed.  Otherwise,
879  * coefficients are not present, but we take action
880  * according to the type macroblock that we have.
881  */
882 void P64Decoder::decode_block(u_int tc, u_int x, u_int y, u_int stride,
883                               u_char* front, u_char* back, int sf)
884 {
885         short blk[64];
886 #ifdef INT_64
887         INT_64 mask;
888 #define MASK_VAL        mask
889 #define MASK_REF        &mask
890 #else
891         u_int mask[2];
892 #define MASK_VAL        mask[0], mask[1]
893 #define MASK_REF        mask
894 #endif
895         int nc;
896         if (tc != 0)
897                 nc = parse_block(blk, MASK_REF);
898 
899         int off = y * stride + x;
900         u_char* out = front + off;
901 
902         if (mt_ & MT_INTRA) {
903                 if (tc != 0) {
904                         if (nc == 0)
905                                 dcfill((blk[0] + 4) >> 3, out, stride);
906 #ifdef notdef
907                         else if (nc == 1) {
908 #ifdef INT_64
909                                 u_int dc = (mask & 1) ? (blk[0] + 4) >> 3 : 0;
910                                 for (int k = 1; k < 64; ++k) {
911                                         if (mask & ((INT_64)1 << k)) {
912                                                 bv_rdct1(dc, blk, k,
913                                                          out, stride);
914                                                 return;
915                                         }
916                                 }
917 #else
918                                 u_int m0 = mask[0];
919                                 u_int m1 = mask[1];
920                                 u_int dc = (m0 & 1) ? (blk[0] + 4) >> 3 : 0;
921                                 for (int k = 1; k < 64; ++k) {
922                                         m0 >>= 1;
923                                         m0 |= m1 << 31;
924                                         m1 >>= 1;
925                                         if (m0 & 1) {
926                                                 bv_rdct1(dc, blk, k,
927                                                          out, stride);
928                                                 return;
929                                         }
930                                 }
931 #endif
932 #endif
933                          else
934                                 rdct(blk, MASK_VAL, out, stride, (u_char*)0);
935                 } else {
936                         u_char* in = back + off;
937                         mvblka(in, out, stride);
938                 }
939                 return;
940         }
941         if ((mt_ & MT_MVD) == 0) {
942                 u_char* in = back + off;
943                 if (tc != 0) {
944                         if (nc == 0) {
945                                 dcsum((blk[0] + 4) >> 3, in, out, stride);
946                         } else
947                                 rdct(blk, MASK_VAL, out, stride, in);
948                 } else
949                         mvblka(in, out, stride);
950                 return;
951         }
952         u_int sx = x + (mvdh_ / sf);
953         u_int sy = y + (mvdv_ / sf);
954         u_char* in = back + sy * stride + sx;
955         if (mt_ & MT_FILTER) {
956                 filter(in, out, stride);
957                 if (tc != 0) {
958                         if (nc == 0)
959                                 dcsum2((blk[0] + 4) >> 3, out, out, stride);
960                         else
961                                 rdct(blk, MASK_VAL, out, stride, out);
962                 }
963         } else {
964                 if (tc != 0) {
965                         if (nc == 0)
966                                 dcsum2((blk[0] + 4) >> 3, in, out, stride);
967                         else
968                                 rdct(blk, MASK_VAL, out, stride, in);
969                 } else
970                         mvblk(in, out, stride);
971         }
972 }
973 
974 /*
975  * Decompress the next macroblock.  Return 0 if the macroblock
976  * was present (with no errors).  Return SYM_STARTCODE (-1),
977  * if there was no macroblock but instead the start of the
978  * next GOB or picture (in which case the start code has
979  * been consumed).  Return SYM_ILLEGAL (-2) if there was an error.
980  */
981 int P64Decoder::decode_mb()
982 {
983         u_int cbp;
984         register int v;
985 
986         if ((v = parse_mb_hdr(cbp)) <= 0)
987                 return (v);
988 
989         /*
990          * Lookup the base coordinate for this MBA.
991          * Convert from a block to a pixel coord.
992          */
993         register u_int x, y;
994         x = coord_[mba_];
995         y = (x & 0xff) << 3;
996         x >>= 8;
997         x <<= 3;
998 
999         /* Update bounding box */
1000         if (x < minx_)
1001                 minx_ = x;
1002         if (x > maxx_)
1003                 maxx_ = x;
1004         if (y < miny_)
1005                 miny_ = y;
1006         if (y > maxy_)
1007                 maxy_ = y;
1008 
1009         /*
1010          * Decode the six blocks in the MB (4Y:1U:1V with 4:2:0 subsampling scheme).
1011          * (This code assumes MT_TCOEFF is 1.)
1012          */
1013         register u_int tc = mt_ & MT_TCOEFF;
1014         register u_int s = width_;
1015         decode_block(tc & (cbp >> 5), x, y, s, front_, back_, 1);
1016         decode_block(tc & (cbp >> 4), x + 8, y, s, front_, back_, 1);
1017         decode_block(tc & (cbp >> 3), x, y + 8, s, front_, back_, 1);
1018         decode_block(tc & (cbp >> 2), x + 8, y + 8, s, front_, back_, 1);
1019         s >>= 1;
1020         int off = size_;
1021         decode_block(tc & (cbp >> 1), x >> 1, y >> 1, s,
1022                      front_ + off, back_ + off, 2);
1023         off += size_ >> 2;
1024         decode_block(tc & (cbp >> 0), x >> 1, y >> 1, s,
1025                      front_ + off, back_ + off, 2);
1026 
1027         mbst_[mba_] = MBST_NEW;
1028 
1029         /*
1030          * If a marking table was attached, take note.
1031          * This allows us to dither only the blocks that have changed,
1032          * rather than the entire image on each frame.
1033          */
1034         if (marks_) {
1035                 /* convert to 8x8 block offset */
1036                 off = (x >> 3) + (y >> 3) * (width_ >> 3);
1037                 int m = mark_;
1038                 marks_[off] = m;
1039                 marks_[off + 1] = m;
1040                 off += width_ >> 3;
1041                 marks_[off] = m;
1042                 marks_[off + 1] = m;
1043         }
1044         return (0);
1045 }
1046 
1047 /*
1048  * Decode H.261 stream.  Decoding can begin on either
1049  * a GOB or macroblock header.  All the macroblocks of
1050  * a given frame can be decoded in any order, but chunks
1051  * cannot be reordered across frame boundaries.  Since data
1052  * can be decoded in any order, this entry point can't tell
1053  * when a frame is fully decoded (actually, we could count
1054  * macroblocks but if there is loss, we would not know when
1055  * to sync).  Instead, the callee should sync the decoder
1056  * by calling the sync() method after the entire frame
1057  * has been decoded (modulo loss).
1058  *
1059  * This routine should not be called with more than
1060  * one frame present since there is no callback mechanism
1061  * for renderering frames (i.e., don't call this routine
1062  * with a buffer that has a picture header that's not
1063  * at the front).
1064  */
1065 int P64Decoder::decode(const u_char* bp, int cc, int sbit, int ebit,
1066                        int mba, int gob, int mq, int mvdh, int mvdv)
1067 {
1068         ps_ = (u_short*)bp;
1069 
1070         /*
1071          * If cc is odd, ignore 8 extra bits in last short.
1072          */
1073         int odd = cc & 1;
1074         ebit += odd << 3;
1075         pebit_ = ebit;
1076         es_ = (u_short*)(bp + ((cc - 1) &~ 1));
1077 
1078         /*
1079          * If input buffer not aligned, prime bit-buffer
1080          * with 8 bits; otherwise, prime it with a 16.
1081          */
1082         if ((int)bp & 1) {
1083                 bs_ = (u_short*)(bp + 1);
1084                 bb_ = *bp;
1085                 nbb_ = 8 - sbit;
1086         } else {
1087                 bs_ = (u_short*)bp;
1088                 HUFFRQ(bs_, bb_);
1089                 nbb_ = 16 - sbit;
1090         }
1091 
1092         mba_ = mba;
1093         qt_ = &quant_[mq << 8];
1094         mvdh_ = mvdh;
1095         mvdv_ = mvdv;
1096 
1097         /* don't rely on this (GOB number in RTP header) */
1098         if (gob != 0) {
1099                 gob -= 1;
1100                 if (fmt_ == IT_QCIF)
1101                         gob >>= 1;
1102         }
1103 
1104         while (bs_ < es_ || (bs_ == es_ && nbb_ > ebit)) {
1105                 mbst_ = &mb_state_[gob << 6];
1106                 coord_ = &base_[gob << 6];
1107 
1108                 int v = decode_mb();
1109                 if (v == 0) {
1110                         // a macroblock has been decoded. Continue in next chunk
1111                         ndmblk_++;
1112                         continue;
1113                 }
1114 
1115                 // check if this was the start of the next GOB or picture (in which 
1116                 //      case the start code has been consumed)
1117                 if (v != SYM_STARTCODE) {
1118                         err("expected GOB startcode");
1119                         ++bad_bits_;
1120                         return (0);
1121                 }
1122                 gob = parse_gob_hdr(ebit);
1123                 if (gob < 0) {
1124                         /*FIXME*/
1125                         ++bad_bits_;
1126                         return (0);
1127                 }
1128         }
1129         return (0);
1130 }
1131 
1132 FullP64Decoder::FullP64Decoder()
1133 {
1134         init();
1135 }
1136 
1137 void FullP64Decoder::allocate()
1138 {
1139         delete[] fs_;
1140         int n = size_ + (size_ >> 1);
1141         fs_ = new u_char[2 * n];
1142         /* initialize to gray */
1143         memset(fs_, 0x80, 2 * n);
1144         front_ = fs_;
1145         back_ = front_ + n;
1146 }
1147 
1148 /*
1149  * Swap the `front' and `back' frame buffers.  While decoding a
1150  * frame, the front buffer is the image being constructed while
1151  * the back buffer is the reference image.  Rather than copy
1152  * the whole image each time, we just swap pointers here.
1153  * We defer this copying until we find out that we're skipping
1154  * over a macroblock, or even a whole gob.  In this case, we
1155  * go ahead and copy it, but take note in the mb_skip_ array.
1156  * Next time we need to copy it, we skip it if the skip array
1157  * says it's okay (e.g., there is no reason to copy a given block
1158  * back and forth between buffers if it never changes).  When we
1159  * modify a macroblock, we clear out it's entry in mb_skip_.
1160  */
1161 void FullP64Decoder::swap()
1162 {
1163         u_char* p = front_;
1164         front_ = back_;
1165         back_ = p;
1166 }
1167 
1168 /*
1169  * Copy a macroblock from the saved frame (back buffer)
1170  * to the current frame (front buffer). coord_ determines
1171  * which GOB we're in.
1172  */
1173 void FullP64Decoder::mbcopy(u_int mba)
1174 {
1175         u_int x, y;
1176         x = coord_[mba];
1177         y = (x & 0xff) << 3;
1178         x >>= 8;
1179         x <<= 3;
1180 
1181         u_int stride = width_;
1182         u_int off = y * stride + x;
1183         u_char* in = back_ + off;
1184         u_char* out = front_ + off;
1185 
1186         mvblka(in, out, stride);
1187         mvblka(in + 8, out + 8, stride);
1188         in += stride << 3;
1189         out += stride << 3;
1190         mvblka(in, out, stride);
1191         mvblka(in + 8, out + 8, stride);
1192         x >>= 1;
1193         y >>= 1;
1194         stride >>= 1;