shithub: jbig2

Download patch

ref: 107138382e590d3851ba1b5e897a33f2cf27ee4b
parent: 0a9304dad738268b27556717bf83936c15618506
author: raph <raph@ded80894-8fb9-0310-811b-c03f3676ab4d>
date: Fri Feb 15 15:46:30 EST 2002

A good start at generic region decoding. The added jbig2_generic code
handles GBTEMPLATE=1 with vanilla options and the adaptive template
pixels in their nominal locations. This is sufficient to successfully
decode the 042_4.jb2 test stream.


git-svn-id: http://svn.ghostscript.com/jbig2dec/trunk@42 ded80894-8fb9-0310-811b-c03f3676ab4d

--- a/jbig2.c
+++ b/jbig2.c
@@ -4,6 +4,7 @@
 #include <stdio.h>
 #include "jbig2.h"
 #include "jbig2_priv.h"
+#include "jbig2_generic.h"
 
 static void *
 jbig2_default_alloc (Jbig2Allocator *allocator, size_t size)
@@ -95,8 +96,8 @@
   return result;
 }
 
-static int32_t
-jbig2_get_int32 (uint8_t *buf)
+int32_t
+jbig2_get_int32 (const byte *buf)
 {
   return (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3];
 }
@@ -364,5 +365,10 @@
 	      "Segment %d, flags=%x, type=%d, data_length=%d",
 	      sh->segment_number, sh->flags, sh->flags & 63,
 	      sh->data_length);
+  switch (sh->flags & 63)
+    {
+    case 38:
+      return jbig2_immediate_generic_region(ctx, sh, segment_data);
+    }
   return 0;
 }
--- a/jbig2_arith.c
+++ b/jbig2_arith.c
@@ -8,7 +8,7 @@
     the Free Software Foundation; either version 2 of the License, or
     (at your option) any later version.
 
-    $Id: jbig2_arith.c,v 1.5 2001/06/10 07:09:18 giles Exp $
+    $Id: jbig2_arith.c,v 1.6 2002/02/15 20:46:30 raph Exp $
 */
 
 #include <stdio.h>
@@ -34,8 +34,33 @@
   int offset;
 };
 
-#define SOFTWARE_CONVENTION
+#undef SOFTWARE_CONVENTION
 
+/*
+  A note on the "software conventions".
+
+  The spec (in both draft and final versions) gives a "software
+  conventions" version of the arithmetic decoding procedure. It is not
+  normative, which is good, because it's wrong.
+
+  The value of C in the "software conventions" is nominally equal to
+  -C + (0x10000 >> CT) + (A << 16). However, the leftmost branch of
+  Figure G.3 gives a very wrong value for C, based on this equation.
+  Changing the (B << 9) to (B << 8) restores the invariant, and gives
+  correct decoding of the H.2 test sequence. However, the decoding of
+  the 042_4.jb2 test bitstream is incorrect, even with this change.
+  Changing the "Chigh < A?" predicate in Figure G.2 to "C < (A << 16)"
+  restores correct decoding for bitstream, but I'm not sure this is
+  100% correct.
+
+  In any case, my benchmarking indicates no speed difference at all.
+  Therefore, for now we will just use the normative version. If
+  somebody wants to figure out how to do the software conventions
+  version correctly, and can establish that it really does improve
+  performance, it might be worthwhile to revisit.
+
+ */
+
 static void
 jbig2_arith_bytein (Jbig2ArithState *as)
 {
@@ -48,7 +73,7 @@
       as->offset += 4;
       as->next_word_bytes = 4;
     }
-  /* Figure F.3 */
+  /* Figure G.3 */
   B = (as->next_word >> 24) & 0xFF;
   if (B == 0xFF)
     {
@@ -76,11 +101,9 @@
 #ifdef DEBUG
 	      printf ("read %02x (a)\n", B);
 #endif
-	      /* Note: the spec calls for adding (or subtracting) B <<
-		 9 here. However, to be consistent with the sample run
-		 in Annex H.2, we use the B << 8 instead. */
 #ifdef SOFTWARE_CONVENTION
-	      as->C += 0xFE00 - (B << 8);
+	      /* Note: this is what the spec says. The spec is wrong. */
+	      as->C += 0xFE00 - (B << 9);
 #else
 	      as->C += 0xFF00;
 #endif
@@ -108,11 +131,10 @@
 #ifdef DEBUG
 	      printf ("read %02x (b)\n", B);
 #endif
-	      /* Note: the spec calls for adding (or subtracting) B <<
-		 9 here. However, to be consistent with the sample run
-		 in Annex H.2, we use the B << 8 instead. */
+
 #ifdef SOFTWARE_CONVENTION
-	      as->C += 0xFE00 - (B << 8);
+	      /* Note: this is what the spec says. The spec is wrong. */
+	      as->C += 0xFE00 - (B << 9);
 #else
 	      as->C += 0xFF00;
 #endif
@@ -158,7 +180,7 @@
   result->next_word_bytes = 4;
   result->offset = 4;
 
-  /* Figure F.1 */
+  /* Figure G.1 */
 #ifdef SOFTWARE_CONVENTION
   result->C = (~(result->next_word >> 8)) & 0xFF0000;
 #else
@@ -254,11 +276,12 @@
   const Jbig2ArithQe *pqe = &jbig2_arith_Qe[cx & 0x7f];
   bool D;
 
-  /* Figure F.2 */
+  /* Figure G.2 */
   as->A -= pqe->Qe;
   if (
 #ifdef SOFTWARE_CONVENTION
-      (as->C) >> 16 < as->A
+      /* Note: I do not think this is correct. See above. */
+      (as->C >> 16) < as->A
 #else
       !((as->C >> 16) < pqe->Qe)
 #endif
--- /dev/null
+++ b/jbig2_generic.c
@@ -1,0 +1,265 @@
+/**
+ * Generic region handlers.
+ **/
+
+#define OUTPUT_PBM
+
+#include <stdint.h>
+#include <stddef.h>
+#ifdef OUTPUT_PBM
+#include <stdio.h>
+#endif
+#include "jbig2.h"
+#include "jbig2_priv.h"
+#include "jbig2_arith.h"
+#include "jbig2_generic.h"
+
+/* Table 2 */
+typedef struct {
+  bool MMR;
+  int32_t GBW;
+  int32_t GBH;
+  int GBTEMPLATE;
+  bool TPGDON;
+  bool USESKIP;
+  /* SKIP */
+  byte gbat[8];
+} Jbig2GenericRegionParams;
+
+typedef struct {
+  int32_t width;
+  int32_t height;
+  int32_t x;
+  int32_t y;
+  byte flags;
+} Jbig2RegionSegmentInfo;
+
+typedef struct {
+  Jbig2WordStream super;
+  const byte *data;
+  size_t size;
+} Jbig2WordStreamBuf;
+
+static uint32_t
+jbig2_word_stream_buf_get_next_word(Jbig2WordStream *self, int offset)
+{
+  Jbig2WordStreamBuf *z = (Jbig2WordStreamBuf *)self;
+  const byte *data = z->data;
+  uint32_t result;
+
+  if (offset + 4 < z->size)
+    result = (data[offset] << 24) | (data[offset + 1] << 16) |
+      (data[offset + 2] << 8) | data[offset + 3];
+  else
+    {
+      int i;
+
+      result = 0;
+      for (i = 0; i < z->size - offset; i++)
+	result |= data[offset + i] << ((3 - i) << 3);
+    }
+  return result;
+}
+
+Jbig2WordStream *
+jbig2_word_stream_buf_new(Jbig2Ctx *ctx, const byte *data, size_t size)
+{
+  Jbig2WordStreamBuf *result = (Jbig2WordStreamBuf *)jbig2_alloc(ctx->allocator, sizeof(Jbig2WordStreamBuf));
+
+  result->super.get_next_word = jbig2_word_stream_buf_get_next_word;
+  result->data = data;
+  result->size = size;
+
+  return &result->super;
+}
+
+void
+jbig2_word_stream_buf_free(Jbig2Ctx *ctx, Jbig2WordStream *ws)
+{
+  jbig2_free(ctx->allocator, ws);
+}
+
+static int
+jbig2_decode_generic_template1(Jbig2Ctx *ctx,
+			       int32_t seg_number,
+			       const Jbig2GenericRegionParams *params,
+			       const byte *data, size_t size,
+			       byte *gbreg)
+{
+  Jbig2ArithCx GB_stats[8192];
+  Jbig2WordStream *ws = jbig2_word_stream_buf_new(ctx, data, size);
+  /* todo: ctx needs to be an argument; fix up memory allocation */
+  Jbig2ArithState *as = jbig2_arith_new(ws);
+  int GBW = params->GBW;
+  int rowstride = (GBW + 7) >> 3;
+  int x, y;
+  byte *gbreg_line = gbreg;
+  bool LTP = 0;
+
+  memset(GB_stats, 0, sizeof(GB_stats));
+  /* todo: currently we only handle the nominal gbat location */
+
+#ifdef OUTPUT_PBM
+  printf("P4\n%d %d\n", GBW, params->GBH);
+#endif
+
+  for (y = 0; y < params->GBH; y++)
+    {
+      uint32_t CONTEXT;
+      uint32_t line_m1;
+      uint32_t line_m2;
+      int padded_width = (GBW + 7) & -8;
+
+      line_m1 = (y >= 1) ? gbreg_line[-rowstride] : 0;
+      line_m2 = (y >= 2) ? gbreg_line[-(rowstride << 1)] << 5 : 0;
+      CONTEXT = ((line_m1 >> 1) & 0x1f8) | ((line_m2 >> 4) & 0x1e00);
+
+      /* 6.2.5.7 3d */
+      for (x = 0; x < padded_width; x += 8)
+	{
+	  byte result = 0;
+	  int x_minor;
+	  int minor_width = GBW - x > 8 ? 8 : GBW - x;
+
+	  if (y >= 1)
+	    line_m1 = (line_m1 << 8) |
+	      (x + 8 < GBW ? gbreg_line[-rowstride + (x >> 3) + 1] : 0);
+
+	  if (y >= 2)
+	    line_m2 = (line_m2 << 8) |
+	      (x + 8 < GBW ? gbreg_line[-(rowstride << 1) + (x >> 3) + 1] << 5: 0);
+
+	  /* This is the speed-critical inner loop. */
+	  for (x_minor = 0; x_minor < minor_width; x_minor++)
+	    {
+	      bool bit;
+
+	      bit = jbig2_arith_decode(as, &GB_stats[CONTEXT]);
+	      result |= bit << (7 - x_minor);
+	      CONTEXT = ((CONTEXT & 0xefb) << 1) | bit |
+		((line_m1 >> (8 - x_minor)) & 0x8) |
+		((line_m2 >> (8 - x_minor)) & 0x200);
+	    }
+	  gbreg_line[x >> 3] = result;
+	}
+#ifdef OUTPUT_PBM
+      fwrite(gbreg_line, 1, rowstride, stdout);
+#endif
+      gbreg_line += rowstride;
+    }
+
+  return 0;
+}
+
+/**
+ * jbig2_decode_generic_region: Decode a generic region.
+ * @ctx: The context for allocation and error reporting.
+ * @params: Parameters, as specified in Table 2.
+ * @data: The input data.
+ * @size: The size of the input data, in bytes.
+ * @gbreg: Where to store the decoded data.
+ *
+ * Decodes a generic region, according to section 6.2. The caller should
+ * have allocated the memory for @gbreg, which is packed 8 pixels to a
+ * byte, scanlines aligned to one byte boundaries.
+ *
+ * Todo: I think the stats need to be an argument.
+ *
+ * Return code: 0 on success.
+ **/
+int
+jbig2_decode_generic_region(Jbig2Ctx *ctx,
+			    int32_t seg_number,
+			    const Jbig2GenericRegionParams *params,
+			    const byte *data, size_t size,
+			    byte *gbreg)
+{
+  if (!params->MMR && params->GBTEMPLATE == 1)
+    return jbig2_decode_generic_template1(ctx, seg_number,
+					  params, data, size, gbreg);
+  jbig2_error(ctx, JBIG2_SEVERITY_WARNING, seg_number,
+	      "decode_generic_region: MMR=%d, GBTEMPLATE=%d NYI",
+	      params->MMR, params->GBTEMPLATE);
+  return -1;
+}
+
+void
+jbig2_get_region_segment_info(Jbig2RegionSegmentInfo *info,
+			      const byte *segment_data)
+{
+  /* 7.4.1 */
+  info->width = jbig2_get_int32(segment_data);
+  info->height = jbig2_get_int32(segment_data + 4);
+  info->x = jbig2_get_int32(segment_data + 8);
+  info->y = jbig2_get_int32(segment_data + 12);
+  info->flags = segment_data[16];
+}
+
+int
+jbig2_immediate_generic_region(Jbig2Ctx *ctx, Jbig2SegmentHeader *sh,
+			       const uint8_t *segment_data)
+{
+  Jbig2RegionSegmentInfo rsi;
+  byte seg_flags;
+  int8_t gbat[8];
+  int offset;
+  int gbat_bytes = 0;
+  Jbig2GenericRegionParams params;
+  int code;
+  byte *gbreg;
+
+  /* 7.4.6 */
+  if (sh->data_length < 18)
+    {
+      jbig2_error(ctx, JBIG2_SEVERITY_FATAL, sh->segment_number,
+		  "Segment too short");
+      return -1;
+    }
+
+  jbig2_get_region_segment_info(&rsi, segment_data);
+  jbig2_error(ctx, JBIG2_SEVERITY_INFO, sh->segment_number,
+	      "generic region: %d x %d @ (%d, %d), flags = %02x",
+	      rsi.width, rsi.height, rsi.x, rsi.y, rsi.flags);
+
+  /* 7.4.6.2 */
+  seg_flags = segment_data[17];
+  jbig2_error(ctx, JBIG2_SEVERITY_INFO, sh->segment_number,
+	      "segment flags = %02x",
+	      seg_flags);
+  if ((seg_flags & 1) && (seg_flags & 6))
+    jbig2_error(ctx, JBIG2_SEVERITY_WARNING, sh->segment_number,
+		"MMR is 1, but GBTEMPLATE is not 0");
+
+  /* 7.4.6.3 */
+  if (!(seg_flags & 1))
+    {
+      gbat_bytes = (seg_flags & 6) ? 2 : 8;
+      if (18 + gbat_bytes > sh->data_length)
+	return -1;
+      memcpy(gbat, segment_data + 18, gbat_bytes);
+      jbig2_error(ctx, JBIG2_SEVERITY_INFO, sh->segment_number,
+		  "gbat: %d, %d", gbat[0], gbat[1]);
+    }
+
+  offset = 18 + gbat_bytes;
+
+  /* Table 34 */
+  params.MMR = seg_flags & 1;
+  params.GBTEMPLATE = (seg_flags & 6) >> 1;
+  params.TPGDON = (seg_flags & 8) >> 3;
+  params.USESKIP = 0;
+  params.GBW = rsi.width;
+  params.GBH = rsi.height;
+  memcpy (params.gbat, gbat, gbat_bytes);
+
+  gbreg = jbig2_alloc(ctx->allocator, ((rsi.width + 7) >> 3) * rsi.height);
+
+  code = jbig2_decode_generic_region(ctx, sh->segment_number, &params,
+				     segment_data + offset,
+				     sh->data_length - offset,
+				     gbreg);
+
+  /* todo: stash gbreg as segment result */
+
+  return code;
+}
--- /dev/null
+++ b/jbig2_generic.h
@@ -1,0 +1,4 @@
+int
+jbig2_immediate_generic_region(Jbig2Ctx *ctx, Jbig2SegmentHeader *sh,
+			       const uint8_t *segment_data);
+
--- a/jbig2_priv.h
+++ b/jbig2_priv.h
@@ -12,6 +12,7 @@
 	     const char *fmt, ...);
 
 typedef uint8_t byte;
+typedef int bool;
 
 typedef enum {
   JBIG2_FILE_HEADER,
@@ -45,3 +46,16 @@
   int sh_ix;
 };
 
+int32_t
+jbig2_get_int32 (const byte *buf);
+
+/* The word stream design is a compromise between simplicity and
+   trying to amortize the number of method calls. Each ::get_next_word
+   invocation pulls 4 bytes from the stream, packed big-endian into a
+   32 bit word. The offset argument is provided as a convenience. It
+   begins at 0 and increments by 4 for each successive invocation. */
+typedef struct _Jbig2WordStream Jbig2WordStream;
+
+struct _Jbig2WordStream {
+  uint32_t (*get_next_word) (Jbig2WordStream *self, int offset);
+};
--- a/makefile
+++ b/makefile
@@ -4,7 +4,7 @@
 
 all:	$(APPS)
 
-jbig2dec:	jbig2.o jbig2dec.o jbig2_huffman.o jbig2_arith.o jbig2_image.o
+jbig2dec:	jbig2.o jbig2dec.o jbig2_huffman.o jbig2_arith.o jbig2_image.o jbig2_generic.o
 
 test_huffman:	jbig2_huffman.c
 	gcc $(CFLAGS) -DTEST jbig2_huffman.c -o test_huffman