ref: 107138382e590d3851ba1b5e897a33f2cf27ee4b
parent: 0a9304dad738268b27556717bf83936c15618506
author: raph <raph@ded80894-8fb9-0310-811b-c03f3676ab4d>
date: Fri Feb 15 15:46:30 EST 2002
A good start at generic region decoding. The added jbig2_generic code handles GBTEMPLATE=1 with vanilla options and the adaptive template pixels in their nominal locations. This is sufficient to successfully decode the 042_4.jb2 test stream. git-svn-id: http://svn.ghostscript.com/jbig2dec/trunk@42 ded80894-8fb9-0310-811b-c03f3676ab4d
--- a/jbig2.c
+++ b/jbig2.c
@@ -4,6 +4,7 @@
#include <stdio.h>
#include "jbig2.h"
#include "jbig2_priv.h"
+#include "jbig2_generic.h"
static void *
jbig2_default_alloc (Jbig2Allocator *allocator, size_t size)
@@ -95,8 +96,8 @@
return result;
}
-static int32_t
-jbig2_get_int32 (uint8_t *buf)
+int32_t
+jbig2_get_int32 (const byte *buf)
{
return (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3];
}
@@ -364,5 +365,10 @@
"Segment %d, flags=%x, type=%d, data_length=%d",
sh->segment_number, sh->flags, sh->flags & 63,
sh->data_length);
+ switch (sh->flags & 63)
+ {
+ case 38:
+ return jbig2_immediate_generic_region(ctx, sh, segment_data);
+ }
return 0;
}
--- a/jbig2_arith.c
+++ b/jbig2_arith.c
@@ -8,7 +8,7 @@
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
- $Id: jbig2_arith.c,v 1.5 2001/06/10 07:09:18 giles Exp $
+ $Id: jbig2_arith.c,v 1.6 2002/02/15 20:46:30 raph Exp $
*/
#include <stdio.h>
@@ -34,8 +34,33 @@
int offset;
};
-#define SOFTWARE_CONVENTION
+#undef SOFTWARE_CONVENTION
+/*
+ A note on the "software conventions".
+
+ The spec (in both draft and final versions) gives a "software
+ conventions" version of the arithmetic decoding procedure. It is not
+ normative, which is good, because it's wrong.
+
+ The value of C in the "software conventions" is nominally equal to
+ -C + (0x10000 >> CT) + (A << 16). However, the leftmost branch of
+ Figure G.3 gives a very wrong value for C, based on this equation.
+ Changing the (B << 9) to (B << 8) restores the invariant, and gives
+ correct decoding of the H.2 test sequence. However, the decoding of
+ the 042_4.jb2 test bitstream is incorrect, even with this change.
+ Changing the "Chigh < A?" predicate in Figure G.2 to "C < (A << 16)"
+ restores correct decoding for bitstream, but I'm not sure this is
+ 100% correct.
+
+ In any case, my benchmarking indicates no speed difference at all.
+ Therefore, for now we will just use the normative version. If
+ somebody wants to figure out how to do the software conventions
+ version correctly, and can establish that it really does improve
+ performance, it might be worthwhile to revisit.
+
+ */
+
static void
jbig2_arith_bytein (Jbig2ArithState *as)
{
@@ -48,7 +73,7 @@
as->offset += 4;
as->next_word_bytes = 4;
}
- /* Figure F.3 */
+ /* Figure G.3 */
B = (as->next_word >> 24) & 0xFF;
if (B == 0xFF)
{
@@ -76,11 +101,9 @@
#ifdef DEBUG
printf ("read %02x (a)\n", B);
#endif
- /* Note: the spec calls for adding (or subtracting) B <<
- 9 here. However, to be consistent with the sample run
- in Annex H.2, we use the B << 8 instead. */
#ifdef SOFTWARE_CONVENTION
- as->C += 0xFE00 - (B << 8);
+ /* Note: this is what the spec says. The spec is wrong. */
+ as->C += 0xFE00 - (B << 9);
#else
as->C += 0xFF00;
#endif
@@ -108,11 +131,10 @@
#ifdef DEBUG
printf ("read %02x (b)\n", B);
#endif
- /* Note: the spec calls for adding (or subtracting) B <<
- 9 here. However, to be consistent with the sample run
- in Annex H.2, we use the B << 8 instead. */
+
#ifdef SOFTWARE_CONVENTION
- as->C += 0xFE00 - (B << 8);
+ /* Note: this is what the spec says. The spec is wrong. */
+ as->C += 0xFE00 - (B << 9);
#else
as->C += 0xFF00;
#endif
@@ -158,7 +180,7 @@
result->next_word_bytes = 4;
result->offset = 4;
- /* Figure F.1 */
+ /* Figure G.1 */
#ifdef SOFTWARE_CONVENTION
result->C = (~(result->next_word >> 8)) & 0xFF0000;
#else
@@ -254,11 +276,12 @@
const Jbig2ArithQe *pqe = &jbig2_arith_Qe[cx & 0x7f];
bool D;
- /* Figure F.2 */
+ /* Figure G.2 */
as->A -= pqe->Qe;
if (
#ifdef SOFTWARE_CONVENTION
- (as->C) >> 16 < as->A
+ /* Note: I do not think this is correct. See above. */
+ (as->C >> 16) < as->A
#else
!((as->C >> 16) < pqe->Qe)
#endif
--- /dev/null
+++ b/jbig2_generic.c
@@ -1,0 +1,265 @@
+/**
+ * Generic region handlers.
+ **/
+
+#define OUTPUT_PBM
+
+#include <stdint.h>
+#include <stddef.h>
+#ifdef OUTPUT_PBM
+#include <stdio.h>
+#endif
+#include "jbig2.h"
+#include "jbig2_priv.h"
+#include "jbig2_arith.h"
+#include "jbig2_generic.h"
+
+/* Table 2 */
+typedef struct {
+ bool MMR;
+ int32_t GBW;
+ int32_t GBH;
+ int GBTEMPLATE;
+ bool TPGDON;
+ bool USESKIP;
+ /* SKIP */
+ byte gbat[8];
+} Jbig2GenericRegionParams;
+
+typedef struct {
+ int32_t width;
+ int32_t height;
+ int32_t x;
+ int32_t y;
+ byte flags;
+} Jbig2RegionSegmentInfo;
+
+typedef struct {
+ Jbig2WordStream super;
+ const byte *data;
+ size_t size;
+} Jbig2WordStreamBuf;
+
+static uint32_t
+jbig2_word_stream_buf_get_next_word(Jbig2WordStream *self, int offset)
+{
+ Jbig2WordStreamBuf *z = (Jbig2WordStreamBuf *)self;
+ const byte *data = z->data;
+ uint32_t result;
+
+ if (offset + 4 < z->size)
+ result = (data[offset] << 24) | (data[offset + 1] << 16) |
+ (data[offset + 2] << 8) | data[offset + 3];
+ else
+ {
+ int i;
+
+ result = 0;
+ for (i = 0; i < z->size - offset; i++)
+ result |= data[offset + i] << ((3 - i) << 3);
+ }
+ return result;
+}
+
+Jbig2WordStream *
+jbig2_word_stream_buf_new(Jbig2Ctx *ctx, const byte *data, size_t size)
+{
+ Jbig2WordStreamBuf *result = (Jbig2WordStreamBuf *)jbig2_alloc(ctx->allocator, sizeof(Jbig2WordStreamBuf));
+
+ result->super.get_next_word = jbig2_word_stream_buf_get_next_word;
+ result->data = data;
+ result->size = size;
+
+ return &result->super;
+}
+
+void
+jbig2_word_stream_buf_free(Jbig2Ctx *ctx, Jbig2WordStream *ws)
+{
+ jbig2_free(ctx->allocator, ws);
+}
+
+static int
+jbig2_decode_generic_template1(Jbig2Ctx *ctx,
+ int32_t seg_number,
+ const Jbig2GenericRegionParams *params,
+ const byte *data, size_t size,
+ byte *gbreg)
+{
+ Jbig2ArithCx GB_stats[8192];
+ Jbig2WordStream *ws = jbig2_word_stream_buf_new(ctx, data, size);
+ /* todo: ctx needs to be an argument; fix up memory allocation */
+ Jbig2ArithState *as = jbig2_arith_new(ws);
+ int GBW = params->GBW;
+ int rowstride = (GBW + 7) >> 3;
+ int x, y;
+ byte *gbreg_line = gbreg;
+ bool LTP = 0;
+
+ memset(GB_stats, 0, sizeof(GB_stats));
+ /* todo: currently we only handle the nominal gbat location */
+
+#ifdef OUTPUT_PBM
+ printf("P4\n%d %d\n", GBW, params->GBH);
+#endif
+
+ for (y = 0; y < params->GBH; y++)
+ {
+ uint32_t CONTEXT;
+ uint32_t line_m1;
+ uint32_t line_m2;
+ int padded_width = (GBW + 7) & -8;
+
+ line_m1 = (y >= 1) ? gbreg_line[-rowstride] : 0;
+ line_m2 = (y >= 2) ? gbreg_line[-(rowstride << 1)] << 5 : 0;
+ CONTEXT = ((line_m1 >> 1) & 0x1f8) | ((line_m2 >> 4) & 0x1e00);
+
+ /* 6.2.5.7 3d */
+ for (x = 0; x < padded_width; x += 8)
+ {
+ byte result = 0;
+ int x_minor;
+ int minor_width = GBW - x > 8 ? 8 : GBW - x;
+
+ if (y >= 1)
+ line_m1 = (line_m1 << 8) |
+ (x + 8 < GBW ? gbreg_line[-rowstride + (x >> 3) + 1] : 0);
+
+ if (y >= 2)
+ line_m2 = (line_m2 << 8) |
+ (x + 8 < GBW ? gbreg_line[-(rowstride << 1) + (x >> 3) + 1] << 5: 0);
+
+ /* This is the speed-critical inner loop. */
+ for (x_minor = 0; x_minor < minor_width; x_minor++)
+ {
+ bool bit;
+
+ bit = jbig2_arith_decode(as, &GB_stats[CONTEXT]);
+ result |= bit << (7 - x_minor);
+ CONTEXT = ((CONTEXT & 0xefb) << 1) | bit |
+ ((line_m1 >> (8 - x_minor)) & 0x8) |
+ ((line_m2 >> (8 - x_minor)) & 0x200);
+ }
+ gbreg_line[x >> 3] = result;
+ }
+#ifdef OUTPUT_PBM
+ fwrite(gbreg_line, 1, rowstride, stdout);
+#endif
+ gbreg_line += rowstride;
+ }
+
+ return 0;
+}
+
+/**
+ * jbig2_decode_generic_region: Decode a generic region.
+ * @ctx: The context for allocation and error reporting.
+ * @params: Parameters, as specified in Table 2.
+ * @data: The input data.
+ * @size: The size of the input data, in bytes.
+ * @gbreg: Where to store the decoded data.
+ *
+ * Decodes a generic region, according to section 6.2. The caller should
+ * have allocated the memory for @gbreg, which is packed 8 pixels to a
+ * byte, scanlines aligned to one byte boundaries.
+ *
+ * Todo: I think the stats need to be an argument.
+ *
+ * Return code: 0 on success.
+ **/
+int
+jbig2_decode_generic_region(Jbig2Ctx *ctx,
+ int32_t seg_number,
+ const Jbig2GenericRegionParams *params,
+ const byte *data, size_t size,
+ byte *gbreg)
+{
+ if (!params->MMR && params->GBTEMPLATE == 1)
+ return jbig2_decode_generic_template1(ctx, seg_number,
+ params, data, size, gbreg);
+ jbig2_error(ctx, JBIG2_SEVERITY_WARNING, seg_number,
+ "decode_generic_region: MMR=%d, GBTEMPLATE=%d NYI",
+ params->MMR, params->GBTEMPLATE);
+ return -1;
+}
+
+void
+jbig2_get_region_segment_info(Jbig2RegionSegmentInfo *info,
+ const byte *segment_data)
+{
+ /* 7.4.1 */
+ info->width = jbig2_get_int32(segment_data);
+ info->height = jbig2_get_int32(segment_data + 4);
+ info->x = jbig2_get_int32(segment_data + 8);
+ info->y = jbig2_get_int32(segment_data + 12);
+ info->flags = segment_data[16];
+}
+
+int
+jbig2_immediate_generic_region(Jbig2Ctx *ctx, Jbig2SegmentHeader *sh,
+ const uint8_t *segment_data)
+{
+ Jbig2RegionSegmentInfo rsi;
+ byte seg_flags;
+ int8_t gbat[8];
+ int offset;
+ int gbat_bytes = 0;
+ Jbig2GenericRegionParams params;
+ int code;
+ byte *gbreg;
+
+ /* 7.4.6 */
+ if (sh->data_length < 18)
+ {
+ jbig2_error(ctx, JBIG2_SEVERITY_FATAL, sh->segment_number,
+ "Segment too short");
+ return -1;
+ }
+
+ jbig2_get_region_segment_info(&rsi, segment_data);
+ jbig2_error(ctx, JBIG2_SEVERITY_INFO, sh->segment_number,
+ "generic region: %d x %d @ (%d, %d), flags = %02x",
+ rsi.width, rsi.height, rsi.x, rsi.y, rsi.flags);
+
+ /* 7.4.6.2 */
+ seg_flags = segment_data[17];
+ jbig2_error(ctx, JBIG2_SEVERITY_INFO, sh->segment_number,
+ "segment flags = %02x",
+ seg_flags);
+ if ((seg_flags & 1) && (seg_flags & 6))
+ jbig2_error(ctx, JBIG2_SEVERITY_WARNING, sh->segment_number,
+ "MMR is 1, but GBTEMPLATE is not 0");
+
+ /* 7.4.6.3 */
+ if (!(seg_flags & 1))
+ {
+ gbat_bytes = (seg_flags & 6) ? 2 : 8;
+ if (18 + gbat_bytes > sh->data_length)
+ return -1;
+ memcpy(gbat, segment_data + 18, gbat_bytes);
+ jbig2_error(ctx, JBIG2_SEVERITY_INFO, sh->segment_number,
+ "gbat: %d, %d", gbat[0], gbat[1]);
+ }
+
+ offset = 18 + gbat_bytes;
+
+ /* Table 34 */
+ params.MMR = seg_flags & 1;
+ params.GBTEMPLATE = (seg_flags & 6) >> 1;
+ params.TPGDON = (seg_flags & 8) >> 3;
+ params.USESKIP = 0;
+ params.GBW = rsi.width;
+ params.GBH = rsi.height;
+ memcpy (params.gbat, gbat, gbat_bytes);
+
+ gbreg = jbig2_alloc(ctx->allocator, ((rsi.width + 7) >> 3) * rsi.height);
+
+ code = jbig2_decode_generic_region(ctx, sh->segment_number, ¶ms,
+ segment_data + offset,
+ sh->data_length - offset,
+ gbreg);
+
+ /* todo: stash gbreg as segment result */
+
+ return code;
+}
--- /dev/null
+++ b/jbig2_generic.h
@@ -1,0 +1,4 @@
+int
+jbig2_immediate_generic_region(Jbig2Ctx *ctx, Jbig2SegmentHeader *sh,
+ const uint8_t *segment_data);
+
--- a/jbig2_priv.h
+++ b/jbig2_priv.h
@@ -12,6 +12,7 @@
const char *fmt, ...);
typedef uint8_t byte;
+typedef int bool;
typedef enum {
JBIG2_FILE_HEADER,
@@ -45,3 +46,16 @@
int sh_ix;
};
+int32_t
+jbig2_get_int32 (const byte *buf);
+
+/* The word stream design is a compromise between simplicity and
+ trying to amortize the number of method calls. Each ::get_next_word
+ invocation pulls 4 bytes from the stream, packed big-endian into a
+ 32 bit word. The offset argument is provided as a convenience. It
+ begins at 0 and increments by 4 for each successive invocation. */
+typedef struct _Jbig2WordStream Jbig2WordStream;
+
+struct _Jbig2WordStream {
+ uint32_t (*get_next_word) (Jbig2WordStream *self, int offset);
+};
--- a/makefile
+++ b/makefile
@@ -4,7 +4,7 @@
all: $(APPS)
-jbig2dec: jbig2.o jbig2dec.o jbig2_huffman.o jbig2_arith.o jbig2_image.o
+jbig2dec: jbig2.o jbig2dec.o jbig2_huffman.o jbig2_arith.o jbig2_image.o jbig2_generic.o
test_huffman: jbig2_huffman.c
gcc $(CFLAGS) -DTEST jbig2_huffman.c -o test_huffman