shithub: pokecrystal

ref: 8826d97ee42b85dbb45647ed81eaaaad56632754
dir: /tools/lz/mpcomp.c/

View raw version
#include "proto.h"

/*
   Multi-pass compressor: performs an initial pass generating a single command for each byte position in the data and
                          refines the command stream further in subsequent passes.
   Methods defined: 16
   Flags values: the flags are a bitfield; each bit triggers some alternate behavior if set:
     1: always emit a literal command (0) for the first byte of the file
     2: when reducing a two-byte repetition (2) command in the overlap elimination pass, don't force it to contain a
        whole number of repetitions (i.e., an even count)
     4: don't emit copy commands (4, 5, 6) with a count of 3
     8: don't emit single-byte repetition (1) commands
*/

struct command * try_compress_multi_pass (const unsigned char * data, const unsigned char * flipped, unsigned short * size, unsigned flags) {
  struct command * result = calloc(*size, sizeof(struct command));
  unsigned char * reversed = malloc(*size);
  short * sources = malloc(*size * sizeof(short));
  unsigned short pos, next, current = 0;
  for (pos = 0; pos < *size; pos ++) {
    reversed[pos] = data[*size - 1 - pos];
    sources[pos] = -1;
  }
  for (pos = (flags & 1); pos < *size; pos += (result[pos].count >= MULTIPASS_SKIP_THRESHOLD) ? result[pos].count : 1) {
    result[pos] = pick_command_for_pass(data, flipped, reversed, sources, *size, pos, flags);
    if ((result[pos].command >= 4) || (result[pos].count < MULTIPASS_SKIP_THRESHOLD)) sources[current ++] = pos;
  }
  free(reversed);
  free(sources);
  for (pos = 0; pos < *size; pos ++) {
    for (current = 1; current < result[pos].count; current ++) if (result[pos + current].count > result[pos].count) {
      result[pos].count = current;
      if ((result[pos].command == 2) && (current & 1) && !(flags & 2)) result[pos].count --;
    }
    if (result[pos].count <= command_size(result[pos])) result[pos] = (struct command) {.command = 0, .count = 0};
  }
  for (pos = 0; pos < *size; pos ++)
    if (!result[pos].command) {
      for (current = 1; (current < MAX_COMMAND_COUNT) && ((pos + current) < *size); current ++) if (result[pos + current].command) break;
      result[pos] = (struct command) {.command = 0, .count = current, .value = pos};
    } else if (result[pos].count > MAX_COMMAND_COUNT) {
      result[pos + MAX_COMMAND_COUNT] = result[pos];
      result[pos + MAX_COMMAND_COUNT].count -= MAX_COMMAND_COUNT;
      if ((result[pos + MAX_COMMAND_COUNT].command >= 4) && (result[pos + MAX_COMMAND_COUNT].value >= 0))
        result[pos + MAX_COMMAND_COUNT].value += (result[pos].command == 6) ? -MAX_COMMAND_COUNT : MAX_COMMAND_COUNT;
      result[pos].count = MAX_COMMAND_COUNT;
    }
  for (next = pos = 0; pos < *size; pos ++)
    if (pos == next)
      next += result[pos].count;
    else
      result[pos].command = 7;
  repack(&result, size);
  return result;
}

struct command pick_command_for_pass (const unsigned char * data, const unsigned char * flipped, const unsigned char * reversed, const short * sources,
                                      unsigned short length, unsigned short position, unsigned flags) {
  struct command result = pick_repetition_for_pass(data, length, position, flags);
  if (result.count >= MULTIPASS_SKIP_THRESHOLD) return result;
  unsigned char p;
  for (p = 0; p < 3; p ++) {
    struct command temp = pick_copy_for_pass(data, p[(const unsigned char * []) {data, flipped, reversed}], sources, p + 4, length, position, flags);
    if (temp.command == 7) continue;
    if (temp.count > result.count) result = temp;
  }
  if ((result.command >= 4) && (result.value >= (position - LOOKBACK_LIMIT))) result.value -= position;
  return result;
}

struct command pick_repetition_for_pass (const unsigned char * data, unsigned short length, unsigned short position, unsigned flags) {
  unsigned short p;
  if (data[position]) {
    if ((position + 1) >= length) return (struct command) {.command = 1, .count = 1, .value = data[position]};
    struct command result;
    if (!(flags & 8) && (data[position] == data[position + 1]))
      result = (struct command) {.command = 1, .value = data[position]};
    else
      result = (struct command) {.command = 2, .value = data[position] | (data[position + 1] << 8)};
    for (p = 1; ((position + p) < length) && (p < LOOKAHEAD_LIMIT); p ++) if (data[position + p] != data[position + (p & 1)]) break;
    result.count = p;
    return result;
  } else {
    for (p = position + 1; (p < length) && (p < (position + LOOKAHEAD_LIMIT)); p ++) if (data[p]) break;
    return (struct command) {.command = 3, .count = p - position};
  }
}

struct command pick_copy_for_pass (const unsigned char * data, const unsigned char * reference, const short * sources, unsigned char command_type,
                                   unsigned short length, unsigned short position, unsigned flags) {
  struct command result = {.command = 7, .count = (flags & 4) ? 4 : 3};
  if (length < 3) return result;
  unsigned refpos, count;
  const unsigned char * current;
  unsigned char buffer[6] = {0};
  memcpy(buffer, reference + length - 3, 3);
  while (*sources >= 0) {
    refpos = *(sources ++);
    if (command_type == 6) refpos = length - 1 - refpos;
    if (refpos >= (length - 3))
      current = buffer + refpos - (length - 3);
    else
      current = reference + refpos;
    if (memcmp(data + position, current, ((position + 4) > length) ? length - position : 4)) continue;
    for (count = 4; (count < (length - position)) && (count < (length - refpos)); count ++) if (data[position + count] != current[count]) break;
    if (count > (length - refpos)) count = length - refpos;
    if (count > (length - position)) count = length - position;
    if (result.count > count) continue;
    result = (struct command) {.command = command_type, .count = count, .value = sources[-1]};
  }
  return result;
}