ref: d0ab3a0dd8f6355b3603d0fb04043a9ae867639b
dir: /appl/lib/readjpg.b/
implement RImagefile;
include "sys.m";
sys: Sys;
include "draw.m";
include "bufio.m";
bufio: Bufio;
Iobuf: import bufio;
include "imagefile.m";
# Constants, all preceded by byte 16rFF
SOF: con byte 16rC0; # Start of Frame
SOF2: con byte 16rC2; # Start of Frame; progressive Huffman
JPG: con byte 16rC8; # Reserved for JPEG extensions
DHT: con byte 16rC4; # Define Huffman Tables
DAC: con byte 16rCC; # Arithmetic coding conditioning
RST: con byte 16rD0; # Restart interval termination
RST7: con byte 16rD7; # Restart interval termination (highest value)
SOI: con byte 16rD8; # Start of Image
EOI: con byte 16rD9; # End of Image
SOS: con byte 16rDA; # Start of Scan
DQT: con byte 16rDB; # Define quantization tables
DNL: con byte 16rDC; # Define number of lines
DRI: con byte 16rDD; # Define restart interval
DHP: con byte 16rDE; # Define hierarchical progression
EXP: con byte 16rDF; # Expand reference components
APPn: con byte 16rE0; # Reserved for application segments
JPGn: con byte 16rF0; # Reserved for JPEG extensions
COM: con byte 16rFE; # Comment
Header: adt
{
fd: ref Iobuf;
ch: chan of (ref Rawimage, string);
# variables in i/o routines
sr: int; # shift register, right aligned
cnt: int; # # bits in right part of sr
buf: array of byte;
bufi: int;
nbuf: int;
Nf: int;
comp: array of Framecomp;
mode: byte;
X,Y: int;
qt: array of array of int; # quantization tables
dcht: array of ref Huffman;
acht: array of ref Huffman;
sf: array of byte; # start of frame; do better later
ss: array of byte; # start of scan; do better later
ri: int;
};
NBUF: con 16*1024;
Huffman: adt
{
bits: array of int;
size: array of int;
code: array of int;
val: array of int;
mincode: array of int;
maxcode: array of int;
valptr: array of int;
# fast lookup
value: array of int;
shift: array of int;
};
Framecomp: adt # Frame component specifier from SOF marker
{
C: int;
H: int;
V: int;
Tq: int;
};
zerobytes: array of byte;
zeroints: array of int;
zeroreals: array of real;
clamp: array of byte;
NCLAMP: con 1000;
CLAMPOFF: con 300;
init(iomod: Bufio)
{
if(sys == nil)
sys = load Sys Sys->PATH;
bufio = iomod;
zerobytes = array[8*8] of byte;
zeroints = array[8*8] of int;
zeroreals = array[8*8] of real;
for(k:=0; k<8*8; k++){
zerobytes[k] = byte 0;
zeroints[k] = 0;
zeroreals[k] = 0.0;
}
clamp = array[NCLAMP] of byte;
for(k=0; k<CLAMPOFF; k++)
clamp[k] = byte 0;
for(; k<CLAMPOFF+256; k++)
clamp[k] = byte(k-CLAMPOFF);
for(; k<NCLAMP; k++)
clamp[k] = byte 255;
}
read(fd: ref Iobuf): (ref Rawimage, string)
{
# spawn a subprocess so I/O errors can clean up easily
ch := chan of (ref Rawimage, string);
spawn readslave(fd, ch);
return <-ch;
}
readmulti(fd: ref Iobuf): (array of ref Rawimage, string)
{
(i, err) := read(fd);
if(i != nil){
a := array[1] of { i };
return (a, err);
}
return (nil, err);
}
readslave(fd: ref Iobuf, ch: chan of (ref Rawimage, string))
{
image: ref Rawimage;
(header, err) := soiheader(fd, ch);
if(header == nil){
ch <-= (nil, err);
exit;
}
buf := header.buf;
nseg := 0;
Loop:
while(err == ""){
m: int;
b: array of byte;
nseg++;
(m, b, err) = readsegment(header);
case m{
-1 =>
break Loop;
int APPn+0 =>
if(nseg==1 && string b[0:4]=="JFIF"){ # JFIF header; check version
vers0 := int b[5];
vers1 := int b[6];
if(vers0>1 || vers1>2)
err = sys->sprint("ReadJPG: can't handle JFIF version %d.%2d", vers0, vers1);
}
int APPn+1 to int APPn+15 =>
;
int DQT =>
err = quanttables(header, b);
int SOF =>
header.Y = int2(b, 1);
header.X = int2(b, 3);
header.Nf = int b[5];
header.comp = array[header.Nf] of Framecomp;
for(i:=0; i<header.Nf; i++){
header.comp[i].C = int b[6+3*i+0];
(H, V) := nibbles(b[6+3*i+1]);
header.comp[i].H = H;
header.comp[i].V = V;
header.comp[i].Tq = int b[6+3*i+2];
}
header.mode = SOF;
header.sf = b;
int SOF2 =>
err = sys->sprint("ReadJPG: can't handle progressive Huffman mode");
break Loop;
int SOS =>
header.ss = b;
(image, err) = decodescan(header);
if(err != "")
break Loop;
# BUG: THIS SHOULD USE THE LOOP TO FINISH UP
x := nextbyte(header, 1);
if(x != 16rFF)
err = sys->sprint("ReadJPG: didn't see marker at end of scan; saw %x", x);
else{
x = nextbyte(header, 1);
if(x != int EOI)
err = sys->sprint("ReadJPG: expected EOI saw %x", x);
}
break Loop;
int DHT =>
err = huffmantables(header, b);
int DRI =>
header.ri = int2(b, 0);
int COM =>
;
int EOI =>
break Loop;
* =>
err = sys->sprint("ReadJPG: unknown marker %.2x", m);
}
}
ch <-= (image, err);
}
readerror(): string
{
return sys->sprint("ReadJPG: read error: %r");
}
marker(buf: array of byte, n: int): byte
{
if(buf[n] != byte 16rFF)
return byte 0;
return buf[n+1];
}
int2(buf: array of byte, n: int): int
{
return (int buf[n]<<8)+(int buf[n+1]);
}
nibbles(b: byte): (int, int)
{
i := int b;
return (i>>4, i&15);
}
soiheader(fd: ref Iobuf, ch: chan of (ref Rawimage, string)): (ref Header, string)
{
# 1+ for restart preamble (see nextbyte), +1 for sentinel
buf := array[1+NBUF+1] of byte;
if(fd.read(buf, 2) != 2)
return (nil, sys->sprint("ReadJPG: can't read header: %r"));
if(marker(buf, 0) != SOI)
return (nil, sys->sprint("ReadJPG: unrecognized marker in header"));
h := ref Header;
h.buf = buf;
h.bufi = 0;
h.nbuf = 0;
h.fd = fd;
h.ri = 0;
h.ch = ch;
return (h, nil);
}
readsegment(h: ref Header): (int, array of byte, string)
{
if(h.fd.read(h.buf, 2) != 2)
return (-1, nil, readerror());
m := int marker(h.buf, 0);
case m{
int EOI =>
return (m, nil, nil);
0 =>
err := sys->sprint("ReadJPG: expecting marker; saw %.2x%.2x)",
int h.buf[0], int h.buf[1]);
return (-1, nil, err);
}
if(h.fd.read(h.buf, 2) != 2)
return (-1, nil, readerror());
n := int2(h.buf, 0);
if(n < 2)
return (-1, nil, readerror());
n -= 2;
# if(n > len h.buf){
# h.buf = array[n+1] of byte; # +1 for sentinel
# #h.nbuf = n;
# }
b := array[n] of byte;
if(h.fd.read(b, n) != n)
return (-1, nil, readerror());
return (m, b, nil);
}
huffmantables(h: ref Header, b: array of byte): string
{
if(h.dcht == nil){
h.dcht = array[4] of ref Huffman;
h.acht = array[4] of ref Huffman;
}
err: string;
mt: int;
for(l:=0; l<len b; l+=17+mt){
(mt, err) = huffmantable(h, b[l:]);
if(err != nil)
return err;
}
return nil;
}
huffmantable(h: ref Header, b: array of byte): (int, string)
{
t := ref Huffman;
(Tc, th) := nibbles(b[0]);
if(Tc > 1)
return (0, sys->sprint("ReadJPG: unknown Huffman table class %d", Tc));
if(th>3 || (h.mode==SOF && th>1))
return (0, sys->sprint("ReadJPG: unknown Huffman table index %d", th));
if(Tc == 0)
h.dcht[th] = t;
else
h.acht[th] = t;
# flow chart C-2
nsize := 0;
for(i:=0; i<16; i++)
nsize += int b[1+i];
t.size = array[nsize+1] of int;
k := 0;
for(i=1; i<=16; i++){
n := int b[i];
for(j:=0; j<n; j++)
t.size[k++] = i;
}
t.size[k] = 0;
# initialize HUFFVAL
t.val = array[nsize] of int;
for(i=0; i<nsize; i++){
t.val[i] = int b[17+i];
}
# flow chart C-3
t.code = array[nsize+1] of int;
k = 0;
code := 0;
si := t.size[0];
for(;;){
do
t.code[k++] = code++;
while(t.size[k] == si);
if(t.size[k] == 0)
break;
do{
code <<= 1;
si++;
}while(t.size[k] != si);
}
# flow chart F-25
t.mincode = array[17] of int;
t.maxcode = array[17] of int;
t.valptr = array[17] of int;
i = 0;
j := 0;
F25:
for(;;){
for(;;){
i++;
if(i > 16)
break F25;
if(int b[i] != 0)
break;
t.maxcode[i] = -1;
}
t.valptr[i] = j;
t.mincode[i] = t.code[j];
j += int b[i]-1;
t.maxcode[i] = t.code[j];
j++;
}
# create byte-indexed fast path tables
t.value = array[256] of int;
t.shift = array[256] of int;
maxcode := t.maxcode;
# stupid startup algorithm: just run machine for each byte value
Bytes:
for(v:=0; v<256; v++){
cnt := 7;
m := 1<<7;
code = 0;
sr := v;
i = 1;
for(;;i++){
if(sr & m)
code |= 1;
if(code <= maxcode[i])
break;
code <<= 1;
m >>= 1;
if(m == 0){
t.shift[v] = 0;
t.value[v] = -1;
continue Bytes;
}
cnt--;
}
t.shift[v] = 8-cnt;
t.value[v] = t.val[t.valptr[i]+(code-t.mincode[i])];
}
return (nsize, nil);
}
quanttables(h: ref Header, b: array of byte): string
{
if(h.qt == nil)
h.qt = array[4] of array of int;
err: string;
n: int;
for(l:=0; l<len b; l+=1+n){
(n, err) = quanttable(h, b[l:]);
if(err != nil)
return err;
}
return nil;
}
quanttable(h: ref Header, b: array of byte): (int, string)
{
(pq, tq) := nibbles(b[0]);
if(pq > 1)
return (0, sys->sprint("ReadJPG: unknown quantization table class %d", pq));
if(tq > 3)
return (0, sys->sprint("ReadJPG: unknown quantization table index %d", tq));
q := array[64] of int;
h.qt[tq] = q;
for(i:=0; i<64; i++){
if(pq == 0)
q[i] = int b[1+i];
else
q[i] = int2(b, 1+2*i);
}
return (64*(1+pq), nil);
}
zig := array[64] of {
0, 1, 8, 16, 9, 2, 3, 10, 17, # 0-7
24, 32, 25, 18, 11, 4, 5, # 8-15
12, 19, 26, 33, 40, 48, 41, 34, # 16-23
27, 20, 13, 6, 7, 14, 21, 28, # 24-31
35, 42, 49, 56, 57, 50, 43, 36, # 32-39
29, 22, 15, 23, 30, 37, 44, 51, # 40-47
58, 59, 52, 45, 38, 31, 39, 46, # 48-55
53, 60, 61, 54, 47, 55, 62, 63 # 56-63
};
decodescan(h: ref Header): (ref Rawimage, string)
{
ss := h.ss;
Ns := int ss[0];
if((Ns!=3 && Ns!=1) || Ns!=h.Nf)
return (nil, "ReadJPG: can't handle scan not 3 components");
image := ref Rawimage;
image.r = ((0, 0), (h.X, h.Y));
image.cmap = nil;
image.transp = 0;
image.trindex = byte 0;
image.fields = 0;
image.chans = array[h.Nf] of array of byte;
if(Ns == 3)
image.chandesc = CRGB;
else
image.chandesc = CY;
image.nchans = h.Nf;
for(k:=0; k<h.Nf; k++)
image.chans[k] = array[h.X*h.Y] of byte;
# build per-component arrays
Td := array[Ns] of int;
Ta := array[Ns] of int;
data := array[Ns] of array of array of real;
H := array[Ns] of int;
V := array[Ns] of int;
DC := array[Ns] of int;
# compute maximum H and V
Hmax := 0;
Vmax := 0;
for(comp:=0; comp<Ns; comp++){
if(h.comp[comp].H > Hmax)
Hmax = h.comp[comp].H;
if(h.comp[comp].V > Vmax)
Vmax = h.comp[comp].V;
}
# initialize data structures
allHV1 := 1;
for(comp=0; comp<Ns; comp++){
# JPEG requires scan components to be in same order as in frame,
# so if both have 3 we know scan is Y Cb Cr and there's no need to
# reorder
cs := int ss[1+2*comp];
(Td[comp], Ta[comp]) = nibbles(ss[2+2*comp]);
H[comp] = h.comp[comp].H;
V[comp] = h.comp[comp].V;
nblock := H[comp]*V[comp];
if(nblock != 1)
allHV1 = 0;
data[comp] = array[nblock] of array of real;
DC[comp] = 0;
for(m:=0; m<nblock; m++)
data[comp][m] = array[8*8] of real;
}
ri := h.ri;
h.buf[0] = byte 16rFF; # see nextbyte()
h.cnt = 0;
h.sr = 0;
nacross := ((h.X+(8*Hmax-1))/(8*Hmax));
nmcu := ((h.Y+(8*Vmax-1))/(8*Vmax))*nacross;
zz := array[64] of real;
err := "";
for(mcu:=0; mcu<nmcu; ){
for(comp=0; comp<Ns; comp++){
dcht := h.dcht[Td[comp]];
acht := h.acht[Ta[comp]];
qt := h.qt[h.comp[comp].Tq];
for(block:=0; block<H[comp]*V[comp]; block++){
# F-22
t := decode(h, dcht);
diff := receive(h, t);
DC[comp] += diff;
# F-23
zz[0:] = zeroreals;
zz[0] = real (qt[0]*DC[comp]);
k = 1;
for(;;){
rs := decode(h, acht);
(rrrr, ssss) := nibbles(byte rs);
if(ssss == 0){
if(rrrr != 15)
break;
k += 16;
}else{
k += rrrr;
z := receive(h, ssss);
zz[zig[k]] = real (z*qt[k]);
if(k == 63)
break;
k++;
}
}
idct(zz, data[comp][block]);
}
}
# rotate colors to RGB and assign to bytes
if(Ns == 1) # very easy
colormap1(h, image, data[0][0], mcu, nacross);
else if(allHV1) # fairly easy
colormapall1(h, image, data[0][0], data[1][0], data[2][0], mcu, nacross);
else # miserable general case
colormap(h, image, data[0], data[1], data[2], mcu, nacross, Hmax, Vmax, H, V);
# process restart marker, if present
mcu++;
if(ri>0 && mcu<nmcu-1 && mcu%ri==0){
restart := mcu/ri-1;
rst, nskip: int;
nskip = 0;
do{
do{
rst = nextbyte(h, 1);
nskip++;
}while(rst>=0 && rst!=16rFF);
if(rst == 16rFF){
rst = nextbyte(h, 1);
nskip++;
}
}while(rst>=0 && (rst&~7)!=int RST);
if(nskip != 2)
err = sys->sprint("skipped %d bytes at restart %d\n", nskip-2, restart);
if(rst < 0)
return (nil, readerror());
if((rst&7) != (restart&7))
return (nil, sys->sprint("ReadJPG: expected RST%d got %d", restart&7, int rst&7));
h.cnt = 0;
h.sr = 0;
for(comp=0; comp<Ns; comp++)
DC[comp] = 0;
}
}
return (image, err);
}
colormap1(h: ref Header, image: ref Rawimage, data: array of real, mcu, nacross: int)
{
pic := image.chans[0];
minx := 8*(mcu%nacross);
dx := 8;
if(minx+dx > h.X)
dx = h.X-minx;
miny := 8*(mcu/nacross);
dy := 8;
if(miny+dy > h.Y)
dy = h.Y-miny;
pici := miny*h.X+minx;
k := 0;
for(y:=0; y<dy; y++){
for(x:=0; x<dx; x++){
r := clamp[int (data[k+x]+128.)+CLAMPOFF];
pic[pici+x] = r;
}
pici += h.X;
k += 8;
}
}
colormapall1(h: ref Header, image: ref Rawimage, data0, data1, data2: array of real, mcu, nacross: int)
{
rpic := image.chans[0];
gpic := image.chans[1];
bpic := image.chans[2];
minx := 8*(mcu%nacross);
dx := 8;
if(minx+dx > h.X)
dx = h.X-minx;
miny := 8*(mcu/nacross);
dy := 8;
if(miny+dy > h.Y)
dy = h.Y-miny;
pici := miny*h.X+minx;
k := 0;
for(y:=0; y<dy; y++){
for(x:=0; x<dx; x++){
Y := data0[k+x]+128.;
Cb := data1[k+x];
Cr := data2[k+x];
r := int (Y+1.402*Cr);
g := int (Y-0.34414*Cb-0.71414*Cr);
b := int (Y+1.772*Cb);
rpic[pici+x] = clamp[r+CLAMPOFF];
gpic[pici+x] = clamp[g+CLAMPOFF];
bpic[pici+x] = clamp[b+CLAMPOFF];
}
pici += h.X;
k += 8;
}
}
colormap(h: ref Header, image: ref Rawimage, data0, data1, data2: array of array of real, mcu, nacross, Hmax, Vmax: int, H, V: array of int)
{
rpic := image.chans[0];
gpic := image.chans[1];
bpic := image.chans[2];
minx := 8*Hmax*(mcu%nacross);
dx := 8*Hmax;
if(minx+dx > h.X)
dx = h.X-minx;
miny := 8*Vmax*(mcu/nacross);
dy := 8*Vmax;
if(miny+dy > h.Y)
dy = h.Y-miny;
pici := miny*h.X+minx;
H0 := H[0];
H1 := H[1];
H2 := H[2];
for(y:=0; y<dy; y++){
t := y*V[0];
b0 := H0*(t/(8*Vmax));
y0 := 8*((t/Vmax)&7);
t = y*V[1];
b1 := H1*(t/(8*Vmax));
y1 := 8*((t/Vmax)&7);
t = y*V[2];
b2 := H2*(t/(8*Vmax));
y2 := 8*((t/Vmax)&7);
x0 := 0;
x1 := 0;
x2 := 0;
for(x:=0; x<dx; x++){
Y := data0[b0][y0+x0++*H0/Hmax]+128.;
Cb := data1[b1][y1+x1++*H1/Hmax];
Cr := data2[b2][y2+x2++*H2/Hmax];
if(x0*H0/Hmax >= 8){
x0 = 0;
b0++;
}
if(x1*H1/Hmax >= 8){
x1 = 0;
b1++;
}
if(x2*H2/Hmax >= 8){
x2 = 0;
b2++;
}
r := int (Y+1.402*Cr);
g := int (Y-0.34414*Cb-0.71414*Cr);
b := int (Y+1.772*Cb);
rpic[pici+x] = clamp[r+CLAMPOFF];
gpic[pici+x] = clamp[g+CLAMPOFF];
bpic[pici+x] = clamp[b+CLAMPOFF];
}
pici += h.X;
}
}
# decode next 8-bit value from entropy-coded input. chart F-26
decode(h: ref Header, t: ref Huffman): int
{
maxcode := t.maxcode;
if(h.cnt < 8)
nextbyte(h, 0);
# fast lookup
code := (h.sr>>(h.cnt-8))&16rFF;
v := t.value[code];
if(v >= 0){
h.cnt -= t.shift[code];
return v;
}
h.cnt -= 8;
if(h.cnt == 0)
nextbyte(h, 0);
h.cnt--;
cnt := h.cnt;
m := 1<<cnt;
sr := h.sr;
code <<= 1;
i := 9;
for(;;i++){
if(sr & m)
code |= 1;
if(code <= maxcode[i])
break;
code <<= 1;
m >>= 1;
if(m == 0){
sr = nextbyte(h, 0);
m = 16r80;
cnt = 8;
}
cnt--;
}
h.cnt = cnt;
return t.val[t.valptr[i]+(code-t.mincode[i])];
}
#
# load next byte of input
# we should really just call h.fd.getb(), but it's faster just to use Bufio
# to load big chunks and manage our own byte-at-a-time input.
#
nextbyte(h: ref Header, marker: int): int
{
b := int h.buf[h.bufi++];
if(b == 16rFF){
# check for sentinel at end of buffer
if(h.bufi >= h.nbuf){
underflow := (h.bufi > h.nbuf);
h.nbuf = h.fd.read(h.buf, NBUF);
if(h.nbuf <= 0){
h.ch <-= (nil, readerror());
exit;
}
h.buf[h.nbuf] = byte 16rFF;
h.bufi = 0;
if(underflow) # if ran off end of buffer, just restart
return nextbyte(h, marker);
}
if(marker)
return b;
b2 := h.buf[h.bufi++];
if(b2 != byte 0){
if(b2 == DNL){
h.ch <-= (nil, "ReadJPG: DNL marker unimplemented");
exit;
}else if(b2<RST && RST7<b2){
h.ch <-= (nil, sys->sprint("ReadJPG: unrecognized marker %x", int b2));
exit;
}
# decode is reading into restart marker; satisfy it and restore state
if(h.bufi < 2){
# misery: must shift up buffer
h.buf[1:] = h.buf[0:h.nbuf+1];
h.nbuf++;
h.buf[0] = byte 16rFF;
h.bufi -= 1;
}else
h.bufi -= 2;
b = 16rFF;
}
}
h.cnt += 8;
h.sr = (h.sr<<8)|b;
return b;
}
# return next s bits of input, MSB first, and level shift it
receive(h: ref Header, s: int): int
{
while(h.cnt < s)
nextbyte(h, 0);
v := h.sr >> (h.cnt-s);
m := (1<<s);
v &= m-1;
h.cnt -= s;
# level shift
if(v < (m>>1))
v += ~(m-1)+1;
return v;
}
# IDCT based on Arai, Agui, and Nakajima, using flow chart Figure 4.8
# of Pennebaker & Mitchell, JPEG: Still Image Data Compression Standard.
# Remember IDCT is reverse of flow of DCT.
a0: con 1.414;
a1: con 0.707;
a2: con 0.541;
a3: con 0.707;
a4: con 1.307;
a5: con -0.383;
# scaling factors from eqn 4-35 of P&M
s1: con 1.0196;
s2: con 1.0823;
s3: con 1.2026;
s4: con 1.4142;
s5: con 1.8000;
s6: con 2.6131;
s7: con 5.1258;
# overall normalization of 1/16, folded into premultiplication on vertical pass
scale: con 0.0625;
idct(zin: array of real, zout: array of real)
{
x, y: int;
r := array[8*8] of real;
# transform horizontally
for(y=0; y<8; y++){
eighty := y<<3;
# if all non-DC components are zero, just propagate the DC term
if(zin[eighty+1]==0.)
if(zin[eighty+2]==0. && zin[eighty+3]==0.)
if(zin[eighty+4]==0. && zin[eighty+5]==0.)
if(zin[eighty+6]==0. && zin[eighty+7]==0.){
v := zin[eighty]*a0;
r[eighty+0] = v;
r[eighty+1] = v;
r[eighty+2] = v;
r[eighty+3] = v;
r[eighty+4] = v;
r[eighty+5] = v;
r[eighty+6] = v;
r[eighty+7] = v;
continue;
}
# step 5
in1 := s1*zin[eighty+1];
in3 := s3*zin[eighty+3];
in5 := s5*zin[eighty+5];
in7 := s7*zin[eighty+7];
f2 := s2*zin[eighty+2];
f3 := s6*zin[eighty+6];
f5 := (in1+in7);
f7 := (in5+in3);
# step 4
g2 := f2-f3;
g4 := (in5-in3);
g6 := (in1-in7);
g7 := f5+f7;
# step 3.5
t := (g4+g6)*a5;
# step 3
f0 := a0*zin[eighty+0];
f1 := s4*zin[eighty+4];
f3 += f2;
f2 = a1*g2;
# step 2
g0 := f0+f1;
g1 := f0-f1;
g3 := f2+f3;
g4 = t-a2*g4;
g5 := a3*(f5-f7);
g6 = a4*g6+t;
# step 1
f0 = g0+g3;
f1 = g1+f2;
f2 = g1-f2;
f3 = g0-g3;
f5 = g5-g4;
f6 := g5+g6;
f7 = g6+g7;
# step 6
r[eighty+0] = (f0+f7);
r[eighty+1] = (f1+f6);
r[eighty+2] = (f2+f5);
r[eighty+3] = (f3-g4);
r[eighty+4] = (f3+g4);
r[eighty+5] = (f2-f5);
r[eighty+6] = (f1-f6);
r[eighty+7] = (f0-f7);
}
# transform vertically
for(x=0; x<8; x++){
# step 5
in1 := scale*s1*r[x+8];
in3 := scale*s3*r[x+24];
in5 := scale*s5*r[x+40];
in7 := scale*s7*r[x+56];
f2 := scale*s2*r[x+16];
f3 := scale*s6*r[x+48];
f5 := (in1+in7);
f7 := (in5+in3);
# step 4
g2 := f2-f3;
g4 := (in5-in3);
g6 := (in1-in7);
g7 := f5+f7;
# step 3.5
t := (g4+g6)*a5;
# step 3
f0 := scale*a0*r[x];
f1 := scale*s4*r[x+32];
f3 += f2;
f2 = a1*g2;
# step 2
g0 := f0+f1;
g1 := f0-f1;
g3 := f2+f3;
g4 = t-a2*g4;
g5 := a3*(f5-f7);
g6 = a4*g6+t;
# step 1
f0 = g0+g3;
f1 = g1+f2;
f2 = g1-f2;
f3 = g0-g3;
f5 = g5-g4;
f6 := g5+g6;
f7 = g6+g7;
# step 6
zout[x] = (f0+f7);
zout[x+8] = (f1+f6);
zout[x+16] = (f2+f5);
zout[x+24] = (f3-g4);
zout[x+32] = (f3+g4);
zout[x+40] = (f2-f5);
zout[x+48] = (f1-f6);
zout[x+56] = (f0-f7);
}
}