ref: 47eca22c0a296502f838d1b3676b0cd8f5ba57b9
parent: 83d4cf3fd1a257df21468f5bb3b4329e5cf8a40f
author: Konstantinn Bonnet <qu7uux@gmail.com>
date: Wed Jan 28 15:52:07 EST 2015
remove asm, not used on amd64 anyway
--- a/asm_draw.h
+++ /dev/null
@@ -1,132 +1,0 @@
-//
-// asm_draw.h
-//
-// Include file for asm drawing routines.
-//
-
-//
-// !!! note that this file must match the corresponding C structures at all
-// times !!!
-//
-
-// !!! if this is changed, it must be changed in r_local.h too !!!
-#define NEAR_CLIP 0.01
-
-// !!! if this is changed, it must be changed in r_local.h too !!!
-#define CYCLE 128
-
-// espan_t structure
-// !!! if this is changed, it must be changed in r_shared.h too !!!
-#define espan_t_u 0
-#define espan_t_v 4
-#define espan_t_count 8
-#define espan_t_pnext 12
-#define espan_t_size 16
-
-// sspan_t structure
-// !!! if this is changed, it must be changed in d_local.h too !!!
-#define sspan_t_u 0
-#define sspan_t_v 4
-#define sspan_t_count 8
-#define sspan_t_size 12
-
-// spanpackage_t structure
-// !!! if this is changed, it must be changed in d_polyset.c too !!!
-#define spanpackage_t_pdest 0
-#define spanpackage_t_pz 4
-#define spanpackage_t_count 8
-#define spanpackage_t_ptex 12
-#define spanpackage_t_sfrac 16
-#define spanpackage_t_tfrac 20
-#define spanpackage_t_light 24
-#define spanpackage_t_zi 28
-#define spanpackage_t_size 32
-
-// edge_t structure
-// !!! if this is changed, it must be changed in r_shared.h too !!!
-#define et_u 0
-#define et_u_step 4
-#define et_prev 8
-#define et_next 12
-#define et_surfs 16
-#define et_nextremove 20
-#define et_nearzi 24
-#define et_owner 28
-#define et_size 32
-
-// surf_t structure
-// !!! if this is changed, it must be changed in r_shared.h too !!!
-#define SURF_T_SHIFT 6
-#define st_next 0
-#define st_prev 4
-#define st_spans 8
-#define st_key 12
-#define st_last_u 16
-#define st_spanstate 20
-#define st_flags 24
-#define st_data 28
-#define st_entity 32
-#define st_nearzi 36
-#define st_insubmodel 40
-#define st_d_ziorigin 44
-#define st_d_zistepu 48
-#define st_d_zistepv 52
-#define st_pad 56
-#define st_size 64
-
-// clipplane_t structure
-// !!! if this is changed, it must be changed in r_local.h too !!!
-#define cp_normal 0
-#define cp_dist 12
-#define cp_next 16
-#define cp_leftedge 20
-#define cp_rightedge 21
-#define cp_reserved 22
-#define cp_size 24
-
-// medge_t structure
-// !!! if this is changed, it must be changed in model.h too !!!
-#define me_v 0
-#define me_cachededgeoffset 4
-#define me_size 8
-
-// mvertex_t structure
-// !!! if this is changed, it must be changed in model.h too !!!
-#define mv_position 0
-#define mv_size 12
-
-// refdef_t structure
-// !!! if this is changed, it must be changed in render.h too !!!
-#define rd_vrect 0
-#define rd_aliasvrect 20
-#define rd_vrectright 40
-#define rd_vrectbottom 44
-#define rd_aliasvrectright 48
-#define rd_aliasvrectbottom 52
-#define rd_vrectrightedge 56
-#define rd_fvrectx 60
-#define rd_fvrecty 64
-#define rd_fvrectx_adj 68
-#define rd_fvrecty_adj 72
-#define rd_vrect_x_adj_shift20 76
-#define rd_vrectright_adj_shift20 80
-#define rd_fvrectright_adj 84
-#define rd_fvrectbottom_adj 88
-#define rd_fvrectright 92
-#define rd_fvrectbottom 96
-#define rd_horizontalFieldOfView 100
-#define rd_xOrigin 104
-#define rd_yOrigin 108
-#define rd_vieworg 112
-#define rd_viewangles 124
-#define rd_ambientlight 136
-#define rd_size 140
-
-// mtriangle_t structure
-// !!! if this is changed, it must be changed in model.h too !!!
-#define mtri_facesfront 0
-#define mtri_vertindex 4
-#define mtri_size 16 // !!! if this changes, array indexing in !!!
- // !!! d_polysa.s must be changed to match !!!
-#define mtri_shift 4
-
--- a/asm_i386.h
+++ /dev/null
@@ -1,78 +1,0 @@
-#ifndef __ASM_I386__
-#define __ASM_I386__
-
-#ifdef ELF
-#define C(label) label
-#endif
-#ifndef ELF
-#define C(label) _##label
-#endif
-
-//
-// !!! note that this file must match the corresponding C structures at all
-// times !!!
-//
-
-// plane_t structure
-// !!! if this is changed, it must be changed in model.h too !!!
-// !!! if the size of this is changed, the array lookup in SV_HullPointContents
-// must be changed too !!!
-#define pl_normal 0
-#define pl_dist 12
-#define pl_type 16
-#define pl_signbits 17
-#define pl_pad 18
-#define pl_size 20
-
-// hull_t structure
-// !!! if this is changed, it must be changed in model.h too !!!
-#define hu_clipnodes 0
-#define hu_planes 4
-#define hu_firstclipnode 8
-#define hu_lastclipnode 12
-#define hu_clip_mins 16
-#define hu_clip_maxs 28
-#define hu_size 40
-
-// dnode_t structure
-// !!! if this is changed, it must be changed in bspfile.h too !!!
-#define nd_planenum 0
-#define nd_children 4
-#define nd_mins 8
-#define nd_maxs 20
-#define nd_firstface 32
-#define nd_numfaces 36
-#define nd_size 40
-
-// sfxcache_t structure
-// !!! if this is changed, it much be changed in sound.h too !!!
-#define sfxc_length 0
-#define sfxc_loopstart 4
-#define sfxc_speed 8
-#define sfxc_width 12
-#define sfxc_stereo 16
-#define sfxc_data 20
-
-// channel_t structure
-// !!! if this is changed, it much be changed in sound.h too !!!
-#define ch_sfx 0
-#define ch_leftvol 4
-#define ch_rightvol 8
-#define ch_end 12
-#define ch_pos 16
-#define ch_looping 20
-#define ch_entnum 24
-#define ch_entchannel 28
-#define ch_origin 32
-#define ch_dist_mult 44
-#define ch_master_vol 48
-#define ch_size 52
-
-// portable_samplepair_t structure
-// !!! if this is changed, it much be changed in sound.h too !!!
-#define psp_left 0
-#define psp_right 4
-#define psp_size 8
-
-#endif
-
--- a/block16.h
+++ /dev/null
@@ -1,123 +1,0 @@
-LEnter16_16:
- movb (%esi),%al
- movb (%esi,%ebx,),%cl
- movb %dh,%ah
- addl %ebp,%edx
- movb %dh,%ch
- leal (%esi,%ebx,2),%esi
- movw 0x12345678(,%eax,2),%ax
-LBPatch0:
- addl %ebp,%edx
- movw %ax,(%edi)
- movw 0x12345678(,%ecx,2),%cx
-LBPatch1:
- movw %cx,2(%edi)
- addl $0x4,%edi
-
- movb (%esi),%al
- movb (%esi,%ebx,),%cl
- movb %dh,%ah
- addl %ebp,%edx
- movb %dh,%ch
- leal (%esi,%ebx,2),%esi
- movw 0x12345678(,%eax,2),%ax
-LBPatch2:
- addl %ebp,%edx
- movw %ax,(%edi)
- movw 0x12345678(,%ecx,2),%cx
-LBPatch3:
- movw %cx,2(%edi)
- addl $0x4,%edi
-
- movb (%esi),%al
- movb (%esi,%ebx,),%cl
- movb %dh,%ah
- addl %ebp,%edx
- movb %dh,%ch
- leal (%esi,%ebx,2),%esi
- movw 0x12345678(,%eax,2),%ax
-LBPatch4:
- addl %ebp,%edx
- movw %ax,(%edi)
- movw 0x12345678(,%ecx,2),%cx
-LBPatch5:
- movw %cx,2(%edi)
- addl $0x4,%edi
-
- movb (%esi),%al
- movb (%esi,%ebx,),%cl
- movb %dh,%ah
- addl %ebp,%edx
- movb %dh,%ch
- leal (%esi,%ebx,2),%esi
- movw 0x12345678(,%eax,2),%ax
-LBPatch6:
- addl %ebp,%edx
- movw %ax,(%edi)
- movw 0x12345678(,%ecx,2),%cx
-LBPatch7:
- movw %cx,2(%edi)
- addl $0x4,%edi
-
-LEnter8_16:
- movb (%esi),%al
- movb (%esi,%ebx,),%cl
- movb %dh,%ah
- addl %ebp,%edx
- movb %dh,%ch
- leal (%esi,%ebx,2),%esi
- movw 0x12345678(,%eax,2),%ax
-LBPatch8:
- addl %ebp,%edx
- movw %ax,(%edi)
- movw 0x12345678(,%ecx,2),%cx
-LBPatch9:
- movw %cx,2(%edi)
- addl $0x4,%edi
-
- movb (%esi),%al
- movb (%esi,%ebx,),%cl
- movb %dh,%ah
- addl %ebp,%edx
- movb %dh,%ch
- leal (%esi,%ebx,2),%esi
- movw 0x12345678(,%eax,2),%ax
-LBPatch10:
- addl %ebp,%edx
- movw %ax,(%edi)
- movw 0x12345678(,%ecx,2),%cx
-LBPatch11:
- movw %cx,2(%edi)
- addl $0x4,%edi
-
-LEnter4_16:
- movb (%esi),%al
- movb (%esi,%ebx,),%cl
- movb %dh,%ah
- addl %ebp,%edx
- movb %dh,%ch
- leal (%esi,%ebx,2),%esi
- movw 0x12345678(,%eax,2),%ax
-LBPatch12:
- addl %ebp,%edx
- movw %ax,(%edi)
- movw 0x12345678(,%ecx,2),%cx
-LBPatch13:
- movw %cx,2(%edi)
- addl $0x4,%edi
-
-LEnter2_16:
- movb (%esi),%al
- movb (%esi,%ebx,),%cl
- movb %dh,%ah
- addl %ebp,%edx
- movb %dh,%ch
- leal (%esi,%ebx,2),%esi
- movw 0x12345678(,%eax,2),%ax
-LBPatch14:
- addl %ebp,%edx
- movw %ax,(%edi)
- movw 0x12345678(,%ecx,2),%cx
-LBPatch15:
- movw %cx,2(%edi)
- addl $0x4,%edi
--- a/d_draw.s
+++ /dev/null
@@ -1,1018 +1,0 @@
-//
-// d_draw.s
-// x86 assembly-language horizontal 8-bpp span-drawing code.
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "asm_draw.h"
-#include "d_ifacea.h"
-
-#ifdef id386
-
-//----------------------------------------------------------------------
-// 8-bpp horizontal span drawing code for polygons, with no transparency.
-//
-// Assumes there is at least one span in pspans, and that every span
-// contains at least one pixel
-//----------------------------------------------------------------------
-
- .text
-
-// out-of-line, rarely-needed clamping code
-
-LClampHigh0:
- movl C(bbextents),%esi
- jmp LClampReentry0
-LClampHighOrLow0:
- jg LClampHigh0
- xorl %esi,%esi
- jmp LClampReentry0
-
-LClampHigh1:
- movl C(bbextentt),%edx
- jmp LClampReentry1
-LClampHighOrLow1:
- jg LClampHigh1
- xorl %edx,%edx
- jmp LClampReentry1
-
-LClampLow2:
- movl $2048,%ebp
- jmp LClampReentry2
-LClampHigh2:
- movl C(bbextents),%ebp
- jmp LClampReentry2
-
-LClampLow3:
- movl $2048,%ecx
- jmp LClampReentry3
-LClampHigh3:
- movl C(bbextentt),%ecx
- jmp LClampReentry3
-
-LClampLow4:
- movl $2048,%eax
- jmp LClampReentry4
-LClampHigh4:
- movl C(bbextents),%eax
- jmp LClampReentry4
-
-LClampLow5:
- movl $2048,%ebx
- jmp LClampReentry5
-LClampHigh5:
- movl C(bbextentt),%ebx
- jmp LClampReentry5
-
-
-#define pspans 4+16
-
- .align 4
-.globl C(D_DrawSpans8)
-C(D_DrawSpans8):
- pushl %ebp // preserve caller's stack frame
- pushl %edi
- pushl %esi // preserve register variables
- pushl %ebx
-
-//
-// set up scaled-by-8 steps, for 8-long segments; also set up cacheblock
-// and span list pointers
-//
-// TODO: any overlap from rearranging?
- flds C(d_sdivzstepu)
- fmuls fp_8
- movl C(cacheblock),%edx
- flds C(d_tdivzstepu)
- fmuls fp_8
- movl pspans(%esp),%ebx // point to the first span descriptor
- flds C(d_zistepu)
- fmuls fp_8
- movl %edx,pbase // pbase = cacheblock
- fstps zi8stepu
- fstps tdivz8stepu
- fstps sdivz8stepu
-
-LSpanLoop:
-//
-// set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
-// initial s and t values
-//
-// FIXME: pipeline FILD?
- fildl espan_t_v(%ebx)
- fildl espan_t_u(%ebx)
-
- fld %st(1) // dv | du | dv
- fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv
- fld %st(1) // du | dv*d_sdivzstepv | du | dv
- fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
- fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
- fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu |
- // dv*d_sdivzstepv | du | dv
- fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu |
- // dv*d_sdivzstepv | du | dv
- faddp %st(0),%st(2) // du*d_tdivzstepu |
- // du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
- fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
- // du*d_tdivzstepu | du | dv
- fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv |
- // du*d_tdivzstepu | du | dv
- fmuls C(d_tdivzstepv) // dv*d_tdivzstepv |
- // du*d_sdivzstepu + dv*d_sdivzstepv |
- // du*d_tdivzstepu | du | dv
- fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
- // dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
- fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv +
- // du*d_sdivzstepu; stays in %st(2) at end
- fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
- // s/z
- fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv |
- // du*d_tdivzstepu | du | s/z
- fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv |
- // du*d_tdivzstepu | du | s/z
- faddp %st(0),%st(2) // dv*d_zistepv |
- // dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
- fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu |
- // dv*d_zistepv | s/z
- fmuls C(d_zistepu) // du*d_zistepu |
- // dv*d_tdivzstepv + du*d_tdivzstepu |
- // dv*d_zistepv | s/z
- fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu |
- // du*d_zistepu | dv*d_zistepv | s/z
- fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv +
- // du*d_tdivzstepu; stays in %st(1) at end
- fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z
- faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z
-
- flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
- fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
- fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv +
- // du*d_zistepu; stays in %st(0) at end
- // 1/z | fp_64k | t/z | s/z
-//
-// calculate and clamp s & t
-//
- fdivr %st(0),%st(1) // 1/z | z*64k | t/z | s/z
-
-//
-// point %edi to the first pixel in the span
-//
- movl C(d_viewbuffer),%ecx
- movl espan_t_v(%ebx),%eax
- movl %ebx,pspantemp // preserve spans pointer
-
- movl C(tadjust),%edx
- movl C(sadjust),%esi
- movl C(d_scantable)(,%eax,4),%edi // v * screenwidth
- addl %ecx,%edi
- movl espan_t_u(%ebx),%ecx
- addl %ecx,%edi // pdest = &pdestspan[scans->u];
- movl espan_t_count(%ebx),%ecx
-
-//
-// now start the FDIV for the end of the span
-//
- cmpl $8,%ecx
- ja LSetupNotLast1
-
- decl %ecx
- jz LCleanup1 // if only one pixel, no need to start an FDIV
- movl %ecx,spancountminus1
-
-// finish up the s and t calcs
- fxch %st(1) // z*64k | 1/z | t/z | s/z
-
- fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
- fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
- fxch %st(1) // z*64k | s | 1/z | t/z | s/z
- fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
- fxch %st(1) // s | t | 1/z | t/z | s/z
- fistpl s // 1/z | t | t/z | s/z
- fistpl t // 1/z | t/z | s/z
-
- fildl spancountminus1
-
- flds C(d_tdivzstepu) // C(d_tdivzstepu) | spancountminus1
- flds C(d_zistepu) // C(d_zistepu) | C(d_tdivzstepu) | spancountminus1
- fmul %st(2),%st(0) // C(d_zistepu)*scm1 | C(d_tdivzstepu) | scm1
- fxch %st(1) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
- fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
- fxch %st(2) // scm1 | C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1
- fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_zistepu)*scm1 |
- // C(d_tdivzstepu)*scm1
- fxch %st(1) // C(d_zistepu)*scm1 | C(d_sdivzstepu)*scm1 |
- // C(d_tdivzstepu)*scm1
- faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
- fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
- faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1
- faddp %st(0),%st(3)
-
- flds fp_64k
- fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
- // overlap
- jmp LFDIVInFlight1
-
-LCleanup1:
-// finish up the s and t calcs
- fxch %st(1) // z*64k | 1/z | t/z | s/z
-
- fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
- fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
- fxch %st(1) // z*64k | s | 1/z | t/z | s/z
- fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
- fxch %st(1) // s | t | 1/z | t/z | s/z
- fistpl s // 1/z | t | t/z | s/z
- fistpl t // 1/z | t/z | s/z
- jmp LFDIVInFlight1
-
- .align 4
-LSetupNotLast1:
-// finish up the s and t calcs
- fxch %st(1) // z*64k | 1/z | t/z | s/z
-
- fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
- fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
- fxch %st(1) // z*64k | s | 1/z | t/z | s/z
- fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
- fxch %st(1) // s | t | 1/z | t/z | s/z
- fistpl s // 1/z | t | t/z | s/z
- fistpl t // 1/z | t/z | s/z
-
- fadds zi8stepu
- fxch %st(2)
- fadds sdivz8stepu
- fxch %st(2)
- flds tdivz8stepu
- faddp %st(0),%st(2)
- flds fp_64k
- fdiv %st(1),%st(0) // z = 1/1/z
- // this is what we've gone to all this trouble to
- // overlap
-LFDIVInFlight1:
-
- addl s,%esi
- addl t,%edx
- movl C(bbextents),%ebx
- movl C(bbextentt),%ebp
- cmpl %ebx,%esi
- ja LClampHighOrLow0
-LClampReentry0:
- movl %esi,s
- movl pbase,%ebx
- shll $16,%esi
- cmpl %ebp,%edx
- movl %esi,sfracf
- ja LClampHighOrLow1
-LClampReentry1:
- movl %edx,t
- movl s,%esi // sfrac = scans->sfrac;
- shll $16,%edx
- movl t,%eax // tfrac = scans->tfrac;
- sarl $16,%esi
- movl %edx,tfracf
-
-//
-// calculate the texture starting address
-//
- sarl $16,%eax
- movl C(cachewidth),%edx
- imull %edx,%eax // (tfrac >> 16) * cachewidth
- addl %ebx,%esi
- addl %eax,%esi // psource = pbase + (sfrac >> 16) +
- // ((tfrac >> 16) * cachewidth);
-
-//
-// determine whether last span or not
-//
- cmpl $8,%ecx
- jna LLastSegment
-
-//
-// not the last segment; do full 8-wide segment
-//
-LNotLastSegment:
-
-//
-// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
-// get there
-//
-
-// pick up after the FDIV that was left in flight previously
-
- fld %st(0) // duplicate it
- fmul %st(4),%st(0) // s = s/z * z
- fxch %st(1)
- fmul %st(3),%st(0) // t = t/z * z
- fxch %st(1)
- fistpl snext
- fistpl tnext
- movl snext,%eax
- movl tnext,%edx
-
- movb (%esi),%bl // get first source texel
- subl $8,%ecx // count off this segments' pixels
- movl C(sadjust),%ebp
- movl %ecx,counttemp // remember count of remaining pixels
-
- movl C(tadjust),%ecx
- movb %bl,(%edi) // store first dest pixel
-
- addl %eax,%ebp
- addl %edx,%ecx
-
- movl C(bbextents),%eax
- movl C(bbextentt),%edx
-
- cmpl $2048,%ebp
- jl LClampLow2
- cmpl %eax,%ebp
- ja LClampHigh2
-LClampReentry2:
-
- cmpl $2048,%ecx
- jl LClampLow3
- cmpl %edx,%ecx
- ja LClampHigh3
-LClampReentry3:
-
- movl %ebp,snext
- movl %ecx,tnext
-
- subl s,%ebp
- subl t,%ecx
-
-//
-// set up advancetable
-//
- movl %ecx,%eax
- movl %ebp,%edx
- sarl $19,%eax // tstep >>= 16;
- jz LZero
- sarl $19,%edx // sstep >>= 16;
- movl C(cachewidth),%ebx
- imull %ebx,%eax
- jmp LSetUp1
-
-LZero:
- sarl $19,%edx // sstep >>= 16;
- movl C(cachewidth),%ebx
-
-LSetUp1:
-
- addl %edx,%eax // add in sstep
- // (tstep >> 16) * cachewidth + (sstep >> 16);
- movl tfracf,%edx
- movl %eax,advancetable+4 // advance base in t
- addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth +
- // (sstep >> 16);
- shll $13,%ebp // left-justify sstep fractional part
- movl sfracf,%ebx
- shll $13,%ecx // left-justify tstep fractional part
- movl %eax,advancetable // advance extra in t
-
- movl %ecx,tstep
- addl %ecx,%edx // advance tfrac fractional part by tstep frac
-
- sbbl %ecx,%ecx // turn tstep carry into -1 (0 if none)
- addl %ebp,%ebx // advance sfrac fractional part by sstep frac
- adcl advancetable+4(,%ecx,4),%esi // point to next source texel
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb (%esi),%al
- addl %ebp,%ebx
- movb %al,1(%edi)
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,2(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,3(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
-
-//
-// start FDIV for end of next segment in flight, so it can overlap
-//
- movl counttemp,%ecx
- cmpl $8,%ecx // more than one segment after this?
- ja LSetupNotLast2 // yes
-
- decl %ecx
- jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV
- movl %ecx,spancountminus1
- fildl spancountminus1
-
- flds C(d_zistepu) // C(d_zistepu) | spancountminus1
- fmul %st(1),%st(0) // C(d_zistepu)*scm1 | scm1
- flds C(d_tdivzstepu) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
- fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
- fxch %st(1) // C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1
- faddp %st(0),%st(3) // C(d_tdivzstepu)*scm1 | scm1
- fxch %st(1) // scm1 | C(d_tdivzstepu)*scm1
- fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
- fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
- faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1
- flds fp_64k // 64k | C(d_sdivzstepu)*scm1
- fxch %st(1) // C(d_sdivzstepu)*scm1 | 64k
- faddp %st(0),%st(4) // 64k
-
- fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
- // overlap
- jmp LFDIVInFlight2
-
- .align 4
-LSetupNotLast2:
- fadds zi8stepu
- fxch %st(2)
- fadds sdivz8stepu
- fxch %st(2)
- flds tdivz8stepu
- faddp %st(0),%st(2)
- flds fp_64k
- fdiv %st(1),%st(0) // z = 1/1/z
- // this is what we've gone to all this trouble to
- // overlap
-LFDIVInFlight2:
- movl %ecx,counttemp
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,4(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,5(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,6(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl $8,%edi
- movl %edx,tfracf
- movl snext,%edx
- movl %ebx,sfracf
- movl tnext,%ebx
- movl %edx,s
- movl %ebx,t
-
- movl counttemp,%ecx // retrieve count
-
-//
-// determine whether last span or not
-//
- cmpl $8,%ecx // are there multiple segments remaining?
- movb %al,-1(%edi)
- ja LNotLastSegment // yes
-
-//
-// last segment of scan
-//
-LLastSegment:
-
-//
-// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
-// get there. The number of pixels left is variable, and we want to land on the
-// last pixel, not step one past it, so we can't run into arithmetic problems
-//
- testl %ecx,%ecx
- jz LNoSteps // just draw the last pixel and we're done
-
-// pick up after the FDIV that was left in flight previously
-
-
- fld %st(0) // duplicate it
- fmul %st(4),%st(0) // s = s/z * z
- fxch %st(1)
- fmul %st(3),%st(0) // t = t/z * z
- fxch %st(1)
- fistpl snext
- fistpl tnext
-
- movb (%esi),%al // load first texel in segment
- movl C(tadjust),%ebx
- movb %al,(%edi) // store first pixel in segment
- movl C(sadjust),%eax
-
- addl snext,%eax
- addl tnext,%ebx
-
- movl C(bbextents),%ebp
- movl C(bbextentt),%edx
-
- cmpl $2048,%eax
- jl LClampLow4
- cmpl %ebp,%eax
- ja LClampHigh4
-LClampReentry4:
- movl %eax,snext
-
- cmpl $2048,%ebx
- jl LClampLow5
- cmpl %edx,%ebx
- ja LClampHigh5
-LClampReentry5:
-
- cmpl $1,%ecx // don't bother
- je LOnlyOneStep // if two pixels in segment, there's only one step,
- // of the segment length
- subl s,%eax
- subl t,%ebx
-
- addl %eax,%eax // convert to 15.17 format so multiply by 1.31
- addl %ebx,%ebx // reciprocal yields 16.48
-
- imull reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1)
- movl %edx,%ebp
-
- movl %ebx,%eax
- imull reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1)
-
-LSetEntryvec:
-//
-// set up advancetable
-//
- movl entryvec_table(,%ecx,4),%ebx
- movl %edx,%eax
- movl %ebx,jumptemp // entry point into code for RET later
- movl %ebp,%ecx
- sarl $16,%edx // tstep >>= 16;
- movl C(cachewidth),%ebx
- sarl $16,%ecx // sstep >>= 16;
- imull %ebx,%edx
-
- addl %ecx,%edx // add in sstep
- // (tstep >> 16) * cachewidth + (sstep >> 16);
- movl tfracf,%ecx
- movl %edx,advancetable+4 // advance base in t
- addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth +
- // (sstep >> 16);
- shll $16,%ebp // left-justify sstep fractional part
- movl sfracf,%ebx
- shll $16,%eax // left-justify tstep fractional part
- movl %edx,advancetable // advance extra in t
-
- movl %eax,tstep
- movl %ecx,%edx
- addl %eax,%edx
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
-
- jmp *jumptemp // jump to the number-of-pixels handler
-
-//----------------------------------------
-
-LNoSteps:
- movb (%esi),%al // load first texel in segment
- subl $7,%edi // adjust for hardwired offset
- jmp LEndSpan
-
-
-LOnlyOneStep:
- subl s,%eax
- subl t,%ebx
- movl %eax,%ebp
- movl %ebx,%edx
- jmp LSetEntryvec
-
-//----------------------------------------
-
-.globl Entry2_8
-Entry2_8:
- subl $6,%edi // adjust for hardwired offsets
- movb (%esi),%al
- jmp LLEntry2_8
-
-//----------------------------------------
-
-.globl Entry3_8
-Entry3_8:
- subl $5,%edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- jmp LLEntry3_8
-
-//----------------------------------------
-
-.globl Entry4_8
-Entry4_8:
- subl $4,%edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
- jmp LLEntry4_8
-
-//----------------------------------------
-
-.globl Entry5_8
-Entry5_8:
- subl $3,%edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
- jmp LLEntry5_8
-
-//----------------------------------------
-
-.globl Entry6_8
-Entry6_8:
- subl $2,%edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
- jmp LLEntry6_8
-
-//----------------------------------------
-
-.globl Entry7_8
-Entry7_8:
- decl %edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
- jmp LLEntry7_8
-
-//----------------------------------------
-
-.globl Entry8_8
-Entry8_8:
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,1(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
-LLEntry7_8:
- sbbl %ecx,%ecx
- movb %al,2(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
-LLEntry6_8:
- sbbl %ecx,%ecx
- movb %al,3(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
-LLEntry5_8:
- sbbl %ecx,%ecx
- movb %al,4(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
-LLEntry4_8:
- sbbl %ecx,%ecx
- movb %al,5(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-LLEntry3_8:
- movb %al,6(%edi)
- movb (%esi),%al
-LLEntry2_8:
-
-LEndSpan:
-
-//
-// clear s/z, t/z, 1/z from FP stack
-//
- fstp %st(0)
- fstp %st(0)
- fstp %st(0)
-
- movl pspantemp,%ebx // restore spans pointer
- movl espan_t_pnext(%ebx),%ebx // point to next span
- testl %ebx,%ebx // any more spans?
- movb %al,7(%edi)
- jnz LSpanLoop // more spans
-
- popl %ebx // restore register variables
- popl %esi
- popl %edi
- popl %ebp // restore the caller's stack frame
- ret
-
-//----------------------------------------------------------------------
-// 8-bpp horizontal span z drawing codefor polygons, with no transparency.
-//
-// Assumes there is at least one span in pzspans, and that every span
-// contains at least one pixel
-//----------------------------------------------------------------------
-
- .text
-
-// z-clamp on a non-negative gradient span
-LClamp:
- movl $0x40000000,%edx
- xorl %ebx,%ebx
- fstp %st(0)
- jmp LZDraw
-
-// z-clamp on a negative gradient span
-LClampNeg:
- movl $0x40000000,%edx
- xorl %ebx,%ebx
- fstp %st(0)
- jmp LZDrawNeg
-
-
-#define pzspans 4+16
-
-.globl C(D_DrawZSpans)
-C(D_DrawZSpans):
- pushl %ebp // preserve caller's stack frame
- pushl %edi
- pushl %esi // preserve register variables
- pushl %ebx
-
- flds C(d_zistepu)
- movl C(d_zistepu),%eax
- movl pzspans(%esp),%esi
- testl %eax,%eax
- jz LFNegSpan
-
- fmuls Float2ToThe31nd
- fistpl izistep // note: we are relying on FP exceptions being turned
- // off here to avoid range problems
- movl izistep,%ebx // remains loaded for all spans
-
-LFSpanLoop:
-// set up the initial 1/z value
- fildl espan_t_v(%esi)
- fildl espan_t_u(%esi)
- movl espan_t_v(%esi),%ecx
- movl C(d_pzbuffer),%edi
- fmuls C(d_zistepu)
- fxch %st(1)
- fmuls C(d_zistepv)
- fxch %st(1)
- fadds C(d_ziorigin)
- imull C(d_zrowbytes),%ecx
- faddp %st(0),%st(1)
-
-// clamp if z is nearer than 2 (1/z > 0.5)
- fcoms float_point5
- addl %ecx,%edi
- movl espan_t_u(%esi),%edx
- addl %edx,%edx // word count
- movl espan_t_count(%esi),%ecx
- addl %edx,%edi // pdest = &pdestspan[scans->u];
- pushl %esi // preserve spans pointer
- fnstsw %ax
- testb $0x45,%ah
- jz LClamp
-
- fmuls Float2ToThe31nd
- fistpl izi // note: we are relying on FP exceptions being turned
- // off here to avoid problems when the span is closer
- // than 1/(2**31)
- movl izi,%edx
-
-// at this point:
-// %ebx = izistep
-// %ecx = count
-// %edx = izi
-// %edi = pdest
-
-LZDraw:
-
-// do a single pixel up front, if necessary to dword align the destination
- testl $2,%edi
- jz LFMiddle
- movl %edx,%eax
- addl %ebx,%edx
- shrl $16,%eax
- decl %ecx
- movw %ax,(%edi)
- addl $2,%edi
-
-// do middle a pair of aligned dwords at a time
-LFMiddle:
- pushl %ecx
- shrl $1,%ecx // count / 2
- jz LFLast // no aligned dwords to do
- shrl $1,%ecx // (count / 2) / 2
- jnc LFMiddleLoop // even number of aligned dwords to do
-
- movl %edx,%eax
- addl %ebx,%edx
- shrl $16,%eax
- movl %edx,%esi
- addl %ebx,%edx
- andl $0xFFFF0000,%esi
- orl %esi,%eax
- movl %eax,(%edi)
- addl $4,%edi
- andl %ecx,%ecx
- jz LFLast
-
-LFMiddleLoop:
- movl %edx,%eax
- addl %ebx,%edx
- shrl $16,%eax
- movl %edx,%esi
- addl %ebx,%edx
- andl $0xFFFF0000,%esi
- orl %esi,%eax
- movl %edx,%ebp
- movl %eax,(%edi)
- addl %ebx,%edx
- shrl $16,%ebp
- movl %edx,%esi
- addl %ebx,%edx
- andl $0xFFFF0000,%esi
- orl %esi,%ebp
- movl %ebp,4(%edi) // FIXME: eliminate register contention
- addl $8,%edi
-
- decl %ecx
- jnz LFMiddleLoop
-
-LFLast:
- popl %ecx // retrieve count
- popl %esi // retrieve span pointer
-
-// do the last, unaligned pixel, if there is one
- andl $1,%ecx // is there an odd pixel left to do?
- jz LFSpanDone // no
- shrl $16,%edx
- movw %dx,(%edi) // do the final pixel's z
-
-LFSpanDone:
- movl espan_t_pnext(%esi),%esi
- testl %esi,%esi
- jnz LFSpanLoop
-
- jmp LFDone
-
-LFNegSpan:
- fmuls FloatMinus2ToThe31nd
- fistpl izistep // note: we are relying on FP exceptions being turned
- // off here to avoid range problems
- movl izistep,%ebx // remains loaded for all spans
-
-LFNegSpanLoop:
-// set up the initial 1/z value
- fildl espan_t_v(%esi)
- fildl espan_t_u(%esi)
- movl espan_t_v(%esi),%ecx
- movl C(d_pzbuffer),%edi
- fmuls C(d_zistepu)
- fxch %st(1)
- fmuls C(d_zistepv)
- fxch %st(1)
- fadds C(d_ziorigin)
- imull C(d_zrowbytes),%ecx
- faddp %st(0),%st(1)
-
-// clamp if z is nearer than 2 (1/z > 0.5)
- fcoms float_point5
- addl %ecx,%edi
- movl espan_t_u(%esi),%edx
- addl %edx,%edx // word count
- movl espan_t_count(%esi),%ecx
- addl %edx,%edi // pdest = &pdestspan[scans->u];
- pushl %esi // preserve spans pointer
- fnstsw %ax
- testb $0x45,%ah
- jz LClampNeg
-
- fmuls Float2ToThe31nd
- fistpl izi // note: we are relying on FP exceptions being turned
- // off here to avoid problems when the span is closer
- // than 1/(2**31)
- movl izi,%edx
-
-// at this point:
-// %ebx = izistep
-// %ecx = count
-// %edx = izi
-// %edi = pdest
-
-LZDrawNeg:
-
-// do a single pixel up front, if necessary to dword align the destination
- testl $2,%edi
- jz LFNegMiddle
- movl %edx,%eax
- subl %ebx,%edx
- shrl $16,%eax
- decl %ecx
- movw %ax,(%edi)
- addl $2,%edi
-
-// do middle a pair of aligned dwords at a time
-LFNegMiddle:
- pushl %ecx
- shrl $1,%ecx // count / 2
- jz LFNegLast // no aligned dwords to do
- shrl $1,%ecx // (count / 2) / 2
- jnc LFNegMiddleLoop // even number of aligned dwords to do
-
- movl %edx,%eax
- subl %ebx,%edx
- shrl $16,%eax
- movl %edx,%esi
- subl %ebx,%edx
- andl $0xFFFF0000,%esi
- orl %esi,%eax
- movl %eax,(%edi)
- addl $4,%edi
- andl %ecx,%ecx
- jz LFNegLast
-
-LFNegMiddleLoop:
- movl %edx,%eax
- subl %ebx,%edx
- shrl $16,%eax
- movl %edx,%esi
- subl %ebx,%edx
- andl $0xFFFF0000,%esi
- orl %esi,%eax
- movl %edx,%ebp
- movl %eax,(%edi)
- subl %ebx,%edx
- shrl $16,%ebp
- movl %edx,%esi
- subl %ebx,%edx
- andl $0xFFFF0000,%esi
- orl %esi,%ebp
- movl %ebp,4(%edi) // FIXME: eliminate register contention
- addl $8,%edi
-
- decl %ecx
- jnz LFNegMiddleLoop
-
-LFNegLast:
- popl %ecx // retrieve count
- popl %esi // retrieve span pointer
-
-// do the last, unaligned pixel, if there is one
- andl $1,%ecx // is there an odd pixel left to do?
- jz LFNegSpanDone // no
- shrl $16,%edx
- movw %dx,(%edi) // do the final pixel's z
-
-LFNegSpanDone:
- movl espan_t_pnext(%esi),%esi
- testl %esi,%esi
- jnz LFNegSpanLoop
-
-LFDone:
- popl %ebx // restore register variables
- popl %esi
- popl %edi
- popl %ebp // restore the caller's stack frame
- ret
-
-#endif // id386
--- a/d_draw16.s
+++ /dev/null
@@ -1,955 +1,0 @@
-//
-// d_draw16.s
-// x86 assembly-language horizontal 8-bpp span-drawing code, with 16-pixel
-// subdivision.
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "asm_draw.h"
-#include "d_ifacea.h"
-
-#ifdef id386
-
-//----------------------------------------------------------------------
-// 8-bpp horizontal span drawing code for polygons, with no transparency and
-// 16-pixel subdivision.
-//
-// Assumes there is at least one span in pspans, and that every span
-// contains at least one pixel
-//----------------------------------------------------------------------
-
- .data
-
- .text
-
-// out-of-line, rarely-needed clamping code
-
-LClampHigh0:
- movl C(bbextents),%esi
- jmp LClampReentry0
-LClampHighOrLow0:
- jg LClampHigh0
- xorl %esi,%esi
- jmp LClampReentry0
-
-LClampHigh1:
- movl C(bbextentt),%edx
- jmp LClampReentry1
-LClampHighOrLow1:
- jg LClampHigh1
- xorl %edx,%edx
- jmp LClampReentry1
-
-LClampLow2:
- movl $4096,%ebp
- jmp LClampReentry2
-LClampHigh2:
- movl C(bbextents),%ebp
- jmp LClampReentry2
-
-LClampLow3:
- movl $4096,%ecx
- jmp LClampReentry3
-LClampHigh3:
- movl C(bbextentt),%ecx
- jmp LClampReentry3
-
-LClampLow4:
- movl $4096,%eax
- jmp LClampReentry4
-LClampHigh4:
- movl C(bbextents),%eax
- jmp LClampReentry4
-
-LClampLow5:
- movl $4096,%ebx
- jmp LClampReentry5
-LClampHigh5:
- movl C(bbextentt),%ebx
- jmp LClampReentry5
-
-
-#define pspans 4+16
-
- .align 4
-.globl C(D_DrawSpans16)
-C(D_DrawSpans16):
- pushl %ebp // preserve caller's stack frame
- pushl %edi
- pushl %esi // preserve register variables
- pushl %ebx
-
-//
-// set up scaled-by-16 steps, for 16-long segments; also set up cacheblock
-// and span list pointers
-//
-// TODO: any overlap from rearranging?
- flds C(d_sdivzstepu)
- fmuls fp_16
- movl C(cacheblock),%edx
- flds C(d_tdivzstepu)
- fmuls fp_16
- movl pspans(%esp),%ebx // point to the first span descriptor
- flds C(d_zistepu)
- fmuls fp_16
- movl %edx,pbase // pbase = cacheblock
- fstps zi16stepu
- fstps tdivz16stepu
- fstps sdivz16stepu
-
-LSpanLoop:
-//
-// set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
-// initial s and t values
-//
-// FIXME: pipeline FILD?
- fildl espan_t_v(%ebx)
- fildl espan_t_u(%ebx)
-
- fld %st(1) // dv | du | dv
- fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv
- fld %st(1) // du | dv*d_sdivzstepv | du | dv
- fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
- fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
- fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu |
- // dv*d_sdivzstepv | du | dv
- fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu |
- // dv*d_sdivzstepv | du | dv
- faddp %st(0),%st(2) // du*d_tdivzstepu |
- // du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
- fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
- // du*d_tdivzstepu | du | dv
- fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv |
- // du*d_tdivzstepu | du | dv
- fmuls C(d_tdivzstepv) // dv*d_tdivzstepv |
- // du*d_sdivzstepu + dv*d_sdivzstepv |
- // du*d_tdivzstepu | du | dv
- fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
- // dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
- fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv +
- // du*d_sdivzstepu; stays in %st(2) at end
- fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
- // s/z
- fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv |
- // du*d_tdivzstepu | du | s/z
- fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv |
- // du*d_tdivzstepu | du | s/z
- faddp %st(0),%st(2) // dv*d_zistepv |
- // dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
- fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu |
- // dv*d_zistepv | s/z
- fmuls C(d_zistepu) // du*d_zistepu |
- // dv*d_tdivzstepv + du*d_tdivzstepu |
- // dv*d_zistepv | s/z
- fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu |
- // du*d_zistepu | dv*d_zistepv | s/z
- fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv +
- // du*d_tdivzstepu; stays in %st(1) at end
- fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z
- faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z
-
- flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
- fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
- fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv +
- // du*d_zistepu; stays in %st(0) at end
- // 1/z | fp_64k | t/z | s/z
-//
-// calculate and clamp s & t
-//
- fdivr %st(0),%st(1) // 1/z | z*64k | t/z | s/z
-
-//
-// point %edi to the first pixel in the span
-//
- movl C(d_viewbuffer),%ecx
- movl espan_t_v(%ebx),%eax
- movl %ebx,pspantemp // preserve spans pointer
-
- movl C(tadjust),%edx
- movl C(sadjust),%esi
- movl C(d_scantable)(,%eax,4),%edi // v * screenwidth
- addl %ecx,%edi
- movl espan_t_u(%ebx),%ecx
- addl %ecx,%edi // pdest = &pdestspan[scans->u];
- movl espan_t_count(%ebx),%ecx
-
-//
-// now start the FDIV for the end of the span
-//
- cmpl $16,%ecx
- ja LSetupNotLast1
-
- decl %ecx
- jz LCleanup1 // if only one pixel, no need to start an FDIV
- movl %ecx,spancountminus1
-
-// finish up the s and t calcs
- fxch %st(1) // z*64k | 1/z | t/z | s/z
-
- fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
- fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
- fxch %st(1) // z*64k | s | 1/z | t/z | s/z
- fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
- fxch %st(1) // s | t | 1/z | t/z | s/z
- fistpl s // 1/z | t | t/z | s/z
- fistpl t // 1/z | t/z | s/z
-
- fildl spancountminus1
-
- flds C(d_tdivzstepu) // C(d_tdivzstepu) | spancountminus1
- flds C(d_zistepu) // C(d_zistepu) | C(d_tdivzstepu) | spancountminus1
- fmul %st(2),%st(0) // C(d_zistepu)*scm1 | C(d_tdivzstepu) | scm1
- fxch %st(1) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
- fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
- fxch %st(2) // scm1 | C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1
- fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_zistepu)*scm1 |
- // C(d_tdivzstepu)*scm1
- fxch %st(1) // C(d_zistepu)*scm1 | C(d_sdivzstepu)*scm1 |
- // C(d_tdivzstepu)*scm1
- faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
- fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
- faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1
- faddp %st(0),%st(3)
-
- flds fp_64k
- fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
- // overlap
- jmp LFDIVInFlight1
-
-LCleanup1:
-// finish up the s and t calcs
- fxch %st(1) // z*64k | 1/z | t/z | s/z
-
- fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
- fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
- fxch %st(1) // z*64k | s | 1/z | t/z | s/z
- fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
- fxch %st(1) // s | t | 1/z | t/z | s/z
- fistpl s // 1/z | t | t/z | s/z
- fistpl t // 1/z | t/z | s/z
- jmp LFDIVInFlight1
-
- .align 4
-LSetupNotLast1:
-// finish up the s and t calcs
- fxch %st(1) // z*64k | 1/z | t/z | s/z
-
- fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
- fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
- fxch %st(1) // z*64k | s | 1/z | t/z | s/z
- fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
- fxch %st(1) // s | t | 1/z | t/z | s/z
- fistpl s // 1/z | t | t/z | s/z
- fistpl t // 1/z | t/z | s/z
-
- fadds zi16stepu
- fxch %st(2)
- fadds sdivz16stepu
- fxch %st(2)
- flds tdivz16stepu
- faddp %st(0),%st(2)
- flds fp_64k
- fdiv %st(1),%st(0) // z = 1/1/z
- // this is what we've gone to all this trouble to
- // overlap
-LFDIVInFlight1:
-
- addl s,%esi
- addl t,%edx
- movl C(bbextents),%ebx
- movl C(bbextentt),%ebp
- cmpl %ebx,%esi
- ja LClampHighOrLow0
-LClampReentry0:
- movl %esi,s
- movl pbase,%ebx
- shll $16,%esi
- cmpl %ebp,%edx
- movl %esi,sfracf
- ja LClampHighOrLow1
-LClampReentry1:
- movl %edx,t
- movl s,%esi // sfrac = scans->sfrac;
- shll $16,%edx
- movl t,%eax // tfrac = scans->tfrac;
- sarl $16,%esi
- movl %edx,tfracf
-
-//
-// calculate the texture starting address
-//
- sarl $16,%eax
- movl C(cachewidth),%edx
- imull %edx,%eax // (tfrac >> 16) * cachewidth
- addl %ebx,%esi
- addl %eax,%esi // psource = pbase + (sfrac >> 16) +
- // ((tfrac >> 16) * cachewidth);
-//
-// determine whether last span or not
-//
- cmpl $16,%ecx
- jna LLastSegment
-
-//
-// not the last segment; do full 16-wide segment
-//
-LNotLastSegment:
-
-//
-// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
-// get there
-//
-
-// pick up after the FDIV that was left in flight previously
-
- fld %st(0) // duplicate it
- fmul %st(4),%st(0) // s = s/z * z
- fxch %st(1)
- fmul %st(3),%st(0) // t = t/z * z
- fxch %st(1)
- fistpl snext
- fistpl tnext
- movl snext,%eax
- movl tnext,%edx
-
- movb (%esi),%bl // get first source texel
- subl $16,%ecx // count off this segments' pixels
- movl C(sadjust),%ebp
- movl %ecx,counttemp // remember count of remaining pixels
-
- movl C(tadjust),%ecx
- movb %bl,(%edi) // store first dest pixel
-
- addl %eax,%ebp
- addl %edx,%ecx
-
- movl C(bbextents),%eax
- movl C(bbextentt),%edx
-
- cmpl $4096,%ebp
- jl LClampLow2
- cmpl %eax,%ebp
- ja LClampHigh2
-LClampReentry2:
-
- cmpl $4096,%ecx
- jl LClampLow3
- cmpl %edx,%ecx
- ja LClampHigh3
-LClampReentry3:
-
- movl %ebp,snext
- movl %ecx,tnext
-
- subl s,%ebp
- subl t,%ecx
-
-//
-// set up advancetable
-//
- movl %ecx,%eax
- movl %ebp,%edx
- sarl $20,%eax // tstep >>= 16;
- jz LZero
- sarl $20,%edx // sstep >>= 16;
- movl C(cachewidth),%ebx
- imull %ebx,%eax
- jmp LSetUp1
-
-LZero:
- sarl $20,%edx // sstep >>= 16;
- movl C(cachewidth),%ebx
-
-LSetUp1:
-
- addl %edx,%eax // add in sstep
- // (tstep >> 16) * cachewidth + (sstep >> 16);
- movl tfracf,%edx
- movl %eax,advancetable+4 // advance base in t
- addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth +
- // (sstep >> 16);
- shll $12,%ebp // left-justify sstep fractional part
- movl sfracf,%ebx
- shll $12,%ecx // left-justify tstep fractional part
- movl %eax,advancetable // advance extra in t
-
- movl %ecx,tstep
- addl %ecx,%edx // advance tfrac fractional part by tstep frac
-
- sbbl %ecx,%ecx // turn tstep carry into -1 (0 if none)
- addl %ebp,%ebx // advance sfrac fractional part by sstep frac
- adcl advancetable+4(,%ecx,4),%esi // point to next source texel
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb (%esi),%al
- addl %ebp,%ebx
- movb %al,1(%edi)
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,2(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,3(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,4(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,5(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,6(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,7(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
-
-//
-// start FDIV for end of next segment in flight, so it can overlap
-//
- movl counttemp,%ecx
- cmpl $16,%ecx // more than one segment after this?
- ja LSetupNotLast2 // yes
-
- decl %ecx
- jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV
- movl %ecx,spancountminus1
- fildl spancountminus1
-
- flds C(d_zistepu) // C(d_zistepu) | spancountminus1
- fmul %st(1),%st(0) // C(d_zistepu)*scm1 | scm1
- flds C(d_tdivzstepu) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
- fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
- fxch %st(1) // C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1
- faddp %st(0),%st(3) // C(d_tdivzstepu)*scm1 | scm1
- fxch %st(1) // scm1 | C(d_tdivzstepu)*scm1
- fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
- fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
- faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1
- flds fp_64k // 64k | C(d_sdivzstepu)*scm1
- fxch %st(1) // C(d_sdivzstepu)*scm1 | 64k
- faddp %st(0),%st(4) // 64k
-
- fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
- // overlap
- jmp LFDIVInFlight2
-
- .align 4
-LSetupNotLast2:
- fadds zi16stepu
- fxch %st(2)
- fadds sdivz16stepu
- fxch %st(2)
- flds tdivz16stepu
- faddp %st(0),%st(2)
- flds fp_64k
- fdiv %st(1),%st(0) // z = 1/1/z
- // this is what we've gone to all this trouble to
- // overlap
-LFDIVInFlight2:
- movl %ecx,counttemp
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,8(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,9(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,10(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,11(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,12(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,13(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,14(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-
- addl $16,%edi
- movl %edx,tfracf
- movl snext,%edx
- movl %ebx,sfracf
- movl tnext,%ebx
- movl %edx,s
- movl %ebx,t
-
- movl counttemp,%ecx // retrieve count
-
-//
-// determine whether last span or not
-//
- cmpl $16,%ecx // are there multiple segments remaining?
- movb %al,-1(%edi)
- ja LNotLastSegment // yes
-
-//
-// last segment of scan
-//
-LLastSegment:
-
-//
-// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
-// get there. The number of pixels left is variable, and we want to land on the
-// last pixel, not step one past it, so we can't run into arithmetic problems
-//
- testl %ecx,%ecx
- jz LNoSteps // just draw the last pixel and we're done
-
-// pick up after the FDIV that was left in flight previously
-
-
- fld %st(0) // duplicate it
- fmul %st(4),%st(0) // s = s/z * z
- fxch %st(1)
- fmul %st(3),%st(0) // t = t/z * z
- fxch %st(1)
- fistpl snext
- fistpl tnext
-
- movb (%esi),%al // load first texel in segment
- movl C(tadjust),%ebx
- movb %al,(%edi) // store first pixel in segment
- movl C(sadjust),%eax
-
- addl snext,%eax
- addl tnext,%ebx
-
- movl C(bbextents),%ebp
- movl C(bbextentt),%edx
-
- cmpl $4096,%eax
- jl LClampLow4
- cmpl %ebp,%eax
- ja LClampHigh4
-LClampReentry4:
- movl %eax,snext
-
- cmpl $4096,%ebx
- jl LClampLow5
- cmpl %edx,%ebx
- ja LClampHigh5
-LClampReentry5:
-
- cmpl $1,%ecx // don't bother
- je LOnlyOneStep // if two pixels in segment, there's only one step,
- // of the segment length
- subl s,%eax
- subl t,%ebx
-
- addl %eax,%eax // convert to 15.17 format so multiply by 1.31
- addl %ebx,%ebx // reciprocal yields 16.48
-
- imull reciprocal_table_16-8(,%ecx,4) // sstep = (snext - s) /
- // (spancount-1)
- movl %edx,%ebp
-
- movl %ebx,%eax
- imull reciprocal_table_16-8(,%ecx,4) // tstep = (tnext - t) /
- // (spancount-1)
-LSetEntryvec:
-//
-// set up advancetable
-//
- movl entryvec_table_16(,%ecx,4),%ebx
- movl %edx,%eax
- movl %ebx,jumptemp // entry point into code for RET later
- movl %ebp,%ecx
- sarl $16,%edx // tstep >>= 16;
- movl C(cachewidth),%ebx
- sarl $16,%ecx // sstep >>= 16;
- imull %ebx,%edx
-
- addl %ecx,%edx // add in sstep
- // (tstep >> 16) * cachewidth + (sstep >> 16);
- movl tfracf,%ecx
- movl %edx,advancetable+4 // advance base in t
- addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth +
- // (sstep >> 16);
- shll $16,%ebp // left-justify sstep fractional part
- movl sfracf,%ebx
- shll $16,%eax // left-justify tstep fractional part
- movl %edx,advancetable // advance extra in t
-
- movl %eax,tstep
- movl %ecx,%edx
- addl %eax,%edx
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
-
- jmp *jumptemp // jump to the number-of-pixels handler
-
-//----------------------------------------
-
-LNoSteps:
- movb (%esi),%al // load first texel in segment
- subl $15,%edi // adjust for hardwired offset
- jmp LEndSpan
-
-
-LOnlyOneStep:
- subl s,%eax
- subl t,%ebx
- movl %eax,%ebp
- movl %ebx,%edx
- jmp LSetEntryvec
-
-//----------------------------------------
-
-.globl Entry2_16, Entry3_16, Entry4_16, Entry5_16
-.globl Entry6_16, Entry7_16, Entry8_16, Entry9_16
-.globl Entry10_16, Entry11_16, Entry12_16, Entry13_16
-.globl Entry14_16, Entry15_16, Entry16_16
-
-Entry2_16:
- subl $14,%edi // adjust for hardwired offsets
- movb (%esi),%al
- jmp LEntry2_16
-
-//----------------------------------------
-
-Entry3_16:
- subl $13,%edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- jmp LEntry3_16
-
-//----------------------------------------
-
-Entry4_16:
- subl $12,%edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
- jmp LEntry4_16
-
-//----------------------------------------
-
-Entry5_16:
- subl $11,%edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
- jmp LEntry5_16
-
-//----------------------------------------
-
-Entry6_16:
- subl $10,%edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
- jmp LEntry6_16
-
-//----------------------------------------
-
-Entry7_16:
- subl $9,%edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
- jmp LEntry7_16
-
-//----------------------------------------
-
-Entry8_16:
- subl $8,%edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
- jmp LEntry8_16
-
-//----------------------------------------
-
-Entry9_16:
- subl $7,%edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
- jmp LEntry9_16
-
-//----------------------------------------
-
-Entry10_16:
- subl $6,%edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
- jmp LEntry10_16
-
-//----------------------------------------
-
-Entry11_16:
- subl $5,%edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
- jmp LEntry11_16
-
-//----------------------------------------
-
-Entry12_16:
- subl $4,%edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
- jmp LEntry12_16
-
-//----------------------------------------
-
-Entry13_16:
- subl $3,%edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
- jmp LEntry13_16
-
-//----------------------------------------
-
-Entry14_16:
- subl $2,%edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
- jmp LEntry14_16
-
-//----------------------------------------
-
-Entry15_16:
- decl %edi // adjust for hardwired offsets
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
- jmp LEntry15_16
-
-//----------------------------------------
-
-Entry16_16:
- addl %eax,%edx
- movb (%esi),%al
- sbbl %ecx,%ecx
- addl %ebp,%ebx
- adcl advancetable+4(,%ecx,4),%esi
-
- addl tstep,%edx
- sbbl %ecx,%ecx
- movb %al,1(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
-LEntry15_16:
- sbbl %ecx,%ecx
- movb %al,2(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
-LEntry14_16:
- sbbl %ecx,%ecx
- movb %al,3(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
-LEntry13_16:
- sbbl %ecx,%ecx
- movb %al,4(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
-LEntry12_16:
- sbbl %ecx,%ecx
- movb %al,5(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
-LEntry11_16:
- sbbl %ecx,%ecx
- movb %al,6(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
-LEntry10_16:
- sbbl %ecx,%ecx
- movb %al,7(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
-LEntry9_16:
- sbbl %ecx,%ecx
- movb %al,8(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
-LEntry8_16:
- sbbl %ecx,%ecx
- movb %al,9(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
-LEntry7_16:
- sbbl %ecx,%ecx
- movb %al,10(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
-LEntry6_16:
- sbbl %ecx,%ecx
- movb %al,11(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
-LEntry5_16:
- sbbl %ecx,%ecx
- movb %al,12(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
- addl tstep,%edx
-LEntry4_16:
- sbbl %ecx,%ecx
- movb %al,13(%edi)
- addl %ebp,%ebx
- movb (%esi),%al
- adcl advancetable+4(,%ecx,4),%esi
-LEntry3_16:
- movb %al,14(%edi)
- movb (%esi),%al
-LEntry2_16:
-
-LEndSpan:
-
-//
-// clear s/z, t/z, 1/z from FP stack
-//
- fstp %st(0)
- fstp %st(0)
- fstp %st(0)
-
- movl pspantemp,%ebx // restore spans pointer
- movl espan_t_pnext(%ebx),%ebx // point to next span
- testl %ebx,%ebx // any more spans?
- movb %al,15(%edi)
- jnz LSpanLoop // more spans
-
- popl %ebx // restore register variables
- popl %esi
- popl %edi
- popl %ebp // restore the caller's stack frame
- ret
-
-#endif // id386
--- a/d_ifacea.h
+++ /dev/null
@@ -1,79 +1,0 @@
-//
-// d_ifacea.h
-//
-// Include file for asm driver interface.
-//
-
-//
-// !!! note that this file must match the corresponding C structures in
-// d_iface.h at all times !!!
-//
-
-// !!! if this is changed, it must be changed in r_shared.h too !!!
-#define ALIAS_ONSEAM 0x0020
-
-// !!! if this is changed, it must be changed in d_iface.h too !!!
-#define TURB_TEX_SIZE 64 // base turbulent texture size
-
-// !!! if this is changed, it must be changed in d_iface.h too !!!
-#define CYCLE 128
-
-// !!! if this is changed, it must be changed in r_shared.h too !!!
-#define MAXHEIGHT 1024
-
-// !!! if this is changed, it must be changed in quakedef.h too !!!
-#define CACHE_SIZE 32 // used to align key data structures
-
-// particle_t structure
-// !!! if this is changed, it must be changed in d_iface.h too !!!
-// driver-usable fields
-#define pt_org 0
-#define pt_color 12
-// drivers never touch the following fields
-#define pt_next 16
-#define pt_vel 20
-#define pt_ramp 32
-#define pt_die 36
-#define pt_type 40
-#define pt_size 44
-
-#define PARTICLE_Z_CLIP 8.0
-
-// finalvert_t structure
-// !!! if this is changed, it must be changed in d_iface.h too !!!
-#define fv_v 0 // !!! if this is moved, cases where the !!!
- // !!! address of this field is pushed in !!!
- // !!! d_polysa.s must be changed !!!
-#define fv_flags 24
-#define fv_reserved 28
-#define fv_size 32
-#define fv_shift 5
-
-
-// stvert_t structure
-// !!! if this is changed, it must be changed in modelgen.h too !!!
-#define stv_onseam 0
-#define stv_s 4
-#define stv_t 8
-#define stv_size 12
-
-
-// trivertx_t structure
-// !!! if this is changed, it must be changed in modelgen.h too !!!
-#define tv_v 0
-#define tv_lightnormalindex 3
-#define tv_size 4
-
-// affinetridesc_t structure
-// !!! if this is changed, it must be changed in d_iface.h too !!!
-#define atd_pskin 0
-#define atd_pskindesc 4
-#define atd_skinwidth 8
-#define atd_skinheight 12
-#define atd_ptriangles 16
-#define atd_pfinalverts 20
-#define atd_numtriangles 24
-#define atd_drawtype 28
-#define atd_seamfixupX16 32
-#define atd_size 36
-
--- a/d_parta.s
+++ /dev/null
@@ -1,458 +1,0 @@
-//
-// d_parta.s
-// x86 assembly-language 8-bpp particle-drawing code.
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "d_ifacea.h"
-#include "asm_draw.h"
-
-#ifdef id386
-
-//----------------------------------------------------------------------
-// 8-bpp particle drawing code.
-//----------------------------------------------------------------------
-
-//FIXME: comments, full optimization
-
-//----------------------------------------------------------------------
-// 8-bpp particle queueing code.
-//----------------------------------------------------------------------
-
- .text
-
-#define P 12+4
-
- .align 4
-.globl C(D_DrawParticle)
-C(D_DrawParticle):
- pushl %ebp // preserve caller's stack frame
- pushl %edi // preserve register variables
- pushl %ebx
-
- movl P(%esp),%edi
-
-// FIXME: better FP overlap in general here
-
-// transform point
-// VectorSubtract (p->org, r_origin, local);
- flds C(r_origin)
- fsubrs pt_org(%edi)
- flds pt_org+4(%edi)
- fsubs C(r_origin)+4
- flds pt_org+8(%edi)
- fsubs C(r_origin)+8
- fxch %st(2) // local[0] | local[1] | local[2]
-
-// transformed[2] = DotProduct(local, r_ppn);
- flds C(r_ppn) // r_ppn[0] | local[0] | local[1] | local[2]
- fmul %st(1),%st(0) // dot0 | local[0] | local[1] | local[2]
- flds C(r_ppn)+4 // r_ppn[1] | dot0 | local[0] | local[1] | local[2]
- fmul %st(3),%st(0) // dot1 | dot0 | local[0] | local[1] | local[2]
- flds C(r_ppn)+8 // r_ppn[2] | dot1 | dot0 | local[0] |
- // local[1] | local[2]
- fmul %st(5),%st(0) // dot2 | dot1 | dot0 | local[0] | local[1] | local[2]
- fxch %st(2) // dot0 | dot1 | dot2 | local[0] | local[1] | local[2]
- faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[0] | local[1] |
- // local[2]
- faddp %st(0),%st(1) // z | local[0] | local[1] | local[2]
- fld %st(0) // z | z | local[0] | local[1] |
- // local[2]
- fdivrs float_1 // 1/z | z | local[0] | local[1] | local[2]
- fxch %st(1) // z | 1/z | local[0] | local[1] | local[2]
-
-// if (transformed[2] < PARTICLE_Z_CLIP)
-// return;
- fcomps float_particle_z_clip // 1/z | local[0] | local[1] | local[2]
- fxch %st(3) // local[2] | local[0] | local[1] | 1/z
-
- flds C(r_pup) // r_pup[0] | local[2] | local[0] | local[1] | 1/z
- fmul %st(2),%st(0) // dot0 | local[2] | local[0] | local[1] | 1/z
- flds C(r_pup)+4 // r_pup[1] | dot0 | local[2] | local[0] |
- // local[1] | 1/z
-
- fnstsw %ax
- testb $1,%ah
- jnz LPop6AndDone
-
-// transformed[1] = DotProduct(local, r_pup);
- fmul %st(4),%st(0) // dot1 | dot0 | local[2] | local[0] | local[1] | 1/z
- flds C(r_pup)+8 // r_pup[2] | dot1 | dot0 | local[2] |
- // local[0] | local[1] | 1/z
- fmul %st(3),%st(0) // dot2 | dot1 | dot0 | local[2] | local[0] |
- // local[1] | 1/z
- fxch %st(2) // dot0 | dot1 | dot2 | local[2] | local[0] |
- // local[1] | 1/z
- faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[2] | local[0] |
- // local[1] | 1/z
- faddp %st(0),%st(1) // y | local[2] | local[0] | local[1] | 1/z
- fxch %st(3) // local[1] | local[2] | local[0] | y | 1/z
-
-// transformed[0] = DotProduct(local, r_pright);
- fmuls C(r_pright)+4 // dot1 | local[2] | local[0] | y | 1/z
- fxch %st(2) // local[0] | local[2] | dot1 | y | 1/z
- fmuls C(r_pright) // dot0 | local[2] | dot1 | y | 1/z
- fxch %st(1) // local[2] | dot0 | dot1 | y | 1/z
- fmuls C(r_pright)+8 // dot2 | dot0 | dot1 | y | 1/z
- fxch %st(2) // dot1 | dot0 | dot2 | y | 1/z
- faddp %st(0),%st(1) // dot1 + dot0 | dot2 | y | 1/z
-
- faddp %st(0),%st(1) // x | y | 1/z
- fxch %st(1) // y | x | 1/z
-
-// project the point
- fmul %st(2),%st(0) // y/z | x | 1/z
- fxch %st(1) // x | y/z | 1/z
- fmul %st(2),%st(0) // x/z | y/z | 1/z
- fxch %st(1) // y/z | x/z | 1/z
- fsubrs C(ycenter) // v | x/z | 1/z
- fxch %st(1) // x/z | v | 1/z
- fadds C(xcenter) // u | v | 1/z
-// FIXME: preadjust xcenter and ycenter
- fxch %st(1) // v | u | 1/z
- fadds float_point5 // v | u | 1/z
- fxch %st(1) // u | v | 1/z
- fadds float_point5 // u | v | 1/z
- fxch %st(2) // 1/z | v | u
- fmuls DP_32768 // 1/z * 0x8000 | v | u
- fxch %st(2) // u | v | 1/z * 0x8000
-
-// FIXME: use Terje's fp->int trick here?
-// FIXME: check we're getting proper rounding here
- fistpl DP_u // v | 1/z * 0x8000
- fistpl DP_v // 1/z * 0x8000
-
- movl DP_u,%eax
- movl DP_v,%edx
-
-// if ((v > d_vrectbottom_particle) ||
-// (u > d_vrectright_particle) ||
-// (v < d_vrecty) ||
-// (u < d_vrectx))
-// {
-// continue;
-// }
-
- movl C(d_vrectbottom_particle),%ebx
- movl C(d_vrectright_particle),%ecx
- cmpl %ebx,%edx
- jg LPop1AndDone
- cmpl %ecx,%eax
- jg LPop1AndDone
- movl C(d_vrecty),%ebx
- movl C(d_vrectx),%ecx
- cmpl %ebx,%edx
- jl LPop1AndDone
-
- cmpl %ecx,%eax
- jl LPop1AndDone
-
- flds pt_color(%edi) // color | 1/z * 0x8000
-// FIXME: use Terje's fast fp->int trick?
- fistpl DP_Color // 1/z * 0x8000
-
- movl C(d_viewbuffer),%ebx
-
- addl %eax,%ebx
- movl C(d_scantable)(,%edx,4),%edi // point to the pixel
-
- imull C(d_zrowbytes),%edx // point to the z pixel
-
- leal (%edx,%eax,2),%edx
- movl C(d_pzbuffer),%eax
-
- fistpl izi
-
- addl %ebx,%edi
- addl %eax,%edx
-
-// pix = izi >> d_pix_shift;
-
- movl izi,%eax
- movl C(d_pix_shift),%ecx
- shrl %cl,%eax
- movl izi,%ebp
-
-// if (pix < d_pix_min)
-// pix = d_pix_min;
-// else if (pix > d_pix_max)
-// pix = d_pix_max;
-
- movl C(d_pix_min),%ebx
- movl C(d_pix_max),%ecx
- cmpl %ebx,%eax
- jnl LTestPixMax
- movl %ebx,%eax
- jmp LTestDone
-
-LTestPixMax:
- cmpl %ecx,%eax
- jng LTestDone
- movl %ecx,%eax
-LTestDone:
-
- movb DP_Color,%ch
-
- movl C(d_y_aspect_shift),%ebx
- testl %ebx,%ebx
- jnz LDefault
-
- cmpl $4,%eax
- ja LDefault
-
- jmp DP_EntryTable-4(,%eax,4)
-
-// 1x1
-.globl DP_1x1
-DP_1x1:
- cmpw %bp,(%edx) // just one pixel to do
- jg LDone
- movw %bp,(%edx)
- movb %ch,(%edi)
- jmp LDone
-
-// 2x2
-.globl DP_2x2
-DP_2x2:
- pushl %esi
- movl C(screenwidth),%ebx
- movl C(d_zrowbytes),%esi
-
- cmpw %bp,(%edx)
- jg L2x2_1
- movw %bp,(%edx)
- movb %ch,(%edi)
-L2x2_1:
- cmpw %bp,2(%edx)
- jg L2x2_2
- movw %bp,2(%edx)
- movb %ch,1(%edi)
-L2x2_2:
- cmpw %bp,(%edx,%esi,1)
- jg L2x2_3
- movw %bp,(%edx,%esi,1)
- movb %ch,(%edi,%ebx,1)
-L2x2_3:
- cmpw %bp,2(%edx,%esi,1)
- jg L2x2_4
- movw %bp,2(%edx,%esi,1)
- movb %ch,1(%edi,%ebx,1)
-L2x2_4:
-
- popl %esi
- jmp LDone
-
-// 3x3
-.globl DP_3x3
-DP_3x3:
- pushl %esi
- movl C(screenwidth),%ebx
- movl C(d_zrowbytes),%esi
-
- cmpw %bp,(%edx)
- jg L3x3_1
- movw %bp,(%edx)
- movb %ch,(%edi)
-L3x3_1:
- cmpw %bp,2(%edx)
- jg L3x3_2
- movw %bp,2(%edx)
- movb %ch,1(%edi)
-L3x3_2:
- cmpw %bp,4(%edx)
- jg L3x3_3
- movw %bp,4(%edx)
- movb %ch,2(%edi)
-L3x3_3:
-
- cmpw %bp,(%edx,%esi,1)
- jg L3x3_4
- movw %bp,(%edx,%esi,1)
- movb %ch,(%edi,%ebx,1)
-L3x3_4:
- cmpw %bp,2(%edx,%esi,1)
- jg L3x3_5
- movw %bp,2(%edx,%esi,1)
- movb %ch,1(%edi,%ebx,1)
-L3x3_5:
- cmpw %bp,4(%edx,%esi,1)
- jg L3x3_6
- movw %bp,4(%edx,%esi,1)
- movb %ch,2(%edi,%ebx,1)
-L3x3_6:
-
- cmpw %bp,(%edx,%esi,2)
- jg L3x3_7
- movw %bp,(%edx,%esi,2)
- movb %ch,(%edi,%ebx,2)
-L3x3_7:
- cmpw %bp,2(%edx,%esi,2)
- jg L3x3_8
- movw %bp,2(%edx,%esi,2)
- movb %ch,1(%edi,%ebx,2)
-L3x3_8:
- cmpw %bp,4(%edx,%esi,2)
- jg L3x3_9
- movw %bp,4(%edx,%esi,2)
- movb %ch,2(%edi,%ebx,2)
-L3x3_9:
-
- popl %esi
- jmp LDone
-
-
-// 4x4
-.globl DP_4x4
-DP_4x4:
- pushl %esi
- movl C(screenwidth),%ebx
- movl C(d_zrowbytes),%esi
-
- cmpw %bp,(%edx)
- jg L4x4_1
- movw %bp,(%edx)
- movb %ch,(%edi)
-L4x4_1:
- cmpw %bp,2(%edx)
- jg L4x4_2
- movw %bp,2(%edx)
- movb %ch,1(%edi)
-L4x4_2:
- cmpw %bp,4(%edx)
- jg L4x4_3
- movw %bp,4(%edx)
- movb %ch,2(%edi)
-L4x4_3:
- cmpw %bp,6(%edx)
- jg L4x4_4
- movw %bp,6(%edx)
- movb %ch,3(%edi)
-L4x4_4:
-
- cmpw %bp,(%edx,%esi,1)
- jg L4x4_5
- movw %bp,(%edx,%esi,1)
- movb %ch,(%edi,%ebx,1)
-L4x4_5:
- cmpw %bp,2(%edx,%esi,1)
- jg L4x4_6
- movw %bp,2(%edx,%esi,1)
- movb %ch,1(%edi,%ebx,1)
-L4x4_6:
- cmpw %bp,4(%edx,%esi,1)
- jg L4x4_7
- movw %bp,4(%edx,%esi,1)
- movb %ch,2(%edi,%ebx,1)
-L4x4_7:
- cmpw %bp,6(%edx,%esi,1)
- jg L4x4_8
- movw %bp,6(%edx,%esi,1)
- movb %ch,3(%edi,%ebx,1)
-L4x4_8:
-
- leal (%edx,%esi,2),%edx
- leal (%edi,%ebx,2),%edi
-
- cmpw %bp,(%edx)
- jg L4x4_9
- movw %bp,(%edx)
- movb %ch,(%edi)
-L4x4_9:
- cmpw %bp,2(%edx)
- jg L4x4_10
- movw %bp,2(%edx)
- movb %ch,1(%edi)
-L4x4_10:
- cmpw %bp,4(%edx)
- jg L4x4_11
- movw %bp,4(%edx)
- movb %ch,2(%edi)
-L4x4_11:
- cmpw %bp,6(%edx)
- jg L4x4_12
- movw %bp,6(%edx)
- movb %ch,3(%edi)
-L4x4_12:
-
- cmpw %bp,(%edx,%esi,1)
- jg L4x4_13
- movw %bp,(%edx,%esi,1)
- movb %ch,(%edi,%ebx,1)
-L4x4_13:
- cmpw %bp,2(%edx,%esi,1)
- jg L4x4_14
- movw %bp,2(%edx,%esi,1)
- movb %ch,1(%edi,%ebx,1)
-L4x4_14:
- cmpw %bp,4(%edx,%esi,1)
- jg L4x4_15
- movw %bp,4(%edx,%esi,1)
- movb %ch,2(%edi,%ebx,1)
-L4x4_15:
- cmpw %bp,6(%edx,%esi,1)
- jg L4x4_16
- movw %bp,6(%edx,%esi,1)
- movb %ch,3(%edi,%ebx,1)
-L4x4_16:
-
- popl %esi
- jmp LDone
-
-// default case, handling any size particle
-LDefault:
-
-// count = pix << d_y_aspect_shift;
-
- movl %eax,%ebx
- movl %eax,DP_Pix
- movb C(d_y_aspect_shift),%cl
- shll %cl,%ebx
-
-// for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth)
-// {
-// for (i=0 ; i<pix ; i++)
-// {
-// if (pz[i] <= izi)
-// {
-// pz[i] = izi;
-// pdest[i] = color;
-// }
-// }
-// }
-
-LGenRowLoop:
- movl DP_Pix,%eax
-
-LGenColLoop:
- cmpw %bp,-2(%edx,%eax,2)
- jg LGSkip
- movw %bp,-2(%edx,%eax,2)
- movb %ch,-1(%edi,%eax,1)
-LGSkip:
- decl %eax // --pix
- jnz LGenColLoop
-
- addl C(d_zrowbytes),%edx
- addl C(screenwidth),%edi
-
- decl %ebx // --count
- jnz LGenRowLoop
-
-LDone:
- popl %ebx // restore register variables
- popl %edi
- popl %ebp // restore the caller's stack frame
- ret
-
-LPop6AndDone:
- fstp %st(0)
- fstp %st(0)
- fstp %st(0)
- fstp %st(0)
- fstp %st(0)
-LPop1AndDone:
- fstp %st(0)
- jmp LDone
-
-#endif // id386
--- a/d_polysa.s
+++ /dev/null
@@ -1,1723 +1,0 @@
-//
-// d_polysa.s
-// x86 assembly-language polygon model drawing code
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "asm_draw.h"
-#include "d_ifacea.h"
-
-#ifdef id386
-
-// !!! if this is changed, it must be changed in d_polyse.c too !!!
-#define DPS_MAXSPANS MAXHEIGHT+1
- // 1 extra for spanpackage that marks end
-
-//#define SPAN_SIZE (((DPS_MAXSPANS + 1 + ((CACHE_SIZE - 1) / spanpackage_t_size)) + 1) * spanpackage_t_size)
-#define SPAN_SIZE (1024+1+1+1)*32
-
-
- .data
-
- .align 4
-p10_minus_p20: .single 0
-p01_minus_p21: .single 0
-temp0: .single 0
-temp1: .single 0
-Ltemp: .single 0
-
-aff8entryvec_table: .long LDraw8, LDraw7, LDraw6, LDraw5
- .long LDraw4, LDraw3, LDraw2, LDraw1
-
-lzistepx: .long 0
-
-
- .text
-
- .extern C(D_PolysetSetEdgeTable)
- .extern C(D_RasterizeAliasPolySmooth)
-
-//----------------------------------------------------------------------
-// affine triangle gradient calculation code
-//----------------------------------------------------------------------
-
-#define skinwidth 4+0
-
-.globl C(D_PolysetCalcGradients)
-C(D_PolysetCalcGradients):
-
-// p00_minus_p20 = r_p0[0] - r_p2[0];
-// p01_minus_p21 = r_p0[1] - r_p2[1];
-// p10_minus_p20 = r_p1[0] - r_p2[0];
-// p11_minus_p21 = r_p1[1] - r_p2[1];
-//
-// xstepdenominv = 1.0 / (p10_minus_p20 * p01_minus_p21 -
-// p00_minus_p20 * p11_minus_p21);
-//
-// ystepdenominv = -xstepdenominv;
-
- fildl C(r_p0)+0 // r_p0[0]
- fildl C(r_p2)+0 // r_p2[0] | r_p0[0]
- fildl C(r_p0)+4 // r_p0[1] | r_p2[0] | r_p0[0]
- fildl C(r_p2)+4 // r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
- fildl C(r_p1)+0 // r_p1[0] | r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
- fildl C(r_p1)+4 // r_p1[1] | r_p1[0] | r_p2[1] | r_p0[1] |
- // r_p2[0] | r_p0[0]
- fxch %st(3) // r_p0[1] | r_p1[0] | r_p2[1] | r_p1[1] |
- // r_p2[0] | r_p0[0]
- fsub %st(2),%st(0) // p01_minus_p21 | r_p1[0] | r_p2[1] | r_p1[1] |
- // r_p2[0] | r_p0[0]
- fxch %st(1) // r_p1[0] | p01_minus_p21 | r_p2[1] | r_p1[1] |
- // r_p2[0] | r_p0[0]
- fsub %st(4),%st(0) // p10_minus_p20 | p01_minus_p21 | r_p2[1] |
- // r_p1[1] | r_p2[0] | r_p0[0]
- fxch %st(5) // r_p0[0] | p01_minus_p21 | r_p2[1] |
- // r_p1[1] | r_p2[0] | p10_minus_p20
- fsubp %st(0),%st(4) // p01_minus_p21 | r_p2[1] | r_p1[1] |
- // p00_minus_p20 | p10_minus_p20
- fxch %st(2) // r_p1[1] | r_p2[1] | p01_minus_p21 |
- // p00_minus_p20 | p10_minus_p20
- fsubp %st(0),%st(1) // p11_minus_p21 | p01_minus_p21 |
- // p00_minus_p20 | p10_minus_p20
- fxch %st(1) // p01_minus_p21 | p11_minus_p21 |
- // p00_minus_p20 | p10_minus_p20
- flds C(d_xdenom) // d_xdenom | p01_minus_p21 | p11_minus_p21 |
- // p00_minus_p20 | p10_minus_p20
- fxch %st(4) // p10_minus_p20 | p01_minus_p21 | p11_minus_p21 |
- // p00_minus_p20 | d_xdenom
- fstps p10_minus_p20 // p01_minus_p21 | p11_minus_p21 |
- // p00_minus_p20 | d_xdenom
- fstps p01_minus_p21 // p11_minus_p21 | p00_minus_p20 | xstepdenominv
- fxch %st(2) // xstepdenominv | p00_minus_p20 | p11_minus_p21
-
-//// ceil () for light so positive steps are exaggerated, negative steps
-//// diminished, pushing us away from underflow toward overflow. Underflow is
-//// very visible, overflow is very unlikely, because of ambient lighting
-// t0 = r_p0[4] - r_p2[4];
-// t1 = r_p1[4] - r_p2[4];
-
- fildl C(r_p2)+16 // r_p2[4] | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fildl C(r_p0)+16 // r_p0[4] | r_p2[4] | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fildl C(r_p1)+16 // r_p1[4] | r_p0[4] | r_p2[4] | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fxch %st(2) // r_p2[4] | r_p0[4] | r_p1[4] | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fld %st(0) // r_p2[4] | r_p2[4] | r_p0[4] | r_p1[4] |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fsubrp %st(0),%st(2) // r_p2[4] | t0 | r_p1[4] | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fsubrp %st(0),%st(2) // t0 | t1 | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
-
-// r_lstepx = (int)
-// ceil((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv);
-// r_lstepy = (int)
-// ceil((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv);
-
- fld %st(0) // t0 | t0 | t1 | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fmul %st(5),%st(0) // t0*p11_minus_p21 | t0 | t1 | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
- // t0*p11_minus_p21 | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
- // t0*p11_minus_p21 | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fmul %st(5),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 |
- // t1*p01_minus_p21 | t0*p11_minus_p21 |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 |
- // t1*p00_minus_p20 | t0*p11_minus_p21 |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fld %st(2) // xstepdenominv |
- // t1*p00_minus_p20 - t0*p10_minus_p20 |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fmuls float_minus_1 // ystepdenominv |
- // t1*p00_minus_p20 - t0*p10_minus_p20 |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(2) // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // t1*p00_minus_p20 - t0*p10_minus_p20 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)*
- // xstepdenominv |
- // t1*p00_minus_p20 - t0*p10_minus_p20 |
- // | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fxch %st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
- // (t1*p01_minus_p21 - t0*p11_minus_p21)*
- // xstepdenominv | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
- // ystepdenominv |
- // (t1*p01_minus_p21 - t0*p11_minus_p21)*
- // xstepdenominv | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fldcw ceil_cw
- fistpl C(r_lstepy) // r_lstepx | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fistpl C(r_lstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fldcw single_cw
-
-// t0 = r_p0[2] - r_p2[2];
-// t1 = r_p1[2] - r_p2[2];
-
- fildl C(r_p2)+8 // r_p2[2] | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fildl C(r_p0)+8 // r_p0[2] | r_p2[2] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fildl C(r_p1)+8 // r_p1[2] | r_p0[2] | r_p2[2] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(2) // r_p2[2] | r_p0[2] | r_p1[2] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fld %st(0) // r_p2[2] | r_p2[2] | r_p0[2] | r_p1[2] |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fsubrp %st(0),%st(2) // r_p2[2] | t0 | r_p1[2] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
-
-// r_sstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
-// xstepdenominv);
-// r_sstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
-// ystepdenominv);
-
- fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv
- fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
- // t0*p11_minus_p21 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
- // t0*p11_minus_p21 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 |
- // t1*p01_minus_p21 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 |
- // t1*p00_minus_p20 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
- // ystepdenominv |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // (t1*p00_minus_p20 - t0*p10_minus_p20)*
- // ystepdenominv | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)*
- // xstepdenominv |
- // (t1*p00_minus_p20 - t0*p10_minus_p20)*
- // ystepdenominv | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
- // ystepdenominv |
- // (t1*p01_minus_p21 - t0*p11_minus_p21)*
- // xstepdenominv | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fistpl C(r_sstepy) // r_sstepx | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fistpl C(r_sstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
-
-// t0 = r_p0[3] - r_p2[3];
-// t1 = r_p1[3] - r_p2[3];
-
- fildl C(r_p2)+12 // r_p2[3] | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fildl C(r_p0)+12 // r_p0[3] | r_p2[3] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fildl C(r_p1)+12 // r_p1[3] | r_p0[3] | r_p2[3] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(2) // r_p2[3] | r_p0[3] | r_p1[3] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fld %st(0) // r_p2[3] | r_p2[3] | r_p0[3] | r_p1[3] |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fsubrp %st(0),%st(2) // r_p2[3] | t0 | r_p1[3] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
-
-// r_tstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
-// xstepdenominv);
-// r_tstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
-// ystepdenominv);
-
- fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
- // t0*p11_minus_p21 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
- // t0*p11_minus_p21 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 |
- // t1*p01_minus_p21 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 |
- // t1*p00_minus_p20 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
- // ystepdenominv |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // (t1*p00_minus_p20 - t0*p10_minus_p20)*
- // ystepdenominv | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)*
- // xstepdenominv |
- // (t1*p00_minus_p20 - t0*p10_minus_p20)*
- // ystepdenominv | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
- // ystepdenominv |
- // (t1*p01_minus_p21 - t0*p11_minus_p21)*
- // xstepdenominv | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fistpl C(r_tstepy) // r_tstepx | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fistpl C(r_tstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
-
-// t0 = r_p0[5] - r_p2[5];
-// t1 = r_p1[5] - r_p2[5];
-
- fildl C(r_p2)+20 // r_p2[5] | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fildl C(r_p0)+20 // r_p0[5] | r_p2[5] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fildl C(r_p1)+20 // r_p1[5] | r_p0[5] | r_p2[5] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(2) // r_p2[5] | r_p0[5] | r_p1[5] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fld %st(0) // r_p2[5] | r_p2[5] | r_p0[5] | r_p1[5] |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fsubrp %st(0),%st(2) // r_p2[5] | t0 | r_p1[5] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
-
-// r_zistepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
-// xstepdenominv);
-// r_zistepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
-// ystepdenominv);
-
- fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fmulp %st(0),%st(6) // t0 | t1 | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | t0*p11_minus_p21
- fxch %st(1) // t1 | t0 | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | t0*p11_minus_p21
- fld %st(0) // t1 | t1 | t0 | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | t0*p11_minus_p21
- fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 |
- // t0*p11_minus_p21
- fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 |
- // t0*p11_minus_p21
- fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // t0*p11_minus_p21
- fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // t0*p11_minus_p21
- fmulp %st(0),%st(5) // t0*p10_minus_p20 | t1*p01_minus_p21 |
- // ystepdenominv | xstepdenominv |
- // t1*p00_minus_p20 | t0*p11_minus_p21
- fxch %st(5) // t0*p11_minus_p21 | t1*p01_minus_p21 |
- // ystepdenominv | xstepdenominv |
- // t1*p00_minus_p20 | t0*p10_minus_p20
- fsubrp %st(0),%st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv |
- // t1*p00_minus_p20 | t0*p10_minus_p20
- fxch %st(3) // t1*p00_minus_p20 | ystepdenominv |
- // xstepdenominv |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // t0*p10_minus_p20
- fsubp %st(0),%st(4) // ystepdenominv | xstepdenominv |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // t1*p00_minus_p20 - t0*p10_minus_p20
- fxch %st(1) // xstepdenominv | ystepdenominv |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // t1*p00_minus_p20 - t0*p10_minus_p20
- fmulp %st(0),%st(2) // ystepdenominv |
- // (t1*p01_minus_p21 - t0*p11_minus_p21) *
- // xstepdenominv |
- // t1*p00_minus_p20 - t0*p10_minus_p20
- fmulp %st(0),%st(2) // (t1*p01_minus_p21 - t0*p11_minus_p21) *
- // xstepdenominv |
- // (t1*p00_minus_p20 - t0*p10_minus_p20) *
- // ystepdenominv
- fistpl C(r_zistepx) // (t1*p00_minus_p20 - t0*p10_minus_p20) *
- // ystepdenominv
- fistpl C(r_zistepy)
-
-// a_sstepxfrac = r_sstepx << 16;
-// a_tstepxfrac = r_tstepx << 16;
-//
-// a_ststepxwhole = r_affinetridesc.skinwidth * (r_tstepx >> 16) +
-// (r_sstepx >> 16);
-
- movl C(r_sstepx),%eax
- movl C(r_tstepx),%edx
- shll $16,%eax
- shll $16,%edx
- movl %eax,C(a_sstepxfrac)
- movl %edx,C(a_tstepxfrac)
-
- movl C(r_sstepx),%ecx
- movl C(r_tstepx),%eax
- sarl $16,%ecx
- sarl $16,%eax
- imull skinwidth(%esp)
- addl %ecx,%eax
- movl %eax,C(a_ststepxwhole)
-
- ret
-
-
-//----------------------------------------------------------------------
-// recursive subdivision affine triangle drawing code
-//
-// not C-callable because of stdcall return
-//----------------------------------------------------------------------
-
-#define lp1 4+16
-#define lp2 8+16
-#define lp3 12+16
-
-.globl C(D_PolysetRecursiveTriangle)
-C(D_PolysetRecursiveTriangle):
- pushl %ebp // preserve caller stack frame pointer
- pushl %esi // preserve register variables
- pushl %edi
- pushl %ebx
-
-// int *temp;
-// int d;
-// int new[6];
-// int i;
-// int z;
-// short *zbuf;
- movl lp2(%esp),%esi
- movl lp1(%esp),%ebx
- movl lp3(%esp),%edi
-
-// d = lp2[0] - lp1[0];
-// if (d < -1 || d > 1)
-// goto split;
- movl 0(%esi),%eax
-
- movl 0(%ebx),%edx
- movl 4(%esi),%ebp
-
- subl %edx,%eax
- movl 4(%ebx),%ecx
-
- subl %ecx,%ebp
- incl %eax
-
- cmpl $2,%eax
- ja LSplit
-
-// d = lp2[1] - lp1[1];
-// if (d < -1 || d > 1)
-// goto split;
- movl 0(%edi),%eax
- incl %ebp
-
- cmpl $2,%ebp
- ja LSplit
-
-// d = lp3[0] - lp2[0];
-// if (d < -1 || d > 1)
-// goto split2;
- movl 0(%esi),%edx
- movl 4(%edi),%ebp
-
- subl %edx,%eax
- movl 4(%esi),%ecx
-
- subl %ecx,%ebp
- incl %eax
-
- cmpl $2,%eax
- ja LSplit2
-
-// d = lp3[1] - lp2[1];
-// if (d < -1 || d > 1)
-// goto split2;
- movl 0(%ebx),%eax
- incl %ebp
-
- cmpl $2,%ebp
- ja LSplit2
-
-// d = lp1[0] - lp3[0];
-// if (d < -1 || d > 1)
-// goto split3;
- movl 0(%edi),%edx
- movl 4(%ebx),%ebp
-
- subl %edx,%eax
- movl 4(%edi),%ecx
-
- subl %ecx,%ebp
- incl %eax
-
- incl %ebp
- movl %ebx,%edx
-
- cmpl $2,%eax
- ja LSplit3
-
-// d = lp1[1] - lp3[1];
-// if (d < -1 || d > 1)
-// {
-//split3:
-// temp = lp1;
-// lp3 = lp2;
-// lp1 = lp3;
-// lp2 = temp;
-// goto split;
-// }
-//
-// return; // entire tri is filled
-//
- cmpl $2,%ebp
- jna LDone
-
-LSplit3:
- movl %edi,%ebx
- movl %esi,%edi
- movl %edx,%esi
- jmp LSplit
-
-//split2:
-LSplit2:
-
-// temp = lp1;
-// lp1 = lp2;
-// lp2 = lp3;
-// lp3 = temp;
- movl %ebx,%eax
- movl %esi,%ebx
- movl %edi,%esi
- movl %eax,%edi
-
-//split:
-LSplit:
-
- subl $24,%esp // allocate space for a new vertex
-
-//// split this edge
-// new[0] = (lp1[0] + lp2[0]) >> 1;
-// new[1] = (lp1[1] + lp2[1]) >> 1;
-// new[2] = (lp1[2] + lp2[2]) >> 1;
-// new[3] = (lp1[3] + lp2[3]) >> 1;
-// new[5] = (lp1[5] + lp2[5]) >> 1;
- movl 8(%ebx),%eax
-
- movl 8(%esi),%edx
- movl 12(%ebx),%ecx
-
- addl %edx,%eax
- movl 12(%esi),%edx
-
- sarl $1,%eax
- addl %edx,%ecx
-
- movl %eax,8(%esp)
- movl 20(%ebx),%eax
-
- sarl $1,%ecx
- movl 20(%esi),%edx
-
- movl %ecx,12(%esp)
- addl %edx,%eax
-
- movl 0(%ebx),%ecx
- movl 0(%esi),%edx
-
- sarl $1,%eax
- addl %ecx,%edx
-
- movl %eax,20(%esp)
- movl 4(%ebx),%eax
-
- sarl $1,%edx
- movl 4(%esi),%ebp
-
- movl %edx,0(%esp)
- addl %eax,%ebp
-
- sarl $1,%ebp
- movl %ebp,4(%esp)
-
-//// draw the point if splitting a leading edge
-// if (lp2[1] > lp1[1])
-// goto nodraw;
- cmpl %eax,4(%esi)
- jg LNoDraw
-
-// if ((lp2[1] == lp1[1]) && (lp2[0] < lp1[0]))
-// goto nodraw;
- movl 0(%esi),%edx
- jnz LDraw
-
- cmpl %ecx,%edx
- jl LNoDraw
-
-LDraw:
-
-// z = new[5] >> 16;
- movl 20(%esp),%edx
- movl 4(%esp),%ecx
-
- sarl $16,%edx
- movl 0(%esp),%ebp
-
-// zbuf = zspantable[new[1]] + new[0];
- movl C(zspantable)(,%ecx,4),%eax
-
-// if (z >= *zbuf)
-// {
- cmpw (%eax,%ebp,2),%dx
- jnge LNoDraw
-
-// int pix;
-//
-// *zbuf = z;
- movw %dx,(%eax,%ebp,2)
-
-// pix = d_pcolormap[skintable[new[3]>>16][new[2]>>16]];
- movl 12(%esp),%eax
-
- sarl $16,%eax
- movl 8(%esp),%edx
-
- sarl $16,%edx
- subl %ecx,%ecx
-
- movl C(skintable)(,%eax,4),%eax
- movl 4(%esp),%ebp
-
- movb (%eax,%edx,),%cl
- movl C(d_pcolormap),%edx
-
- movb (%edx,%ecx,),%dl
- movl 0(%esp),%ecx
-
-// d_viewbuffer[d_scantable[new[1]] + new[0]] = pix;
- movl C(d_scantable)(,%ebp,4),%eax
- addl %eax,%ecx
- movl C(d_viewbuffer),%eax
- movb %dl,(%eax,%ecx,1)
-
-// }
-//
-//nodraw:
-LNoDraw:
-
-//// recursively continue
-// D_PolysetRecursiveTriangle (lp3, lp1, new);
- pushl %esp
- pushl %ebx
- pushl %edi
- call C(D_PolysetRecursiveTriangle)
-
-// D_PolysetRecursiveTriangle (lp3, new, lp2);
- movl %esp,%ebx
- pushl %esi
- pushl %ebx
- pushl %edi
- call C(D_PolysetRecursiveTriangle)
- addl $24,%esp
-
-LDone:
- popl %ebx // restore register variables
- popl %edi
- popl %esi
- popl %ebp // restore caller stack frame pointer
- ret $12
-
-
-//----------------------------------------------------------------------
-// 8-bpp horizontal span drawing code for affine polygons, with smooth
-// shading and no transparency
-//----------------------------------------------------------------------
-
-#define pspans 4+8
-
-.globl C(D_PolysetAff8Start)
-C(D_PolysetAff8Start):
-
-.globl C(D_PolysetDrawSpans8)
-C(D_PolysetDrawSpans8):
- pushl %esi // preserve register variables
- pushl %ebx
-
- movl pspans(%esp),%esi // point to the first span descriptor
- movl C(r_zistepx),%ecx
-
- pushl %ebp // preserve caller's stack frame
- pushl %edi
-
- rorl $16,%ecx // put high 16 bits of 1/z step in low word
- movl spanpackage_t_count(%esi),%edx
-
- movl %ecx,lzistepx
-
-LSpanLoop:
-
-// lcount = d_aspancount - pspanpackage->count;
-//
-// errorterm += erroradjustup;
-// if (errorterm >= 0)
-// {
-// d_aspancount += d_countextrastep;
-// errorterm -= erroradjustdown;
-// }
-// else
-// {
-// d_aspancount += ubasestep;
-// }
- movl C(d_aspancount),%eax
- subl %edx,%eax
-
- movl C(erroradjustup),%edx
- movl C(errorterm),%ebx
- addl %edx,%ebx
- js LNoTurnover
-
- movl C(erroradjustdown),%edx
- movl C(d_countextrastep),%edi
- subl %edx,%ebx
- movl C(d_aspancount),%ebp
- movl %ebx,C(errorterm)
- addl %edi,%ebp
- movl %ebp,C(d_aspancount)
- jmp LRightEdgeStepped
-
-LNoTurnover:
- movl C(d_aspancount),%edi
- movl C(ubasestep),%edx
- movl %ebx,C(errorterm)
- addl %edx,%edi
- movl %edi,C(d_aspancount)
-
-LRightEdgeStepped:
- cmpl $1,%eax
-
- jl LNextSpan
- jz LExactlyOneLong
-
-//
-// set up advancetable
-//
- movl C(a_ststepxwhole),%ecx
- movl C(r_affinetridesc)+atd_skinwidth,%edx
-
- movl %ecx,advancetable+4 // advance base in t
- addl %edx,%ecx
-
- movl %ecx,advancetable // advance extra in t
- movl C(a_tstepxfrac),%ecx
-
- movw C(r_lstepx),%cx
- movl %eax,%edx // count
-
- movl %ecx,tstep
- addl $7,%edx
-
- shrl $3,%edx // count of full and partial loops
- movl spanpackage_t_sfrac(%esi),%ebx
-
- movw %dx,%bx
- movl spanpackage_t_pz(%esi),%ecx
-
- negl %eax
-
- movl spanpackage_t_pdest(%esi),%edi
- andl $7,%eax // 0->0, 1->7, 2->6, ... , 7->1
-
- subl %eax,%edi // compensate for hardwired offsets
- subl %eax,%ecx
-
- subl %eax,%ecx
- movl spanpackage_t_tfrac(%esi),%edx
-
- movw spanpackage_t_light(%esi),%dx
- movl spanpackage_t_zi(%esi),%ebp
-
- rorl $16,%ebp // put high 16 bits of 1/z in low word
- pushl %esi
-
- movl spanpackage_t_ptex(%esi),%esi
- jmp aff8entryvec_table(,%eax,4)
-
-// %bx = count of full and partial loops
-// %ebx high word = sfrac
-// %ecx = pz
-// %dx = light
-// %edx high word = tfrac
-// %esi = ptex
-// %edi = pdest
-// %ebp = 1/z
-// tstep low word = C(r_lstepx)
-// tstep high word = C(a_tstepxfrac)
-// C(a_sstepxfrac) low word = 0
-// C(a_sstepxfrac) high word = C(a_sstepxfrac)
-
-LDrawLoop:
-
-// FIXME: do we need to clamp light? We may need at least a buffer bit to
-// keep it from poking into tfrac and causing problems
-
-LDraw8:
- cmpw (%ecx),%bp
- jl Lp1
- xorl %eax,%eax
- movb %dh,%ah
- movb (%esi),%al
- movw %bp,(%ecx)
- movb 0x12345678(%eax),%al
-LPatch8:
- movb %al,(%edi)
-Lp1:
- addl tstep,%edx
- sbbl %eax,%eax
- addl lzistepx,%ebp
- adcl $0,%ebp
- addl C(a_sstepxfrac),%ebx
- adcl advancetable+4(,%eax,4),%esi
-
-LDraw7:
- cmpw 2(%ecx),%bp
- jl Lp2
- xorl %eax,%eax
- movb %dh,%ah
- movb (%esi),%al
- movw %bp,2(%ecx)
- movb 0x12345678(%eax),%al
-LPatch7:
- movb %al,1(%edi)
-Lp2:
- addl tstep,%edx
- sbbl %eax,%eax
- addl lzistepx,%ebp
- adcl $0,%ebp
- addl C(a_sstepxfrac),%ebx
- adcl advancetable+4(,%eax,4),%esi
-
-LDraw6:
- cmpw 4(%ecx),%bp
- jl Lp3
- xorl %eax,%eax
- movb %dh,%ah
- movb (%esi),%al
- movw %bp,4(%ecx)
- movb 0x12345678(%eax),%al
-LPatch6:
- movb %al,2(%edi)
-Lp3:
- addl tstep,%edx
- sbbl %eax,%eax
- addl lzistepx,%ebp
- adcl $0,%ebp
- addl C(a_sstepxfrac),%ebx
- adcl advancetable+4(,%eax,4),%esi
-
-LDraw5:
- cmpw 6(%ecx),%bp
- jl Lp4
- xorl %eax,%eax
- movb %dh,%ah
- movb (%esi),%al
- movw %bp,6(%ecx)
- movb 0x12345678(%eax),%al
-LPatch5:
- movb %al,3(%edi)
-Lp4:
- addl tstep,%edx
- sbbl %eax,%eax
- addl lzistepx,%ebp
- adcl $0,%ebp
- addl C(a_sstepxfrac),%ebx
- adcl advancetable+4(,%eax,4),%esi
-
-LDraw4:
- cmpw 8(%ecx),%bp
- jl Lp5
- xorl %eax,%eax
- movb %dh,%ah
- movb (%esi),%al
- movw %bp,8(%ecx)
- movb 0x12345678(%eax),%al
-LPatch4:
- movb %al,4(%edi)
-Lp5:
- addl tstep,%edx
- sbbl %eax,%eax
- addl lzistepx,%ebp
- adcl $0,%ebp
- addl C(a_sstepxfrac),%ebx
- adcl advancetable+4(,%eax,4),%esi
-
-LDraw3:
- cmpw 10(%ecx),%bp
- jl Lp6
- xorl %eax,%eax
- movb %dh,%ah
- movb (%esi),%al
- movw %bp,10(%ecx)
- movb 0x12345678(%eax),%al
-LPatch3:
- movb %al,5(%edi)
-Lp6:
- addl tstep,%edx
- sbbl %eax,%eax
- addl lzistepx,%ebp
- adcl $0,%ebp
- addl C(a_sstepxfrac),%ebx
- adcl advancetable+4(,%eax,4),%esi
-
-LDraw2:
- cmpw 12(%ecx),%bp
- jl Lp7
- xorl %eax,%eax
- movb %dh,%ah
- movb (%esi),%al
- movw %bp,12(%ecx)
- movb 0x12345678(%eax),%al
-LPatch2:
- movb %al,6(%edi)
-Lp7:
- addl tstep,%edx
- sbbl %eax,%eax
- addl lzistepx,%ebp
- adcl $0,%ebp
- addl C(a_sstepxfrac),%ebx
- adcl advancetable+4(,%eax,4),%esi
-
-LDraw1:
- cmpw 14(%ecx),%bp
- jl Lp8
- xorl %eax,%eax
- movb %dh,%ah
- movb (%esi),%al
- movw %bp,14(%ecx)
- movb 0x12345678(%eax),%al
-LPatch1:
- movb %al,7(%edi)
-Lp8:
- addl tstep,%edx
- sbbl %eax,%eax
- addl lzistepx,%ebp
- adcl $0,%ebp
- addl C(a_sstepxfrac),%ebx
- adcl advancetable+4(,%eax,4),%esi
-
- addl $8,%edi
- addl $16,%ecx
-
- decw %bx
- jnz LDrawLoop
-
- popl %esi // restore spans pointer
-LNextSpan:
- addl $(spanpackage_t_size),%esi // point to next span
-LNextSpanESISet:
- movl spanpackage_t_count(%esi),%edx
- cmpl $-999999,%edx // any more spans?
- jnz LSpanLoop // yes
-
- popl %edi
- popl %ebp // restore the caller's stack frame
- popl %ebx // restore register variables
- popl %esi
- ret
-
-
-// draw a one-long span
-
-LExactlyOneLong:
-
- movl spanpackage_t_pz(%esi),%ecx
- movl spanpackage_t_zi(%esi),%ebp
-
- rorl $16,%ebp // put high 16 bits of 1/z in low word
- movl spanpackage_t_ptex(%esi),%ebx
-
- cmpw (%ecx),%bp
- jl LNextSpan
- xorl %eax,%eax
- movl spanpackage_t_pdest(%esi),%edi
- movb spanpackage_t_light+1(%esi),%ah
- addl $(spanpackage_t_size),%esi // point to next span
- movb (%ebx),%al
- movw %bp,(%ecx)
- movb 0x12345678(%eax),%al
-LPatch9:
- movb %al,(%edi)
-
- jmp LNextSpanESISet
-
-.globl C(D_PolysetAff8End)
-C(D_PolysetAff8End):
-
-
-#define pcolormap 4
-
-.globl C(D_Aff8Patch)
-C(D_Aff8Patch):
- movl pcolormap(%esp),%eax
- movl %eax,LPatch1-4
- movl %eax,LPatch2-4
- movl %eax,LPatch3-4
- movl %eax,LPatch4-4
- movl %eax,LPatch5-4
- movl %eax,LPatch6-4
- movl %eax,LPatch7-4
- movl %eax,LPatch8-4
- movl %eax,LPatch9-4
-
- ret
-
-
-//----------------------------------------------------------------------
-// Alias model polygon dispatching code, combined with subdivided affine
-// triangle drawing code
-//----------------------------------------------------------------------
-
-.globl C(D_PolysetDraw)
-C(D_PolysetDraw):
-
-// spanpackage_t spans[DPS_MAXSPANS + 1 +
-// ((CACHE_SIZE - 1) / sizeof(spanpackage_t)) + 1];
-// // one extra because of cache line pretouching
-//
-// a_spans = (spanpackage_t *)
-// (((intptr)&spans[0] + CACHE_SIZE - 1) & ~(CACHE_SIZE - 1));
- subl $(SPAN_SIZE),%esp
- movl %esp,%eax
- addl $(CACHE_SIZE - 1),%eax
- andl $(~(CACHE_SIZE - 1)),%eax
- movl %eax,C(a_spans)
-
-// if (r_affinetridesc.drawtype)
-// D_DrawSubdiv ();
-// else
-// D_DrawNonSubdiv ();
- movl C(r_affinetridesc)+atd_drawtype,%eax
- testl %eax,%eax
- jz C(D_DrawNonSubdiv)
-
- pushl %ebp // preserve caller stack frame pointer
-
-// lnumtriangles = r_affinetridesc.numtriangles;
- movl C(r_affinetridesc)+atd_numtriangles,%ebp
-
- pushl %esi // preserve register variables
- shll $4,%ebp
-
- pushl %ebx
-// ptri = r_affinetridesc.ptriangles;
- movl C(r_affinetridesc)+atd_ptriangles,%ebx
-
- pushl %edi
-
-// mtriangle_t *ptri;
-// finalvert_t *pfv, *index0, *index1, *index2;
-// int i;
-// int lnumtriangles;
-// int s0, s1, s2;
-
-// pfv = r_affinetridesc.pfinalverts;
- movl C(r_affinetridesc)+atd_pfinalverts,%edi
-
-// for (i=0 ; i<lnumtriangles ; i++)
-// {
-
-Llooptop:
-
-// index0 = pfv + ptri[i].vertindex[0];
-// index1 = pfv + ptri[i].vertindex[1];
-// index2 = pfv + ptri[i].vertindex[2];
- movl mtri_vertindex-16+0(%ebx,%ebp,),%ecx
- movl mtri_vertindex-16+4(%ebx,%ebp,),%esi
-
- shll $(fv_shift),%ecx
- movl mtri_vertindex-16+8(%ebx,%ebp,),%edx
-
- shll $(fv_shift),%esi
- addl %edi,%ecx
-
- shll $(fv_shift),%edx
- addl %edi,%esi
-
- addl %edi,%edx
-
-// if (((index0->v[1]-index1->v[1]) *
-// (index0->v[0]-index2->v[0]) -
-// (index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1])) >= 0)
-// {
-// continue;
-// }
-//
-// d_pcolormap = &((byte *)acolormap)[index0->v[4] & 0xFF00];
- fildl fv_v+4(%ecx) // i0v1
- fildl fv_v+4(%esi) // i1v1 | i0v1
- fildl fv_v+0(%ecx) // i0v0 | i1v1 | i0v1
- fildl fv_v+0(%edx) // i2v0 | i0v0 | i1v1 | i0v1
- fxch %st(2) // i1v1 | i0v0 | i2v0 | i0v1
- fsubr %st(3),%st(0) // i0v1-i1v1 | i0v0 | i2v0 | i0v1
- fildl fv_v+0(%esi) // i1v0 | i0v1-i1v1 | i0v0 | i2v0 | i0v1
- fxch %st(2) // i0v0 | i0v1-i1v1 | i1v0 | i2v0 | i0v1
- fsub %st(0),%st(3) // i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0 | i0v1
- fildl fv_v+4(%edx) // i2v1 | i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1
- fxch %st(1) // i0v0 | i2v1 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1
- fsubp %st(0),%st(3) // i2v1 | i0v1-i1v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1
- fxch %st(1) // i0v1-i1v1 | i2v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1
- fmulp %st(0),%st(3) // i2v1 | i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1
- fsubrp %st(0),%st(3) // i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1-i2v1
- movl fv_v+16(%ecx),%eax
- andl $0xFF00,%eax
- fmulp %st(0),%st(2) // i0v1-i1v1*i0v0-i2v0 | i0v0-i1v0*i0v1-i2v1
- addl C(acolormap),%eax
- fsubp %st(0),%st(1) // (i0v1-i1v1)*(i0v0-i2v0)-(i0v0-i1v0)*(i0v1-i2v1)
- movl %eax,C(d_pcolormap)
- fstps Ltemp
- movl Ltemp,%eax
- subl $0x80000001,%eax
- jc Lskip
-
-// if (ptri[i].facesfront)
-// {
-// D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v);
- movl mtri_facesfront-16(%ebx,%ebp,),%eax
- testl %eax,%eax
- jz Lfacesback
-
- pushl %edx
- pushl %esi
- pushl %ecx
- call C(D_PolysetRecursiveTriangle)
-
- subl $16,%ebp
- jnz Llooptop
- jmp Ldone2
-
-// }
-// else
-// {
-Lfacesback:
-
-// s0 = index0->v[2];
-// s1 = index1->v[2];
-// s2 = index2->v[2];
- movl fv_v+8(%ecx),%eax
- pushl %eax
- movl fv_v+8(%esi),%eax
- pushl %eax
- movl fv_v+8(%edx),%eax
- pushl %eax
- pushl %ecx
- pushl %edx
-
-// if (index0->flags & ALIAS_ONSEAM)
-// index0->v[2] += r_affinetridesc.seamfixupX16;
- movl C(r_affinetridesc)+atd_seamfixupX16,%eax
- testl $(ALIAS_ONSEAM),fv_flags(%ecx)
- jz Lp11
- addl %eax,fv_v+8(%ecx)
-Lp11:
-
-// if (index1->flags & ALIAS_ONSEAM)
-// index1->v[2] += r_affinetridesc.seamfixupX16;
- testl $(ALIAS_ONSEAM),fv_flags(%esi)
- jz Lp12
- addl %eax,fv_v+8(%esi)
-Lp12:
-
-// if (index2->flags & ALIAS_ONSEAM)
-// index2->v[2] += r_affinetridesc.seamfixupX16;
- testl $(ALIAS_ONSEAM),fv_flags(%edx)
- jz Lp13
- addl %eax,fv_v+8(%edx)
-Lp13:
-
-// D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v);
- pushl %edx
- pushl %esi
- pushl %ecx
- call C(D_PolysetRecursiveTriangle)
-
-// index0->v[2] = s0;
-// index1->v[2] = s1;
-// index2->v[2] = s2;
- popl %edx
- popl %ecx
- popl %eax
- movl %eax,fv_v+8(%edx)
- popl %eax
- movl %eax,fv_v+8(%esi)
- popl %eax
- movl %eax,fv_v+8(%ecx)
-
-// }
-// }
-Lskip:
- subl $16,%ebp
- jnz Llooptop
-
-Ldone2:
- popl %edi // restore the caller's stack frame
- popl %ebx
- popl %esi // restore register variables
- popl %ebp
-
- addl $(SPAN_SIZE),%esp
-
- ret
-
-
-//----------------------------------------------------------------------
-// Alias model triangle left-edge scanning code
-//----------------------------------------------------------------------
-
-#define height 4+16
-
-.globl C(D_PolysetScanLeftEdge)
-C(D_PolysetScanLeftEdge):
- pushl %ebp // preserve caller stack frame pointer
- pushl %esi // preserve register variables
- pushl %edi
- pushl %ebx
-
- movl height(%esp),%eax
- movl C(d_sfrac),%ecx
- andl $0xFFFF,%eax
- movl C(d_ptex),%ebx
- orl %eax,%ecx
- movl C(d_pedgespanpackage),%esi
- movl C(d_tfrac),%edx
- movl C(d_light),%edi
- movl C(d_zi),%ebp
-
-// %eax: scratch
-// %ebx: d_ptex
-// %ecx: d_sfrac in high word, count in low word
-// %edx: d_tfrac
-// %esi: d_pedgespanpackage, errorterm, scratch alternately
-// %edi: d_light
-// %ebp: d_zi
-
-// do
-// {
-
-LScanLoop:
-
-// d_pedgespanpackage->ptex = ptex;
-// d_pedgespanpackage->pdest = d_pdest;
-// d_pedgespanpackage->pz = d_pz;
-// d_pedgespanpackage->count = d_aspancount;
-// d_pedgespanpackage->light = d_light;
-// d_pedgespanpackage->zi = d_zi;
-// d_pedgespanpackage->sfrac = d_sfrac << 16;
-// d_pedgespanpackage->tfrac = d_tfrac << 16;
- movl %ebx,spanpackage_t_ptex(%esi)
- movl C(d_pdest),%eax
- movl %eax,spanpackage_t_pdest(%esi)
- movl C(d_pz),%eax
- movl %eax,spanpackage_t_pz(%esi)
- movl C(d_aspancount),%eax
- movl %eax,spanpackage_t_count(%esi)
- movl %edi,spanpackage_t_light(%esi)
- movl %ebp,spanpackage_t_zi(%esi)
- movl %ecx,spanpackage_t_sfrac(%esi)
- movl %edx,spanpackage_t_tfrac(%esi)
-
-// pretouch the next cache line
- movb spanpackage_t_size(%esi),%al
-
-// d_pedgespanpackage++;
- addl $(spanpackage_t_size),%esi
- movl C(erroradjustup),%eax
- movl %esi,C(d_pedgespanpackage)
-
-// errorterm += erroradjustup;
- movl C(errorterm),%esi
- addl %eax,%esi
- movl C(d_pdest),%eax
-
-// if (errorterm >= 0)
-// {
- js LNoLeftEdgeTurnover
-
-// errorterm -= erroradjustdown;
-// d_pdest += d_pdestextrastep;
- subl C(erroradjustdown),%esi
- addl C(d_pdestextrastep),%eax
- movl %esi,C(errorterm)
- movl %eax,C(d_pdest)
-
-// d_pz += d_pzextrastep;
-// d_aspancount += d_countextrastep;
-// d_ptex += d_ptexextrastep;
-// d_sfrac += d_sfracextrastep;
-// d_ptex += d_sfrac >> 16;
-// d_sfrac &= 0xFFFF;
-// d_tfrac += d_tfracextrastep;
- movl C(d_pz),%eax
- movl C(d_aspancount),%esi
- addl C(d_pzextrastep),%eax
- addl C(d_sfracextrastep),%ecx
- adcl C(d_ptexextrastep),%ebx
- addl C(d_countextrastep),%esi
- movl %eax,C(d_pz)
- movl C(d_tfracextrastep),%eax
- movl %esi,C(d_aspancount)
- addl %eax,%edx
-
-// if (d_tfrac & 0x10000)
-// {
- jnc LSkip1
-
-// d_ptex += r_affinetridesc.skinwidth;
-// d_tfrac &= 0xFFFF;
- addl C(r_affinetridesc)+atd_skinwidth,%ebx
-
-// }
-
-LSkip1:
-
-// d_light += d_lightextrastep;
-// d_zi += d_ziextrastep;
- addl C(d_lightextrastep),%edi
- addl C(d_ziextrastep),%ebp
-
-// }
- movl C(d_pedgespanpackage),%esi
- decl %ecx
- testl $0xFFFF,%ecx
- jnz LScanLoop
-
- popl %ebx
- popl %edi
- popl %esi
- popl %ebp
- ret
-
-// else
-// {
-
-LNoLeftEdgeTurnover:
- movl %esi,C(errorterm)
-
-// d_pdest += d_pdestbasestep;
- addl C(d_pdestbasestep),%eax
- movl %eax,C(d_pdest)
-
-// d_pz += d_pzbasestep;
-// d_aspancount += ubasestep;
-// d_ptex += d_ptexbasestep;
-// d_sfrac += d_sfracbasestep;
-// d_ptex += d_sfrac >> 16;
-// d_sfrac &= 0xFFFF;
- movl C(d_pz),%eax
- movl C(d_aspancount),%esi
- addl C(d_pzbasestep),%eax
- addl C(d_sfracbasestep),%ecx
- adcl C(d_ptexbasestep),%ebx
- addl C(ubasestep),%esi
- movl %eax,C(d_pz)
- movl %esi,C(d_aspancount)
-
-// d_tfrac += d_tfracbasestep;
- movl C(d_tfracbasestep),%esi
- addl %esi,%edx
-
-// if (d_tfrac & 0x10000)
-// {
- jnc LSkip2
-
-// d_ptex += r_affinetridesc.skinwidth;
-// d_tfrac &= 0xFFFF;
- addl C(r_affinetridesc)+atd_skinwidth,%ebx
-
-// }
-
-LSkip2:
-
-// d_light += d_lightbasestep;
-// d_zi += d_zibasestep;
- addl C(d_lightbasestep),%edi
- addl C(d_zibasestep),%ebp
-
-// }
-// } while (--height);
- movl C(d_pedgespanpackage),%esi
- decl %ecx
- testl $0xFFFF,%ecx
- jnz LScanLoop
-
- popl %ebx
- popl %edi
- popl %esi
- popl %ebp
- ret
-
-
-//----------------------------------------------------------------------
-// Alias model vertex drawing code
-//----------------------------------------------------------------------
-
-#define fv 4+8
-#define numverts 8+8
-
-.globl C(D_PolysetDrawFinalVerts)
-C(D_PolysetDrawFinalVerts):
- pushl %ebp // preserve caller stack frame pointer
- pushl %ebx
-
-// int i, z;
-// short *zbuf;
-
- movl numverts(%esp),%ecx
- movl fv(%esp),%ebx
-
- pushl %esi // preserve register variables
- pushl %edi
-
-LFVLoop:
-
-// for (i=0 ; i<numverts ; i++, fv++)
-// {
-// // valid triangle coordinates for filling can include the bottom and
-// // right clip edges, due to the fill rule; these shouldn't be drawn
-// if ((fv->v[0] < r_refdef.vrectright) &&
-// (fv->v[1] < r_refdef.vrectbottom))
-// {
- movl fv_v+0(%ebx),%eax
- movl C(r_refdef)+rd_vrectright,%edx
- cmpl %edx,%eax
- jge LNextVert
- movl fv_v+4(%ebx),%esi
- movl C(r_refdef)+rd_vrectbottom,%edx
- cmpl %edx,%esi
- jge LNextVert
-
-// zbuf = zspantable[fv->v[1]] + fv->v[0];
- movl C(zspantable)(,%esi,4),%edi
-
-// z = fv->v[5]>>16;
- movl fv_v+20(%ebx),%edx
- shrl $16,%edx
-
-// if (z >= *zbuf)
-// {
-// int pix;
- cmpw (%edi,%eax,2),%dx
- jl LNextVert
-
-// *zbuf = z;
- movw %dx,(%edi,%eax,2)
-
-// pix = skintable[fv->v[3]>>16][fv->v[2]>>16];
- movl fv_v+12(%ebx),%edi
- shrl $16,%edi
- movl C(skintable)(,%edi,4),%edi
- movl fv_v+8(%ebx),%edx
- shrl $16,%edx
- movb (%edi,%edx),%dl
-
-// pix = ((byte *)acolormap)[pix + (fv->v[4] & 0xFF00)];
- movl fv_v+16(%ebx),%edi
- andl $0xFF00,%edi
- andl $0x00FF,%edx
- addl %edx,%edi
- movl C(acolormap),%edx
- movb (%edx,%edi,1),%dl
-
-// d_viewbuffer[d_scantable[fv->v[1]] + fv->v[0]] = pix;
- movl C(d_scantable)(,%esi,4),%edi
- movl C(d_viewbuffer),%esi
- addl %eax,%edi
- movb %dl,(%esi,%edi)
-
-// }
-// }
-// }
-LNextVert:
- addl $(fv_size),%ebx
- decl %ecx
- jnz LFVLoop
-
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
-
-
-//----------------------------------------------------------------------
-// Alias model non-subdivided polygon dispatching code
-//
-// not C-callable because of stack buffer cleanup
-//----------------------------------------------------------------------
-
-.globl C(D_DrawNonSubdiv)
-C(D_DrawNonSubdiv):
- pushl %ebp // preserve caller stack frame pointer
- movl C(r_affinetridesc)+atd_numtriangles,%ebp
- pushl %ebx
- shll $(mtri_shift),%ebp
- pushl %esi // preserve register variables
- movl C(r_affinetridesc)+atd_ptriangles,%esi
- pushl %edi
-
-// mtriangle_t *ptri;
-// finalvert_t *pfv, *index0, *index1, *index2;
-// int i;
-// int lnumtriangles;
-
-// pfv = r_affinetridesc.pfinalverts;
-// ptri = r_affinetridesc.ptriangles;
-// lnumtriangles = r_affinetridesc.numtriangles;
-
-LNDLoop:
-
-// for (i=0 ; i<lnumtriangles ; i++, ptri++)
-// {
-// index0 = pfv + ptri->vertindex[0];
-// index1 = pfv + ptri->vertindex[1];
-// index2 = pfv + ptri->vertindex[2];
- movl C(r_affinetridesc)+atd_pfinalverts,%edi
- movl mtri_vertindex+0-mtri_size(%esi,%ebp,1),%ecx
- shll $(fv_shift),%ecx
- movl mtri_vertindex+4-mtri_size(%esi,%ebp,1),%edx
- shll $(fv_shift),%edx
- movl mtri_vertindex+8-mtri_size(%esi,%ebp,1),%ebx
- shll $(fv_shift),%ebx
- addl %edi,%ecx
- addl %edi,%edx
- addl %edi,%ebx
-
-// d_xdenom = (index0->v[1]-index1->v[1]) *
-// (index0->v[0]-index2->v[0]) -
-// (index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1]);
- movl fv_v+4(%ecx),%eax
- movl fv_v+0(%ecx),%esi
- subl fv_v+4(%edx),%eax
- subl fv_v+0(%ebx),%esi
- imull %esi,%eax
- movl fv_v+0(%ecx),%esi
- movl fv_v+4(%ecx),%edi
- subl fv_v+0(%edx),%esi
- subl fv_v+4(%ebx),%edi
- imull %esi,%edi
- subl %edi,%eax
-
-// if (d_xdenom >= 0)
-// {
-// continue;
- jns LNextTri
-
-// }
-
- movl %eax,C(d_xdenom)
- fildl C(d_xdenom)
-
-// r_p0[0] = index0->v[0]; // u
-// r_p0[1] = index0->v[1]; // v
-// r_p0[2] = index0->v[2]; // s
-// r_p0[3] = index0->v[3]; // t
-// r_p0[4] = index0->v[4]; // light
-// r_p0[5] = index0->v[5]; // iz
- movl fv_v+0(%ecx),%eax
- movl fv_v+4(%ecx),%esi
- movl %eax,C(r_p0)+0
- movl %esi,C(r_p0)+4
- movl fv_v+8(%ecx),%eax
- movl fv_v+12(%ecx),%esi
- movl %eax,C(r_p0)+8
- movl %esi,C(r_p0)+12
- movl fv_v+16(%ecx),%eax
- movl fv_v+20(%ecx),%esi
- movl %eax,C(r_p0)+16
- movl %esi,C(r_p0)+20
-
- fdivrs float_1
-
-// r_p1[0] = index1->v[0];
-// r_p1[1] = index1->v[1];
-// r_p1[2] = index1->v[2];
-// r_p1[3] = index1->v[3];
-// r_p1[4] = index1->v[4];
-// r_p1[5] = index1->v[5];
- movl fv_v+0(%edx),%eax
- movl fv_v+4(%edx),%esi
- movl %eax,C(r_p1)+0
- movl %esi,C(r_p1)+4
- movl fv_v+8(%edx),%eax
- movl fv_v+12(%edx),%esi
- movl %eax,C(r_p1)+8
- movl %esi,C(r_p1)+12
- movl fv_v+16(%edx),%eax
- movl fv_v+20(%edx),%esi
- movl %eax,C(r_p1)+16
- movl %esi,C(r_p1)+20
-
-// r_p2[0] = index2->v[0];
-// r_p2[1] = index2->v[1];
-// r_p2[2] = index2->v[2];
-// r_p2[3] = index2->v[3];
-// r_p2[4] = index2->v[4];
-// r_p2[5] = index2->v[5];
- movl fv_v+0(%ebx),%eax
- movl fv_v+4(%ebx),%esi
- movl %eax,C(r_p2)+0
- movl %esi,C(r_p2)+4
- movl fv_v+8(%ebx),%eax
- movl fv_v+12(%ebx),%esi
- movl %eax,C(r_p2)+8
- movl %esi,C(r_p2)+12
- movl fv_v+16(%ebx),%eax
- movl fv_v+20(%ebx),%esi
- movl %eax,C(r_p2)+16
- movl C(r_affinetridesc)+atd_ptriangles,%edi
- movl %esi,C(r_p2)+20
- movl mtri_facesfront-mtri_size(%edi,%ebp,1),%eax
-
-// if (!ptri->facesfront)
-// {
- testl %eax,%eax
- jnz LFacesFront
-
-// if (index0->flags & ALIAS_ONSEAM)
-// r_p0[2] += r_affinetridesc.seamfixupX16;
- movl fv_flags(%ecx),%eax
- movl fv_flags(%edx),%esi
- movl fv_flags(%ebx),%edi
- testl $(ALIAS_ONSEAM),%eax
- movl C(r_affinetridesc)+atd_seamfixupX16,%eax
- jz LOnseamDone0
- addl %eax,C(r_p0)+8
-LOnseamDone0:
-
-// if (index1->flags & ALIAS_ONSEAM)
-// r_p1[2] += r_affinetridesc.seamfixupX16;
- testl $(ALIAS_ONSEAM),%esi
- jz LOnseamDone1
- addl %eax,C(r_p1)+8
-LOnseamDone1:
-
-// if (index2->flags & ALIAS_ONSEAM)
-// r_p2[2] += r_affinetridesc.seamfixupX16;
- testl $(ALIAS_ONSEAM),%edi
- jz LOnseamDone2
- addl %eax,C(r_p2)+8
-LOnseamDone2:
-
-// }
-
-LFacesFront:
-
- fstps C(d_xdenom)
-
-// D_PolysetSetEdgeTable ();
-// D_RasterizeAliasPolySmooth ();
- call C(D_PolysetSetEdgeTable)
- call C(D_RasterizeAliasPolySmooth)
-
-LNextTri:
- movl C(r_affinetridesc)+atd_ptriangles,%esi
- subl $16,%ebp
- jnz LNDLoop
-// }
-
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
-
- addl $(SPAN_SIZE),%esp
-
- ret
-
-
-#endif // id386
-
--- a/d_scana.s
+++ /dev/null
@@ -1,70 +1,0 @@
-//
-// d_scana.s
-// x86 assembly-language turbulent texture mapping code
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "asm_draw.h"
-#include "d_ifacea.h"
-
-#ifdef id386
-
- .data
-
- .text
-
-//----------------------------------------------------------------------
-// turbulent texture mapping code
-//----------------------------------------------------------------------
-
- .align 4
-.globl C(D_DrawTurbulent8Span)
-C(D_DrawTurbulent8Span):
- pushl %ebp // preserve caller's stack frame pointer
- pushl %esi // preserve register variables
- pushl %edi
- pushl %ebx
-
- movl C(r_turb_s),%esi
- movl C(r_turb_t),%ecx
- movl C(r_turb_pdest),%edi
- movl C(r_turb_spancount),%ebx
-
-Llp:
- movl %ecx,%eax
- movl %esi,%edx
- sarl $16,%eax
- movl C(r_turb_turb),%ebp
- sarl $16,%edx
- andl $(CYCLE-1),%eax
- andl $(CYCLE-1),%edx
- movl (%ebp,%eax,4),%eax
- movl (%ebp,%edx,4),%edx
- addl %esi,%eax
- sarl $16,%eax
- addl %ecx,%edx
- sarl $16,%edx
- andl $(TURB_TEX_SIZE-1),%eax
- andl $(TURB_TEX_SIZE-1),%edx
- shll $6,%edx
- movl C(r_turb_pbase),%ebp
- addl %eax,%edx
- incl %edi
- addl C(r_turb_sstep),%esi
- addl C(r_turb_tstep),%ecx
- movb (%ebp,%edx,1),%dl
- decl %ebx
- movb %dl,-1(%edi)
- jnz Llp
-
- movl %edi,C(r_turb_pdest)
-
- popl %ebx // restore register variables
- popl %edi
- popl %esi
- popl %ebp // restore caller's stack frame pointer
- ret
-
-#endif // id386
-
--- a/d_spr8.s
+++ /dev/null
@@ -1,881 +1,0 @@
-//
-// d_spr8.s
-// x86 assembly-language horizontal 8-bpp transparent span-drawing code.
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "asm_draw.h"
-
-#ifdef id386
-
-//----------------------------------------------------------------------
-// 8-bpp horizontal span drawing code for polygons, with transparency.
-//----------------------------------------------------------------------
-
- .text
-
-// out-of-line, rarely-needed clamping code
-
-LClampHigh0:
- movl C(bbextents),%esi
- jmp LClampReentry0
-LClampHighOrLow0:
- jg LClampHigh0
- xorl %esi,%esi
- jmp LClampReentry0
-
-LClampHigh1:
- movl C(bbextentt),%edx
- jmp LClampReentry1
-LClampHighOrLow1:
- jg LClampHigh1
- xorl %edx,%edx
- jmp LClampReentry1
-
-LClampLow2:
- movl $2048,%ebp
- jmp LClampReentry2
-LClampHigh2:
- movl C(bbextents),%ebp
- jmp LClampReentry2
-
-LClampLow3:
- movl $2048,%ecx
- jmp LClampReentry3
-LClampHigh3:
- movl C(bbextentt),%ecx
- jmp LClampReentry3
-
-LClampLow4:
- movl $2048,%eax
- jmp LClampReentry4
-LClampHigh4:
- movl C(bbextents),%eax
- jmp LClampReentry4
-
-LClampLow5:
- movl $2048,%ebx
- jmp LClampReentry5
-LClampHigh5:
- movl C(bbextentt),%ebx
- jmp LClampReentry5
-
-
-#define pspans 4+16
-
- .align 4
-.globl C(D_SpriteDrawSpans)
-C(D_SpriteDrawSpans):
- pushl %ebp // preserve caller's stack frame
- pushl %edi
- pushl %esi // preserve register variables
- pushl %ebx
-
-//
-// set up scaled-by-8 steps, for 8-long segments; also set up cacheblock
-// and span list pointers, and 1/z step in 0.32 fixed-point
-//
-// FIXME: any overlap from rearranging?
- flds C(d_sdivzstepu)
- fmuls fp_8
- movl C(cacheblock),%edx
- flds C(d_tdivzstepu)
- fmuls fp_8
- movl pspans(%esp),%ebx // point to the first span descriptor
- flds C(d_zistepu)
- fmuls fp_8
- movl %edx,pbase // pbase = cacheblock
- flds C(d_zistepu)
- fmuls fp_64kx64k
- fxch %st(3)
- fstps sdivz8stepu
- fstps zi8stepu
- fstps tdivz8stepu
- fistpl izistep
- movl izistep,%eax
- rorl $16,%eax // put upper 16 bits in low word
- movl sspan_t_count(%ebx),%ecx
- movl %eax,izistep
-
- cmpl $0,%ecx
- jle LNextSpan
-
-LSpanLoop:
-
-//
-// set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
-// initial s and t values
-//
-// FIXME: pipeline FILD?
- fildl sspan_t_v(%ebx)
- fildl sspan_t_u(%ebx)
-
- fld %st(1) // dv | du | dv
- fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv
- fld %st(1) // du | dv*d_sdivzstepv | du | dv
- fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
- fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
- fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu |
- // dv*d_sdivzstepv | du | dv
- fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu |
- // dv*d_sdivzstepv | du | dv
- faddp %st(0),%st(2) // du*d_tdivzstepu |
- // du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
- fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
- // du*d_tdivzstepu | du | dv
- fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv |
- // du*d_tdivzstepu | du | dv
- fmuls C(d_tdivzstepv) // dv*d_tdivzstepv |
- // du*d_sdivzstepu + dv*d_sdivzstepv |
- // du*d_tdivzstepu | du | dv
- fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
- // dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
- fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv +
- // du*d_sdivzstepu; stays in %st(2) at end
- fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
- // s/z
- fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv |
- // du*d_tdivzstepu | du | s/z
- fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv |
- // du*d_tdivzstepu | du | s/z
- faddp %st(0),%st(2) // dv*d_zistepv |
- // dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
- fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu |
- // dv*d_zistepv | s/z
- fmuls C(d_zistepu) // du*d_zistepu |
- // dv*d_tdivzstepv + du*d_tdivzstepu |
- // dv*d_zistepv | s/z
- fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu |
- // du*d_zistepu | dv*d_zistepv | s/z
- fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv +
- // du*d_tdivzstepu; stays in %st(1) at end
- fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z
- faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z
-
- flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
- fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
- fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv +
- // du*d_zistepu; stays in %st(0) at end
- // 1/z | fp_64k | t/z | s/z
-
- fld %st(0) // FIXME: get rid of stall on FMUL?
- fmuls fp_64kx64k
- fxch %st(1)
-
-//
-// calculate and clamp s & t
-//
- fdivr %st(0),%st(2) // 1/z | z*64k | t/z | s/z
- fxch %st(1)
-
- fistpl izi // 0.32 fixed-point 1/z
- movl izi,%ebp
-
-//
-// set pz to point to the first z-buffer pixel in the span
-//
- rorl $16,%ebp // put upper 16 bits in low word
- movl sspan_t_v(%ebx),%eax
- movl %ebp,izi
- movl sspan_t_u(%ebx),%ebp
- imull C(d_zrowbytes)
- shll $1,%ebp // a word per pixel
- addl C(d_pzbuffer),%eax
- addl %ebp,%eax
- movl %eax,pz
-
-//
-// point %edi to the first pixel in the span
-//
- movl C(d_viewbuffer),%ebp
- movl sspan_t_v(%ebx),%eax
- pushl %ebx // preserve spans pointer
- movl C(tadjust),%edx
- movl C(sadjust),%esi
- movl C(d_scantable)(,%eax,4),%edi // v * screenwidth
- addl %ebp,%edi
- movl sspan_t_u(%ebx),%ebp
- addl %ebp,%edi // pdest = &pdestspan[scans->u];
-
-//
-// now start the FDIV for the end of the span
-//
- cmpl $8,%ecx
- ja LSetupNotLast1
-
- decl %ecx
- jz LCleanup1 // if only one pixel, no need to start an FDIV
- movl %ecx,spancountminus1
-
-// finish up the s and t calcs
- fxch %st(1) // z*64k | 1/z | t/z | s/z
-
- fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
- fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
- fxch %st(1) // z*64k | s | 1/z | t/z | s/z
- fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
- fxch %st(1) // s | t | 1/z | t/z | s/z
- fistpl s // 1/z | t | t/z | s/z
- fistpl t // 1/z | t/z | s/z
-
- fildl spancountminus1
-
- flds C(d_tdivzstepu) // _d_tdivzstepu | spancountminus1
- flds C(d_zistepu) // _d_zistepu | _d_tdivzstepu | spancountminus1
- fmul %st(2),%st(0) // _d_zistepu*scm1 | _d_tdivzstepu | scm1
- fxch %st(1) // _d_tdivzstepu | _d_zistepu*scm1 | scm1
- fmul %st(2),%st(0) // _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
- fxch %st(2) // scm1 | _d_zistepu*scm1 | _d_tdivzstepu*scm1
- fmuls C(d_sdivzstepu) // _d_sdivzstepu*scm1 | _d_zistepu*scm1 |
- // _d_tdivzstepu*scm1
- fxch %st(1) // _d_zistepu*scm1 | _d_sdivzstepu*scm1 |
- // _d_tdivzstepu*scm1
- faddp %st(0),%st(3) // _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
- fxch %st(1) // _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
- faddp %st(0),%st(3) // _d_sdivzstepu*scm1
- faddp %st(0),%st(3)
-
- flds fp_64k
- fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
- // overlap
- jmp LFDIVInFlight1
-
-LCleanup1:
-// finish up the s and t calcs
- fxch %st(1) // z*64k | 1/z | t/z | s/z
-
- fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
- fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
- fxch %st(1) // z*64k | s | 1/z | t/z | s/z
- fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
- fxch %st(1) // s | t | 1/z | t/z | s/z
- fistpl s // 1/z | t | t/z | s/z
- fistpl t // 1/z | t/z | s/z
- jmp LFDIVInFlight1
-
- .align 4
-LSetupNotLast1:
-// finish up the s and t calcs
- fxch %st(1) // z*64k | 1/z | t/z | s/z
-
- fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
- fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
- fxch %st(1) // z*64k | s | 1/z | t/z | s/z
- fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
- fxch %st(1) // s | t | 1/z | t/z | s/z
- fistpl s // 1/z | t | t/z | s/z
- fistpl t // 1/z | t/z | s/z
-
- fadds zi8stepu
- fxch %st(2)
- fadds sdivz8stepu
- fxch %st(2)
- flds tdivz8stepu
- faddp %st(0),%st(2)
- flds fp_64k
- fdiv %st(1),%st(0) // z = 1/1/z
- // this is what we've gone to all this trouble to
- // overlap
-LFDIVInFlight1:
-
- addl s,%esi
- addl t,%edx
- movl C(bbextents),%ebx
- movl C(bbextentt),%ebp
- cmpl %ebx,%esi
- ja LClampHighOrLow0
-LClampReentry0:
- movl %esi,s
- movl pbase,%ebx
- shll $16,%esi
- cmpl %ebp,%edx
- movl %esi,sfracf
- ja LClampHighOrLow1
-LClampReentry1:
- movl %edx,t
- movl s,%esi // sfrac = scans->sfrac;
- shll $16,%edx
- movl t,%eax // tfrac = scans->tfrac;
- sarl $16,%esi
- movl %edx,tfracf
-
-//
-// calculate the texture starting address
-//
- sarl $16,%eax
- addl %ebx,%esi
- imull C(cachewidth),%eax // (tfrac >> 16) * cachewidth
- addl %eax,%esi // psource = pbase + (sfrac >> 16) +
- // ((tfrac >> 16) * cachewidth);
-
-//
-// determine whether last span or not
-//
- cmpl $8,%ecx
- jna LLastSegment
-
-//
-// not the last segment; do full 8-wide segment
-//
-LNotLastSegment:
-
-//
-// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
-// get there
-//
-
-// pick up after the FDIV that was left in flight previously
-
- fld %st(0) // duplicate it
- fmul %st(4),%st(0) // s = s/z * z
- fxch %st(1)
- fmul %st(3),%st(0) // t = t/z * z
- fxch %st(1)
- fistpl snext
- fistpl tnext
- movl snext,%eax
- movl tnext,%edx
-
- subl $8,%ecx // count off this segments' pixels
- movl C(sadjust),%ebp
- pushl %ecx // remember count of remaining pixels
- movl C(tadjust),%ecx
-
- addl %eax,%ebp
- addl %edx,%ecx
-
- movl C(bbextents),%eax
- movl C(bbextentt),%edx
-
- cmpl $2048,%ebp
- jl LClampLow2
- cmpl %eax,%ebp
- ja LClampHigh2
-LClampReentry2:
-
- cmpl $2048,%ecx
- jl LClampLow3
- cmpl %edx,%ecx
- ja LClampHigh3
-LClampReentry3:
-
- movl %ebp,snext
- movl %ecx,tnext
-
- subl s,%ebp
- subl t,%ecx
-
-//
-// set up advancetable
-//
- movl %ecx,%eax
- movl %ebp,%edx
- sarl $19,%edx // sstep >>= 16;
- movl C(cachewidth),%ebx
- sarl $19,%eax // tstep >>= 16;
- jz LIsZero
- imull %ebx,%eax // (tstep >> 16) * cachewidth;
-LIsZero:
- addl %edx,%eax // add in sstep
- // (tstep >> 16) * cachewidth + (sstep >> 16);
- movl tfracf,%edx
- movl %eax,advancetable+4 // advance base in t
- addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth +
- // (sstep >> 16);
- shll $13,%ebp // left-justify sstep fractional part
- movl %ebp,sstep
- movl sfracf,%ebx
- shll $13,%ecx // left-justify tstep fractional part
- movl %eax,advancetable // advance extra in t
- movl %ecx,tstep
-
- movl pz,%ecx
- movl izi,%ebp
-
- cmpw (%ecx),%bp
- jl Lp1
- movb (%esi),%al // get first source texel
- cmpb $(TRANSPARENT_COLOR),%al
- jz Lp1
- movw %bp,(%ecx)
- movb %al,(%edi) // store first dest pixel
-Lp1:
- addl izistep,%ebp
- adcl $0,%ebp
- addl tstep,%edx // advance tfrac fractional part by tstep frac
-
- sbbl %eax,%eax // turn tstep carry into -1 (0 if none)
- addl sstep,%ebx // advance sfrac fractional part by sstep frac
- adcl advancetable+4(,%eax,4),%esi // point to next source texel
-
- cmpw 2(%ecx),%bp
- jl Lp2
- movb (%esi),%al
- cmpb $(TRANSPARENT_COLOR),%al
- jz Lp2
- movw %bp,2(%ecx)
- movb %al,1(%edi)
-Lp2:
- addl izistep,%ebp
- adcl $0,%ebp
- addl tstep,%edx
- sbbl %eax,%eax
- addl sstep,%ebx
- adcl advancetable+4(,%eax,4),%esi
-
- cmpw 4(%ecx),%bp
- jl Lp3
- movb (%esi),%al
- cmpb $(TRANSPARENT_COLOR),%al
- jz Lp3
- movw %bp,4(%ecx)
- movb %al,2(%edi)
-Lp3:
- addl izistep,%ebp
- adcl $0,%ebp
- addl tstep,%edx
- sbbl %eax,%eax
- addl sstep,%ebx
- adcl advancetable+4(,%eax,4),%esi
-
- cmpw 6(%ecx),%bp
- jl Lp4
- movb (%esi),%al
- cmpb $(TRANSPARENT_COLOR),%al
- jz Lp4
- movw %bp,6(%ecx)
- movb %al,3(%edi)
-Lp4:
- addl izistep,%ebp
- adcl $0,%ebp
- addl tstep,%edx
- sbbl %eax,%eax
- addl sstep,%ebx
- adcl advancetable+4(,%eax,4),%esi
-
- cmpw 8(%ecx),%bp
- jl Lp5
- movb (%esi),%al
- cmpb $(TRANSPARENT_COLOR),%al
- jz Lp5
- movw %bp,8(%ecx)
- movb %al,4(%edi)
-Lp5:
- addl izistep,%ebp
- adcl $0,%ebp
- addl tstep,%edx
- sbbl %eax,%eax
- addl sstep,%ebx
- adcl advancetable+4(,%eax,4),%esi
-
-//
-// start FDIV for end of next segment in flight, so it can overlap
-//
- popl %eax
- cmpl $8,%eax // more than one segment after this?
- ja LSetupNotLast2 // yes
-
- decl %eax
- jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV
- movl %eax,spancountminus1
- fildl spancountminus1
-
- flds C(d_zistepu) // _d_zistepu | spancountminus1
- fmul %st(1),%st(0) // _d_zistepu*scm1 | scm1
- flds C(d_tdivzstepu) // _d_tdivzstepu | _d_zistepu*scm1 | scm1
- fmul %st(2),%st(0) // _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
- fxch %st(1) // _d_zistepu*scm1 | _d_tdivzstepu*scm1 | scm1
- faddp %st(0),%st(3) // _d_tdivzstepu*scm1 | scm1
- fxch %st(1) // scm1 | _d_tdivzstepu*scm1
- fmuls C(d_sdivzstepu) // _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
- fxch %st(1) // _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
- faddp %st(0),%st(3) // _d_sdivzstepu*scm1
- flds fp_64k // 64k | _d_sdivzstepu*scm1
- fxch %st(1) // _d_sdivzstepu*scm1 | 64k
- faddp %st(0),%st(4) // 64k
-
- fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
- // overlap
- jmp LFDIVInFlight2
-
- .align 4
-LSetupNotLast2:
- fadds zi8stepu
- fxch %st(2)
- fadds sdivz8stepu
- fxch %st(2)
- flds tdivz8stepu
- faddp %st(0),%st(2)
- flds fp_64k
- fdiv %st(1),%st(0) // z = 1/1/z
- // this is what we've gone to all this trouble to
- // overlap
-LFDIVInFlight2:
- pushl %eax
-
- cmpw 10(%ecx),%bp
- jl Lp6
- movb (%esi),%al
- cmpb $(TRANSPARENT_COLOR),%al
- jz Lp6
- movw %bp,10(%ecx)
- movb %al,5(%edi)
-Lp6:
- addl izistep,%ebp
- adcl $0,%ebp
- addl tstep,%edx
- sbbl %eax,%eax
- addl sstep,%ebx
- adcl advancetable+4(,%eax,4),%esi
-
- cmpw 12(%ecx),%bp
- jl Lp7
- movb (%esi),%al
- cmpb $(TRANSPARENT_COLOR),%al
- jz Lp7
- movw %bp,12(%ecx)
- movb %al,6(%edi)
-Lp7:
- addl izistep,%ebp
- adcl $0,%ebp
- addl tstep,%edx
- sbbl %eax,%eax
- addl sstep,%ebx
- adcl advancetable+4(,%eax,4),%esi
-
- cmpw 14(%ecx),%bp
- jl Lp8
- movb (%esi),%al
- cmpb $(TRANSPARENT_COLOR),%al
- jz Lp8
- movw %bp,14(%ecx)
- movb %al,7(%edi)
-Lp8:
- addl izistep,%ebp
- adcl $0,%ebp
- addl tstep,%edx
- sbbl %eax,%eax
- addl sstep,%ebx
- adcl advancetable+4(,%eax,4),%esi
-
- addl $8,%edi
- addl $16,%ecx
- movl %edx,tfracf
- movl snext,%edx
- movl %ebx,sfracf
- movl tnext,%ebx
- movl %edx,s
- movl %ebx,t
-
- movl %ecx,pz
- movl %ebp,izi
-
- popl %ecx // retrieve count
-
-//
-// determine whether last span or not
-//
- cmpl $8,%ecx // are there multiple segments remaining?
- ja LNotLastSegment // yes
-
-//
-// last segment of scan
-//
-LLastSegment:
-
-//
-// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
-// get there. The number of pixels left is variable, and we want to land on the
-// last pixel, not step one past it, so we can't run into arithmetic problems
-//
- testl %ecx,%ecx
- jz LNoSteps // just draw the last pixel and we're done
-
-// pick up after the FDIV that was left in flight previously
-
-
- fld %st(0) // duplicate it
- fmul %st(4),%st(0) // s = s/z * z
- fxch %st(1)
- fmul %st(3),%st(0) // t = t/z * z
- fxch %st(1)
- fistpl snext
- fistpl tnext
-
- movl C(tadjust),%ebx
- movl C(sadjust),%eax
-
- addl snext,%eax
- addl tnext,%ebx
-
- movl C(bbextents),%ebp
- movl C(bbextentt),%edx
-
- cmpl $2048,%eax
- jl LClampLow4
- cmpl %ebp,%eax
- ja LClampHigh4
-LClampReentry4:
- movl %eax,snext
-
- cmpl $2048,%ebx
- jl LClampLow5
- cmpl %edx,%ebx
- ja LClampHigh5
-LClampReentry5:
-
- cmpl $1,%ecx // don't bother
- je LOnlyOneStep // if two pixels in segment, there's only one step,
- // of the segment length
- subl s,%eax
- subl t,%ebx
-
- addl %eax,%eax // convert to 15.17 format so multiply by 1.31
- addl %ebx,%ebx // reciprocal yields 16.48
- imull reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1)
- movl %edx,%ebp
-
- movl %ebx,%eax
- imull reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1)
-
-LSetEntryvec:
-//
-// set up advancetable
-//
- movl spr8entryvec_table(,%ecx,4),%ebx
- movl %edx,%eax
- pushl %ebx // entry point into code for RET later
- movl %ebp,%ecx
- sarl $16,%ecx // sstep >>= 16;
- movl C(cachewidth),%ebx
- sarl $16,%edx // tstep >>= 16;
- jz LIsZeroLast
- imull %ebx,%edx // (tstep >> 16) * cachewidth;
-LIsZeroLast:
- addl %ecx,%edx // add in sstep
- // (tstep >> 16) * cachewidth + (sstep >> 16);
- movl tfracf,%ecx
- movl %edx,advancetable+4 // advance base in t
- addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth +
- // (sstep >> 16);
- shll $16,%ebp // left-justify sstep fractional part
- movl sfracf,%ebx
- shll $16,%eax // left-justify tstep fractional part
- movl %edx,advancetable // advance extra in t
-
- movl %eax,tstep
- movl %ebp,sstep
- movl %ecx,%edx
-
- movl pz,%ecx
- movl izi,%ebp
-
- ret // jump to the number-of-pixels handler
-
-//----------------------------------------
-
-LNoSteps:
- movl pz,%ecx
- subl $7,%edi // adjust for hardwired offset
- subl $14,%ecx
- jmp LEndSpan
-
-
-LOnlyOneStep:
- subl s,%eax
- subl t,%ebx
- movl %eax,%ebp
- movl %ebx,%edx
- jmp LSetEntryvec
-
-//----------------------------------------
-
-.globl Spr8Entry2_8
-Spr8Entry2_8:
- subl $6,%edi // adjust for hardwired offsets
- subl $12,%ecx
- movb (%esi),%al
- jmp LLEntry2_8
-
-//----------------------------------------
-
-.globl Spr8Entry3_8
-Spr8Entry3_8:
- subl $5,%edi // adjust for hardwired offsets
- subl $10,%ecx
- jmp LLEntry3_8
-
-//----------------------------------------
-
-.globl Spr8Entry4_8
-Spr8Entry4_8:
- subl $4,%edi // adjust for hardwired offsets
- subl $8,%ecx
- jmp LLEntry4_8
-
-//----------------------------------------
-
-.globl Spr8Entry5_8
-Spr8Entry5_8:
- subl $3,%edi // adjust for hardwired offsets
- subl $6,%ecx
- jmp LLEntry5_8
-
-//----------------------------------------
-
-.globl Spr8Entry6_8
-Spr8Entry6_8:
- subl $2,%edi // adjust for hardwired offsets
- subl $4,%ecx
- jmp LLEntry6_8
-
-//----------------------------------------
-
-.globl Spr8Entry7_8
-Spr8Entry7_8:
- decl %edi // adjust for hardwired offsets
- subl $2,%ecx
- jmp LLEntry7_8
-
-//----------------------------------------
-
-.globl Spr8Entry8_8
-Spr8Entry8_8:
- cmpw (%ecx),%bp
- jl Lp9
- movb (%esi),%al
- cmpb $(TRANSPARENT_COLOR),%al
- jz Lp9
- movw %bp,(%ecx)
- movb %al,(%edi)
-Lp9:
- addl izistep,%ebp
- adcl $0,%ebp
- addl tstep,%edx
- sbbl %eax,%eax
- addl sstep,%ebx
- adcl advancetable+4(,%eax,4),%esi
-LLEntry7_8:
- cmpw 2(%ecx),%bp
- jl Lp10
- movb (%esi),%al
- cmpb $(TRANSPARENT_COLOR),%al
- jz Lp10
- movw %bp,2(%ecx)
- movb %al,1(%edi)
-Lp10:
- addl izistep,%ebp
- adcl $0,%ebp
- addl tstep,%edx
- sbbl %eax,%eax
- addl sstep,%ebx
- adcl advancetable+4(,%eax,4),%esi
-LLEntry6_8:
- cmpw 4(%ecx),%bp
- jl Lp11
- movb (%esi),%al
- cmpb $(TRANSPARENT_COLOR),%al
- jz Lp11
- movw %bp,4(%ecx)
- movb %al,2(%edi)
-Lp11:
- addl izistep,%ebp
- adcl $0,%ebp
- addl tstep,%edx
- sbbl %eax,%eax
- addl sstep,%ebx
- adcl advancetable+4(,%eax,4),%esi
-LLEntry5_8:
- cmpw 6(%ecx),%bp
- jl Lp12
- movb (%esi),%al
- cmpb $(TRANSPARENT_COLOR),%al
- jz Lp12
- movw %bp,6(%ecx)
- movb %al,3(%edi)
-Lp12:
- addl izistep,%ebp
- adcl $0,%ebp
- addl tstep,%edx
- sbbl %eax,%eax
- addl sstep,%ebx
- adcl advancetable+4(,%eax,4),%esi
-LLEntry4_8:
- cmpw 8(%ecx),%bp
- jl Lp13
- movb (%esi),%al
- cmpb $(TRANSPARENT_COLOR),%al
- jz Lp13
- movw %bp,8(%ecx)
- movb %al,4(%edi)
-Lp13:
- addl izistep,%ebp
- adcl $0,%ebp
- addl tstep,%edx
- sbbl %eax,%eax
- addl sstep,%ebx
- adcl advancetable+4(,%eax,4),%esi
-LLEntry3_8:
- cmpw 10(%ecx),%bp
- jl Lp14
- movb (%esi),%al
- cmpb $(TRANSPARENT_COLOR),%al
- jz Lp14
- movw %bp,10(%ecx)
- movb %al,5(%edi)
-Lp14:
- addl izistep,%ebp
- adcl $0,%ebp
- addl tstep,%edx
- sbbl %eax,%eax
- addl sstep,%ebx
- adcl advancetable+4(,%eax,4),%esi
-LLEntry2_8:
- cmpw 12(%ecx),%bp
- jl Lp15
- movb (%esi),%al
- cmpb $(TRANSPARENT_COLOR),%al
- jz Lp15
- movw %bp,12(%ecx)
- movb %al,6(%edi)
-Lp15:
- addl izistep,%ebp
- adcl $0,%ebp
- addl tstep,%edx
- sbbl %eax,%eax
- addl sstep,%ebx
- adcl advancetable+4(,%eax,4),%esi
-
-LEndSpan:
- cmpw 14(%ecx),%bp
- jl Lp16
- movb (%esi),%al // load first texel in segment
- cmpb $(TRANSPARENT_COLOR),%al
- jz Lp16
- movw %bp,14(%ecx)
- movb %al,7(%edi)
-Lp16:
-
-//
-// clear s/z, t/z, 1/z from FP stack
-//
- fstp %st(0)
- fstp %st(0)
- fstp %st(0)
-
- popl %ebx // restore spans pointer
-LNextSpan:
- addl $(sspan_t_size),%ebx // point to next span
- movl sspan_t_count(%ebx),%ecx
- cmpl $0,%ecx // any more spans?
- jg LSpanLoop // yes
- jz LNextSpan // yes, but this one's empty
-
- popl %ebx // restore register variables
- popl %esi
- popl %edi
- popl %ebp // restore the caller's stack frame
- ret
-
-#endif // id386
--- a/d_varsa.s
+++ /dev/null
@@ -1,186 +1,0 @@
-//
-// d_varsa.s
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "asm_draw.h"
-#include "d_ifacea.h"
-
-#ifdef id386
-
- .data
-
-//-------------------------------------------------------
-// global refresh variables
-//-------------------------------------------------------
-
-// FIXME: put all refresh variables into one contiguous block. Make into one
-// big structure, like cl or sv?
-
- .align 4
-.globl C(d_sdivzstepu)
-.globl C(d_tdivzstepu)
-.globl C(d_zistepu)
-.globl C(d_sdivzstepv)
-.globl C(d_tdivzstepv)
-.globl C(d_zistepv)
-.globl C(d_sdivzorigin)
-.globl C(d_tdivzorigin)
-.globl C(d_ziorigin)
-C(d_sdivzstepu): .single 0
-C(d_tdivzstepu): .single 0
-C(d_zistepu): .single 0
-C(d_sdivzstepv): .single 0
-C(d_tdivzstepv): .single 0
-C(d_zistepv): .single 0
-C(d_sdivzorigin): .single 0
-C(d_tdivzorigin): .single 0
-C(d_ziorigin): .single 0
-
-.globl C(sadjust)
-.globl C(tadjust)
-.globl C(bbextents)
-.globl C(bbextentt)
-C(sadjust): .long 0
-C(tadjust): .long 0
-C(bbextents): .long 0
-C(bbextentt): .long 0
-
-.globl C(cacheblock)
-.globl C(d_viewbuffer)
-.globl C(cachewidth)
-.globl C(d_pzbuffer)
-.globl C(d_zrowbytes)
-.globl C(d_zwidth)
-C(cacheblock): .long 0
-C(cachewidth): .long 0
-C(d_viewbuffer): .long 0
-C(d_pzbuffer): .long 0
-C(d_zrowbytes): .long 0
-C(d_zwidth): .long 0
-
-
-//-------------------------------------------------------
-// ASM-only variables
-//-------------------------------------------------------
-.globl izi
-izi: .long 0
-
-.globl pbase, s, t, sfracf, tfracf, snext, tnext
-.globl spancountminus1, zi16stepu, sdivz16stepu, tdivz16stepu
-.globl zi8stepu, sdivz8stepu, tdivz8stepu, pz
-s: .long 0
-t: .long 0
-snext: .long 0
-tnext: .long 0
-sfracf: .long 0
-tfracf: .long 0
-pbase: .long 0
-zi8stepu: .long 0
-sdivz8stepu: .long 0
-tdivz8stepu: .long 0
-zi16stepu: .long 0
-sdivz16stepu: .long 0
-tdivz16stepu: .long 0
-spancountminus1: .long 0
-pz: .long 0
-
-.globl izistep
-izistep: .long 0
-
-//-------------------------------------------------------
-// local variables for d_draw16.s
-//-------------------------------------------------------
-
-.globl reciprocal_table_16, entryvec_table_16
-// 1/2, 1/3, 1/4, 1/5, 1/6, 1/7, 1/8, 1/9, 1/10, 1/11, 1/12, 1/13,
-// 1/14, and 1/15 in 0.32 form
-reciprocal_table_16: .long 0x40000000, 0x2aaaaaaa, 0x20000000
- .long 0x19999999, 0x15555555, 0x12492492
- .long 0x10000000, 0xe38e38e, 0xccccccc, 0xba2e8ba
- .long 0xaaaaaaa, 0x9d89d89, 0x9249249, 0x8888888
-
- .extern Entry2_16
- .extern Entry3_16
- .extern Entry4_16
- .extern Entry5_16
- .extern Entry6_16
- .extern Entry7_16
- .extern Entry8_16
- .extern Entry9_16
- .extern Entry10_16
- .extern Entry11_16
- .extern Entry12_16
- .extern Entry13_16
- .extern Entry14_16
- .extern Entry15_16
- .extern Entry16_16
-
-entryvec_table_16: .long 0, Entry2_16, Entry3_16, Entry4_16
- .long Entry5_16, Entry6_16, Entry7_16, Entry8_16
- .long Entry9_16, Entry10_16, Entry11_16, Entry12_16
- .long Entry13_16, Entry14_16, Entry15_16, Entry16_16
-
-//-------------------------------------------------------
-// local variables for d_parta.s
-//-------------------------------------------------------
-.globl DP_Count, DP_u, DP_v, DP_32768, DP_Color, DP_Pix, DP_EntryTable
-DP_Count: .long 0
-DP_u: .long 0
-DP_v: .long 0
-DP_32768: .single 32768.0
-DP_Color: .long 0
-DP_Pix: .long 0
-
-
- .extern DP_1x1
- .extern DP_2x2
- .extern DP_3x3
- .extern DP_4x4
-
-DP_EntryTable: .long DP_1x1, DP_2x2, DP_3x3, DP_4x4
-
-//
-// advancetable is 8 bytes, but points to the middle of that range so negative
-// offsets will work
-//
-.globl advancetable, sstep, tstep, pspantemp, counttemp, jumptemp
-advancetable: .long 0, 0
-sstep: .long 0
-tstep: .long 0
-
-pspantemp: .long 0
-counttemp: .long 0
-jumptemp: .long 0
-
-// 1/2, 1/3, 1/4, 1/5, 1/6, and 1/7 in 0.32 form
-.globl reciprocal_table, entryvec_table
-reciprocal_table: .long 0x40000000, 0x2aaaaaaa, 0x20000000
- .long 0x19999999, 0x15555555, 0x12492492
-
- .extern Entry2_8
- .extern Entry3_8
- .extern Entry4_8
- .extern Entry5_8
- .extern Entry6_8
- .extern Entry7_8
- .extern Entry8_8
-
-entryvec_table: .long 0, Entry2_8, Entry3_8, Entry4_8
- .long Entry5_8, Entry6_8, Entry7_8, Entry8_8
-
- .extern Spr8Entry2_8
- .extern Spr8Entry3_8
- .extern Spr8Entry4_8
- .extern Spr8Entry5_8
- .extern Spr8Entry6_8
- .extern Spr8Entry7_8
- .extern Spr8Entry8_8
-
-.globl spr8entryvec_table
-spr8entryvec_table: .long 0, Spr8Entry2_8, Spr8Entry3_8, Spr8Entry4_8
- .long Spr8Entry5_8, Spr8Entry6_8, Spr8Entry7_8, Spr8Entry8_8
-
-#endif // id386
-
--- a/math.s
+++ /dev/null
@@ -1,399 +1,0 @@
-//
-// math.s
-// x86 assembly-language math routines.
-
-#define GLQUAKE 1 // don't include unneeded defs
-#include "asm_i386.h"
-#include "quakeasm.h"
-
-
-#ifdef id386
-
- .data
-
- .align 4
-Ljmptab: .long Lcase0, Lcase1, Lcase2, Lcase3
- .long Lcase4, Lcase5, Lcase6, Lcase7
-
- .text
-
-// TODO: rounding needed?
-// stack parameter offset
-#define val 4
-
-.globl C(Invert24To16)
-C(Invert24To16):
-
- movl val(%esp),%ecx
- movl $0x100,%edx // 0x10000000000 as dividend
- cmpl %edx,%ecx
- jle LOutOfRange
-
- subl %eax,%eax
- divl %ecx
-
- ret
-
-LOutOfRange:
- movl $0xFFFFFFFF,%eax
- ret
-
-#define in 4
-#define out 8
-
- .align 2
-.globl C(TransformVector)
-C(TransformVector):
- movl in(%esp),%eax
- movl out(%esp),%edx
-
- flds (%eax) // in[0]
- fmuls C(vright) // in[0]*vright[0]
- flds (%eax) // in[0] | in[0]*vright[0]
- fmuls C(vup) // in[0]*vup[0] | in[0]*vright[0]
- flds (%eax) // in[0] | in[0]*vup[0] | in[0]*vright[0]
- fmuls C(vpn) // in[0]*vpn[0] | in[0]*vup[0] | in[0]*vright[0]
-
- flds 4(%eax) // in[1] | ...
- fmuls C(vright)+4 // in[1]*vright[1] | ...
- flds 4(%eax) // in[1] | in[1]*vright[1] | ...
- fmuls C(vup)+4 // in[1]*vup[1] | in[1]*vright[1] | ...
- flds 4(%eax) // in[1] | in[1]*vup[1] | in[1]*vright[1] | ...
- fmuls C(vpn)+4 // in[1]*vpn[1] | in[1]*vup[1] | in[1]*vright[1] | ...
- fxch %st(2) // in[1]*vright[1] | in[1]*vup[1] | in[1]*vpn[1] | ...
-
- faddp %st(0),%st(5) // in[1]*vup[1] | in[1]*vpn[1] | ...
- faddp %st(0),%st(3) // in[1]*vpn[1] | ...
- faddp %st(0),%st(1) // vpn_accum | vup_accum | vright_accum
-
- flds 8(%eax) // in[2] | ...
- fmuls C(vright)+8 // in[2]*vright[2] | ...
- flds 8(%eax) // in[2] | in[2]*vright[2] | ...
- fmuls C(vup)+8 // in[2]*vup[2] | in[2]*vright[2] | ...
- flds 8(%eax) // in[2] | in[2]*vup[2] | in[2]*vright[2] | ...
- fmuls C(vpn)+8 // in[2]*vpn[2] | in[2]*vup[2] | in[2]*vright[2] | ...
- fxch %st(2) // in[2]*vright[2] | in[2]*vup[2] | in[2]*vpn[2] | ...
-
- faddp %st(0),%st(5) // in[2]*vup[2] | in[2]*vpn[2] | ...
- faddp %st(0),%st(3) // in[2]*vpn[2] | ...
- faddp %st(0),%st(1) // vpn_accum | vup_accum | vright_accum
-
- fstps 8(%edx) // out[2]
- fstps 4(%edx) // out[1]
- fstps (%edx) // out[0]
-
- ret
-
-
-#define EMINS 4+4
-#define EMAXS 4+8
-#define P 4+12
-
- .align 2
-.globl C(BoxOnPlaneSide)
-C(BoxOnPlaneSide):
- pushl %ebx
-
- movl P(%esp),%edx
- movl EMINS(%esp),%ecx
- xorl %eax,%eax
- movl EMAXS(%esp),%ebx
- movb pl_signbits(%edx),%al
- cmpb $8,%al
- jge Lerror
- flds pl_normal(%edx) // p->normal[0]
- fld %st(0) // p->normal[0] | p->normal[0]
- jmp Ljmptab(,%eax,4)
-
-
-//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
-//dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
-Lcase0:
- fmuls (%ebx) // p->normal[0]*emaxs[0] | p->normal[0]
- flds pl_normal+4(%edx) // p->normal[1] | p->normal[0]*emaxs[0] |
- // p->normal[0]
- fxch %st(2) // p->normal[0] | p->normal[0]*emaxs[0] |
- // p->normal[1]
- fmuls (%ecx) // p->normal[0]*emins[0] |
- // p->normal[0]*emaxs[0] | p->normal[1]
- fxch %st(2) // p->normal[1] | p->normal[0]*emaxs[0] |
- // p->normal[0]*emins[0]
- fld %st(0) // p->normal[1] | p->normal[1] |
- // p->normal[0]*emaxs[0] |
- // p->normal[0]*emins[0]
- fmuls 4(%ebx) // p->normal[1]*emaxs[1] | p->normal[1] |
- // p->normal[0]*emaxs[0] |
- // p->normal[0]*emins[0]
- flds pl_normal+8(%edx) // p->normal[2] | p->normal[1]*emaxs[1] |
- // p->normal[1] | p->normal[0]*emaxs[0] |
- // p->normal[0]*emins[0]
- fxch %st(2) // p->normal[1] | p->normal[1]*emaxs[1] |
- // p->normal[2] | p->normal[0]*emaxs[0] |
- // p->normal[0]*emins[0]
- fmuls 4(%ecx) // p->normal[1]*emins[1] |
- // p->normal[1]*emaxs[1] |
- // p->normal[2] | p->normal[0]*emaxs[0] |
- // p->normal[0]*emins[0]
- fxch %st(2) // p->normal[2] | p->normal[1]*emaxs[1] |
- // p->normal[1]*emins[1] |
- // p->normal[0]*emaxs[0] |
- // p->normal[0]*emins[0]
- fld %st(0) // p->normal[2] | p->normal[2] |
- // p->normal[1]*emaxs[1] |
- // p->normal[1]*emins[1] |
- // p->normal[0]*emaxs[0] |
- // p->normal[0]*emins[0]
- fmuls 8(%ebx) // p->normal[2]*emaxs[2] |
- // p->normal[2] |
- // p->normal[1]*emaxs[1] |
- // p->normal[1]*emins[1] |
- // p->normal[0]*emaxs[0] |
- // p->normal[0]*emins[0]
- fxch %st(5) // p->normal[0]*emins[0] |
- // p->normal[2] |
- // p->normal[1]*emaxs[1] |
- // p->normal[1]*emins[1] |
- // p->normal[0]*emaxs[0] |
- // p->normal[2]*emaxs[2]
- faddp %st(0),%st(3) //p->normal[2] |
- // p->normal[1]*emaxs[1] |
- // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
- // p->normal[0]*emaxs[0] |
- // p->normal[2]*emaxs[2]
- fmuls 8(%ecx) //p->normal[2]*emins[2] |
- // p->normal[1]*emaxs[1] |
- // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
- // p->normal[0]*emaxs[0] |
- // p->normal[2]*emaxs[2]
- fxch %st(1) //p->normal[1]*emaxs[1] |
- // p->normal[2]*emins[2] |
- // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
- // p->normal[0]*emaxs[0] |
- // p->normal[2]*emaxs[2]
- faddp %st(0),%st(3) //p->normal[2]*emins[2] |
- // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
- // p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]|
- // p->normal[2]*emaxs[2]
- fxch %st(3) //p->normal[2]*emaxs[2] +
- // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
- // p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]|
- // p->normal[2]*emins[2]
- faddp %st(0),%st(2) //p->normal[1]*emins[1]+p->normal[0]*emins[0]|
- // dist1 | p->normal[2]*emins[2]
-
- jmp LSetSides
-
-//dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
-//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
-Lcase1:
- fmuls (%ecx) // emins[0]
- flds pl_normal+4(%edx)
- fxch %st(2)
- fmuls (%ebx) // emaxs[0]
- fxch %st(2)
- fld %st(0)
- fmuls 4(%ebx) // emaxs[1]
- flds pl_normal+8(%edx)
- fxch %st(2)
- fmuls 4(%ecx) // emins[1]
- fxch %st(2)
- fld %st(0)
- fmuls 8(%ebx) // emaxs[2]
- fxch %st(5)
- faddp %st(0),%st(3)
- fmuls 8(%ecx) // emins[2]
- fxch %st(1)
- faddp %st(0),%st(3)
- fxch %st(3)
- faddp %st(0),%st(2)
-
- jmp LSetSides
-
-//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
-//dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
-Lcase2:
- fmuls (%ebx) // emaxs[0]
- flds pl_normal+4(%edx)
- fxch %st(2)
- fmuls (%ecx) // emins[0]
- fxch %st(2)
- fld %st(0)
- fmuls 4(%ecx) // emins[1]
- flds pl_normal+8(%edx)
- fxch %st(2)
- fmuls 4(%ebx) // emaxs[1]
- fxch %st(2)
- fld %st(0)
- fmuls 8(%ebx) // emaxs[2]
- fxch %st(5)
- faddp %st(0),%st(3)
- fmuls 8(%ecx) // emins[2]
- fxch %st(1)
- faddp %st(0),%st(3)
- fxch %st(3)
- faddp %st(0),%st(2)
-
- jmp LSetSides
-
-//dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
-//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
-Lcase3:
- fmuls (%ecx) // emins[0]
- flds pl_normal+4(%edx)
- fxch %st(2)
- fmuls (%ebx) // emaxs[0]
- fxch %st(2)
- fld %st(0)
- fmuls 4(%ecx) // emins[1]
- flds pl_normal+8(%edx)
- fxch %st(2)
- fmuls 4(%ebx) // emaxs[1]
- fxch %st(2)
- fld %st(0)
- fmuls 8(%ebx) // emaxs[2]
- fxch %st(5)
- faddp %st(0),%st(3)
- fmuls 8(%ecx) // emins[2]
- fxch %st(1)
- faddp %st(0),%st(3)
- fxch %st(3)
- faddp %st(0),%st(2)
-
- jmp LSetSides
-
-//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
-//dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
-Lcase4:
- fmuls (%ebx) // emaxs[0]
- flds pl_normal+4(%edx)
- fxch %st(2)
- fmuls (%ecx) // emins[0]
- fxch %st(2)
- fld %st(0)
- fmuls 4(%ebx) // emaxs[1]
- flds pl_normal+8(%edx)
- fxch %st(2)
- fmuls 4(%ecx) // emins[1]
- fxch %st(2)
- fld %st(0)
- fmuls 8(%ecx) // emins[2]
- fxch %st(5)
- faddp %st(0),%st(3)
- fmuls 8(%ebx) // emaxs[2]
- fxch %st(1)
- faddp %st(0),%st(3)
- fxch %st(3)
- faddp %st(0),%st(2)
-
- jmp LSetSides
-
-//dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
-//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
-Lcase5:
- fmuls (%ecx) // emins[0]
- flds pl_normal+4(%edx)
- fxch %st(2)
- fmuls (%ebx) // emaxs[0]
- fxch %st(2)
- fld %st(0)
- fmuls 4(%ebx) // emaxs[1]
- flds pl_normal+8(%edx)
- fxch %st(2)
- fmuls 4(%ecx) // emins[1]
- fxch %st(2)
- fld %st(0)
- fmuls 8(%ecx) // emins[2]
- fxch %st(5)
- faddp %st(0),%st(3)
- fmuls 8(%ebx) // emaxs[2]
- fxch %st(1)
- faddp %st(0),%st(3)
- fxch %st(3)
- faddp %st(0),%st(2)
-
- jmp LSetSides
-
-//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
-//dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
-Lcase6:
- fmuls (%ebx) // emaxs[0]
- flds pl_normal+4(%edx)
- fxch %st(2)
- fmuls (%ecx) // emins[0]
- fxch %st(2)
- fld %st(0)
- fmuls 4(%ecx) // emins[1]
- flds pl_normal+8(%edx)
- fxch %st(2)
- fmuls 4(%ebx) // emaxs[1]
- fxch %st(2)
- fld %st(0)
- fmuls 8(%ecx) // emins[2]
- fxch %st(5)
- faddp %st(0),%st(3)
- fmuls 8(%ebx) // emaxs[2]
- fxch %st(1)
- faddp %st(0),%st(3)
- fxch %st(3)
- faddp %st(0),%st(2)
-
- jmp LSetSides
-
-//dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
-//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
-Lcase7:
- fmuls (%ecx) // emins[0]
- flds pl_normal+4(%edx)
- fxch %st(2)
- fmuls (%ebx) // emaxs[0]
- fxch %st(2)
- fld %st(0)
- fmuls 4(%ecx) // emins[1]
- flds pl_normal+8(%edx)
- fxch %st(2)
- fmuls 4(%ebx) // emaxs[1]
- fxch %st(2)
- fld %st(0)
- fmuls 8(%ecx) // emins[2]
- fxch %st(5)
- faddp %st(0),%st(3)
- fmuls 8(%ebx) // emaxs[2]
- fxch %st(1)
- faddp %st(0),%st(3)
- fxch %st(3)
- faddp %st(0),%st(2)
-
-LSetSides:
-
-// sides = 0;
-// if (dist1 >= p->dist)
-// sides = 1;
-// if (dist2 < p->dist)
-// sides |= 2;
-
- faddp %st(0),%st(2) // dist1 | dist2
- fcomps pl_dist(%edx)
- xorl %ecx,%ecx
- fnstsw %ax
- fcomps pl_dist(%edx)
- andb $1,%ah
- xorb $1,%ah
- addb %ah,%cl
-
- fnstsw %ax
- andb $1,%ah
- addb %ah,%ah
- addb %ah,%cl
-
-// return sides;
-
- popl %ebx
- movl %ecx,%eax // return status
-
- ret
-
-
-Lerror:
- call C(BOPS_Error)
-
-#endif // id386
--- a/mkfile
+++ b/mkfile
@@ -80,31 +80,10 @@
snd_mem.o\
snd_mix.o\
snd_linux.o\
- d_draw.o\
- d_draw16.o\
- d_parta.o\
- d_polysa.o\
- d_scana.o\
- d_spr8.o\
- d_varsa.o\
- math.o\
- r_aliasa.o\
- r_drawa.o\
- r_edgea.o\
- r_varsa.o\
- surf16.o\
- surf8.o\
- worlda.o\
- r_aclipa.o\
- snd_mixa.o\
- #sys_dosa.o\
HFILES=\
adivtab.h\
anorms.h\
- asm_draw.h\
- asm_i386.h\
- block16.h\
bspfile.h\
cdaudio.h\
client.h\
@@ -113,7 +92,6 @@
console.h\
crc.h\
cvar.h\
- d_ifacea.h\
d_iface.h\
d_local.h\
draw.h\
@@ -131,7 +109,6 @@
progdefs.h\
progs.h\
protocol.h\
- quakeasm.h\
quakedef.h\
render.h\
r_local.h\
@@ -149,8 +126,3 @@
zone.h\
<$PLAN9/src/mkone
-
-AS=gcc
-
-%.$O: %.s
- $AS $AFLAGS -o $target -c $stem.s
--- a/quakeasm.h
+++ /dev/null
@@ -1,248 +1,0 @@
-//
-// quakeasm.h: general asm header file
-//
-
-//#define GLQUAKE 1
-
-#ifdef __i386__
-#define id386
-#endif
-
-// !!! must be kept the same as in d_iface.h !!!
-#define TRANSPARENT_COLOR 255
-
-#ifndef GLQUAKE
- .extern C(d_zistepu)
- .extern C(d_pzbuffer)
- .extern C(d_zistepv)
- .extern C(d_zrowbytes)
- .extern C(d_ziorigin)
- .extern C(r_turb_s)
- .extern C(r_turb_t)
- .extern C(r_turb_pdest)
- .extern C(r_turb_spancount)
- .extern C(r_turb_turb)
- .extern C(r_turb_pbase)
- .extern C(r_turb_sstep)
- .extern C(r_turb_tstep)
- .extern C(r_bmodelactive)
- .extern C(d_sdivzstepu)
- .extern C(d_tdivzstepu)
- .extern C(d_sdivzstepv)
- .extern C(d_tdivzstepv)
- .extern C(d_sdivzorigin)
- .extern C(d_tdivzorigin)
- .extern C(sadjust)
- .extern C(tadjust)
- .extern C(bbextents)
- .extern C(bbextentt)
- .extern C(cacheblock)
- .extern C(d_viewbuffer)
- .extern C(cachewidth)
- .extern C(d_pzbuffer)
- .extern C(d_zrowbytes)
- .extern C(d_zwidth)
- .extern C(d_scantable)
- .extern C(r_lightptr)
- .extern C(r_numvblocks)
- .extern C(prowdestbase)
- .extern C(pbasesource)
- .extern C(r_lightwidth)
- .extern C(lightright)
- .extern C(lightrightstep)
- .extern C(lightdeltastep)
- .extern C(lightdelta)
- .extern C(lightright)
- .extern C(lightdelta)
- .extern C(sourcetstep)
- .extern C(surfrowbytes)
- .extern C(lightrightstep)
- .extern C(lightdeltastep)
- .extern C(r_sourcemax)
- .extern C(r_stepback)
- .extern C(colormap)
- .extern C(blocksize)
- .extern C(sourcesstep)
- .extern C(lightleft)
- .extern C(blockdivshift)
- .extern C(blockdivmask)
- .extern C(lightleftstep)
- .extern C(r_origin)
- .extern C(r_ppn)
- .extern C(r_pup)
- .extern C(r_pright)
- .extern C(ycenter)
- .extern C(xcenter)
- .extern C(d_vrectbottom_particle)
- .extern C(d_vrectright_particle)
- .extern C(d_vrecty)
- .extern C(d_vrectx)
- .extern C(d_pix_shift)
- .extern C(d_pix_min)
- .extern C(d_pix_max)
- .extern C(d_y_aspect_shift)
- .extern C(screenwidth)
- .extern C(r_leftclipped)
- .extern C(r_leftenter)
- .extern C(r_rightclipped)
- .extern C(r_rightenter)
- .extern C(modelorg)
- .extern C(xscale)
- .extern C(r_refdef)
- .extern C(yscale)
- .extern C(r_leftexit)
- .extern C(r_rightexit)
- .extern C(r_lastvertvalid)
- .extern C(cacheoffset)
- .extern C(newedges)
- .extern C(removeedges)
- .extern C(r_pedge)
- .extern C(r_framecount)
- .extern C(r_u1)
- .extern C(r_emitted)
- .extern C(edge_p)
- .extern C(surface_p)
- .extern C(surfaces)
- .extern C(r_lzi1)
- .extern C(r_v1)
- .extern C(r_ceilv1)
- .extern C(r_nearzi)
- .extern C(r_nearzionly)
- .extern C(edge_aftertail)
- .extern C(edge_tail)
- .extern C(current_iv)
- .extern C(edge_head_u_shift20)
- .extern C(span_p)
- .extern C(edge_head)
- .extern C(fv)
- .extern C(edge_tail_u_shift20)
- .extern C(r_apverts)
- .extern C(r_anumverts)
- .extern C(aliastransform)
- .extern C(r_avertexnormals)
- .extern C(r_plightvec)
- .extern C(r_ambientlight)
- .extern C(r_shadelight)
- .extern C(aliasxcenter)
- .extern C(aliasycenter)
- .extern C(a_sstepxfrac)
- .extern C(r_affinetridesc)
- .extern C(acolormap)
- .extern C(d_pcolormap)
- .extern C(r_affinetridesc)
- .extern C(d_sfrac)
- .extern C(d_ptex)
- .extern C(d_pedgespanpackage)
- .extern C(d_tfrac)
- .extern C(d_light)
- .extern C(d_zi)
- .extern C(d_pdest)
- .extern C(d_pz)
- .extern C(d_aspancount)
- .extern C(erroradjustup)
- .extern C(errorterm)
- .extern C(d_xdenom)
- .extern C(r_p0)
- .extern C(r_p1)
- .extern C(r_p2)
- .extern C(a_tstepxfrac)
- .extern C(r_sstepx)
- .extern C(r_tstepx)
- .extern C(a_ststepxwhole)
- .extern C(zspantable)
- .extern C(skintable)
- .extern C(r_zistepx)
- .extern C(erroradjustdown)
- .extern C(d_countextrastep)
- .extern C(ubasestep)
- .extern C(a_ststepxwhole)
- .extern C(a_tstepxfrac)
- .extern C(r_lstepx)
- .extern C(a_spans)
- .extern C(erroradjustdown)
- .extern C(d_pdestextrastep)
- .extern C(d_pzextrastep)
- .extern C(d_sfracextrastep)
- .extern C(d_ptexextrastep)
- .extern C(d_countextrastep)
- .extern C(d_tfracextrastep)
- .extern C(d_lightextrastep)
- .extern C(d_ziextrastep)
- .extern C(d_pdestbasestep)
- .extern C(d_pzbasestep)
- .extern C(d_sfracbasestep)
- .extern C(d_ptexbasestep)
- .extern C(ubasestep)
- .extern C(d_tfracbasestep)
- .extern C(d_lightbasestep)
- .extern C(d_zibasestep)
- .extern C(zspantable)
- .extern C(r_lstepy)
- .extern C(r_sstepy)
- .extern C(r_tstepy)
- .extern C(r_zistepy)
- .extern C(D_PolysetSetEdgeTable)
- .extern C(D_RasterizeAliasPolySmooth)
-
- .extern float_point5
- .extern Float2ToThe31nd
- .extern izistep
- .extern izi
- .extern FloatMinus2ToThe31nd
- .extern float_1
- .extern float_particle_z_clip
- .extern float_minus_1
- .extern float_0
- .extern fp_16
- .extern fp_64k
- .extern fp_1m
- .extern fp_1m_minus_1
- .extern fp_8
- .extern entryvec_table
- .extern advancetable
- .extern sstep
- .extern tstep
- .extern pspantemp
- .extern counttemp
- .extern jumptemp
- .extern reciprocal_table
- .extern DP_Count
- .extern DP_u
- .extern DP_v
- .extern DP_32768
- .extern DP_Color
- .extern DP_Pix
- .extern DP_EntryTable
- .extern pbase
- .extern s
- .extern t
- .extern sfracf
- .extern tfracf
- .extern snext
- .extern tnext
- .extern spancountminus1
- .extern zi16stepu
- .extern sdivz16stepu
- .extern tdivz16stepu
- .extern zi8stepu
- .extern sdivz8stepu
- .extern tdivz8stepu
- .extern reciprocal_table_16
- .extern entryvec_table_16
- .extern ceil_cw
- .extern single_cw
- .extern fp_64kx64k
- .extern pz
- .extern spr8entryvec_table
-#endif
-
- .extern C(snd_scaletable)
- .extern C(paintbuffer)
- .extern C(snd_linear_count)
- .extern C(snd_p)
- .extern C(snd_vol)
- .extern C(snd_out)
- .extern C(vright)
- .extern C(vup)
- .extern C(vpn)
- .extern C(BOPS_Error)
--- a/r_aclipa.s
+++ /dev/null
@@ -1,197 +1,0 @@
-//
-// r_aliasa.s
-// x86 assembly-language Alias model transform and project code.
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "asm_draw.h"
-#include "d_ifacea.h"
-
-#ifdef id386
-
- .data
-Ltemp0: .long 0
-Ltemp1: .long 0
-
- .text
-
-#define pfv0 8+4
-#define pfv1 8+8
-#define out 8+12
-
-.globl C(R_Alias_clip_bottom)
-C(R_Alias_clip_bottom):
- pushl %esi
- pushl %edi
-
- movl pfv0(%esp),%esi
- movl pfv1(%esp),%edi
-
- movl C(r_refdef)+rd_aliasvrectbottom,%eax
-
-LDoForwardOrBackward:
-
- movl fv_v+4(%esi),%edx
- movl fv_v+4(%edi),%ecx
-
- cmpl %ecx,%edx
- jl LDoForward
-
- movl fv_v+4(%esi),%ecx
- movl fv_v+4(%edi),%edx
- movl pfv0(%esp),%edi
- movl pfv1(%esp),%esi
-
-LDoForward:
-
- subl %edx,%ecx
- subl %edx,%eax
- movl %ecx,Ltemp1
- movl %eax,Ltemp0
- fildl Ltemp1
- fildl Ltemp0
- movl out(%esp),%edx
- movl $2,%eax
-
- fdivp %st(0),%st(1) // scale
-
-LDo3Forward:
- fildl fv_v+0(%esi) // fv0v0 | scale
- fildl fv_v+0(%edi) // fv1v0 | fv0v0 | scale
- fildl fv_v+4(%esi) // fv0v1 | fv1v0 | fv0v0 | scale
- fildl fv_v+4(%edi) // fv1v1 | fv0v1 | fv1v0 | fv0v0 | scale
- fildl fv_v+8(%esi) // fv0v2 | fv1v1 | fv0v1 | fv1v0 | fv0v0 | scale
- fildl fv_v+8(%edi) // fv1v2 | fv0v2 | fv1v1 | fv0v1 | fv1v0 | fv0v0 |
- // scale
- fxch %st(5) // fv0v0 | fv0v2 | fv1v1 | fv0v1 | fv1v0 | fv1v2 |
- // scale
- fsubr %st(0),%st(4) // fv0v0 | fv0v2 | fv1v1 | fv0v1 | fv1v0-fv0v0 |
- // fv1v2 | scale
- fxch %st(3) // fv0v1 | fv0v2 | fv1v1 | fv0v0 | fv1v0-fv0v0 |
- // fv1v2 | scale
- fsubr %st(0),%st(2) // fv0v1 | fv0v2 | fv1v1-fv0v1 | fv0v0 |
- // fv1v0-fv0v0 | fv1v2 | scale
- fxch %st(1) // fv0v2 | fv0v1 | fv1v1-fv0v1 | fv0v0 |
- // fv1v0-fv0v0 | fv1v2 | scale
- fsubr %st(0),%st(5) // fv0v2 | fv0v1 | fv1v1-fv0v1 | fv0v0 |
- // fv1v0-fv0v0 | fv1v2-fv0v2 | scale
- fxch %st(6) // scale | fv0v1 | fv1v1-fv0v1 | fv0v0 |
- // fv1v0-fv0v0 | fv1v2-fv0v2 | fv0v2
- fmul %st(0),%st(4) // scale | fv0v1 | fv1v1-fv0v1 | fv0v0 |
- // (fv1v0-fv0v0)*scale | fv1v2-fv0v2 | fv0v2
- addl $12,%edi
- fmul %st(0),%st(2) // scale | fv0v1 | (fv1v1-fv0v1)*scale | fv0v0 |
- // (fv1v0-fv0v0)*scale | fv1v2-fv0v2 | fv0v2
- addl $12,%esi
- addl $12,%edx
- fmul %st(0),%st(5) // scale | fv0v1 | (fv1v1-fv0v1)*scale | fv0v0 |
- // (fv1v0-fv0v0)*scale | (fv1v2-fv0v2)*scale |
- // fv0v2
- fxch %st(3) // fv0v0 | fv0v1 | (fv1v1-fv0v1)*scale | scale |
- // (fv1v0-fv0v0)*scale | (fv1v2-fv0v2)*scale |
- // fv0v2
- faddp %st(0),%st(4) // fv0v1 | (fv1v1-fv0v1)*scale | scale |
- // fv0v0+(fv1v0-fv0v0)*scale |
- // (fv1v2-fv0v2)*scale | fv0v2
- faddp %st(0),%st(1) // fv0v1+(fv1v1-fv0v1)*scale | scale |
- // fv0v0+(fv1v0-fv0v0)*scale |
- // (fv1v2-fv0v2)*scale | fv0v2
- fxch %st(4) // fv0v2 | scale | fv0v0+(fv1v0-fv0v0)*scale |
- // (fv1v2-fv0v2)*scale | fv0v1+(fv1v1-fv0v1)*scale
- faddp %st(0),%st(3) // scale | fv0v0+(fv1v0-fv0v0)*scale |
- // fv0v2+(fv1v2-fv0v2)*scale |
- // fv0v1+(fv1v1-fv0v1)*scale
- fxch %st(1) // fv0v0+(fv1v0-fv0v0)*scale | scale |
- // fv0v2+(fv1v2-fv0v2)*scale |
- // fv0v1+(fv1v1-fv0v1)*scale
- fadds float_point5
- fxch %st(3) // fv0v1+(fv1v1-fv0v1)*scale | scale |
- // fv0v2+(fv1v2-fv0v2)*scale |
- // fv0v0+(fv1v0-fv0v0)*scale
- fadds float_point5
- fxch %st(2) // fv0v2+(fv1v2-fv0v2)*scale | scale |
- // fv0v1+(fv1v1-fv0v1)*scale |
- // fv0v0+(fv1v0-fv0v0)*scale
- fadds float_point5
- fxch %st(3) // fv0v0+(fv1v0-fv0v0)*scale | scale |
- // fv0v1+(fv1v1-fv0v1)*scale |
- // fv0v2+(fv1v2-fv0v2)*scale
- fistpl fv_v+0-12(%edx) // scale | fv0v1+(fv1v1-fv0v1)*scale |
- // fv0v2+(fv1v2-fv0v2)*scale
- fxch %st(1) // fv0v1+(fv1v1-fv0v1)*scale | scale |
- // fv0v2+(fv1v2-fv0v2)*scale | scale
- fistpl fv_v+4-12(%edx) // scale | fv0v2+(fv1v2-fv0v2)*scale
- fxch %st(1) // fv0v2+(fv1v2-fv0v2)*sc | scale
- fistpl fv_v+8-12(%edx) // scale
-
- decl %eax
- jnz LDo3Forward
-
- fstp %st(0)
-
- popl %edi
- popl %esi
-
- ret
-
-
-.globl C(R_Alias_clip_top)
-C(R_Alias_clip_top):
- pushl %esi
- pushl %edi
-
- movl pfv0(%esp),%esi
- movl pfv1(%esp),%edi
-
- movl C(r_refdef)+rd_aliasvrect+4,%eax
- jmp LDoForwardOrBackward
-
-
-
-.globl C(R_Alias_clip_right)
-C(R_Alias_clip_right):
- pushl %esi
- pushl %edi
-
- movl pfv0(%esp),%esi
- movl pfv1(%esp),%edi
-
- movl C(r_refdef)+rd_aliasvrectright,%eax
-
-LRightLeftEntry:
-
-
- movl fv_v+4(%esi),%edx
- movl fv_v+4(%edi),%ecx
-
- cmpl %ecx,%edx
- movl fv_v+0(%esi),%edx
-
- movl fv_v+0(%edi),%ecx
- jl LDoForward2
-
- movl fv_v+0(%esi),%ecx
- movl fv_v+0(%edi),%edx
- movl pfv0(%esp),%edi
- movl pfv1(%esp),%esi
-
-LDoForward2:
-
- jmp LDoForward
-
-
-.globl C(R_Alias_clip_left)
-C(R_Alias_clip_left):
- pushl %esi
- pushl %edi
-
- movl pfv0(%esp),%esi
- movl pfv1(%esp),%edi
-
- movl C(r_refdef)+rd_aliasvrect+0,%eax
- jmp LRightLeftEntry
-
-
-#endif // id386
-
--- a/r_aliasa.s
+++ /dev/null
@@ -1,218 +1,0 @@
-//
-// r_aliasa.s
-// x86 assembly-language Alias model transform and project code.
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "asm_draw.h"
-#include "d_ifacea.h"
-
-#ifdef id386
-
- .data
-
-Lfloat_1: .single 1.0
-Ltemp: .long 0
-Lcoords: .long 0, 0, 0
-
- .text
-
-#define fv 12+4
-#define pstverts 12+8
-
-.globl C(R_AliasTransformAndProjectFinalVerts)
-C(R_AliasTransformAndProjectFinalVerts):
- pushl %ebp // preserve caller's stack frame
- pushl %edi
- pushl %esi // preserve register variables
-
-// int i, temp;
-// float lightcos, *plightnormal, zi;
-// trivertx_t *pverts;
-
-// pverts = r_apverts;
- movl C(r_apverts),%esi
-
-// for (i=0 ; i<r_anumverts ; i++, fv++, pverts++, pstverts++)
-// {
- movl pstverts(%esp),%ebp
- movl fv(%esp),%edi
- movl C(r_anumverts),%ecx
- subl %edx,%edx
-
-Lloop:
-
-// // transform and project
-// zi = 1.0 / (DotProduct(pverts->v, aliastransform[2]) +
-// aliastransform[2][3]);
- movb (%esi),%dl
- movb %dl,Lcoords
- fildl Lcoords // v[0]
- movb 1(%esi),%dl
- movb %dl,Lcoords+4
- fildl Lcoords+4 // v[1] | v[0]
- movb 2(%esi),%dl
- movb %dl,Lcoords+8
- fildl Lcoords+8 // v[2] | v[1] | v[0]
-
- fld %st(2) // v[0] | v[2] | v[1] | v[0]
- fmuls C(aliastransform)+32 // accum | v[2] | v[1] | v[0]
- fld %st(2) // v[1] | accum | v[2] | v[1] | v[0]
- fmuls C(aliastransform)+36 // accum2 | accum | v[2] | v[1] | v[0]
- fxch %st(1) // accum | accum2 | v[2] | v[1] | v[0]
- fadds C(aliastransform)+44 // accum | accum2 | v[2] | v[1] | v[0]
- fld %st(2) // v[2] | accum | accum2 | v[2] | v[1] | v[0]
- fmuls C(aliastransform)+40 // accum3 | accum | accum2 | v[2] | v[1] |
- // v[0]
- fxch %st(1) // accum | accum3 | accum2 | v[2] | v[1] | v[0]
- faddp %st(0),%st(2) // accum3 | accum | v[2] | v[1] | v[0]
- movb tv_lightnormalindex(%esi),%dl
- movl stv_s(%ebp),%eax
- movl %eax,fv_v+8(%edi)
- faddp %st(0),%st(1) // z | v[2] | v[1] | v[0]
-
- movl stv_t(%ebp),%eax
- movl %eax,fv_v+12(%edi)
-
-// // lighting
-// plightnormal = r_avertexnormals[pverts->lightnormalindex];
-
- fdivrs Lfloat_1 // zi | v[2] | v[1] | v[0]
-
-// fv->v[2] = pstverts->s;
-// fv->v[3] = pstverts->t;
-// fv->flags = pstverts->onseam;
- movl stv_onseam(%ebp),%eax
- movl %eax,fv_flags(%edi)
-
- movl fv_size(%edi),%eax
- movl stv_size(%ebp),%eax
- movl 4(%esi),%eax
-
- leal (%edx,%edx,2),%eax // index*3
-
- fxch %st(3) // v[0] | v[2] | v[1] | zi
-
-// lightcos = DotProduct (plightnormal, r_plightvec);
- flds C(r_avertexnormals)(,%eax,4)
- fmuls C(r_plightvec)
- flds C(r_avertexnormals)+4(,%eax,4)
- fmuls C(r_plightvec)+4
- flds C(r_avertexnormals)+8(,%eax,4)
- fmuls C(r_plightvec)+8
- fxch %st(1)
- faddp %st(0),%st(2)
- fld %st(2) // v[0] | laccum | laccum2 | v[0] | v[2] |
- // v[1] | zi
- fmuls C(aliastransform)+0 // xaccum | laccum | laccum2 | v[0] | v[2] |
- // v[1] | zi
- fxch %st(2) // laccum2 | laccum | xaccum | v[0] | v[2] |
- // v[1] | zi
- faddp %st(0),%st(1) // laccum | xaccum | v[0] | v[2] | v[1] | zi
-
-// temp = r_ambientlight;
-// if (lightcos < 0)
-// {
- fsts Ltemp
- movl C(r_ambientlight),%eax
- movb Ltemp+3,%dl
- testb $0x80,%dl
- jz Lsavelight // no need to clamp if only ambient lit, because
- // r_ambientlight is preclamped
-
-// temp += (int)(r_shadelight * lightcos);
- fmuls C(r_shadelight)
-// FIXME: fast float->int conversion?
- fistpl Ltemp
- addl Ltemp,%eax
-
-// // clamp; because we limited the minimum ambient and shading light, we
-// // don't have to clamp low light, just bright
-// if (temp < 0)
-// temp = 0;
- jns Lp1
- subl %eax,%eax
-
-// }
-
-Lp1:
-
-// fv->v[4] = temp;
-//
-// // x, y, and z are scaled down by 1/2**31 in the transform, so 1/z is
-// // scaled up by 1/2**31, and the scaling cancels out for x and y in the
-// // projection
-// fv->v[0] = ((DotProduct(pverts->v, aliastransform[0]) +
-// aliastransform[0][3]) * zi) + aliasxcenter;
-// fv->v[1] = ((DotProduct(pverts->v, aliastransform[1]) +
-// aliastransform[1][3]) * zi) + aliasycenter;
-// fv->v[5] = zi;
- fxch %st(1) // v[0] | xaccum | v[2] | v[1] | zi
- fmuls C(aliastransform)+16 // yaccum | xaccum | v[2] | v[1] | zi
- fxch %st(3) // v[1] | xaccum | v[2] | yaccum | zi
- fld %st(0) // v[1] | v[1] | xaccum | v[2] | yaccum | zi
- fmuls C(aliastransform)+4 // xaccum2 | v[1] | xaccum | v[2] | yaccum |zi
- fxch %st(1) // v[1] | xaccum2 | xaccum | v[2] | yaccum |zi
- movl %eax,fv_v+16(%edi)
- fmuls C(aliastransform)+20 // yaccum2 | xaccum2 | xaccum | v[2] | yaccum|
- // zi
- fxch %st(2) // xaccum | xaccum2 | yaccum2 | v[2] | yaccum|
- // zi
- fadds C(aliastransform)+12 // xaccum | xaccum2 | yaccum2 | v[2] | yaccum|
- // zi
- fxch %st(4) // yaccum | xaccum2 | yaccum2 | v[2] | xaccum|
- // zi
- fadds C(aliastransform)+28 // yaccum | xaccum2 | yaccum2 | v[2] | xaccum|
- // zi
- fxch %st(3) // v[2] | xaccum2 | yaccum2 | yaccum | xaccum|
- // zi
- fld %st(0) // v[2] | v[2] | xaccum2 | yaccum2 | yaccum |
- // xaccum | zi
- fmuls C(aliastransform)+8 // xaccum3 | v[2] | xaccum2 | yaccum2 |yaccum|
- // xaccum | zi
- fxch %st(1) // v[2] | xaccum3 | xaccum2 | yaccum2 |yaccum|
- // xaccum | zi
- fmuls C(aliastransform)+24 // yaccum3 | xaccum3 | xaccum2 | yaccum2 |
- // yaccum | xaccum | zi
- fxch %st(5) // xaccum | xaccum3 | xaccum2 | yaccum2 |
- // yaccum | yaccum3 | zi
- faddp %st(0),%st(2) // xaccum3 | xaccum | yaccum2 | yaccum |
- // yaccum3 | zi
- fxch %st(3) // yaccum | xaccum | yaccum2 | xaccum3 |
- // yaccum3 | zi
- faddp %st(0),%st(2) // xaccum | yaccum | xaccum3 | yaccum3 | zi
- addl $(tv_size),%esi
- faddp %st(0),%st(2) // yaccum | x | yaccum3 | zi
- faddp %st(0),%st(2) // x | y | zi
- addl $(stv_size),%ebp
- fmul %st(2),%st(0) // x/z | y | zi
- fxch %st(1) // y | x/z | zi
- fmul %st(2),%st(0) // y/z | x/z | zi
- fxch %st(1) // x/z | y/z | zi
- fadds C(aliasxcenter) // u | y/z | zi
- fxch %st(1) // y/z | u | zi
- fadds C(aliasycenter) // v | u | zi
- fxch %st(2) // zi | u | v
-// FIXME: fast float->int conversion?
- fistpl fv_v+20(%edi) // u | v
- fistpl fv_v+0(%edi) // v
- fistpl fv_v+4(%edi)
-
-// }
-
- addl $(fv_size),%edi
- decl %ecx
- jnz Lloop
-
- popl %esi // restore register variables
- popl %edi
- popl %ebp // restore the caller's stack frame
- ret
-
-Lsavelight:
- fstp %st(0)
- jmp Lp1
-
-#endif // id386
-
--- a/r_drawa.s
+++ /dev/null
@@ -1,819 +1,0 @@
-//
-// r_drawa.s
-// x86 assembly-language edge clipping and emission code
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "asm_draw.h"
-#include "d_ifacea.h"
-
-#ifdef id386
-
-// !!! if these are changed, they must be changed in r_draw.c too !!!
-#define FULLY_CLIPPED_CACHED 0x80000000
-#define FRAMECOUNT_MASK 0x7FFFFFFF
-
- .data
-
-Ld0: .single 0.0
-Ld1: .single 0.0
-Lstack: .long 0
-Lfp_near_clip: .single NEAR_CLIP
-Lceilv0: .long 0
-Lv: .long 0
-Lu0: .long 0
-Lv0: .long 0
-Lzi0: .long 0
-
- .text
-
-//----------------------------------------------------------------------
-// edge clipping code
-//----------------------------------------------------------------------
-
-#define pv0 4+12
-#define pv1 8+12
-#define clip 12+12
-
- .align 4
-.globl C(R_ClipEdge)
-C(R_ClipEdge):
- pushl %esi // preserve register variables
- pushl %edi
- pushl %ebx
- movl %esp,Lstack // for clearing the stack later
-
-// float d0, d1, f;
-// mvertex_t clipvert;
-
- movl clip(%esp),%ebx
- movl pv0(%esp),%esi
- movl pv1(%esp),%edx
-
-// if (clip)
-// {
- testl %ebx,%ebx
- jz Lemit
-
-// do
-// {
-
-Lcliploop:
-
-// d0 = DotProduct (pv0->position, clip->normal) - clip->dist;
-// d1 = DotProduct (pv1->position, clip->normal) - clip->dist;
- flds mv_position+0(%esi)
- fmuls cp_normal+0(%ebx)
- flds mv_position+4(%esi)
- fmuls cp_normal+4(%ebx)
- flds mv_position+8(%esi)
- fmuls cp_normal+8(%ebx)
- fxch %st(1)
- faddp %st(0),%st(2) // d0mul2 | d0add0
-
- flds mv_position+0(%edx)
- fmuls cp_normal+0(%ebx)
- flds mv_position+4(%edx)
- fmuls cp_normal+4(%ebx)
- flds mv_position+8(%edx)
- fmuls cp_normal+8(%ebx)
- fxch %st(1)
- faddp %st(0),%st(2) // d1mul2 | d1add0 | d0mul2 | d0add0
- fxch %st(3) // d0add0 | d1add0 | d0mul2 | d1mul2
-
- faddp %st(0),%st(2) // d1add0 | dot0 | d1mul2
- faddp %st(0),%st(2) // dot0 | dot1
-
- fsubs cp_dist(%ebx) // d0 | dot1
- fxch %st(1) // dot1 | d0
- fsubs cp_dist(%ebx) // d1 | d0
- fxch %st(1)
- fstps Ld0
- fstps Ld1
-
-// if (d0 >= 0)
-// {
- movl Ld0,%eax
- movl Ld1,%ecx
- orl %eax,%ecx
- js Lp2
-
-// both points are unclipped
-
-Lcontinue:
-
-//
-// R_ClipEdge (&clipvert, pv1, clip->next);
-// return;
-// }
-// } while ((clip = clip->next) != NULL);
- movl cp_next(%ebx),%ebx
- testl %ebx,%ebx
- jnz Lcliploop
-
-// }
-
-//// add the edge
-// R_EmitEdge (pv0, pv1);
-Lemit:
-
-//
-// set integer rounding to ceil mode, set to single precision
-//
-// FIXME: do away with by manually extracting integers from floats?
-// FIXME: set less often
- fldcw ceil_cw
-
-// edge_t *edge, *pcheck;
-// int u_check;
-// float u, u_step;
-// vec3_t local, transformed;
-// float *world;
-// int v, v2, ceilv0;
-// float scale, lzi0, u0, v0;
-// int side;
-
-// if (r_lastvertvalid)
-// {
- cmpl $0,C(r_lastvertvalid)
- jz LCalcFirst
-
-// u0 = r_u1;
-// v0 = r_v1;
-// lzi0 = r_lzi1;
-// ceilv0 = r_ceilv1;
- movl C(r_lzi1),%eax
- movl C(r_u1),%ecx
- movl %eax,Lzi0
- movl %ecx,Lu0
- movl C(r_v1),%ecx
- movl C(r_ceilv1),%eax
- movl %ecx,Lv0
- movl %eax,Lceilv0
- jmp LCalcSecond
-
-// }
-
-LCalcFirst:
-
-// else
-// {
-// world = &pv0->position[0];
-
- call LTransformAndProject // v0 | lzi0 | u0
-
- fsts Lv0
- fxch %st(2) // u0 | lzi0 | v0
- fstps Lu0 // lzi0 | v0
- fstps Lzi0 // v0
-
-// ceilv0 = (int)(v0 - 2000) + 2000; // ceil(v0);
- fistpl Lceilv0
-
-// }
-
-LCalcSecond:
-
-// world = &pv1->position[0];
- movl %edx,%esi
-
- call LTransformAndProject // v1 | lzi1 | u1
-
- flds Lu0 // u0 | v1 | lzi1 | u1
- fxch %st(3) // u1 | v1 | lzi1 | u0
- flds Lzi0 // lzi0 | u1 | v1 | lzi1 | u0
- fxch %st(3) // lzi1 | u1 | v1 | lzi0 | u0
- flds Lv0 // v0 | lzi1 | u1 | v1 | lzi0 | u0
- fxch %st(3) // v1 | lzi1 | u1 | v0 | lzi0 | u0
-
-// r_ceilv1 = (int)(r_v1 - 2000) + 2000; // ceil(r_v1);
- fistl C(r_ceilv1)
-
- fldcw single_cw // put back normal floating-point state
-
- fsts C(r_v1)
- fxch %st(4) // lzi0 | lzi1 | u1 | v0 | v1 | u0
-
-// if (r_lzi1 > lzi0)
-// lzi0 = r_lzi1;
- fcom %st(1)
- fnstsw %ax
- testb $1,%ah
- jz LP0
- fstp %st(0)
- fld %st(0)
-LP0:
-
- fxch %st(1) // lzi1 | lzi0 | u1 | v0 | v1 | u0
- fstps C(r_lzi1) // lzi0 | u1 | v0 | v1 | u0
- fxch %st(1)
- fsts C(r_u1)
- fxch %st(1)
-
-// if (lzi0 > r_nearzi) // for mipmap finding
-// r_nearzi = lzi0;
- fcoms C(r_nearzi)
- fnstsw %ax
- testb $0x45,%ah
- jnz LP1
- fsts C(r_nearzi)
-LP1:
-
-// // for right edges, all we want is the effect on 1/z
-// if (r_nearzionly)
-// return;
- movl C(r_nearzionly),%eax
- testl %eax,%eax
- jz LP2
-LPop5AndDone:
- movl C(cacheoffset),%eax
- movl C(r_framecount),%edx
- cmpl $0x7FFFFFFF,%eax
- jz LDoPop
- andl $(FRAMECOUNT_MASK),%edx
- orl $(FULLY_CLIPPED_CACHED),%edx
- movl %edx,C(cacheoffset)
-
-LDoPop:
- fstp %st(0) // u1 | v0 | v1 | u0
- fstp %st(0) // v0 | v1 | u0
- fstp %st(0) // v1 | u0
- fstp %st(0) // u0
- fstp %st(0)
- jmp Ldone
-
-LP2:
-
-// // create the edge
-// if (ceilv0 == r_ceilv1)
-// return; // horizontal edge
- movl Lceilv0,%ebx
- movl C(edge_p),%edi
- movl C(r_ceilv1),%ecx
- movl %edi,%edx
- movl C(r_pedge),%esi
- addl $(et_size),%edx
- cmpl %ecx,%ebx
- jz LPop5AndDone
-
- movl C(r_pedge),%eax
- movl %eax,et_owner(%edi)
-
-// side = ceilv0 > r_ceilv1;
-//
-// edge->nearzi = lzi0;
- fstps et_nearzi(%edi) // u1 | v0 | v1 | u0
-
-// if (side == 1)
-// {
- jc LSide0
-
-LSide1:
-
-// // leading edge (go from p2 to p1)
-
-// u_step = ((u0 - r_u1) / (v0 - r_v1));
- fsubrp %st(0),%st(3) // v0 | v1 | u0-u1
- fsub %st(1),%st(0) // v0-v1 | v1 | u0-u1
- fdivrp %st(0),%st(2) // v1 | ustep
-
-// r_emitted = 1;
- movl $1,C(r_emitted)
-
-// edge = edge_p++;
- movl %edx,C(edge_p)
-
-// pretouch next edge
- movl (%edx),%eax
-
-// v2 = ceilv0 - 1;
-// v = r_ceilv1;
- movl %ecx,%eax
- leal -1(%ebx),%ecx
- movl %eax,%ebx
-
-// edge->surfs[0] = 0;
-// edge->surfs[1] = surface_p - surfaces;
- movl C(surface_p),%eax
- movl C(surfaces),%esi
- subl %edx,%edx
- subl %esi,%eax
- shrl $(SURF_T_SHIFT),%eax
- movl %edx,et_surfs(%edi)
- movl %eax,et_surfs+2(%edi)
-
- subl %esi,%esi
-
-// u = r_u1 + ((float)v - r_v1) * u_step;
- movl %ebx,Lv
- fildl Lv // v | v1 | ustep
- fsubp %st(0),%st(1) // v-v1 | ustep
- fmul %st(1),%st(0) // (v-v1)*ustep | ustep
- fadds C(r_u1) // u | ustep
-
- jmp LSideDone
-
-// }
-
-LSide0:
-
-// else
-// {
-// // trailing edge (go from p1 to p2)
-
-// u_step = ((r_u1 - u0) / (r_v1 - v0));
- fsub %st(3),%st(0) // u1-u0 | v0 | v1 | u0
- fxch %st(2) // v1 | v0 | u1-u0 | u0
- fsub %st(1),%st(0) // v1-v0 | v0 | u1-u0 | u0
- fdivrp %st(0),%st(2) // v0 | ustep | u0
-
-// r_emitted = 1;
- movl $1,C(r_emitted)
-
-// edge = edge_p++;
- movl %edx,C(edge_p)
-
-// pretouch next edge
- movl (%edx),%eax
-
-// v = ceilv0;
-// v2 = r_ceilv1 - 1;
- decl %ecx
-
-// edge->surfs[0] = surface_p - surfaces;
-// edge->surfs[1] = 0;
- movl C(surface_p),%eax
- movl C(surfaces),%esi
- subl %edx,%edx
- subl %esi,%eax
- shrl $(SURF_T_SHIFT),%eax
- movl %edx,et_surfs+2(%edi)
- movl %eax,et_surfs(%edi)
-
- movl $1,%esi
-
-// u = u0 + ((float)v - v0) * u_step;
- movl %ebx,Lv
- fildl Lv // v | v0 | ustep | u0
- fsubp %st(0),%st(1) // v-v0 | ustep | u0
- fmul %st(1),%st(0) // (v-v0)*ustep | ustep | u0
- faddp %st(0),%st(2) // ustep | u
- fxch %st(1) // u | ustep
-
-// }
-
-LSideDone:
-
-// edge->u_step = u_step*0x100000;
-// edge->u = u*0x100000 + 0xFFFFF;
-
- fmuls fp_1m // u*0x100000 | ustep
- fxch %st(1) // ustep | u*0x100000
- fmuls fp_1m // ustep*0x100000 | u*0x100000
- fxch %st(1) // u*0x100000 | ustep*0x100000
- fadds fp_1m_minus_1 // u*0x100000 + 0xFFFFF | ustep*0x100000
- fxch %st(1) // ustep*0x100000 | u*0x100000 + 0xFFFFF
- fistpl et_u_step(%edi) // u*0x100000 + 0xFFFFF
- fistpl et_u(%edi)
-
-// // we need to do this to avoid stepping off the edges if a very nearly
-// // horizontal edge is less than epsilon above a scan, and numeric error
-// // causes it to incorrectly extend to the scan, and the extension of the
-// // line goes off the edge of the screen
-// // FIXME: is this actually needed?
-// if (edge->u < r_refdef.vrect_x_adj_shift20)
-// edge->u = r_refdef.vrect_x_adj_shift20;
-// if (edge->u > r_refdef.vrectright_adj_shift20)
-// edge->u = r_refdef.vrectright_adj_shift20;
- movl et_u(%edi),%eax
- movl C(r_refdef)+rd_vrect_x_adj_shift20,%edx
- cmpl %edx,%eax
- jl LP4
- movl C(r_refdef)+rd_vrectright_adj_shift20,%edx
- cmpl %edx,%eax
- jng LP5
-LP4:
- movl %edx,et_u(%edi)
- movl %edx,%eax
-LP5:
-
-// // sort the edge in normally
-// u_check = edge->u;
-//
-// if (edge->surfs[0])
-// u_check++; // sort trailers after leaders
- addl %esi,%eax
-
-// if (!newedges[v] || newedges[v]->u >= u_check)
-// {
- movl C(newedges)(,%ebx,4),%esi
- testl %esi,%esi
- jz LDoFirst
- cmpl %eax,et_u(%esi)
- jl LNotFirst
-LDoFirst:
-
-// edge->next = newedges[v];
-// newedges[v] = edge;
- movl %esi,et_next(%edi)
- movl %edi,C(newedges)(,%ebx,4)
-
- jmp LSetRemove
-
-// }
-
-LNotFirst:
-
-// else
-// {
-// pcheck = newedges[v];
-//
-// while (pcheck->next && pcheck->next->u < u_check)
-// pcheck = pcheck->next;
-LFindInsertLoop:
- movl %esi,%edx
- movl et_next(%esi),%esi
- testl %esi,%esi
- jz LInsertFound
- cmpl %eax,et_u(%esi)
- jl LFindInsertLoop
-
-LInsertFound:
-
-// edge->next = pcheck->next;
-// pcheck->next = edge;
- movl %esi,et_next(%edi)
- movl %edi,et_next(%edx)
-
-// }
-
-LSetRemove:
-
-// edge->nextremove = removeedges[v2];
-// removeedges[v2] = edge;
- movl C(removeedges)(,%ecx,4),%eax
- movl %edi,C(removeedges)(,%ecx,4)
- movl %eax,et_nextremove(%edi)
-
-Ldone:
- movl Lstack,%esp // clear temporary variables from stack
-
- popl %ebx // restore register variables
- popl %edi
- popl %esi
- ret
-
-// at least one point is clipped
-
-Lp2:
- testl %eax,%eax
- jns Lp1
-
-// else
-// {
-// // point 0 is clipped
-
-// if (d1 < 0)
-// {
- movl Ld1,%eax
- testl %eax,%eax
- jns Lp3
-
-// // both points are clipped
-// // we do cache fully clipped edges
-// if (!leftclipped)
- movl C(r_leftclipped),%eax
- movl C(r_pedge),%ecx
- testl %eax,%eax
- jnz Ldone
-
-// r_pedge->framecount = r_framecount;
- movl C(r_framecount),%eax
- andl $(FRAMECOUNT_MASK),%eax
- orl $(FULLY_CLIPPED_CACHED),%eax
- movl %eax,C(cacheoffset)
-
-// return;
- jmp Ldone
-
-// }
-
-Lp1:
-
-// // point 0 is unclipped
-// if (d1 >= 0)
-// {
-// // both points are unclipped
-// continue;
-
-// // only point 1 is clipped
-
-// f = d0 / (d0 - d1);
- flds Ld0
- flds Ld1
- fsubr %st(1),%st(0)
-
-// // we don't cache partially clipped edges
- movl $0x7FFFFFFF,C(cacheoffset)
-
- fdivrp %st(0),%st(1)
-
- subl $(mv_size),%esp // allocate space for clipvert
-
-// clipvert.position[0] = pv0->position[0] +
-// f * (pv1->position[0] - pv0->position[0]);
-// clipvert.position[1] = pv0->position[1] +
-// f * (pv1->position[1] - pv0->position[1]);
-// clipvert.position[2] = pv0->position[2] +
-// f * (pv1->position[2] - pv0->position[2]);
- flds mv_position+8(%edx)
- fsubs mv_position+8(%esi)
- flds mv_position+4(%edx)
- fsubs mv_position+4(%esi)
- flds mv_position+0(%edx)
- fsubs mv_position+0(%esi) // 0 | 1 | 2
-
-// replace pv1 with the clip point
- movl %esp,%edx
- movl cp_leftedge(%ebx),%eax
- testb %al,%al
-
- fmul %st(3),%st(0)
- fxch %st(1) // 1 | 0 | 2
- fmul %st(3),%st(0)
- fxch %st(2) // 2 | 0 | 1
- fmulp %st(0),%st(3) // 0 | 1 | 2
- fadds mv_position+0(%esi)
- fxch %st(1) // 1 | 0 | 2
- fadds mv_position+4(%esi)
- fxch %st(2) // 2 | 0 | 1
- fadds mv_position+8(%esi)
- fxch %st(1) // 0 | 2 | 1
- fstps mv_position+0(%esp) // 2 | 1
- fstps mv_position+8(%esp) // 1
- fstps mv_position+4(%esp)
-
-// if (clip->leftedge)
-// {
- jz Ltestright
-
-// r_leftclipped = true;
-// r_leftexit = clipvert;
- movl $1,C(r_leftclipped)
- movl mv_position+0(%esp),%eax
- movl %eax,C(r_leftexit)+mv_position+0
- movl mv_position+4(%esp),%eax
- movl %eax,C(r_leftexit)+mv_position+4
- movl mv_position+8(%esp),%eax
- movl %eax,C(r_leftexit)+mv_position+8
-
- jmp Lcontinue
-
-// }
-
-Ltestright:
-// else if (clip->rightedge)
-// {
- testb %ah,%ah
- jz Lcontinue
-
-// r_rightclipped = true;
-// r_rightexit = clipvert;
- movl $1,C(r_rightclipped)
- movl mv_position+0(%esp),%eax
- movl %eax,C(r_rightexit)+mv_position+0
- movl mv_position+4(%esp),%eax
- movl %eax,C(r_rightexit)+mv_position+4
- movl mv_position+8(%esp),%eax
- movl %eax,C(r_rightexit)+mv_position+8
-
-// }
-//
-// R_ClipEdge (pv0, &clipvert, clip->next);
-// return;
-// }
- jmp Lcontinue
-
-// }
-
-Lp3:
-
-// // only point 0 is clipped
-// r_lastvertvalid = false;
-
- movl $0,C(r_lastvertvalid)
-
-// f = d0 / (d0 - d1);
- flds Ld0
- flds Ld1
- fsubr %st(1),%st(0)
-
-// // we don't cache partially clipped edges
- movl $0x7FFFFFFF,C(cacheoffset)
-
- fdivrp %st(0),%st(1)
-
- subl $(mv_size),%esp // allocate space for clipvert
-
-// clipvert.position[0] = pv0->position[0] +
-// f * (pv1->position[0] - pv0->position[0]);
-// clipvert.position[1] = pv0->position[1] +
-// f * (pv1->position[1] - pv0->position[1]);
-// clipvert.position[2] = pv0->position[2] +
-// f * (pv1->position[2] - pv0->position[2]);
- flds mv_position+8(%edx)
- fsubs mv_position+8(%esi)
- flds mv_position+4(%edx)
- fsubs mv_position+4(%esi)
- flds mv_position+0(%edx)
- fsubs mv_position+0(%esi) // 0 | 1 | 2
-
- movl cp_leftedge(%ebx),%eax
- testb %al,%al
-
- fmul %st(3),%st(0)
- fxch %st(1) // 1 | 0 | 2
- fmul %st(3),%st(0)
- fxch %st(2) // 2 | 0 | 1
- fmulp %st(0),%st(3) // 0 | 1 | 2
- fadds mv_position+0(%esi)
- fxch %st(1) // 1 | 0 | 2
- fadds mv_position+4(%esi)
- fxch %st(2) // 2 | 0 | 1
- fadds mv_position+8(%esi)
- fxch %st(1) // 0 | 2 | 1
- fstps mv_position+0(%esp) // 2 | 1
- fstps mv_position+8(%esp) // 1
- fstps mv_position+4(%esp)
-
-// replace pv0 with the clip point
- movl %esp,%esi
-
-// if (clip->leftedge)
-// {
- jz Ltestright2
-
-// r_leftclipped = true;
-// r_leftenter = clipvert;
- movl $1,C(r_leftclipped)
- movl mv_position+0(%esp),%eax
- movl %eax,C(r_leftenter)+mv_position+0
- movl mv_position+4(%esp),%eax
- movl %eax,C(r_leftenter)+mv_position+4
- movl mv_position+8(%esp),%eax
- movl %eax,C(r_leftenter)+mv_position+8
-
- jmp Lcontinue
-
-// }
-
-Ltestright2:
-// else if (clip->rightedge)
-// {
- testb %ah,%ah
- jz Lcontinue
-
-// r_rightclipped = true;
-// r_rightenter = clipvert;
- movl $1,C(r_rightclipped)
- movl mv_position+0(%esp),%eax
- movl %eax,C(r_rightenter)+mv_position+0
- movl mv_position+4(%esp),%eax
- movl %eax,C(r_rightenter)+mv_position+4
- movl mv_position+8(%esp),%eax
- movl %eax,C(r_rightenter)+mv_position+8
-
-// }
- jmp Lcontinue
-
-// %esi = vec3_t point to transform and project
-// %edx preserved
-LTransformAndProject:
-
-// // transform and project
-// VectorSubtract (world, modelorg, local);
- flds mv_position+0(%esi)
- fsubs C(modelorg)+0
- flds mv_position+4(%esi)
- fsubs C(modelorg)+4
- flds mv_position+8(%esi)
- fsubs C(modelorg)+8
- fxch %st(2) // local[0] | local[1] | local[2]
-
-// TransformVector (local, transformed);
-//
-// if (transformed[2] < NEAR_CLIP)
-// transformed[2] = NEAR_CLIP;
-//
-// lzi0 = 1.0 / transformed[2];
- fld %st(0) // local[0] | local[0] | local[1] | local[2]
- fmuls C(vpn)+0 // zm0 | local[0] | local[1] | local[2]
- fld %st(1) // local[0] | zm0 | local[0] | local[1] |
- // local[2]
- fmuls C(vright)+0 // xm0 | zm0 | local[0] | local[1] | local[2]
- fxch %st(2) // local[0] | zm0 | xm0 | local[1] | local[2]
- fmuls C(vup)+0 // ym0 | zm0 | xm0 | local[1] | local[2]
- fld %st(3) // local[1] | ym0 | zm0 | xm0 | local[1] |
- // local[2]
- fmuls C(vpn)+4 // zm1 | ym0 | zm0 | xm0 | local[1] |
- // local[2]
- fld %st(4) // local[1] | zm1 | ym0 | zm0 | xm0 |
- // local[1] | local[2]
- fmuls C(vright)+4 // xm1 | zm1 | ym0 | zm0 | xm0 |
- // local[1] | local[2]
- fxch %st(5) // local[1] | zm1 | ym0 | zm0 | xm0 |
- // xm1 | local[2]
- fmuls C(vup)+4 // ym1 | zm1 | ym0 | zm0 | xm0 |
- // xm1 | local[2]
- fxch %st(1) // zm1 | ym1 | ym0 | zm0 | xm0 |
- // xm1 | local[2]
- faddp %st(0),%st(3) // ym1 | ym0 | zm2 | xm0 | xm1 | local[2]
- fxch %st(3) // xm0 | ym0 | zm2 | ym1 | xm1 | local[2]
- faddp %st(0),%st(4) // ym0 | zm2 | ym1 | xm2 | local[2]
- faddp %st(0),%st(2) // zm2 | ym2 | xm2 | local[2]
- fld %st(3) // local[2] | zm2 | ym2 | xm2 | local[2]
- fmuls C(vpn)+8 // zm3 | zm2 | ym2 | xm2 | local[2]
- fld %st(4) // local[2] | zm3 | zm2 | ym2 | xm2 | local[2]
- fmuls C(vright)+8 // xm3 | zm3 | zm2 | ym2 | xm2 | local[2]
- fxch %st(5) // local[2] | zm3 | zm2 | ym2 | xm2 | xm3
- fmuls C(vup)+8 // ym3 | zm3 | zm2 | ym2 | xm2 | xm3
- fxch %st(1) // zm3 | ym3 | zm2 | ym2 | xm2 | xm3
- faddp %st(0),%st(2) // ym3 | zm4 | ym2 | xm2 | xm3
- fxch %st(4) // xm3 | zm4 | ym2 | xm2 | ym3
- faddp %st(0),%st(3) // zm4 | ym2 | xm4 | ym3
- fxch %st(1) // ym2 | zm4 | xm4 | ym3
- faddp %st(0),%st(3) // zm4 | xm4 | ym4
-
- fcoms Lfp_near_clip
- fnstsw %ax
- testb $1,%ah
- jz LNoClip
- fstp %st(0)
- flds Lfp_near_clip
-
-LNoClip:
-
- fdivrs float_1 // lzi0 | x | y
- fxch %st(1) // x | lzi0 | y
-
-// // FIXME: build x/yscale into transform?
-// scale = xscale * lzi0;
-// u0 = (xcenter + scale*transformed[0]);
- flds C(xscale) // xscale | x | lzi0 | y
- fmul %st(2),%st(0) // scale | x | lzi0 | y
- fmulp %st(0),%st(1) // scale*x | lzi0 | y
- fadds C(xcenter) // u0 | lzi0 | y
-
-// if (u0 < r_refdef.fvrectx_adj)
-// u0 = r_refdef.fvrectx_adj;
-// if (u0 > r_refdef.fvrectright_adj)
-// u0 = r_refdef.fvrectright_adj;
-// FIXME: use integer compares of floats?
- fcoms C(r_refdef)+rd_fvrectx_adj
- fnstsw %ax
- testb $1,%ah
- jz LClampP0
- fstp %st(0)
- flds C(r_refdef)+rd_fvrectx_adj
-LClampP0:
- fcoms C(r_refdef)+rd_fvrectright_adj
- fnstsw %ax
- testb $0x45,%ah
- jnz LClampP1
- fstp %st(0)
- flds C(r_refdef)+rd_fvrectright_adj
-LClampP1:
-
- fld %st(1) // lzi0 | u0 | lzi0 | y
-
-// scale = yscale * lzi0;
-// v0 = (ycenter - scale*transformed[1]);
- fmuls C(yscale) // scale | u0 | lzi0 | y
- fmulp %st(0),%st(3) // u0 | lzi0 | scale*y
- fxch %st(2) // scale*y | lzi0 | u0
- fsubrs C(ycenter) // v0 | lzi0 | u0
-
-// if (v0 < r_refdef.fvrecty_adj)
-// v0 = r_refdef.fvrecty_adj;
-// if (v0 > r_refdef.fvrectbottom_adj)
-// v0 = r_refdef.fvrectbottom_adj;
-// FIXME: use integer compares of floats?
- fcoms C(r_refdef)+rd_fvrecty_adj
- fnstsw %ax
- testb $1,%ah
- jz LClampP2
- fstp %st(0)
- flds C(r_refdef)+rd_fvrecty_adj
-LClampP2:
- fcoms C(r_refdef)+rd_fvrectbottom_adj
- fnstsw %ax
- testb $0x45,%ah
- jnz LClampP3
- fstp %st(0)
- flds C(r_refdef)+rd_fvrectbottom_adj
-LClampP3:
- ret
-
-#endif // id386
-
--- a/r_edgea.s
+++ /dev/null
@@ -1,731 +1,0 @@
-//
-// r_edgea.s
-// x86 assembly-language edge-processing code.
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "asm_draw.h"
-
-#ifdef id386
-
- .data
-Ltemp: .long 0
-float_1_div_0100000h: .long 0x35800000 // 1.0/(float)0x100000
-float_point_999: .single 0.999
-float_1_point_001: .single 1.001
-
- .text
-
-//--------------------------------------------------------------------
-
-#define edgestoadd 4+8 // note odd stack offsets because of interleaving
-#define edgelist 8+12 // with pushes
-
-.globl C(R_EdgeCodeStart)
-C(R_EdgeCodeStart):
-
-.globl C(R_InsertNewEdges)
-C(R_InsertNewEdges):
- pushl %edi
- pushl %esi // preserve register variables
- movl edgestoadd(%esp),%edx
- pushl %ebx
- movl edgelist(%esp),%ecx
-
-LDoNextEdge:
- movl et_u(%edx),%eax
- movl %edx,%edi
-
-LContinueSearch:
- movl et_u(%ecx),%ebx
- movl et_next(%ecx),%esi
- cmpl %ebx,%eax
- jle LAddedge
- movl et_u(%esi),%ebx
- movl et_next(%esi),%ecx
- cmpl %ebx,%eax
- jle LAddedge2
- movl et_u(%ecx),%ebx
- movl et_next(%ecx),%esi
- cmpl %ebx,%eax
- jle LAddedge
- movl et_u(%esi),%ebx
- movl et_next(%esi),%ecx
- cmpl %ebx,%eax
- jg LContinueSearch
-
-LAddedge2:
- movl et_next(%edx),%edx
- movl et_prev(%esi),%ebx
- movl %esi,et_next(%edi)
- movl %ebx,et_prev(%edi)
- movl %edi,et_next(%ebx)
- movl %edi,et_prev(%esi)
- movl %esi,%ecx
-
- cmpl $0,%edx
- jnz LDoNextEdge
- jmp LDone
-
- .align 4
-LAddedge:
- movl et_next(%edx),%edx
- movl et_prev(%ecx),%ebx
- movl %ecx,et_next(%edi)
- movl %ebx,et_prev(%edi)
- movl %edi,et_next(%ebx)
- movl %edi,et_prev(%ecx)
-
- cmpl $0,%edx
- jnz LDoNextEdge
-
-LDone:
- popl %ebx // restore register variables
- popl %esi
- popl %edi
-
- ret
-
-//--------------------------------------------------------------------
-
-#define predge 4+4
-
-.globl C(R_RemoveEdges)
-C(R_RemoveEdges):
- pushl %ebx
- movl predge(%esp),%eax
-
-Lre_loop:
- movl et_next(%eax),%ecx
- movl et_nextremove(%eax),%ebx
- movl et_prev(%eax),%edx
- testl %ebx,%ebx
- movl %edx,et_prev(%ecx)
- jz Lre_done
- movl %ecx,et_next(%edx)
-
- movl et_next(%ebx),%ecx
- movl et_prev(%ebx),%edx
- movl et_nextremove(%ebx),%eax
- movl %edx,et_prev(%ecx)
- testl %eax,%eax
- movl %ecx,et_next(%edx)
- jnz Lre_loop
-
- popl %ebx
- ret
-
-Lre_done:
- movl %ecx,et_next(%edx)
- popl %ebx
-
- ret
-
-//--------------------------------------------------------------------
-
-#define pedgelist 4+4 // note odd stack offset because of interleaving
- // with pushes
-
-.globl C(R_StepActiveU)
-C(R_StepActiveU):
- pushl %edi
- movl pedgelist(%esp),%edx
- pushl %esi // preserve register variables
- pushl %ebx
-
- movl et_prev(%edx),%esi
-
-LNewEdge:
- movl et_u(%esi),%edi
-
-LNextEdge:
- movl et_u(%edx),%eax
- movl et_u_step(%edx),%ebx
- addl %ebx,%eax
- movl et_next(%edx),%esi
- movl %eax,et_u(%edx)
- cmpl %edi,%eax
- jl LPushBack
-
- movl et_u(%esi),%edi
- movl et_u_step(%esi),%ebx
- addl %ebx,%edi
- movl et_next(%esi),%edx
- movl %edi,et_u(%esi)
- cmpl %eax,%edi
- jl LPushBack2
-
- movl et_u(%edx),%eax
- movl et_u_step(%edx),%ebx
- addl %ebx,%eax
- movl et_next(%edx),%esi
- movl %eax,et_u(%edx)
- cmpl %edi,%eax
- jl LPushBack
-
- movl et_u(%esi),%edi
- movl et_u_step(%esi),%ebx
- addl %ebx,%edi
- movl et_next(%esi),%edx
- movl %edi,et_u(%esi)
- cmpl %eax,%edi
- jnl LNextEdge
-
-LPushBack2:
- movl %edx,%ebx
- movl %edi,%eax
- movl %esi,%edx
- movl %ebx,%esi
-
-LPushBack:
-// push it back to keep it sorted
- movl et_prev(%edx),%ecx
- movl et_next(%edx),%ebx
-
-// done if the -1 in edge_aftertail triggered this
- cmpl $(C(edge_aftertail)),%edx
- jz LUDone
-
-// pull the edge out of the edge list
- movl et_prev(%ecx),%edi
- movl %ecx,et_prev(%esi)
- movl %ebx,et_next(%ecx)
-
-// find out where the edge goes in the edge list
-LPushBackLoop:
- movl et_prev(%edi),%ecx
- movl et_u(%edi),%ebx
- cmpl %ebx,%eax
- jnl LPushBackFound
-
- movl et_prev(%ecx),%edi
- movl et_u(%ecx),%ebx
- cmpl %ebx,%eax
- jl LPushBackLoop
-
- movl %ecx,%edi
-
-// put the edge back into the edge list
-LPushBackFound:
- movl et_next(%edi),%ebx
- movl %edi,et_prev(%edx)
- movl %ebx,et_next(%edx)
- movl %edx,et_next(%edi)
- movl %edx,et_prev(%ebx)
-
- movl %esi,%edx
- movl et_prev(%esi),%esi
-
- cmpl $(C(edge_tail)),%edx
- jnz LNewEdge
-
-LUDone:
- popl %ebx // restore register variables
- popl %esi
- popl %edi
-
- ret
-
-//--------------------------------------------------------------------
-
-#define surf 4 // note this is loaded before any pushes
-
- .align 4
-TrailingEdge:
- movl st_spanstate(%esi),%eax // check for edge inversion
- decl %eax
- jnz LInverted
-
- movl %eax,st_spanstate(%esi)
- movl st_insubmodel(%esi),%ecx
- movl 0x12345678,%edx // surfaces[1].st_next
-LPatch0:
- movl C(r_bmodelactive),%eax
- subl %ecx,%eax
- cmpl %esi,%edx
- movl %eax,C(r_bmodelactive)
- jnz LNoEmit // surface isn't on top, just remove
-
-// emit a span (current top going away)
- movl et_u(%ebx),%eax
- shrl $20,%eax // iu = integral pixel u
- movl st_last_u(%esi),%edx
- movl st_next(%esi),%ecx
- cmpl %edx,%eax
- jle LNoEmit2 // iu <= surf->last_u, so nothing to emit
-
- movl %eax,st_last_u(%ecx) // surf->next->last_u = iu;
- subl %edx,%eax
- movl %edx,espan_t_u(%ebp) // span->u = surf->last_u;
-
- movl %eax,espan_t_count(%ebp) // span->count = iu - span->u;
- movl C(current_iv),%eax
- movl %eax,espan_t_v(%ebp) // span->v = current_iv;
- movl st_spans(%esi),%eax
- movl %eax,espan_t_pnext(%ebp) // span->pnext = surf->spans;
- movl %ebp,st_spans(%esi) // surf->spans = span;
- addl $(espan_t_size),%ebp
-
- movl st_next(%esi),%edx // remove the surface from the surface
- movl st_prev(%esi),%esi // stack
-
- movl %edx,st_next(%esi)
- movl %esi,st_prev(%edx)
- ret
-
-LNoEmit2:
- movl %eax,st_last_u(%ecx) // surf->next->last_u = iu;
- movl st_next(%esi),%edx // remove the surface from the surface
- movl st_prev(%esi),%esi // stack
-
- movl %edx,st_next(%esi)
- movl %esi,st_prev(%edx)
- ret
-
-LNoEmit:
- movl st_next(%esi),%edx // remove the surface from the surface
- movl st_prev(%esi),%esi // stack
-
- movl %edx,st_next(%esi)
- movl %esi,st_prev(%edx)
- ret
-
-LInverted:
- movl %eax,st_spanstate(%esi)
- ret
-
-//--------------------------------------------------------------------
-
-// trailing edge only
-Lgs_trailing:
- pushl $Lgs_nextedge
- jmp TrailingEdge
-
-
-.globl C(R_GenerateSpans)
-C(R_GenerateSpans):
- pushl %ebp // preserve caller's stack frame
- pushl %edi
- pushl %esi // preserve register variables
- pushl %ebx
-
-// clear active surfaces to just the background surface
- movl C(surfaces),%eax
- movl C(edge_head_u_shift20),%edx
- addl $(st_size),%eax
-// %ebp = span_p throughout
- movl C(span_p),%ebp
-
- movl $0,C(r_bmodelactive)
-
- movl %eax,st_next(%eax)
- movl %eax,st_prev(%eax)
- movl %edx,st_last_u(%eax)
- movl C(edge_head)+et_next,%ebx // edge=edge_head.next
-
-// generate spans
- cmpl $(C(edge_tail)),%ebx // done if empty list
- jz Lgs_lastspan
-
-Lgs_edgeloop:
-
- movl et_surfs(%ebx),%edi
- movl C(surfaces),%eax
- movl %edi,%esi
- andl $0xFFFF0000,%edi
- andl $0xFFFF,%esi
- jz Lgs_leading // not a trailing edge
-
-// it has a left surface, so a surface is going away for this span
- shll $(SURF_T_SHIFT),%esi
- addl %eax,%esi
- testl %edi,%edi
- jz Lgs_trailing
-
-// both leading and trailing
- call TrailingEdge
- movl C(surfaces),%eax
-
-// ---------------------------------------------------------------
-// handle a leading edge
-// ---------------------------------------------------------------
-
-Lgs_leading:
- shrl $16-SURF_T_SHIFT,%edi
- movl C(surfaces),%eax
- addl %eax,%edi
- movl 0x12345678,%esi // surf2 = surfaces[1].next;
-LPatch2:
- movl st_spanstate(%edi),%edx
- movl st_insubmodel(%edi),%eax
- testl %eax,%eax
- jnz Lbmodel_leading
-
-// handle a leading non-bmodel edge
-
-// don't start a span if this is an inverted span, with the end edge preceding
-// the start edge (that is, we've already seen the end edge)
- testl %edx,%edx
- jnz Lxl_done
-
-
-// if (surf->key < surf2->key)
-// goto newtop;
- incl %edx
- movl st_key(%edi),%eax
- movl %edx,st_spanstate(%edi)
- movl st_key(%esi),%ecx
- cmpl %ecx,%eax
- jl Lnewtop
-
-// main sorting loop to search through surface stack until insertion point
-// found. Always terminates because background surface is sentinel
-// do
-// {
-// surf2 = surf2->next;
-// } while (surf->key >= surf2->key);
-Lsortloopnb:
- movl st_next(%esi),%esi
- movl st_key(%esi),%ecx
- cmpl %ecx,%eax
- jge Lsortloopnb
-
- jmp LInsertAndExit
-
-
-// handle a leading bmodel edge
- .align 4
-Lbmodel_leading:
-
-// don't start a span if this is an inverted span, with the end edge preceding
-// the start edge (that is, we've already seen the end edge)
- testl %edx,%edx
- jnz Lxl_done
-
- movl C(r_bmodelactive),%ecx
- incl %edx
- incl %ecx
- movl %edx,st_spanstate(%edi)
- movl %ecx,C(r_bmodelactive)
-
-// if (surf->key < surf2->key)
-// goto newtop;
- movl st_key(%edi),%eax
- movl st_key(%esi),%ecx
- cmpl %ecx,%eax
- jl Lnewtop
-
-// if ((surf->key == surf2->key) && surf->insubmodel)
-// {
- jz Lzcheck_for_newtop
-
-// main sorting loop to search through surface stack until insertion point
-// found. Always terminates because background surface is sentinel
-// do
-// {
-// surf2 = surf2->next;
-// } while (surf->key > surf2->key);
-Lsortloop:
- movl st_next(%esi),%esi
- movl st_key(%esi),%ecx
- cmpl %ecx,%eax
- jg Lsortloop
-
- jne LInsertAndExit
-
-// Do 1/z sorting to see if we've arrived in the right position
- movl et_u(%ebx),%eax
- subl $0xFFFFF,%eax
- movl %eax,Ltemp
- fildl Ltemp
-
- fmuls float_1_div_0100000h // fu = (float)(edge->u - 0xFFFFF) *
- // (1.0 / 0x100000);
-
- fld %st(0) // fu | fu
- fmuls st_d_zistepu(%edi) // fu*surf->d_zistepu | fu
- flds C(fv) // fv | fu*surf->d_zistepu | fu
- fmuls st_d_zistepv(%edi) // fv*surf->d_zistepv | fu*surf->d_zistepu | fu
- fxch %st(1) // fu*surf->d_zistepu | fv*surf->d_zistepv | fu
- fadds st_d_ziorigin(%edi) // fu*surf->d_zistepu + surf->d_ziorigin |
- // fv*surf->d_zistepv | fu
-
- flds st_d_zistepu(%esi) // surf2->d_zistepu |
- // fu*surf->d_zistepu + surf->d_ziorigin |
- // fv*surf->d_zistepv | fu
- fmul %st(3),%st(0) // fu*surf2->d_zistepu |
- // fu*surf->d_zistepu + surf->d_ziorigin |
- // fv*surf->d_zistepv | fu
- fxch %st(1) // fu*surf->d_zistepu + surf->d_ziorigin |
- // fu*surf2->d_zistepu |
- // fv*surf->d_zistepv | fu
- faddp %st(0),%st(2) // fu*surf2->d_zistepu | newzi | fu
-
- flds C(fv) // fv | fu*surf2->d_zistepu | newzi | fu
- fmuls st_d_zistepv(%esi) // fv*surf2->d_zistepv |
- // fu*surf2->d_zistepu | newzi | fu
- fld %st(2) // newzi | fv*surf2->d_zistepv |
- // fu*surf2->d_zistepu | newzi | fu
- fmuls float_point_999 // newzibottom | fv*surf2->d_zistepv |
- // fu*surf2->d_zistepu | newzi | fu
-
- fxch %st(2) // fu*surf2->d_zistepu | fv*surf2->d_zistepv |
- // newzibottom | newzi | fu
- fadds st_d_ziorigin(%esi) // fu*surf2->d_zistepu + surf2->d_ziorigin |
- // fv*surf2->d_zistepv | newzibottom | newzi |
- // fu
- faddp %st(0),%st(1) // testzi | newzibottom | newzi | fu
- fxch %st(1) // newzibottom | testzi | newzi | fu
-
-// if (newzibottom >= testzi)
-// goto Lgotposition;
-
- fcomp %st(1) // testzi | newzi | fu
-
- fxch %st(1) // newzi | testzi | fu
- fmuls float_1_point_001 // newzitop | testzi | fu
- fxch %st(1) // testzi | newzitop | fu
-
- fnstsw %ax
- testb $0x01,%ah
- jz Lgotposition_fpop3
-
-// if (newzitop >= testzi)
-// {
-
- fcomp %st(1) // newzitop | fu
- fnstsw %ax
- testb $0x45,%ah
- jz Lsortloop_fpop2
-
-// if (surf->d_zistepu >= surf2->d_zistepu)
-// goto newtop;
-
- flds st_d_zistepu(%edi) // surf->d_zistepu | newzitop| fu
- fcomps st_d_zistepu(%esi) // newzitop | fu
- fnstsw %ax
- testb $0x01,%ah
- jz Lgotposition_fpop2
-
- fstp %st(0) // clear the FPstack
- fstp %st(0)
- movl st_key(%edi),%eax
- jmp Lsortloop
-
-
-Lgotposition_fpop3:
- fstp %st(0)
-Lgotposition_fpop2:
- fstp %st(0)
- fstp %st(0)
- jmp LInsertAndExit
-
-
-// emit a span (obscures current top)
-
-Lnewtop_fpop3:
- fstp %st(0)
-Lnewtop_fpop2:
- fstp %st(0)
- fstp %st(0)
- movl st_key(%edi),%eax // reload the sorting key
-
-Lnewtop:
- movl et_u(%ebx),%eax
- movl st_last_u(%esi),%edx
- shrl $20,%eax // iu = integral pixel u
- movl %eax,st_last_u(%edi) // surf->last_u = iu;
- cmpl %edx,%eax
- jle LInsertAndExit // iu <= surf->last_u, so nothing to emit
-
- subl %edx,%eax
- movl %edx,espan_t_u(%ebp) // span->u = surf->last_u;
-
- movl %eax,espan_t_count(%ebp) // span->count = iu - span->u;
- movl C(current_iv),%eax
- movl %eax,espan_t_v(%ebp) // span->v = current_iv;
- movl st_spans(%esi),%eax
- movl %eax,espan_t_pnext(%ebp) // span->pnext = surf->spans;
- movl %ebp,st_spans(%esi) // surf->spans = span;
- addl $(espan_t_size),%ebp
-
-LInsertAndExit:
-// insert before surf2
- movl %esi,st_next(%edi) // surf->next = surf2;
- movl st_prev(%esi),%eax
- movl %eax,st_prev(%edi) // surf->prev = surf2->prev;
- movl %edi,st_prev(%esi) // surf2->prev = surf;
- movl %edi,st_next(%eax) // surf2->prev->next = surf;
-
-// ---------------------------------------------------------------
-// leading edge done
-// ---------------------------------------------------------------
-
-// ---------------------------------------------------------------
-// see if there are any more edges
-// ---------------------------------------------------------------
-
-Lgs_nextedge:
- movl et_next(%ebx),%ebx
- cmpl $(C(edge_tail)),%ebx
- jnz Lgs_edgeloop
-
-// clean up at the right edge
-Lgs_lastspan:
-
-// now that we've reached the right edge of the screen, we're done with any
-// unfinished surfaces, so emit a span for whatever's on top
- movl 0x12345678,%esi // surfaces[1].st_next
-LPatch3:
- movl C(edge_tail_u_shift20),%eax
- xorl %ecx,%ecx
- movl st_last_u(%esi),%edx
- subl %edx,%eax
- jle Lgs_resetspanstate
-
- movl %edx,espan_t_u(%ebp)
- movl %eax,espan_t_count(%ebp)
- movl C(current_iv),%eax
- movl %eax,espan_t_v(%ebp)
- movl st_spans(%esi),%eax
- movl %eax,espan_t_pnext(%ebp)
- movl %ebp,st_spans(%esi)
- addl $(espan_t_size),%ebp
-
-// reset spanstate for all surfaces in the surface stack
-Lgs_resetspanstate:
- movl %ecx,st_spanstate(%esi)
- movl st_next(%esi),%esi
- cmpl $0x12345678,%esi // &surfaces[1]
-LPatch4:
- jnz Lgs_resetspanstate
-
-// store the final span_p
- movl %ebp,C(span_p)
-
- popl %ebx // restore register variables
- popl %esi
- popl %edi
- popl %ebp // restore the caller's stack frame
- ret
-
-
-// ---------------------------------------------------------------
-// 1/z sorting for bmodels in the same leaf
-// ---------------------------------------------------------------
- .align 4
-Lxl_done:
- incl %edx
- movl %edx,st_spanstate(%edi)
-
- jmp Lgs_nextedge
-
-
- .align 4
-Lzcheck_for_newtop:
- movl et_u(%ebx),%eax
- subl $0xFFFFF,%eax
- movl %eax,Ltemp
- fildl Ltemp
-
- fmuls float_1_div_0100000h // fu = (float)(edge->u - 0xFFFFF) *
- // (1.0 / 0x100000);
-
- fld %st(0) // fu | fu
- fmuls st_d_zistepu(%edi) // fu*surf->d_zistepu | fu
- flds C(fv) // fv | fu*surf->d_zistepu | fu
- fmuls st_d_zistepv(%edi) // fv*surf->d_zistepv | fu*surf->d_zistepu | fu
- fxch %st(1) // fu*surf->d_zistepu | fv*surf->d_zistepv | fu
- fadds st_d_ziorigin(%edi) // fu*surf->d_zistepu + surf->d_ziorigin |
- // fv*surf->d_zistepv | fu
-
- flds st_d_zistepu(%esi) // surf2->d_zistepu |
- // fu*surf->d_zistepu + surf->d_ziorigin |
- // fv*surf->d_zistepv | fu
- fmul %st(3),%st(0) // fu*surf2->d_zistepu |
- // fu*surf->d_zistepu + surf->d_ziorigin |
- // fv*surf->d_zistepv | fu
- fxch %st(1) // fu*surf->d_zistepu + surf->d_ziorigin |
- // fu*surf2->d_zistepu |
- // fv*surf->d_zistepv | fu
- faddp %st(0),%st(2) // fu*surf2->d_zistepu | newzi | fu
-
- flds C(fv) // fv | fu*surf2->d_zistepu | newzi | fu
- fmuls st_d_zistepv(%esi) // fv*surf2->d_zistepv |
- // fu*surf2->d_zistepu | newzi | fu
- fld %st(2) // newzi | fv*surf2->d_zistepv |
- // fu*surf2->d_zistepu | newzi | fu
- fmuls float_point_999 // newzibottom | fv*surf2->d_zistepv |
- // fu*surf2->d_zistepu | newzi | fu
-
- fxch %st(2) // fu*surf2->d_zistepu | fv*surf2->d_zistepv |
- // newzibottom | newzi | fu
- fadds st_d_ziorigin(%esi) // fu*surf2->d_zistepu + surf2->d_ziorigin |
- // fv*surf2->d_zistepv | newzibottom | newzi |
- // fu
- faddp %st(0),%st(1) // testzi | newzibottom | newzi | fu
- fxch %st(1) // newzibottom | testzi | newzi | fu
-
-// if (newzibottom >= testzi)
-// goto newtop;
-
- fcomp %st(1) // testzi | newzi | fu
-
- fxch %st(1) // newzi | testzi | fu
- fmuls float_1_point_001 // newzitop | testzi | fu
- fxch %st(1) // testzi | newzitop | fu
-
- fnstsw %ax
- testb $0x01,%ah
- jz Lnewtop_fpop3
-
-// if (newzitop >= testzi)
-// {
-
- fcomp %st(1) // newzitop | fu
- fnstsw %ax
- testb $0x45,%ah
- jz Lsortloop_fpop2
-
-// if (surf->d_zistepu >= surf2->d_zistepu)
-// goto newtop;
-
- flds st_d_zistepu(%edi) // surf->d_zistepu | newzitop | fu
- fcomps st_d_zistepu(%esi) // newzitop | fu
- fnstsw %ax
- testb $0x01,%ah
- jz Lnewtop_fpop2
-
-Lsortloop_fpop2:
- fstp %st(0) // clear the FP stack
- fstp %st(0)
- movl st_key(%edi),%eax
- jmp Lsortloop
-
-
-.globl C(R_EdgeCodeEnd)
-C(R_EdgeCodeEnd):
-
-
-//----------------------------------------------------------------------
-// Surface array address code patching routine
-//----------------------------------------------------------------------
-
- .align 4
-.globl C(R_SurfacePatch)
-C(R_SurfacePatch):
-
- movl C(surfaces),%eax
- addl $(st_size),%eax
- movl %eax,LPatch4-4
-
- addl $(st_next),%eax
- movl %eax,LPatch0-4
- movl %eax,LPatch2-4
- movl %eax,LPatch3-4
-
- ret
-
-#endif // id386
-
--- a/r_varsa.s
+++ /dev/null
@@ -1,45 +1,0 @@
-//
-// r_varsa.s
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "asm_draw.h"
-#include "d_ifacea.h"
-
-#ifdef id386
-
- .data
-
-//-------------------------------------------------------
-// ASM-only variables
-//-------------------------------------------------------
-.globl float_1, float_particle_z_clip, float_point5
-.globl float_minus_1, float_0
-float_0: .single 0.0
-float_1: .single 1.0
-float_minus_1: .single -1.0
-float_particle_z_clip: .single PARTICLE_Z_CLIP
-float_point5: .single 0.5
-
-.globl fp_16, fp_64k, fp_1m, fp_64kx64k
-.globl fp_1m_minus_1
-.globl fp_8
-fp_1m: .single 1048576.0
-fp_1m_minus_1: .single 1048575.0
-fp_64k: .single 65536.0
-fp_8: .single 8.0
-fp_16: .single 16.0
-fp_64kx64k: .long 0x4f000000 // (float)0x8000*0x10000
-
-
-.globl FloatZero, Float2ToThe31nd, FloatMinus2ToThe31nd
-FloatZero: .long 0
-Float2ToThe31nd: .long 0x4f000000
-FloatMinus2ToThe31nd: .long 0xcf000000
-
-.globl C(r_bmodelactive)
-C(r_bmodelactive): .long 0
-
-#endif // id386
-
--- a/snd_mixa.s
+++ /dev/null
@@ -1,199 +1,0 @@
-//
-// snd_mixa.s
-// x86 assembly-language sound code
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-
-#ifdef id386
-
- .text
-
-//----------------------------------------------------------------------
-// 8-bit sound-mixing code
-//----------------------------------------------------------------------
-
-#define ch 4+16
-#define sc 8+16
-#define count 12+16
-
-.globl C(SND_PaintChannelFrom8)
-C(SND_PaintChannelFrom8):
- pushl %esi // preserve register variables
- pushl %edi
- pushl %ebx
- pushl %ebp
-
-// int data;
-// short *lscale, *rscale;
-// unsigned char *sfx;
-// int i;
-
- movl ch(%esp),%ebx
- movl sc(%esp),%esi
-
-// if (ch->leftvol > 255)
-// ch->leftvol = 255;
-// if (ch->rightvol > 255)
-// ch->rightvol = 255;
- movl ch_leftvol(%ebx),%eax
- movl ch_rightvol(%ebx),%edx
- cmpl $255,%eax
- jna LLeftSet
- movl $255,%eax
-LLeftSet:
- cmpl $255,%edx
- jna LRightSet
- movl $255,%edx
-LRightSet:
-
-// lscale = snd_scaletable[ch->leftvol >> 3];
-// rscale = snd_scaletable[ch->rightvol >> 3];
-// sfx = (signed char *)sc->data + ch->pos;
-// ch->pos += count;
- andl $0xF8,%eax
- addl $(sfxc_data),%esi
- andl $0xF8,%edx
- movl ch_pos(%ebx),%edi
- movl count(%esp),%ecx
- addl %edi,%esi
- shll $7,%eax
- addl %ecx,%edi
- shll $7,%edx
- movl %edi,ch_pos(%ebx)
- addl $(C(snd_scaletable)),%eax
- addl $(C(snd_scaletable)),%edx
- subl %ebx,%ebx
- movb -1(%esi,%ecx,1),%bl
-
- testl $1,%ecx
- jz LMix8Loop
-
- movl (%eax,%ebx,4),%edi
- movl (%edx,%ebx,4),%ebp
- addl C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size),%edi
- addl C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size),%ebp
- movl %edi,C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size)
- movl %ebp,C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size)
- movb -2(%esi,%ecx,1),%bl
-
- decl %ecx
- jz LDone
-
-// for (i=0 ; i<count ; i++)
-// {
-LMix8Loop:
-
-// data = sfx[i];
-// paintbuffer[i].left += lscale[data];
-// paintbuffer[i].right += rscale[data];
- movl (%eax,%ebx,4),%edi
- movl (%edx,%ebx,4),%ebp
- addl C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size),%edi
- addl C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size),%ebp
- movb -2(%esi,%ecx,1),%bl
- movl %edi,C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size)
- movl %ebp,C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size)
-
- movl (%eax,%ebx,4),%edi
- movl (%edx,%ebx,4),%ebp
- movb -3(%esi,%ecx,1),%bl
- addl C(paintbuffer)+psp_left-psp_size*2(,%ecx,psp_size),%edi
- addl C(paintbuffer)+psp_right-psp_size*2(,%ecx,psp_size),%ebp
- movl %edi,C(paintbuffer)+psp_left-psp_size*2(,%ecx,psp_size)
- movl %ebp,C(paintbuffer)+psp_right-psp_size*2(,%ecx,psp_size)
-
-// }
- subl $2,%ecx
- jnz LMix8Loop
-
-LDone:
- popl %ebp
- popl %ebx
- popl %edi
- popl %esi
-
- ret
-
-
-//----------------------------------------------------------------------
-// Transfer of stereo buffer to 16-bit DMA buffer code
-//----------------------------------------------------------------------
-
-.globl C(Snd_WriteLinearBlastStereo16)
-C(Snd_WriteLinearBlastStereo16):
- pushl %esi // preserve register variables
- pushl %edi
- pushl %ebx
-
-// int i;
-// int val;
- movl C(snd_linear_count),%ecx
- movl C(snd_p),%ebx
- movl C(snd_vol),%esi
- movl C(snd_out),%edi
-
-// for (i=0 ; i<snd_linear_count ; i+=2)
-// {
-LWLBLoopTop:
-
-// val = (snd_p[i]*snd_vol)>>8;
-// if (val > 0x7fff)
-// snd_out[i] = 0x7fff;
-// else if (val < (short)0x8000)
-// snd_out[i] = (short)0x8000;
-// else
-// snd_out[i] = val;
- movl -8(%ebx,%ecx,4),%eax
- imull %esi,%eax
- sarl $8,%eax
- cmpl $0x7FFF,%eax
- jg LClampHigh
- cmpl $0xFFFF8000,%eax
- jnl LClampDone
- movl $0xFFFF8000,%eax
- jmp LClampDone
-LClampHigh:
- movl $0x7FFF,%eax
-LClampDone:
-
-// val = (snd_p[i+1]*snd_vol)>>8;
-// if (val > 0x7fff)
-// snd_out[i+1] = 0x7fff;
-// else if (val < (short)0x8000)
-// snd_out[i+1] = (short)0x8000;
-// else
-// snd_out[i+1] = val;
- movl -4(%ebx,%ecx,4),%edx
- imull %esi,%edx
- sarl $8,%edx
- cmpl $0x7FFF,%edx
- jg LClampHigh2
- cmpl $0xFFFF8000,%edx
- jnl LClampDone2
- movl $0xFFFF8000,%edx
- jmp LClampDone2
-LClampHigh2:
- movl $0x7FFF,%edx
-LClampDone2:
- shll $16,%edx
- andl $0xFFFF,%eax
- orl %eax,%edx
- movl %edx,-4(%edi,%ecx,2)
-
-// }
- subl $2,%ecx
- jnz LWLBLoopTop
-
-// snd_p += snd_linear_count;
-
- popl %ebx
- popl %edi
- popl %esi
-
- ret
-
-
-#endif // id386
-
--- a/surf16.s
+++ /dev/null
@@ -1,153 +1,0 @@
-//
-// surf16.s
-// x86 assembly-language 16 bpp surface block drawing code.
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "asm_draw.h"
-
-#ifdef id386
-
-//----------------------------------------------------------------------
-// Surface block drawer
-//----------------------------------------------------------------------
-
- .data
-
-k: .long 0
-loopentry: .long 0
-
- .align 4
-blockjumptable16:
- .long LEnter2_16
- .long LEnter4_16
- .long 0, LEnter8_16
- .long 0, 0, 0, LEnter16_16
-
-
- .text
-
- .align 4
-.globl C(R_Surf16Start)
-C(R_Surf16Start):
-
- .align 4
-.globl C(R_DrawSurfaceBlock16)
-C(R_DrawSurfaceBlock16):
- pushl %ebp // preserve caller's stack frame
- pushl %edi
- pushl %esi // preserve register variables
- pushl %ebx
-
- movl C(blocksize),%eax
- movl C(prowdestbase),%edi
- movl C(pbasesource),%esi
- movl C(sourcesstep),%ebx
- movl blockjumptable16-4(,%eax,2),%ecx
- movl %eax,k
- movl %ecx,loopentry
- movl C(lightleft),%edx
- movl C(lightright),%ebp
-
-Lblockloop16:
-
- subl %edx,%ebp
- movb C(blockdivshift),%cl
- sarl %cl,%ebp
- jns Lp1_16
- testl C(blockdivmask),%ebp
- jz Lp1_16
- incl %ebp
-Lp1_16:
-
- subl %eax,%eax
- subl %ecx,%ecx // high words must be 0 in loop for addressing
-
- jmp *loopentry
-
- .align 4
-
-#include "block16.h"
-
- movl C(pbasesource),%esi
- movl C(lightleft),%edx
- movl C(lightright),%ebp
- movl C(sourcetstep),%eax
- movl C(lightrightstep),%ecx
- movl C(prowdestbase),%edi
-
- addl %eax,%esi
- addl %ecx,%ebp
-
- movl C(lightleftstep),%eax
- movl C(surfrowbytes),%ecx
-
- addl %eax,%edx
- addl %ecx,%edi
-
- movl %esi,C(pbasesource)
- movl %ebp,C(lightright)
- movl k,%eax
- movl %edx,C(lightleft)
- decl %eax
- movl %edi,C(prowdestbase)
- movl %eax,k
- jnz Lblockloop16
-
- popl %ebx // restore register variables
- popl %esi
- popl %edi
- popl %ebp // restore the caller's stack frame
- ret
-
-.globl C(R_Surf16End)
-C(R_Surf16End):
-
-//----------------------------------------------------------------------
-// Code patching routines
-//----------------------------------------------------------------------
- .data
-
- .align 4
-LPatchTable16:
- .long LBPatch0-4
- .long LBPatch1-4
- .long LBPatch2-4
- .long LBPatch3-4
- .long LBPatch4-4
- .long LBPatch5-4
- .long LBPatch6-4
- .long LBPatch7-4
- .long LBPatch8-4
- .long LBPatch9-4
- .long LBPatch10-4
- .long LBPatch11-4
- .long LBPatch12-4
- .long LBPatch13-4
- .long LBPatch14-4
- .long LBPatch15-4
-
- .text
-
- .align 4
-.globl C(R_Surf16Patch)
-C(R_Surf16Patch):
- pushl %ebx
-
- movl C(colormap),%eax
- movl $LPatchTable16,%ebx
- movl $16,%ecx
-LPatchLoop16:
- movl (%ebx),%edx
- addl $4,%ebx
- movl %eax,(%edx)
- decl %ecx
- jnz LPatchLoop16
-
- popl %ebx
-
- ret
-
-
-#endif // id386
--- a/surf8.s
+++ /dev/null
@@ -1,764 +1,0 @@
-//
-// surf8.s
-// x86 assembly-language 8 bpp surface block drawing code.
-//
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "asm_draw.h"
-
-#ifdef id386
-
- .data
-
-sb_v: .long 0
-
- .text
-
- .align 4
-.globl C(R_Surf8Start)
-C(R_Surf8Start):
-
-//----------------------------------------------------------------------
-// Surface block drawer for mip level 0
-//----------------------------------------------------------------------
-
- .align 4
-.globl C(R_DrawSurfaceBlock8_mip0)
-C(R_DrawSurfaceBlock8_mip0):
- pushl %ebp // preserve caller's stack frame
- pushl %edi
- pushl %esi // preserve register variables
- pushl %ebx
-
-// for (v=0 ; v<numvblocks ; v++)
-// {
- movl C(r_lightptr),%ebx
- movl C(r_numvblocks),%eax
-
- movl %eax,sb_v
- movl C(prowdestbase),%edi
-
- movl C(pbasesource),%esi
-
-Lv_loop_mip0:
-
-// lightleft = lightptr[0];
-// lightright = lightptr[1];
-// lightdelta = (lightleft - lightright) & 0xFFFFF;
- movl (%ebx),%eax // lightleft
- movl 4(%ebx),%edx // lightright
-
- movl %eax,%ebp
- movl C(r_lightwidth),%ecx
-
- movl %edx,C(lightright)
- subl %edx,%ebp
-
- andl $0xFFFFF,%ebp
- leal (%ebx,%ecx,4),%ebx
-
-// lightptr += lightwidth;
- movl %ebx,C(r_lightptr)
-
-// lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
-// lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
-// lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
-// 0xF0000000;
- movl 4(%ebx),%ecx // lightptr[1]
- movl (%ebx),%ebx // lightptr[0]
-
- subl %eax,%ebx
- subl %edx,%ecx
-
- sarl $4,%ecx
- orl $0xF0000000,%ebp
-
- sarl $4,%ebx
- movl %ecx,C(lightrightstep)
-
- subl %ecx,%ebx
- andl $0xFFFFF,%ebx
-
- orl $0xF0000000,%ebx
- subl %ecx,%ecx // high word must be 0 in loop for addressing
-
- movl %ebx,C(lightdeltastep)
- subl %ebx,%ebx // high word must be 0 in loop for addressing
-
-Lblockloop8_mip0:
- movl %ebp,C(lightdelta)
- movb 14(%esi),%cl
-
- sarl $4,%ebp
- movb %dh,%bh
-
- movb 15(%esi),%bl
- addl %ebp,%edx
-
- movb %dh,%ch
- addl %ebp,%edx
-
- movb 0x12345678(%ebx),%ah
-LBPatch0:
- movb 13(%esi),%bl
-
- movb 0x12345678(%ecx),%al
-LBPatch1:
- movb 12(%esi),%cl
-
- movb %dh,%bh
- addl %ebp,%edx
-
- rorl $16,%eax
- movb %dh,%ch
-
- addl %ebp,%edx
- movb 0x12345678(%ebx),%ah
-LBPatch2:
-
- movb 11(%esi),%bl
- movb 0x12345678(%ecx),%al
-LBPatch3:
-
- movb 10(%esi),%cl
- movl %eax,12(%edi)
-
- movb %dh,%bh
- addl %ebp,%edx
-
- movb %dh,%ch
- addl %ebp,%edx
-
- movb 0x12345678(%ebx),%ah
-LBPatch4:
- movb 9(%esi),%bl
-
- movb 0x12345678(%ecx),%al
-LBPatch5:
- movb 8(%esi),%cl
-
- movb %dh,%bh
- addl %ebp,%edx
-
- rorl $16,%eax
- movb %dh,%ch
-
- addl %ebp,%edx
- movb 0x12345678(%ebx),%ah
-LBPatch6:
-
- movb 7(%esi),%bl
- movb 0x12345678(%ecx),%al
-LBPatch7:
-
- movb 6(%esi),%cl
- movl %eax,8(%edi)
-
- movb %dh,%bh
- addl %ebp,%edx
-
- movb %dh,%ch
- addl %ebp,%edx
-
- movb 0x12345678(%ebx),%ah
-LBPatch8:
- movb 5(%esi),%bl
-
- movb 0x12345678(%ecx),%al
-LBPatch9:
- movb 4(%esi),%cl
-
- movb %dh,%bh
- addl %ebp,%edx
-
- rorl $16,%eax
- movb %dh,%ch
-
- addl %ebp,%edx
- movb 0x12345678(%ebx),%ah
-LBPatch10:
-
- movb 3(%esi),%bl
- movb 0x12345678(%ecx),%al
-LBPatch11:
-
- movb 2(%esi),%cl
- movl %eax,4(%edi)
-
- movb %dh,%bh
- addl %ebp,%edx
-
- movb %dh,%ch
- addl %ebp,%edx
-
- movb 0x12345678(%ebx),%ah
-LBPatch12:
- movb 1(%esi),%bl
-
- movb 0x12345678(%ecx),%al
-LBPatch13:
- movb (%esi),%cl
-
- movb %dh,%bh
- addl %ebp,%edx
-
- rorl $16,%eax
- movb %dh,%ch
-
- movb 0x12345678(%ebx),%ah
-LBPatch14:
- movl C(lightright),%edx
-
- movb 0x12345678(%ecx),%al
-LBPatch15:
- movl C(lightdelta),%ebp
-
- movl %eax,(%edi)
-
- addl C(sourcetstep),%esi
- addl C(surfrowbytes),%edi
-
- addl C(lightrightstep),%edx
- addl C(lightdeltastep),%ebp
-
- movl %edx,C(lightright)
- jc Lblockloop8_mip0
-
-// if (pbasesource >= r_sourcemax)
-// pbasesource -= stepback;
-
- cmpl C(r_sourcemax),%esi
- jb LSkip_mip0
- subl C(r_stepback),%esi
-LSkip_mip0:
-
- movl C(r_lightptr),%ebx
- decl sb_v
-
- jnz Lv_loop_mip0
-
- popl %ebx // restore register variables
- popl %esi
- popl %edi
- popl %ebp // restore the caller's stack frame
- ret
-
-
-//----------------------------------------------------------------------
-// Surface block drawer for mip level 1
-//----------------------------------------------------------------------
-
- .align 4
-.globl C(R_DrawSurfaceBlock8_mip1)
-C(R_DrawSurfaceBlock8_mip1):
- pushl %ebp // preserve caller's stack frame
- pushl %edi
- pushl %esi // preserve register variables
- pushl %ebx
-
-// for (v=0 ; v<numvblocks ; v++)
-// {
- movl C(r_lightptr),%ebx
- movl C(r_numvblocks),%eax
-
- movl %eax,sb_v
- movl C(prowdestbase),%edi
-
- movl C(pbasesource),%esi
-
-Lv_loop_mip1:
-
-// lightleft = lightptr[0];
-// lightright = lightptr[1];
-// lightdelta = (lightleft - lightright) & 0xFFFFF;
- movl (%ebx),%eax // lightleft
- movl 4(%ebx),%edx // lightright
-
- movl %eax,%ebp
- movl C(r_lightwidth),%ecx
-
- movl %edx,C(lightright)
- subl %edx,%ebp
-
- andl $0xFFFFF,%ebp
- leal (%ebx,%ecx,4),%ebx
-
-// lightptr += lightwidth;
- movl %ebx,C(r_lightptr)
-
-// lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
-// lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
-// lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
-// 0xF0000000;
- movl 4(%ebx),%ecx // lightptr[1]
- movl (%ebx),%ebx // lightptr[0]
-
- subl %eax,%ebx
- subl %edx,%ecx
-
- sarl $3,%ecx
- orl $0x70000000,%ebp
-
- sarl $3,%ebx
- movl %ecx,C(lightrightstep)
-
- subl %ecx,%ebx
- andl $0xFFFFF,%ebx
-
- orl $0xF0000000,%ebx
- subl %ecx,%ecx // high word must be 0 in loop for addressing
-
- movl %ebx,C(lightdeltastep)
- subl %ebx,%ebx // high word must be 0 in loop for addressing
-
-Lblockloop8_mip1:
- movl %ebp,C(lightdelta)
- movb 6(%esi),%cl
-
- sarl $3,%ebp
- movb %dh,%bh
-
- movb 7(%esi),%bl
- addl %ebp,%edx
-
- movb %dh,%ch
- addl %ebp,%edx
-
- movb 0x12345678(%ebx),%ah
-LBPatch22:
- movb 5(%esi),%bl
-
- movb 0x12345678(%ecx),%al
-LBPatch23:
- movb 4(%esi),%cl
-
- movb %dh,%bh
- addl %ebp,%edx
-
- rorl $16,%eax
- movb %dh,%ch
-
- addl %ebp,%edx
- movb 0x12345678(%ebx),%ah
-LBPatch24:
-
- movb 3(%esi),%bl
- movb 0x12345678(%ecx),%al
-LBPatch25:
-
- movb 2(%esi),%cl
- movl %eax,4(%edi)
-
- movb %dh,%bh
- addl %ebp,%edx
-
- movb %dh,%ch
- addl %ebp,%edx
-
- movb 0x12345678(%ebx),%ah
-LBPatch26:
- movb 1(%esi),%bl
-
- movb 0x12345678(%ecx),%al
-LBPatch27:
- movb (%esi),%cl
-
- movb %dh,%bh
- addl %ebp,%edx
-
- rorl $16,%eax
- movb %dh,%ch
-
- movb 0x12345678(%ebx),%ah
-LBPatch28:
- movl C(lightright),%edx
-
- movb 0x12345678(%ecx),%al
-LBPatch29:
- movl C(lightdelta),%ebp
-
- movl %eax,(%edi)
- movl C(sourcetstep),%eax
-
- addl %eax,%esi
- movl C(surfrowbytes),%eax
-
- addl %eax,%edi
- movl C(lightrightstep),%eax
-
- addl %eax,%edx
- movl C(lightdeltastep),%eax
-
- addl %eax,%ebp
- movl %edx,C(lightright)
-
- jc Lblockloop8_mip1
-
-// if (pbasesource >= r_sourcemax)
-// pbasesource -= stepback;
-
- cmpl C(r_sourcemax),%esi
- jb LSkip_mip1
- subl C(r_stepback),%esi
-LSkip_mip1:
-
- movl C(r_lightptr),%ebx
- decl sb_v
-
- jnz Lv_loop_mip1
-
- popl %ebx // restore register variables
- popl %esi
- popl %edi
- popl %ebp // restore the caller's stack frame
- ret
-
-
-//----------------------------------------------------------------------
-// Surface block drawer for mip level 2
-//----------------------------------------------------------------------
-
- .align 4
-.globl C(R_DrawSurfaceBlock8_mip2)
-C(R_DrawSurfaceBlock8_mip2):
- pushl %ebp // preserve caller's stack frame
- pushl %edi
- pushl %esi // preserve register variables
- pushl %ebx
-
-// for (v=0 ; v<numvblocks ; v++)
-// {
- movl C(r_lightptr),%ebx
- movl C(r_numvblocks),%eax
-
- movl %eax,sb_v
- movl C(prowdestbase),%edi
-
- movl C(pbasesource),%esi
-
-Lv_loop_mip2:
-
-// lightleft = lightptr[0];
-// lightright = lightptr[1];
-// lightdelta = (lightleft - lightright) & 0xFFFFF;
- movl (%ebx),%eax // lightleft
- movl 4(%ebx),%edx // lightright
-
- movl %eax,%ebp
- movl C(r_lightwidth),%ecx
-
- movl %edx,C(lightright)
- subl %edx,%ebp
-
- andl $0xFFFFF,%ebp
- leal (%ebx,%ecx,4),%ebx
-
-// lightptr += lightwidth;
- movl %ebx,C(r_lightptr)
-
-// lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
-// lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
-// lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
-// 0xF0000000;
- movl 4(%ebx),%ecx // lightptr[1]
- movl (%ebx),%ebx // lightptr[0]
-
- subl %eax,%ebx
- subl %edx,%ecx
-
- sarl $2,%ecx
- orl $0x30000000,%ebp
-
- sarl $2,%ebx
- movl %ecx,C(lightrightstep)
-
- subl %ecx,%ebx
-
- andl $0xFFFFF,%ebx
-
- orl $0xF0000000,%ebx
- subl %ecx,%ecx // high word must be 0 in loop for addressing
-
- movl %ebx,C(lightdeltastep)
- subl %ebx,%ebx // high word must be 0 in loop for addressing
-
-Lblockloop8_mip2:
- movl %ebp,C(lightdelta)
- movb 2(%esi),%cl
-
- sarl $2,%ebp
- movb %dh,%bh
-
- movb 3(%esi),%bl
- addl %ebp,%edx
-
- movb %dh,%ch
- addl %ebp,%edx
-
- movb 0x12345678(%ebx),%ah
-LBPatch18:
- movb 1(%esi),%bl
-
- movb 0x12345678(%ecx),%al
-LBPatch19:
- movb (%esi),%cl
-
- movb %dh,%bh
- addl %ebp,%edx
-
- rorl $16,%eax
- movb %dh,%ch
-
- movb 0x12345678(%ebx),%ah
-LBPatch20:
- movl C(lightright),%edx
-
- movb 0x12345678(%ecx),%al
-LBPatch21:
- movl C(lightdelta),%ebp
-
- movl %eax,(%edi)
- movl C(sourcetstep),%eax
-
- addl %eax,%esi
- movl C(surfrowbytes),%eax
-
- addl %eax,%edi
- movl C(lightrightstep),%eax
-
- addl %eax,%edx
- movl C(lightdeltastep),%eax
-
- addl %eax,%ebp
- movl %edx,C(lightright)
-
- jc Lblockloop8_mip2
-
-// if (pbasesource >= r_sourcemax)
-// pbasesource -= stepback;
-
- cmpl C(r_sourcemax),%esi
- jb LSkip_mip2
- subl C(r_stepback),%esi
-LSkip_mip2:
-
- movl C(r_lightptr),%ebx
- decl sb_v
-
- jnz Lv_loop_mip2
-
- popl %ebx // restore register variables
- popl %esi
- popl %edi
- popl %ebp // restore the caller's stack frame
- ret
-
-
-//----------------------------------------------------------------------
-// Surface block drawer for mip level 3
-//----------------------------------------------------------------------
-
- .align 4
-.globl C(R_DrawSurfaceBlock8_mip3)
-C(R_DrawSurfaceBlock8_mip3):
- pushl %ebp // preserve caller's stack frame
- pushl %edi
- pushl %esi // preserve register variables
- pushl %ebx
-
-// for (v=0 ; v<numvblocks ; v++)
-// {
- movl C(r_lightptr),%ebx
- movl C(r_numvblocks),%eax
-
- movl %eax,sb_v
- movl C(prowdestbase),%edi
-
- movl C(pbasesource),%esi
-
-Lv_loop_mip3:
-
-// lightleft = lightptr[0];
-// lightright = lightptr[1];
-// lightdelta = (lightleft - lightright) & 0xFFFFF;
- movl (%ebx),%eax // lightleft
- movl 4(%ebx),%edx // lightright
-
- movl %eax,%ebp
- movl C(r_lightwidth),%ecx
-
- movl %edx,C(lightright)
- subl %edx,%ebp
-
- andl $0xFFFFF,%ebp
- leal (%ebx,%ecx,4),%ebx
-
- movl %ebp,C(lightdelta)
-// lightptr += lightwidth;
- movl %ebx,C(r_lightptr)
-
-// lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
-// lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
-// lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
-// 0xF0000000;
- movl 4(%ebx),%ecx // lightptr[1]
- movl (%ebx),%ebx // lightptr[0]
-
- subl %eax,%ebx
- subl %edx,%ecx
-
- sarl $1,%ecx
-
- sarl $1,%ebx
- movl %ecx,C(lightrightstep)
-
- subl %ecx,%ebx
- andl $0xFFFFF,%ebx
-
- sarl $1,%ebp
- orl $0xF0000000,%ebx
-
- movl %ebx,C(lightdeltastep)
- subl %ebx,%ebx // high word must be 0 in loop for addressing
-
- movb 1(%esi),%bl
- subl %ecx,%ecx // high word must be 0 in loop for addressing
-
- movb %dh,%bh
- movb (%esi),%cl
-
- addl %ebp,%edx
- movb %dh,%ch
-
- movb 0x12345678(%ebx),%al
-LBPatch16:
- movl C(lightright),%edx
-
- movb %al,1(%edi)
- movb 0x12345678(%ecx),%al
-LBPatch17:
-
- movb %al,(%edi)
- movl C(sourcetstep),%eax
-
- addl %eax,%esi
- movl C(surfrowbytes),%eax
-
- addl %eax,%edi
- movl C(lightdeltastep),%eax
-
- movl C(lightdelta),%ebp
- movb (%esi),%cl
-
- addl %eax,%ebp
- movl C(lightrightstep),%eax
-
- sarl $1,%ebp
- addl %eax,%edx
-
- movb %dh,%bh
- movb 1(%esi),%bl
-
- addl %ebp,%edx
- movb %dh,%ch
-
- movb 0x12345678(%ebx),%al
-LBPatch30:
- movl C(sourcetstep),%edx
-
- movb %al,1(%edi)
- movb 0x12345678(%ecx),%al
-LBPatch31:
-
- movb %al,(%edi)
- movl C(surfrowbytes),%ebp
-
- addl %edx,%esi
- addl %ebp,%edi
-
-// if (pbasesource >= r_sourcemax)
-// pbasesource -= stepback;
-
- cmpl C(r_sourcemax),%esi
- jb LSkip_mip3
- subl C(r_stepback),%esi
-LSkip_mip3:
-
- movl C(r_lightptr),%ebx
- decl sb_v
-
- jnz Lv_loop_mip3
-
- popl %ebx // restore register variables
- popl %esi
- popl %edi
- popl %ebp // restore the caller's stack frame
- ret
-
-
-.globl C(R_Surf8End)
-C(R_Surf8End):
-
-//----------------------------------------------------------------------
-// Code patching routines
-//----------------------------------------------------------------------
- .data
-
- .align 4
-LPatchTable8:
- .long LBPatch0-4
- .long LBPatch1-4
- .long LBPatch2-4
- .long LBPatch3-4
- .long LBPatch4-4
- .long LBPatch5-4
- .long LBPatch6-4
- .long LBPatch7-4
- .long LBPatch8-4
- .long LBPatch9-4
- .long LBPatch10-4
- .long LBPatch11-4
- .long LBPatch12-4
- .long LBPatch13-4
- .long LBPatch14-4
- .long LBPatch15-4
- .long LBPatch16-4
- .long LBPatch17-4
- .long LBPatch18-4
- .long LBPatch19-4
- .long LBPatch20-4
- .long LBPatch21-4
- .long LBPatch22-4
- .long LBPatch23-4
- .long LBPatch24-4
- .long LBPatch25-4
- .long LBPatch26-4
- .long LBPatch27-4
- .long LBPatch28-4
- .long LBPatch29-4
- .long LBPatch30-4
- .long LBPatch31-4
-
- .text
-
- .align 4
-.globl C(R_Surf8Patch)
-C(R_Surf8Patch):
- pushl %ebx
-
- movl C(colormap),%eax
- movl $LPatchTable8,%ebx
- movl $32,%ecx
-LPatchLoop8:
- movl (%ebx),%edx
- addl $4,%ebx
- movl %eax,(%edx)
- decl %ecx
- jnz LPatchLoop8
-
- popl %ebx
-
- ret
-
-#endif // id386
--- a/sys_dosa.s
+++ /dev/null
@@ -1,95 +1,0 @@
-//
-// sys_dosa.s
-// x86 assembly-language DOS-dependent routines.
-
-#include "asm_i386.h"
-#include "quakeasm.h"
-
-
- .data
-
- .align 4
-fpenv:
- .long 0, 0, 0, 0, 0, 0, 0, 0
-
- .text
-
-.globl C(MaskExceptions)
-C(MaskExceptions):
- fnstenv fpenv
- orl $0x3F,fpenv
- fldenv fpenv
-
- ret
-
-/*
-.globl C(unmaskexceptions)
-C(unmaskexceptions):
- fnstenv fpenv
- andl $0xFFFFFFE0,fpenv
- fldenv fpenv
-
- ret
-*/
-
- .data
-
- .align 4
-.globl ceil_cw, single_cw, full_cw, cw, pushed_cw
-ceil_cw: .long 0
-single_cw: .long 0
-full_cw: .long 0
-cw: .long 0
-pushed_cw: .long 0
-
- .text
-
-.globl C(Sys_LowFPPrecision)
-C(Sys_LowFPPrecision):
- fldcw single_cw
-
- ret
-
-.globl C(Sys_HighFPPrecision)
-C(Sys_HighFPPrecision):
- fldcw full_cw
-
- ret
-
-.globl C(Sys_PushFPCW_SetHigh)
-C(Sys_PushFPCW_SetHigh):
- fnstcw pushed_cw
- fldcw full_cw
-
- ret
-
-.globl C(Sys_PopFPCW)
-C(Sys_PopFPCW):
- fldcw pushed_cw
-
- ret
-
-.globl C(Sys_SetFPCW)
-C(Sys_SetFPCW):
- fnstcw cw
- movl cw,%eax
-#ifdef id386
- andb $0xF0,%ah
- orb $0x03,%ah // round mode, 64-bit precision
-#endif
- movl %eax,full_cw
-
-#ifdef id386
- andb $0xF0,%ah
- orb $0x0C,%ah // chop mode, single precision
-#endif
- movl %eax,single_cw
-
-#ifdef id386
- andb $0xF0,%ah
- orb $0x08,%ah // ceil mode, single precision
-#endif
- movl %eax,ceil_cw
-
- ret
-
--- /dev/null
+++ b/u/asm_draw.h
@@ -1,0 +1,132 @@
+//
+// asm_draw.h
+//
+// Include file for asm drawing routines.
+//
+
+//
+// !!! note that this file must match the corresponding C structures at all
+// times !!!
+//
+
+// !!! if this is changed, it must be changed in r_local.h too !!!
+#define NEAR_CLIP 0.01
+
+// !!! if this is changed, it must be changed in r_local.h too !!!
+#define CYCLE 128
+
+// espan_t structure
+// !!! if this is changed, it must be changed in r_shared.h too !!!
+#define espan_t_u 0
+#define espan_t_v 4
+#define espan_t_count 8
+#define espan_t_pnext 12
+#define espan_t_size 16
+
+// sspan_t structure
+// !!! if this is changed, it must be changed in d_local.h too !!!
+#define sspan_t_u 0
+#define sspan_t_v 4
+#define sspan_t_count 8
+#define sspan_t_size 12
+
+// spanpackage_t structure
+// !!! if this is changed, it must be changed in d_polyset.c too !!!
+#define spanpackage_t_pdest 0
+#define spanpackage_t_pz 4
+#define spanpackage_t_count 8
+#define spanpackage_t_ptex 12
+#define spanpackage_t_sfrac 16
+#define spanpackage_t_tfrac 20
+#define spanpackage_t_light 24
+#define spanpackage_t_zi 28
+#define spanpackage_t_size 32
+
+// edge_t structure
+// !!! if this is changed, it must be changed in r_shared.h too !!!
+#define et_u 0
+#define et_u_step 4
+#define et_prev 8
+#define et_next 12
+#define et_surfs 16
+#define et_nextremove 20
+#define et_nearzi 24
+#define et_owner 28
+#define et_size 32
+
+// surf_t structure
+// !!! if this is changed, it must be changed in r_shared.h too !!!
+#define SURF_T_SHIFT 6
+#define st_next 0
+#define st_prev 4
+#define st_spans 8
+#define st_key 12
+#define st_last_u 16
+#define st_spanstate 20
+#define st_flags 24
+#define st_data 28
+#define st_entity 32
+#define st_nearzi 36
+#define st_insubmodel 40
+#define st_d_ziorigin 44
+#define st_d_zistepu 48
+#define st_d_zistepv 52
+#define st_pad 56
+#define st_size 64
+
+// clipplane_t structure
+// !!! if this is changed, it must be changed in r_local.h too !!!
+#define cp_normal 0
+#define cp_dist 12
+#define cp_next 16
+#define cp_leftedge 20
+#define cp_rightedge 21
+#define cp_reserved 22
+#define cp_size 24
+
+// medge_t structure
+// !!! if this is changed, it must be changed in model.h too !!!
+#define me_v 0
+#define me_cachededgeoffset 4
+#define me_size 8
+
+// mvertex_t structure
+// !!! if this is changed, it must be changed in model.h too !!!
+#define mv_position 0
+#define mv_size 12
+
+// refdef_t structure
+// !!! if this is changed, it must be changed in render.h too !!!
+#define rd_vrect 0
+#define rd_aliasvrect 20
+#define rd_vrectright 40
+#define rd_vrectbottom 44
+#define rd_aliasvrectright 48
+#define rd_aliasvrectbottom 52
+#define rd_vrectrightedge 56
+#define rd_fvrectx 60
+#define rd_fvrecty 64
+#define rd_fvrectx_adj 68
+#define rd_fvrecty_adj 72
+#define rd_vrect_x_adj_shift20 76
+#define rd_vrectright_adj_shift20 80
+#define rd_fvrectright_adj 84
+#define rd_fvrectbottom_adj 88
+#define rd_fvrectright 92
+#define rd_fvrectbottom 96
+#define rd_horizontalFieldOfView 100
+#define rd_xOrigin 104
+#define rd_yOrigin 108
+#define rd_vieworg 112
+#define rd_viewangles 124
+#define rd_ambientlight 136
+#define rd_size 140
+
+// mtriangle_t structure
+// !!! if this is changed, it must be changed in model.h too !!!
+#define mtri_facesfront 0
+#define mtri_vertindex 4
+#define mtri_size 16 // !!! if this changes, array indexing in !!!
+ // !!! d_polysa.s must be changed to match !!!
+#define mtri_shift 4
+
--- /dev/null
+++ b/u/asm_i386.h
@@ -1,0 +1,78 @@
+#ifndef __ASM_I386__
+#define __ASM_I386__
+
+#ifdef ELF
+#define C(label) label
+#endif
+#ifndef ELF
+#define C(label) _##label
+#endif
+
+//
+// !!! note that this file must match the corresponding C structures at all
+// times !!!
+//
+
+// plane_t structure
+// !!! if this is changed, it must be changed in model.h too !!!
+// !!! if the size of this is changed, the array lookup in SV_HullPointContents
+// must be changed too !!!
+#define pl_normal 0
+#define pl_dist 12
+#define pl_type 16
+#define pl_signbits 17
+#define pl_pad 18
+#define pl_size 20
+
+// hull_t structure
+// !!! if this is changed, it must be changed in model.h too !!!
+#define hu_clipnodes 0
+#define hu_planes 4
+#define hu_firstclipnode 8
+#define hu_lastclipnode 12
+#define hu_clip_mins 16
+#define hu_clip_maxs 28
+#define hu_size 40
+
+// dnode_t structure
+// !!! if this is changed, it must be changed in bspfile.h too !!!
+#define nd_planenum 0
+#define nd_children 4
+#define nd_mins 8
+#define nd_maxs 20
+#define nd_firstface 32
+#define nd_numfaces 36
+#define nd_size 40
+
+// sfxcache_t structure
+// !!! if this is changed, it much be changed in sound.h too !!!
+#define sfxc_length 0
+#define sfxc_loopstart 4
+#define sfxc_speed 8
+#define sfxc_width 12
+#define sfxc_stereo 16
+#define sfxc_data 20
+
+// channel_t structure
+// !!! if this is changed, it much be changed in sound.h too !!!
+#define ch_sfx 0
+#define ch_leftvol 4
+#define ch_rightvol 8
+#define ch_end 12
+#define ch_pos 16
+#define ch_looping 20
+#define ch_entnum 24
+#define ch_entchannel 28
+#define ch_origin 32
+#define ch_dist_mult 44
+#define ch_master_vol 48
+#define ch_size 52
+
+// portable_samplepair_t structure
+// !!! if this is changed, it much be changed in sound.h too !!!
+#define psp_left 0
+#define psp_right 4
+#define psp_size 8
+
+#endif
+
--- /dev/null
+++ b/u/block16.h
@@ -1,0 +1,123 @@
+LEnter16_16:
+ movb (%esi),%al
+ movb (%esi,%ebx,),%cl
+ movb %dh,%ah
+ addl %ebp,%edx
+ movb %dh,%ch
+ leal (%esi,%ebx,2),%esi
+ movw 0x12345678(,%eax,2),%ax
+LBPatch0:
+ addl %ebp,%edx
+ movw %ax,(%edi)
+ movw 0x12345678(,%ecx,2),%cx
+LBPatch1:
+ movw %cx,2(%edi)
+ addl $0x4,%edi
+
+ movb (%esi),%al
+ movb (%esi,%ebx,),%cl
+ movb %dh,%ah
+ addl %ebp,%edx
+ movb %dh,%ch
+ leal (%esi,%ebx,2),%esi
+ movw 0x12345678(,%eax,2),%ax
+LBPatch2:
+ addl %ebp,%edx
+ movw %ax,(%edi)
+ movw 0x12345678(,%ecx,2),%cx
+LBPatch3:
+ movw %cx,2(%edi)
+ addl $0x4,%edi
+
+ movb (%esi),%al
+ movb (%esi,%ebx,),%cl
+ movb %dh,%ah
+ addl %ebp,%edx
+ movb %dh,%ch
+ leal (%esi,%ebx,2),%esi
+ movw 0x12345678(,%eax,2),%ax
+LBPatch4:
+ addl %ebp,%edx
+ movw %ax,(%edi)
+ movw 0x12345678(,%ecx,2),%cx
+LBPatch5:
+ movw %cx,2(%edi)
+ addl $0x4,%edi
+
+ movb (%esi),%al
+ movb (%esi,%ebx,),%cl
+ movb %dh,%ah
+ addl %ebp,%edx
+ movb %dh,%ch
+ leal (%esi,%ebx,2),%esi
+ movw 0x12345678(,%eax,2),%ax
+LBPatch6:
+ addl %ebp,%edx
+ movw %ax,(%edi)
+ movw 0x12345678(,%ecx,2),%cx
+LBPatch7:
+ movw %cx,2(%edi)
+ addl $0x4,%edi
+
+LEnter8_16:
+ movb (%esi),%al
+ movb (%esi,%ebx,),%cl
+ movb %dh,%ah
+ addl %ebp,%edx
+ movb %dh,%ch
+ leal (%esi,%ebx,2),%esi
+ movw 0x12345678(,%eax,2),%ax
+LBPatch8:
+ addl %ebp,%edx
+ movw %ax,(%edi)
+ movw 0x12345678(,%ecx,2),%cx
+LBPatch9:
+ movw %cx,2(%edi)
+ addl $0x4,%edi
+
+ movb (%esi),%al
+ movb (%esi,%ebx,),%cl
+ movb %dh,%ah
+ addl %ebp,%edx
+ movb %dh,%ch
+ leal (%esi,%ebx,2),%esi
+ movw 0x12345678(,%eax,2),%ax
+LBPatch10:
+ addl %ebp,%edx
+ movw %ax,(%edi)
+ movw 0x12345678(,%ecx,2),%cx
+LBPatch11:
+ movw %cx,2(%edi)
+ addl $0x4,%edi
+
+LEnter4_16:
+ movb (%esi),%al
+ movb (%esi,%ebx,),%cl
+ movb %dh,%ah
+ addl %ebp,%edx
+ movb %dh,%ch
+ leal (%esi,%ebx,2),%esi
+ movw 0x12345678(,%eax,2),%ax
+LBPatch12:
+ addl %ebp,%edx
+ movw %ax,(%edi)
+ movw 0x12345678(,%ecx,2),%cx
+LBPatch13:
+ movw %cx,2(%edi)
+ addl $0x4,%edi
+
+LEnter2_16:
+ movb (%esi),%al
+ movb (%esi,%ebx,),%cl
+ movb %dh,%ah
+ addl %ebp,%edx
+ movb %dh,%ch
+ leal (%esi,%ebx,2),%esi
+ movw 0x12345678(,%eax,2),%ax
+LBPatch14:
+ addl %ebp,%edx
+ movw %ax,(%edi)
+ movw 0x12345678(,%ecx,2),%cx
+LBPatch15:
+ movw %cx,2(%edi)
+ addl $0x4,%edi
--- /dev/null
+++ b/u/d_draw.s
@@ -1,0 +1,1018 @@
+//
+// d_draw.s
+// x86 assembly-language horizontal 8-bpp span-drawing code.
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "asm_draw.h"
+#include "d_ifacea.h"
+
+#ifdef id386
+
+//----------------------------------------------------------------------
+// 8-bpp horizontal span drawing code for polygons, with no transparency.
+//
+// Assumes there is at least one span in pspans, and that every span
+// contains at least one pixel
+//----------------------------------------------------------------------
+
+ .text
+
+// out-of-line, rarely-needed clamping code
+
+LClampHigh0:
+ movl C(bbextents),%esi
+ jmp LClampReentry0
+LClampHighOrLow0:
+ jg LClampHigh0
+ xorl %esi,%esi
+ jmp LClampReentry0
+
+LClampHigh1:
+ movl C(bbextentt),%edx
+ jmp LClampReentry1
+LClampHighOrLow1:
+ jg LClampHigh1
+ xorl %edx,%edx
+ jmp LClampReentry1
+
+LClampLow2:
+ movl $2048,%ebp
+ jmp LClampReentry2
+LClampHigh2:
+ movl C(bbextents),%ebp
+ jmp LClampReentry2
+
+LClampLow3:
+ movl $2048,%ecx
+ jmp LClampReentry3
+LClampHigh3:
+ movl C(bbextentt),%ecx
+ jmp LClampReentry3
+
+LClampLow4:
+ movl $2048,%eax
+ jmp LClampReentry4
+LClampHigh4:
+ movl C(bbextents),%eax
+ jmp LClampReentry4
+
+LClampLow5:
+ movl $2048,%ebx
+ jmp LClampReentry5
+LClampHigh5:
+ movl C(bbextentt),%ebx
+ jmp LClampReentry5
+
+
+#define pspans 4+16
+
+ .align 4
+.globl C(D_DrawSpans8)
+C(D_DrawSpans8):
+ pushl %ebp // preserve caller's stack frame
+ pushl %edi
+ pushl %esi // preserve register variables
+ pushl %ebx
+
+//
+// set up scaled-by-8 steps, for 8-long segments; also set up cacheblock
+// and span list pointers
+//
+// TODO: any overlap from rearranging?
+ flds C(d_sdivzstepu)
+ fmuls fp_8
+ movl C(cacheblock),%edx
+ flds C(d_tdivzstepu)
+ fmuls fp_8
+ movl pspans(%esp),%ebx // point to the first span descriptor
+ flds C(d_zistepu)
+ fmuls fp_8
+ movl %edx,pbase // pbase = cacheblock
+ fstps zi8stepu
+ fstps tdivz8stepu
+ fstps sdivz8stepu
+
+LSpanLoop:
+//
+// set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
+// initial s and t values
+//
+// FIXME: pipeline FILD?
+ fildl espan_t_v(%ebx)
+ fildl espan_t_u(%ebx)
+
+ fld %st(1) // dv | du | dv
+ fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv
+ fld %st(1) // du | dv*d_sdivzstepv | du | dv
+ fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
+ fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
+ fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu |
+ // dv*d_sdivzstepv | du | dv
+ fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu |
+ // dv*d_sdivzstepv | du | dv
+ faddp %st(0),%st(2) // du*d_tdivzstepu |
+ // du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
+ fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
+ // du*d_tdivzstepu | du | dv
+ fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv |
+ // du*d_tdivzstepu | du | dv
+ fmuls C(d_tdivzstepv) // dv*d_tdivzstepv |
+ // du*d_sdivzstepu + dv*d_sdivzstepv |
+ // du*d_tdivzstepu | du | dv
+ fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
+ // dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
+ fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv +
+ // du*d_sdivzstepu; stays in %st(2) at end
+ fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
+ // s/z
+ fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv |
+ // du*d_tdivzstepu | du | s/z
+ fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv |
+ // du*d_tdivzstepu | du | s/z
+ faddp %st(0),%st(2) // dv*d_zistepv |
+ // dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
+ fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu |
+ // dv*d_zistepv | s/z
+ fmuls C(d_zistepu) // du*d_zistepu |
+ // dv*d_tdivzstepv + du*d_tdivzstepu |
+ // dv*d_zistepv | s/z
+ fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu |
+ // du*d_zistepu | dv*d_zistepv | s/z
+ fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv +
+ // du*d_tdivzstepu; stays in %st(1) at end
+ fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z
+ faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z
+
+ flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
+ fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
+ fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv +
+ // du*d_zistepu; stays in %st(0) at end
+ // 1/z | fp_64k | t/z | s/z
+//
+// calculate and clamp s & t
+//
+ fdivr %st(0),%st(1) // 1/z | z*64k | t/z | s/z
+
+//
+// point %edi to the first pixel in the span
+//
+ movl C(d_viewbuffer),%ecx
+ movl espan_t_v(%ebx),%eax
+ movl %ebx,pspantemp // preserve spans pointer
+
+ movl C(tadjust),%edx
+ movl C(sadjust),%esi
+ movl C(d_scantable)(,%eax,4),%edi // v * screenwidth
+ addl %ecx,%edi
+ movl espan_t_u(%ebx),%ecx
+ addl %ecx,%edi // pdest = &pdestspan[scans->u];
+ movl espan_t_count(%ebx),%ecx
+
+//
+// now start the FDIV for the end of the span
+//
+ cmpl $8,%ecx
+ ja LSetupNotLast1
+
+ decl %ecx
+ jz LCleanup1 // if only one pixel, no need to start an FDIV
+ movl %ecx,spancountminus1
+
+// finish up the s and t calcs
+ fxch %st(1) // z*64k | 1/z | t/z | s/z
+
+ fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
+ fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
+ fxch %st(1) // z*64k | s | 1/z | t/z | s/z
+ fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
+ fxch %st(1) // s | t | 1/z | t/z | s/z
+ fistpl s // 1/z | t | t/z | s/z
+ fistpl t // 1/z | t/z | s/z
+
+ fildl spancountminus1
+
+ flds C(d_tdivzstepu) // C(d_tdivzstepu) | spancountminus1
+ flds C(d_zistepu) // C(d_zistepu) | C(d_tdivzstepu) | spancountminus1
+ fmul %st(2),%st(0) // C(d_zistepu)*scm1 | C(d_tdivzstepu) | scm1
+ fxch %st(1) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
+ fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
+ fxch %st(2) // scm1 | C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1
+ fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_zistepu)*scm1 |
+ // C(d_tdivzstepu)*scm1
+ fxch %st(1) // C(d_zistepu)*scm1 | C(d_sdivzstepu)*scm1 |
+ // C(d_tdivzstepu)*scm1
+ faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
+ fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
+ faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1
+ faddp %st(0),%st(3)
+
+ flds fp_64k
+ fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
+ // overlap
+ jmp LFDIVInFlight1
+
+LCleanup1:
+// finish up the s and t calcs
+ fxch %st(1) // z*64k | 1/z | t/z | s/z
+
+ fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
+ fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
+ fxch %st(1) // z*64k | s | 1/z | t/z | s/z
+ fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
+ fxch %st(1) // s | t | 1/z | t/z | s/z
+ fistpl s // 1/z | t | t/z | s/z
+ fistpl t // 1/z | t/z | s/z
+ jmp LFDIVInFlight1
+
+ .align 4
+LSetupNotLast1:
+// finish up the s and t calcs
+ fxch %st(1) // z*64k | 1/z | t/z | s/z
+
+ fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
+ fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
+ fxch %st(1) // z*64k | s | 1/z | t/z | s/z
+ fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
+ fxch %st(1) // s | t | 1/z | t/z | s/z
+ fistpl s // 1/z | t | t/z | s/z
+ fistpl t // 1/z | t/z | s/z
+
+ fadds zi8stepu
+ fxch %st(2)
+ fadds sdivz8stepu
+ fxch %st(2)
+ flds tdivz8stepu
+ faddp %st(0),%st(2)
+ flds fp_64k
+ fdiv %st(1),%st(0) // z = 1/1/z
+ // this is what we've gone to all this trouble to
+ // overlap
+LFDIVInFlight1:
+
+ addl s,%esi
+ addl t,%edx
+ movl C(bbextents),%ebx
+ movl C(bbextentt),%ebp
+ cmpl %ebx,%esi
+ ja LClampHighOrLow0
+LClampReentry0:
+ movl %esi,s
+ movl pbase,%ebx
+ shll $16,%esi
+ cmpl %ebp,%edx
+ movl %esi,sfracf
+ ja LClampHighOrLow1
+LClampReentry1:
+ movl %edx,t
+ movl s,%esi // sfrac = scans->sfrac;
+ shll $16,%edx
+ movl t,%eax // tfrac = scans->tfrac;
+ sarl $16,%esi
+ movl %edx,tfracf
+
+//
+// calculate the texture starting address
+//
+ sarl $16,%eax
+ movl C(cachewidth),%edx
+ imull %edx,%eax // (tfrac >> 16) * cachewidth
+ addl %ebx,%esi
+ addl %eax,%esi // psource = pbase + (sfrac >> 16) +
+ // ((tfrac >> 16) * cachewidth);
+
+//
+// determine whether last span or not
+//
+ cmpl $8,%ecx
+ jna LLastSegment
+
+//
+// not the last segment; do full 8-wide segment
+//
+LNotLastSegment:
+
+//
+// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
+// get there
+//
+
+// pick up after the FDIV that was left in flight previously
+
+ fld %st(0) // duplicate it
+ fmul %st(4),%st(0) // s = s/z * z
+ fxch %st(1)
+ fmul %st(3),%st(0) // t = t/z * z
+ fxch %st(1)
+ fistpl snext
+ fistpl tnext
+ movl snext,%eax
+ movl tnext,%edx
+
+ movb (%esi),%bl // get first source texel
+ subl $8,%ecx // count off this segments' pixels
+ movl C(sadjust),%ebp
+ movl %ecx,counttemp // remember count of remaining pixels
+
+ movl C(tadjust),%ecx
+ movb %bl,(%edi) // store first dest pixel
+
+ addl %eax,%ebp
+ addl %edx,%ecx
+
+ movl C(bbextents),%eax
+ movl C(bbextentt),%edx
+
+ cmpl $2048,%ebp
+ jl LClampLow2
+ cmpl %eax,%ebp
+ ja LClampHigh2
+LClampReentry2:
+
+ cmpl $2048,%ecx
+ jl LClampLow3
+ cmpl %edx,%ecx
+ ja LClampHigh3
+LClampReentry3:
+
+ movl %ebp,snext
+ movl %ecx,tnext
+
+ subl s,%ebp
+ subl t,%ecx
+
+//
+// set up advancetable
+//
+ movl %ecx,%eax
+ movl %ebp,%edx
+ sarl $19,%eax // tstep >>= 16;
+ jz LZero
+ sarl $19,%edx // sstep >>= 16;
+ movl C(cachewidth),%ebx
+ imull %ebx,%eax
+ jmp LSetUp1
+
+LZero:
+ sarl $19,%edx // sstep >>= 16;
+ movl C(cachewidth),%ebx
+
+LSetUp1:
+
+ addl %edx,%eax // add in sstep
+ // (tstep >> 16) * cachewidth + (sstep >> 16);
+ movl tfracf,%edx
+ movl %eax,advancetable+4 // advance base in t
+ addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth +
+ // (sstep >> 16);
+ shll $13,%ebp // left-justify sstep fractional part
+ movl sfracf,%ebx
+ shll $13,%ecx // left-justify tstep fractional part
+ movl %eax,advancetable // advance extra in t
+
+ movl %ecx,tstep
+ addl %ecx,%edx // advance tfrac fractional part by tstep frac
+
+ sbbl %ecx,%ecx // turn tstep carry into -1 (0 if none)
+ addl %ebp,%ebx // advance sfrac fractional part by sstep frac
+ adcl advancetable+4(,%ecx,4),%esi // point to next source texel
+
+ addl tstep,%edx
+ sbbl %ecx,%ecx
+ movb (%esi),%al
+ addl %ebp,%ebx
+ movb %al,1(%edi)
+ adcl advancetable+4(,%ecx,4),%esi
+
+ addl tstep,%edx
+ sbbl %ecx,%ecx
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+
+ addl tstep,%edx
+ sbbl %ecx,%ecx
+ movb %al,2(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+
+ addl tstep,%edx
+ sbbl %ecx,%ecx
+ movb %al,3(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+
+
+//
+// start FDIV for end of next segment in flight, so it can overlap
+//
+ movl counttemp,%ecx
+ cmpl $8,%ecx // more than one segment after this?
+ ja LSetupNotLast2 // yes
+
+ decl %ecx
+ jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV
+ movl %ecx,spancountminus1
+ fildl spancountminus1
+
+ flds C(d_zistepu) // C(d_zistepu) | spancountminus1
+ fmul %st(1),%st(0) // C(d_zistepu)*scm1 | scm1
+ flds C(d_tdivzstepu) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
+ fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
+ fxch %st(1) // C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1
+ faddp %st(0),%st(3) // C(d_tdivzstepu)*scm1 | scm1
+ fxch %st(1) // scm1 | C(d_tdivzstepu)*scm1
+ fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
+ fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
+ faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1
+ flds fp_64k // 64k | C(d_sdivzstepu)*scm1
+ fxch %st(1) // C(d_sdivzstepu)*scm1 | 64k
+ faddp %st(0),%st(4) // 64k
+
+ fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
+ // overlap
+ jmp LFDIVInFlight2
+
+ .align 4
+LSetupNotLast2:
+ fadds zi8stepu
+ fxch %st(2)
+ fadds sdivz8stepu
+ fxch %st(2)
+ flds tdivz8stepu
+ faddp %st(0),%st(2)
+ flds fp_64k
+ fdiv %st(1),%st(0) // z = 1/1/z
+ // this is what we've gone to all this trouble to
+ // overlap
+LFDIVInFlight2:
+ movl %ecx,counttemp
+
+ addl tstep,%edx
+ sbbl %ecx,%ecx
+ movb %al,4(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+
+ addl tstep,%edx
+ sbbl %ecx,%ecx
+ movb %al,5(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+
+ addl tstep,%edx
+ sbbl %ecx,%ecx
+ movb %al,6(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+
+ addl $8,%edi
+ movl %edx,tfracf
+ movl snext,%edx
+ movl %ebx,sfracf
+ movl tnext,%ebx
+ movl %edx,s
+ movl %ebx,t
+
+ movl counttemp,%ecx // retrieve count
+
+//
+// determine whether last span or not
+//
+ cmpl $8,%ecx // are there multiple segments remaining?
+ movb %al,-1(%edi)
+ ja LNotLastSegment // yes
+
+//
+// last segment of scan
+//
+LLastSegment:
+
+//
+// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
+// get there. The number of pixels left is variable, and we want to land on the
+// last pixel, not step one past it, so we can't run into arithmetic problems
+//
+ testl %ecx,%ecx
+ jz LNoSteps // just draw the last pixel and we're done
+
+// pick up after the FDIV that was left in flight previously
+
+
+ fld %st(0) // duplicate it
+ fmul %st(4),%st(0) // s = s/z * z
+ fxch %st(1)
+ fmul %st(3),%st(0) // t = t/z * z
+ fxch %st(1)
+ fistpl snext
+ fistpl tnext
+
+ movb (%esi),%al // load first texel in segment
+ movl C(tadjust),%ebx
+ movb %al,(%edi) // store first pixel in segment
+ movl C(sadjust),%eax
+
+ addl snext,%eax
+ addl tnext,%ebx
+
+ movl C(bbextents),%ebp
+ movl C(bbextentt),%edx
+
+ cmpl $2048,%eax
+ jl LClampLow4
+ cmpl %ebp,%eax
+ ja LClampHigh4
+LClampReentry4:
+ movl %eax,snext
+
+ cmpl $2048,%ebx
+ jl LClampLow5
+ cmpl %edx,%ebx
+ ja LClampHigh5
+LClampReentry5:
+
+ cmpl $1,%ecx // don't bother
+ je LOnlyOneStep // if two pixels in segment, there's only one step,
+ // of the segment length
+ subl s,%eax
+ subl t,%ebx
+
+ addl %eax,%eax // convert to 15.17 format so multiply by 1.31
+ addl %ebx,%ebx // reciprocal yields 16.48
+
+ imull reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1)
+ movl %edx,%ebp
+
+ movl %ebx,%eax
+ imull reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1)
+
+LSetEntryvec:
+//
+// set up advancetable
+//
+ movl entryvec_table(,%ecx,4),%ebx
+ movl %edx,%eax
+ movl %ebx,jumptemp // entry point into code for RET later
+ movl %ebp,%ecx
+ sarl $16,%edx // tstep >>= 16;
+ movl C(cachewidth),%ebx
+ sarl $16,%ecx // sstep >>= 16;
+ imull %ebx,%edx
+
+ addl %ecx,%edx // add in sstep
+ // (tstep >> 16) * cachewidth + (sstep >> 16);
+ movl tfracf,%ecx
+ movl %edx,advancetable+4 // advance base in t
+ addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth +
+ // (sstep >> 16);
+ shll $16,%ebp // left-justify sstep fractional part
+ movl sfracf,%ebx
+ shll $16,%eax // left-justify tstep fractional part
+ movl %edx,advancetable // advance extra in t
+
+ movl %eax,tstep
+ movl %ecx,%edx
+ addl %eax,%edx
+ sbbl %ecx,%ecx
+ addl %ebp,%ebx
+ adcl advancetable+4(,%ecx,4),%esi
+
+ jmp *jumptemp // jump to the number-of-pixels handler
+
+//----------------------------------------
+
+LNoSteps:
+ movb (%esi),%al // load first texel in segment
+ subl $7,%edi // adjust for hardwired offset
+ jmp LEndSpan
+
+
+LOnlyOneStep:
+ subl s,%eax
+ subl t,%ebx
+ movl %eax,%ebp
+ movl %ebx,%edx
+ jmp LSetEntryvec
+
+//----------------------------------------
+
+.globl Entry2_8
+Entry2_8:
+ subl $6,%edi // adjust for hardwired offsets
+ movb (%esi),%al
+ jmp LLEntry2_8
+
+//----------------------------------------
+
+.globl Entry3_8
+Entry3_8:
+ subl $5,%edi // adjust for hardwired offsets
+ addl %eax,%edx
+ movb (%esi),%al
+ sbbl %ecx,%ecx
+ addl %ebp,%ebx
+ adcl advancetable+4(,%ecx,4),%esi
+ jmp LLEntry3_8
+
+//----------------------------------------
+
+.globl Entry4_8
+Entry4_8:
+ subl $4,%edi // adjust for hardwired offsets
+ addl %eax,%edx
+ movb (%esi),%al
+ sbbl %ecx,%ecx
+ addl %ebp,%ebx
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+ jmp LLEntry4_8
+
+//----------------------------------------
+
+.globl Entry5_8
+Entry5_8:
+ subl $3,%edi // adjust for hardwired offsets
+ addl %eax,%edx
+ movb (%esi),%al
+ sbbl %ecx,%ecx
+ addl %ebp,%ebx
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+ jmp LLEntry5_8
+
+//----------------------------------------
+
+.globl Entry6_8
+Entry6_8:
+ subl $2,%edi // adjust for hardwired offsets
+ addl %eax,%edx
+ movb (%esi),%al
+ sbbl %ecx,%ecx
+ addl %ebp,%ebx
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+ jmp LLEntry6_8
+
+//----------------------------------------
+
+.globl Entry7_8
+Entry7_8:
+ decl %edi // adjust for hardwired offsets
+ addl %eax,%edx
+ movb (%esi),%al
+ sbbl %ecx,%ecx
+ addl %ebp,%ebx
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+ jmp LLEntry7_8
+
+//----------------------------------------
+
+.globl Entry8_8
+Entry8_8:
+ addl %eax,%edx
+ movb (%esi),%al
+ sbbl %ecx,%ecx
+ addl %ebp,%ebx
+ adcl advancetable+4(,%ecx,4),%esi
+
+ addl tstep,%edx
+ sbbl %ecx,%ecx
+ movb %al,1(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+LLEntry7_8:
+ sbbl %ecx,%ecx
+ movb %al,2(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+LLEntry6_8:
+ sbbl %ecx,%ecx
+ movb %al,3(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+LLEntry5_8:
+ sbbl %ecx,%ecx
+ movb %al,4(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+LLEntry4_8:
+ sbbl %ecx,%ecx
+ movb %al,5(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+LLEntry3_8:
+ movb %al,6(%edi)
+ movb (%esi),%al
+LLEntry2_8:
+
+LEndSpan:
+
+//
+// clear s/z, t/z, 1/z from FP stack
+//
+ fstp %st(0)
+ fstp %st(0)
+ fstp %st(0)
+
+ movl pspantemp,%ebx // restore spans pointer
+ movl espan_t_pnext(%ebx),%ebx // point to next span
+ testl %ebx,%ebx // any more spans?
+ movb %al,7(%edi)
+ jnz LSpanLoop // more spans
+
+ popl %ebx // restore register variables
+ popl %esi
+ popl %edi
+ popl %ebp // restore the caller's stack frame
+ ret
+
+//----------------------------------------------------------------------
+// 8-bpp horizontal span z drawing codefor polygons, with no transparency.
+//
+// Assumes there is at least one span in pzspans, and that every span
+// contains at least one pixel
+//----------------------------------------------------------------------
+
+ .text
+
+// z-clamp on a non-negative gradient span
+LClamp:
+ movl $0x40000000,%edx
+ xorl %ebx,%ebx
+ fstp %st(0)
+ jmp LZDraw
+
+// z-clamp on a negative gradient span
+LClampNeg:
+ movl $0x40000000,%edx
+ xorl %ebx,%ebx
+ fstp %st(0)
+ jmp LZDrawNeg
+
+
+#define pzspans 4+16
+
+.globl C(D_DrawZSpans)
+C(D_DrawZSpans):
+ pushl %ebp // preserve caller's stack frame
+ pushl %edi
+ pushl %esi // preserve register variables
+ pushl %ebx
+
+ flds C(d_zistepu)
+ movl C(d_zistepu),%eax
+ movl pzspans(%esp),%esi
+ testl %eax,%eax
+ jz LFNegSpan
+
+ fmuls Float2ToThe31nd
+ fistpl izistep // note: we are relying on FP exceptions being turned
+ // off here to avoid range problems
+ movl izistep,%ebx // remains loaded for all spans
+
+LFSpanLoop:
+// set up the initial 1/z value
+ fildl espan_t_v(%esi)
+ fildl espan_t_u(%esi)
+ movl espan_t_v(%esi),%ecx
+ movl C(d_pzbuffer),%edi
+ fmuls C(d_zistepu)
+ fxch %st(1)
+ fmuls C(d_zistepv)
+ fxch %st(1)
+ fadds C(d_ziorigin)
+ imull C(d_zrowbytes),%ecx
+ faddp %st(0),%st(1)
+
+// clamp if z is nearer than 2 (1/z > 0.5)
+ fcoms float_point5
+ addl %ecx,%edi
+ movl espan_t_u(%esi),%edx
+ addl %edx,%edx // word count
+ movl espan_t_count(%esi),%ecx
+ addl %edx,%edi // pdest = &pdestspan[scans->u];
+ pushl %esi // preserve spans pointer
+ fnstsw %ax
+ testb $0x45,%ah
+ jz LClamp
+
+ fmuls Float2ToThe31nd
+ fistpl izi // note: we are relying on FP exceptions being turned
+ // off here to avoid problems when the span is closer
+ // than 1/(2**31)
+ movl izi,%edx
+
+// at this point:
+// %ebx = izistep
+// %ecx = count
+// %edx = izi
+// %edi = pdest
+
+LZDraw:
+
+// do a single pixel up front, if necessary to dword align the destination
+ testl $2,%edi
+ jz LFMiddle
+ movl %edx,%eax
+ addl %ebx,%edx
+ shrl $16,%eax
+ decl %ecx
+ movw %ax,(%edi)
+ addl $2,%edi
+
+// do middle a pair of aligned dwords at a time
+LFMiddle:
+ pushl %ecx
+ shrl $1,%ecx // count / 2
+ jz LFLast // no aligned dwords to do
+ shrl $1,%ecx // (count / 2) / 2
+ jnc LFMiddleLoop // even number of aligned dwords to do
+
+ movl %edx,%eax
+ addl %ebx,%edx
+ shrl $16,%eax
+ movl %edx,%esi
+ addl %ebx,%edx
+ andl $0xFFFF0000,%esi
+ orl %esi,%eax
+ movl %eax,(%edi)
+ addl $4,%edi
+ andl %ecx,%ecx
+ jz LFLast
+
+LFMiddleLoop:
+ movl %edx,%eax
+ addl %ebx,%edx
+ shrl $16,%eax
+ movl %edx,%esi
+ addl %ebx,%edx
+ andl $0xFFFF0000,%esi
+ orl %esi,%eax
+ movl %edx,%ebp
+ movl %eax,(%edi)
+ addl %ebx,%edx
+ shrl $16,%ebp
+ movl %edx,%esi
+ addl %ebx,%edx
+ andl $0xFFFF0000,%esi
+ orl %esi,%ebp
+ movl %ebp,4(%edi) // FIXME: eliminate register contention
+ addl $8,%edi
+
+ decl %ecx
+ jnz LFMiddleLoop
+
+LFLast:
+ popl %ecx // retrieve count
+ popl %esi // retrieve span pointer
+
+// do the last, unaligned pixel, if there is one
+ andl $1,%ecx // is there an odd pixel left to do?
+ jz LFSpanDone // no
+ shrl $16,%edx
+ movw %dx,(%edi) // do the final pixel's z
+
+LFSpanDone:
+ movl espan_t_pnext(%esi),%esi
+ testl %esi,%esi
+ jnz LFSpanLoop
+
+ jmp LFDone
+
+LFNegSpan:
+ fmuls FloatMinus2ToThe31nd
+ fistpl izistep // note: we are relying on FP exceptions being turned
+ // off here to avoid range problems
+ movl izistep,%ebx // remains loaded for all spans
+
+LFNegSpanLoop:
+// set up the initial 1/z value
+ fildl espan_t_v(%esi)
+ fildl espan_t_u(%esi)
+ movl espan_t_v(%esi),%ecx
+ movl C(d_pzbuffer),%edi
+ fmuls C(d_zistepu)
+ fxch %st(1)
+ fmuls C(d_zistepv)
+ fxch %st(1)
+ fadds C(d_ziorigin)
+ imull C(d_zrowbytes),%ecx
+ faddp %st(0),%st(1)
+
+// clamp if z is nearer than 2 (1/z > 0.5)
+ fcoms float_point5
+ addl %ecx,%edi
+ movl espan_t_u(%esi),%edx
+ addl %edx,%edx // word count
+ movl espan_t_count(%esi),%ecx
+ addl %edx,%edi // pdest = &pdestspan[scans->u];
+ pushl %esi // preserve spans pointer
+ fnstsw %ax
+ testb $0x45,%ah
+ jz LClampNeg
+
+ fmuls Float2ToThe31nd
+ fistpl izi // note: we are relying on FP exceptions being turned
+ // off here to avoid problems when the span is closer
+ // than 1/(2**31)
+ movl izi,%edx
+
+// at this point:
+// %ebx = izistep
+// %ecx = count
+// %edx = izi
+// %edi = pdest
+
+LZDrawNeg:
+
+// do a single pixel up front, if necessary to dword align the destination
+ testl $2,%edi
+ jz LFNegMiddle
+ movl %edx,%eax
+ subl %ebx,%edx
+ shrl $16,%eax
+ decl %ecx
+ movw %ax,(%edi)
+ addl $2,%edi
+
+// do middle a pair of aligned dwords at a time
+LFNegMiddle:
+ pushl %ecx
+ shrl $1,%ecx // count / 2
+ jz LFNegLast // no aligned dwords to do
+ shrl $1,%ecx // (count / 2) / 2
+ jnc LFNegMiddleLoop // even number of aligned dwords to do
+
+ movl %edx,%eax
+ subl %ebx,%edx
+ shrl $16,%eax
+ movl %edx,%esi
+ subl %ebx,%edx
+ andl $0xFFFF0000,%esi
+ orl %esi,%eax
+ movl %eax,(%edi)
+ addl $4,%edi
+ andl %ecx,%ecx
+ jz LFNegLast
+
+LFNegMiddleLoop:
+ movl %edx,%eax
+ subl %ebx,%edx
+ shrl $16,%eax
+ movl %edx,%esi
+ subl %ebx,%edx
+ andl $0xFFFF0000,%esi
+ orl %esi,%eax
+ movl %edx,%ebp
+ movl %eax,(%edi)
+ subl %ebx,%edx
+ shrl $16,%ebp
+ movl %edx,%esi
+ subl %ebx,%edx
+ andl $0xFFFF0000,%esi
+ orl %esi,%ebp
+ movl %ebp,4(%edi) // FIXME: eliminate register contention
+ addl $8,%edi
+
+ decl %ecx
+ jnz LFNegMiddleLoop
+
+LFNegLast:
+ popl %ecx // retrieve count
+ popl %esi // retrieve span pointer
+
+// do the last, unaligned pixel, if there is one
+ andl $1,%ecx // is there an odd pixel left to do?
+ jz LFNegSpanDone // no
+ shrl $16,%edx
+ movw %dx,(%edi) // do the final pixel's z
+
+LFNegSpanDone:
+ movl espan_t_pnext(%esi),%esi
+ testl %esi,%esi
+ jnz LFNegSpanLoop
+
+LFDone:
+ popl %ebx // restore register variables
+ popl %esi
+ popl %edi
+ popl %ebp // restore the caller's stack frame
+ ret
+
+#endif // id386
--- /dev/null
+++ b/u/d_draw16.s
@@ -1,0 +1,955 @@
+//
+// d_draw16.s
+// x86 assembly-language horizontal 8-bpp span-drawing code, with 16-pixel
+// subdivision.
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "asm_draw.h"
+#include "d_ifacea.h"
+
+#ifdef id386
+
+//----------------------------------------------------------------------
+// 8-bpp horizontal span drawing code for polygons, with no transparency and
+// 16-pixel subdivision.
+//
+// Assumes there is at least one span in pspans, and that every span
+// contains at least one pixel
+//----------------------------------------------------------------------
+
+ .data
+
+ .text
+
+// out-of-line, rarely-needed clamping code
+
+LClampHigh0:
+ movl C(bbextents),%esi
+ jmp LClampReentry0
+LClampHighOrLow0:
+ jg LClampHigh0
+ xorl %esi,%esi
+ jmp LClampReentry0
+
+LClampHigh1:
+ movl C(bbextentt),%edx
+ jmp LClampReentry1
+LClampHighOrLow1:
+ jg LClampHigh1
+ xorl %edx,%edx
+ jmp LClampReentry1
+
+LClampLow2:
+ movl $4096,%ebp
+ jmp LClampReentry2
+LClampHigh2:
+ movl C(bbextents),%ebp
+ jmp LClampReentry2
+
+LClampLow3:
+ movl $4096,%ecx
+ jmp LClampReentry3
+LClampHigh3:
+ movl C(bbextentt),%ecx
+ jmp LClampReentry3
+
+LClampLow4:
+ movl $4096,%eax
+ jmp LClampReentry4
+LClampHigh4:
+ movl C(bbextents),%eax
+ jmp LClampReentry4
+
+LClampLow5:
+ movl $4096,%ebx
+ jmp LClampReentry5
+LClampHigh5:
+ movl C(bbextentt),%ebx
+ jmp LClampReentry5
+
+
+#define pspans 4+16
+
+ .align 4
+.globl C(D_DrawSpans16)
+C(D_DrawSpans16):
+ pushl %ebp // preserve caller's stack frame
+ pushl %edi
+ pushl %esi // preserve register variables
+ pushl %ebx
+
+//
+// set up scaled-by-16 steps, for 16-long segments; also set up cacheblock
+// and span list pointers
+//
+// TODO: any overlap from rearranging?
+ flds C(d_sdivzstepu)
+ fmuls fp_16
+ movl C(cacheblock),%edx
+ flds C(d_tdivzstepu)
+ fmuls fp_16
+ movl pspans(%esp),%ebx // point to the first span descriptor
+ flds C(d_zistepu)
+ fmuls fp_16
+ movl %edx,pbase // pbase = cacheblock
+ fstps zi16stepu
+ fstps tdivz16stepu
+ fstps sdivz16stepu
+
+LSpanLoop:
+//
+// set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
+// initial s and t values
+//
+// FIXME: pipeline FILD?
+ fildl espan_t_v(%ebx)
+ fildl espan_t_u(%ebx)
+
+ fld %st(1) // dv | du | dv
+ fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv
+ fld %st(1) // du | dv*d_sdivzstepv | du | dv
+ fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
+ fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
+ fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu |
+ // dv*d_sdivzstepv | du | dv
+ fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu |
+ // dv*d_sdivzstepv | du | dv
+ faddp %st(0),%st(2) // du*d_tdivzstepu |
+ // du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
+ fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
+ // du*d_tdivzstepu | du | dv
+ fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv |
+ // du*d_tdivzstepu | du | dv
+ fmuls C(d_tdivzstepv) // dv*d_tdivzstepv |
+ // du*d_sdivzstepu + dv*d_sdivzstepv |
+ // du*d_tdivzstepu | du | dv
+ fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
+ // dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
+ fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv +
+ // du*d_sdivzstepu; stays in %st(2) at end
+ fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
+ // s/z
+ fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv |
+ // du*d_tdivzstepu | du | s/z
+ fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv |
+ // du*d_tdivzstepu | du | s/z
+ faddp %st(0),%st(2) // dv*d_zistepv |
+ // dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
+ fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu |
+ // dv*d_zistepv | s/z
+ fmuls C(d_zistepu) // du*d_zistepu |
+ // dv*d_tdivzstepv + du*d_tdivzstepu |
+ // dv*d_zistepv | s/z
+ fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu |
+ // du*d_zistepu | dv*d_zistepv | s/z
+ fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv +
+ // du*d_tdivzstepu; stays in %st(1) at end
+ fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z
+ faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z
+
+ flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
+ fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
+ fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv +
+ // du*d_zistepu; stays in %st(0) at end
+ // 1/z | fp_64k | t/z | s/z
+//
+// calculate and clamp s & t
+//
+ fdivr %st(0),%st(1) // 1/z | z*64k | t/z | s/z
+
+//
+// point %edi to the first pixel in the span
+//
+ movl C(d_viewbuffer),%ecx
+ movl espan_t_v(%ebx),%eax
+ movl %ebx,pspantemp // preserve spans pointer
+
+ movl C(tadjust),%edx
+ movl C(sadjust),%esi
+ movl C(d_scantable)(,%eax,4),%edi // v * screenwidth
+ addl %ecx,%edi
+ movl espan_t_u(%ebx),%ecx
+ addl %ecx,%edi // pdest = &pdestspan[scans->u];
+ movl espan_t_count(%ebx),%ecx
+
+//
+// now start the FDIV for the end of the span
+//
+ cmpl $16,%ecx
+ ja LSetupNotLast1
+
+ decl %ecx
+ jz LCleanup1 // if only one pixel, no need to start an FDIV
+ movl %ecx,spancountminus1
+
+// finish up the s and t calcs
+ fxch %st(1) // z*64k | 1/z | t/z | s/z
+
+ fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
+ fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
+ fxch %st(1) // z*64k | s | 1/z | t/z | s/z
+ fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
+ fxch %st(1) // s | t | 1/z | t/z | s/z
+ fistpl s // 1/z | t | t/z | s/z
+ fistpl t // 1/z | t/z | s/z
+
+ fildl spancountminus1
+
+ flds C(d_tdivzstepu) // C(d_tdivzstepu) | spancountminus1
+ flds C(d_zistepu) // C(d_zistepu) | C(d_tdivzstepu) | spancountminus1
+ fmul %st(2),%st(0) // C(d_zistepu)*scm1 | C(d_tdivzstepu) | scm1
+ fxch %st(1) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
+ fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
+ fxch %st(2) // scm1 | C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1
+ fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_zistepu)*scm1 |
+ // C(d_tdivzstepu)*scm1
+ fxch %st(1) // C(d_zistepu)*scm1 | C(d_sdivzstepu)*scm1 |
+ // C(d_tdivzstepu)*scm1
+ faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
+ fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
+ faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1
+ faddp %st(0),%st(3)
+
+ flds fp_64k
+ fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
+ // overlap
+ jmp LFDIVInFlight1
+
+LCleanup1:
+// finish up the s and t calcs
+ fxch %st(1) // z*64k | 1/z | t/z | s/z
+
+ fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
+ fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
+ fxch %st(1) // z*64k | s | 1/z | t/z | s/z
+ fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
+ fxch %st(1) // s | t | 1/z | t/z | s/z
+ fistpl s // 1/z | t | t/z | s/z
+ fistpl t // 1/z | t/z | s/z
+ jmp LFDIVInFlight1
+
+ .align 4
+LSetupNotLast1:
+// finish up the s and t calcs
+ fxch %st(1) // z*64k | 1/z | t/z | s/z
+
+ fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
+ fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
+ fxch %st(1) // z*64k | s | 1/z | t/z | s/z
+ fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
+ fxch %st(1) // s | t | 1/z | t/z | s/z
+ fistpl s // 1/z | t | t/z | s/z
+ fistpl t // 1/z | t/z | s/z
+
+ fadds zi16stepu
+ fxch %st(2)
+ fadds sdivz16stepu
+ fxch %st(2)
+ flds tdivz16stepu
+ faddp %st(0),%st(2)
+ flds fp_64k
+ fdiv %st(1),%st(0) // z = 1/1/z
+ // this is what we've gone to all this trouble to
+ // overlap
+LFDIVInFlight1:
+
+ addl s,%esi
+ addl t,%edx
+ movl C(bbextents),%ebx
+ movl C(bbextentt),%ebp
+ cmpl %ebx,%esi
+ ja LClampHighOrLow0
+LClampReentry0:
+ movl %esi,s
+ movl pbase,%ebx
+ shll $16,%esi
+ cmpl %ebp,%edx
+ movl %esi,sfracf
+ ja LClampHighOrLow1
+LClampReentry1:
+ movl %edx,t
+ movl s,%esi // sfrac = scans->sfrac;
+ shll $16,%edx
+ movl t,%eax // tfrac = scans->tfrac;
+ sarl $16,%esi
+ movl %edx,tfracf
+
+//
+// calculate the texture starting address
+//
+ sarl $16,%eax
+ movl C(cachewidth),%edx
+ imull %edx,%eax // (tfrac >> 16) * cachewidth
+ addl %ebx,%esi
+ addl %eax,%esi // psource = pbase + (sfrac >> 16) +
+ // ((tfrac >> 16) * cachewidth);
+//
+// determine whether last span or not
+//
+ cmpl $16,%ecx
+ jna LLastSegment
+
+//
+// not the last segment; do full 16-wide segment
+//
+LNotLastSegment:
+
+//
+// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
+// get there
+//
+
+// pick up after the FDIV that was left in flight previously
+
+ fld %st(0) // duplicate it
+ fmul %st(4),%st(0) // s = s/z * z
+ fxch %st(1)
+ fmul %st(3),%st(0) // t = t/z * z
+ fxch %st(1)
+ fistpl snext
+ fistpl tnext
+ movl snext,%eax
+ movl tnext,%edx
+
+ movb (%esi),%bl // get first source texel
+ subl $16,%ecx // count off this segments' pixels
+ movl C(sadjust),%ebp
+ movl %ecx,counttemp // remember count of remaining pixels
+
+ movl C(tadjust),%ecx
+ movb %bl,(%edi) // store first dest pixel
+
+ addl %eax,%ebp
+ addl %edx,%ecx
+
+ movl C(bbextents),%eax
+ movl C(bbextentt),%edx
+
+ cmpl $4096,%ebp
+ jl LClampLow2
+ cmpl %eax,%ebp
+ ja LClampHigh2
+LClampReentry2:
+
+ cmpl $4096,%ecx
+ jl LClampLow3
+ cmpl %edx,%ecx
+ ja LClampHigh3
+LClampReentry3:
+
+ movl %ebp,snext
+ movl %ecx,tnext
+
+ subl s,%ebp
+ subl t,%ecx
+
+//
+// set up advancetable
+//
+ movl %ecx,%eax
+ movl %ebp,%edx
+ sarl $20,%eax // tstep >>= 16;
+ jz LZero
+ sarl $20,%edx // sstep >>= 16;
+ movl C(cachewidth),%ebx
+ imull %ebx,%eax
+ jmp LSetUp1
+
+LZero:
+ sarl $20,%edx // sstep >>= 16;
+ movl C(cachewidth),%ebx
+
+LSetUp1:
+
+ addl %edx,%eax // add in sstep
+ // (tstep >> 16) * cachewidth + (sstep >> 16);
+ movl tfracf,%edx
+ movl %eax,advancetable+4 // advance base in t
+ addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth +
+ // (sstep >> 16);
+ shll $12,%ebp // left-justify sstep fractional part
+ movl sfracf,%ebx
+ shll $12,%ecx // left-justify tstep fractional part
+ movl %eax,advancetable // advance extra in t
+
+ movl %ecx,tstep
+ addl %ecx,%edx // advance tfrac fractional part by tstep frac
+
+ sbbl %ecx,%ecx // turn tstep carry into -1 (0 if none)
+ addl %ebp,%ebx // advance sfrac fractional part by sstep frac
+ adcl advancetable+4(,%ecx,4),%esi // point to next source texel
+
+ addl tstep,%edx
+ sbbl %ecx,%ecx
+ movb (%esi),%al
+ addl %ebp,%ebx
+ movb %al,1(%edi)
+ adcl advancetable+4(,%ecx,4),%esi
+
+ addl tstep,%edx
+ sbbl %ecx,%ecx
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+
+ addl tstep,%edx
+ sbbl %ecx,%ecx
+ movb %al,2(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+
+ addl tstep,%edx
+ sbbl %ecx,%ecx
+ movb %al,3(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+
+ addl tstep,%edx
+ sbbl %ecx,%ecx
+ movb %al,4(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+
+ addl tstep,%edx
+ sbbl %ecx,%ecx
+ movb %al,5(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+
+ addl tstep,%edx
+ sbbl %ecx,%ecx
+ movb %al,6(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+
+ addl tstep,%edx
+ sbbl %ecx,%ecx
+ movb %al,7(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+
+
+//
+// start FDIV for end of next segment in flight, so it can overlap
+//
+ movl counttemp,%ecx
+ cmpl $16,%ecx // more than one segment after this?
+ ja LSetupNotLast2 // yes
+
+ decl %ecx
+ jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV
+ movl %ecx,spancountminus1
+ fildl spancountminus1
+
+ flds C(d_zistepu) // C(d_zistepu) | spancountminus1
+ fmul %st(1),%st(0) // C(d_zistepu)*scm1 | scm1
+ flds C(d_tdivzstepu) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
+ fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
+ fxch %st(1) // C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1
+ faddp %st(0),%st(3) // C(d_tdivzstepu)*scm1 | scm1
+ fxch %st(1) // scm1 | C(d_tdivzstepu)*scm1
+ fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
+ fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
+ faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1
+ flds fp_64k // 64k | C(d_sdivzstepu)*scm1
+ fxch %st(1) // C(d_sdivzstepu)*scm1 | 64k
+ faddp %st(0),%st(4) // 64k
+
+ fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
+ // overlap
+ jmp LFDIVInFlight2
+
+ .align 4
+LSetupNotLast2:
+ fadds zi16stepu
+ fxch %st(2)
+ fadds sdivz16stepu
+ fxch %st(2)
+ flds tdivz16stepu
+ faddp %st(0),%st(2)
+ flds fp_64k
+ fdiv %st(1),%st(0) // z = 1/1/z
+ // this is what we've gone to all this trouble to
+ // overlap
+LFDIVInFlight2:
+ movl %ecx,counttemp
+
+ addl tstep,%edx
+ sbbl %ecx,%ecx
+ movb %al,8(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+
+ addl tstep,%edx
+ sbbl %ecx,%ecx
+ movb %al,9(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+
+ addl tstep,%edx
+ sbbl %ecx,%ecx
+ movb %al,10(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+
+ addl tstep,%edx
+ sbbl %ecx,%ecx
+ movb %al,11(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+
+ addl tstep,%edx
+ sbbl %ecx,%ecx
+ movb %al,12(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+
+ addl tstep,%edx
+ sbbl %ecx,%ecx
+ movb %al,13(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+
+ addl tstep,%edx
+ sbbl %ecx,%ecx
+ movb %al,14(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+
+ addl $16,%edi
+ movl %edx,tfracf
+ movl snext,%edx
+ movl %ebx,sfracf
+ movl tnext,%ebx
+ movl %edx,s
+ movl %ebx,t
+
+ movl counttemp,%ecx // retrieve count
+
+//
+// determine whether last span or not
+//
+ cmpl $16,%ecx // are there multiple segments remaining?
+ movb %al,-1(%edi)
+ ja LNotLastSegment // yes
+
+//
+// last segment of scan
+//
+LLastSegment:
+
+//
+// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
+// get there. The number of pixels left is variable, and we want to land on the
+// last pixel, not step one past it, so we can't run into arithmetic problems
+//
+ testl %ecx,%ecx
+ jz LNoSteps // just draw the last pixel and we're done
+
+// pick up after the FDIV that was left in flight previously
+
+
+ fld %st(0) // duplicate it
+ fmul %st(4),%st(0) // s = s/z * z
+ fxch %st(1)
+ fmul %st(3),%st(0) // t = t/z * z
+ fxch %st(1)
+ fistpl snext
+ fistpl tnext
+
+ movb (%esi),%al // load first texel in segment
+ movl C(tadjust),%ebx
+ movb %al,(%edi) // store first pixel in segment
+ movl C(sadjust),%eax
+
+ addl snext,%eax
+ addl tnext,%ebx
+
+ movl C(bbextents),%ebp
+ movl C(bbextentt),%edx
+
+ cmpl $4096,%eax
+ jl LClampLow4
+ cmpl %ebp,%eax
+ ja LClampHigh4
+LClampReentry4:
+ movl %eax,snext
+
+ cmpl $4096,%ebx
+ jl LClampLow5
+ cmpl %edx,%ebx
+ ja LClampHigh5
+LClampReentry5:
+
+ cmpl $1,%ecx // don't bother
+ je LOnlyOneStep // if two pixels in segment, there's only one step,
+ // of the segment length
+ subl s,%eax
+ subl t,%ebx
+
+ addl %eax,%eax // convert to 15.17 format so multiply by 1.31
+ addl %ebx,%ebx // reciprocal yields 16.48
+
+ imull reciprocal_table_16-8(,%ecx,4) // sstep = (snext - s) /
+ // (spancount-1)
+ movl %edx,%ebp
+
+ movl %ebx,%eax
+ imull reciprocal_table_16-8(,%ecx,4) // tstep = (tnext - t) /
+ // (spancount-1)
+LSetEntryvec:
+//
+// set up advancetable
+//
+ movl entryvec_table_16(,%ecx,4),%ebx
+ movl %edx,%eax
+ movl %ebx,jumptemp // entry point into code for RET later
+ movl %ebp,%ecx
+ sarl $16,%edx // tstep >>= 16;
+ movl C(cachewidth),%ebx
+ sarl $16,%ecx // sstep >>= 16;
+ imull %ebx,%edx
+
+ addl %ecx,%edx // add in sstep
+ // (tstep >> 16) * cachewidth + (sstep >> 16);
+ movl tfracf,%ecx
+ movl %edx,advancetable+4 // advance base in t
+ addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth +
+ // (sstep >> 16);
+ shll $16,%ebp // left-justify sstep fractional part
+ movl sfracf,%ebx
+ shll $16,%eax // left-justify tstep fractional part
+ movl %edx,advancetable // advance extra in t
+
+ movl %eax,tstep
+ movl %ecx,%edx
+ addl %eax,%edx
+ sbbl %ecx,%ecx
+ addl %ebp,%ebx
+ adcl advancetable+4(,%ecx,4),%esi
+
+ jmp *jumptemp // jump to the number-of-pixels handler
+
+//----------------------------------------
+
+LNoSteps:
+ movb (%esi),%al // load first texel in segment
+ subl $15,%edi // adjust for hardwired offset
+ jmp LEndSpan
+
+
+LOnlyOneStep:
+ subl s,%eax
+ subl t,%ebx
+ movl %eax,%ebp
+ movl %ebx,%edx
+ jmp LSetEntryvec
+
+//----------------------------------------
+
+.globl Entry2_16, Entry3_16, Entry4_16, Entry5_16
+.globl Entry6_16, Entry7_16, Entry8_16, Entry9_16
+.globl Entry10_16, Entry11_16, Entry12_16, Entry13_16
+.globl Entry14_16, Entry15_16, Entry16_16
+
+Entry2_16:
+ subl $14,%edi // adjust for hardwired offsets
+ movb (%esi),%al
+ jmp LEntry2_16
+
+//----------------------------------------
+
+Entry3_16:
+ subl $13,%edi // adjust for hardwired offsets
+ addl %eax,%edx
+ movb (%esi),%al
+ sbbl %ecx,%ecx
+ addl %ebp,%ebx
+ adcl advancetable+4(,%ecx,4),%esi
+ jmp LEntry3_16
+
+//----------------------------------------
+
+Entry4_16:
+ subl $12,%edi // adjust for hardwired offsets
+ addl %eax,%edx
+ movb (%esi),%al
+ sbbl %ecx,%ecx
+ addl %ebp,%ebx
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+ jmp LEntry4_16
+
+//----------------------------------------
+
+Entry5_16:
+ subl $11,%edi // adjust for hardwired offsets
+ addl %eax,%edx
+ movb (%esi),%al
+ sbbl %ecx,%ecx
+ addl %ebp,%ebx
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+ jmp LEntry5_16
+
+//----------------------------------------
+
+Entry6_16:
+ subl $10,%edi // adjust for hardwired offsets
+ addl %eax,%edx
+ movb (%esi),%al
+ sbbl %ecx,%ecx
+ addl %ebp,%ebx
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+ jmp LEntry6_16
+
+//----------------------------------------
+
+Entry7_16:
+ subl $9,%edi // adjust for hardwired offsets
+ addl %eax,%edx
+ movb (%esi),%al
+ sbbl %ecx,%ecx
+ addl %ebp,%ebx
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+ jmp LEntry7_16
+
+//----------------------------------------
+
+Entry8_16:
+ subl $8,%edi // adjust for hardwired offsets
+ addl %eax,%edx
+ movb (%esi),%al
+ sbbl %ecx,%ecx
+ addl %ebp,%ebx
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+ jmp LEntry8_16
+
+//----------------------------------------
+
+Entry9_16:
+ subl $7,%edi // adjust for hardwired offsets
+ addl %eax,%edx
+ movb (%esi),%al
+ sbbl %ecx,%ecx
+ addl %ebp,%ebx
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+ jmp LEntry9_16
+
+//----------------------------------------
+
+Entry10_16:
+ subl $6,%edi // adjust for hardwired offsets
+ addl %eax,%edx
+ movb (%esi),%al
+ sbbl %ecx,%ecx
+ addl %ebp,%ebx
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+ jmp LEntry10_16
+
+//----------------------------------------
+
+Entry11_16:
+ subl $5,%edi // adjust for hardwired offsets
+ addl %eax,%edx
+ movb (%esi),%al
+ sbbl %ecx,%ecx
+ addl %ebp,%ebx
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+ jmp LEntry11_16
+
+//----------------------------------------
+
+Entry12_16:
+ subl $4,%edi // adjust for hardwired offsets
+ addl %eax,%edx
+ movb (%esi),%al
+ sbbl %ecx,%ecx
+ addl %ebp,%ebx
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+ jmp LEntry12_16
+
+//----------------------------------------
+
+Entry13_16:
+ subl $3,%edi // adjust for hardwired offsets
+ addl %eax,%edx
+ movb (%esi),%al
+ sbbl %ecx,%ecx
+ addl %ebp,%ebx
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+ jmp LEntry13_16
+
+//----------------------------------------
+
+Entry14_16:
+ subl $2,%edi // adjust for hardwired offsets
+ addl %eax,%edx
+ movb (%esi),%al
+ sbbl %ecx,%ecx
+ addl %ebp,%ebx
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+ jmp LEntry14_16
+
+//----------------------------------------
+
+Entry15_16:
+ decl %edi // adjust for hardwired offsets
+ addl %eax,%edx
+ movb (%esi),%al
+ sbbl %ecx,%ecx
+ addl %ebp,%ebx
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+ jmp LEntry15_16
+
+//----------------------------------------
+
+Entry16_16:
+ addl %eax,%edx
+ movb (%esi),%al
+ sbbl %ecx,%ecx
+ addl %ebp,%ebx
+ adcl advancetable+4(,%ecx,4),%esi
+
+ addl tstep,%edx
+ sbbl %ecx,%ecx
+ movb %al,1(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+LEntry15_16:
+ sbbl %ecx,%ecx
+ movb %al,2(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+LEntry14_16:
+ sbbl %ecx,%ecx
+ movb %al,3(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+LEntry13_16:
+ sbbl %ecx,%ecx
+ movb %al,4(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+LEntry12_16:
+ sbbl %ecx,%ecx
+ movb %al,5(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+LEntry11_16:
+ sbbl %ecx,%ecx
+ movb %al,6(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+LEntry10_16:
+ sbbl %ecx,%ecx
+ movb %al,7(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+LEntry9_16:
+ sbbl %ecx,%ecx
+ movb %al,8(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+LEntry8_16:
+ sbbl %ecx,%ecx
+ movb %al,9(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+LEntry7_16:
+ sbbl %ecx,%ecx
+ movb %al,10(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+LEntry6_16:
+ sbbl %ecx,%ecx
+ movb %al,11(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+LEntry5_16:
+ sbbl %ecx,%ecx
+ movb %al,12(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+ addl tstep,%edx
+LEntry4_16:
+ sbbl %ecx,%ecx
+ movb %al,13(%edi)
+ addl %ebp,%ebx
+ movb (%esi),%al
+ adcl advancetable+4(,%ecx,4),%esi
+LEntry3_16:
+ movb %al,14(%edi)
+ movb (%esi),%al
+LEntry2_16:
+
+LEndSpan:
+
+//
+// clear s/z, t/z, 1/z from FP stack
+//
+ fstp %st(0)
+ fstp %st(0)
+ fstp %st(0)
+
+ movl pspantemp,%ebx // restore spans pointer
+ movl espan_t_pnext(%ebx),%ebx // point to next span
+ testl %ebx,%ebx // any more spans?
+ movb %al,15(%edi)
+ jnz LSpanLoop // more spans
+
+ popl %ebx // restore register variables
+ popl %esi
+ popl %edi
+ popl %ebp // restore the caller's stack frame
+ ret
+
+#endif // id386
--- /dev/null
+++ b/u/d_ifacea.h
@@ -1,0 +1,79 @@
+//
+// d_ifacea.h
+//
+// Include file for asm driver interface.
+//
+
+//
+// !!! note that this file must match the corresponding C structures in
+// d_iface.h at all times !!!
+//
+
+// !!! if this is changed, it must be changed in r_shared.h too !!!
+#define ALIAS_ONSEAM 0x0020
+
+// !!! if this is changed, it must be changed in d_iface.h too !!!
+#define TURB_TEX_SIZE 64 // base turbulent texture size
+
+// !!! if this is changed, it must be changed in d_iface.h too !!!
+#define CYCLE 128
+
+// !!! if this is changed, it must be changed in r_shared.h too !!!
+#define MAXHEIGHT 1024
+
+// !!! if this is changed, it must be changed in quakedef.h too !!!
+#define CACHE_SIZE 32 // used to align key data structures
+
+// particle_t structure
+// !!! if this is changed, it must be changed in d_iface.h too !!!
+// driver-usable fields
+#define pt_org 0
+#define pt_color 12
+// drivers never touch the following fields
+#define pt_next 16
+#define pt_vel 20
+#define pt_ramp 32
+#define pt_die 36
+#define pt_type 40
+#define pt_size 44
+
+#define PARTICLE_Z_CLIP 8.0
+
+// finalvert_t structure
+// !!! if this is changed, it must be changed in d_iface.h too !!!
+#define fv_v 0 // !!! if this is moved, cases where the !!!
+ // !!! address of this field is pushed in !!!
+ // !!! d_polysa.s must be changed !!!
+#define fv_flags 24
+#define fv_reserved 28
+#define fv_size 32
+#define fv_shift 5
+
+
+// stvert_t structure
+// !!! if this is changed, it must be changed in modelgen.h too !!!
+#define stv_onseam 0
+#define stv_s 4
+#define stv_t 8
+#define stv_size 12
+
+
+// trivertx_t structure
+// !!! if this is changed, it must be changed in modelgen.h too !!!
+#define tv_v 0
+#define tv_lightnormalindex 3
+#define tv_size 4
+
+// affinetridesc_t structure
+// !!! if this is changed, it must be changed in d_iface.h too !!!
+#define atd_pskin 0
+#define atd_pskindesc 4
+#define atd_skinwidth 8
+#define atd_skinheight 12
+#define atd_ptriangles 16
+#define atd_pfinalverts 20
+#define atd_numtriangles 24
+#define atd_drawtype 28
+#define atd_seamfixupX16 32
+#define atd_size 36
+
--- /dev/null
+++ b/u/d_parta.s
@@ -1,0 +1,458 @@
+//
+// d_parta.s
+// x86 assembly-language 8-bpp particle-drawing code.
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "d_ifacea.h"
+#include "asm_draw.h"
+
+#ifdef id386
+
+//----------------------------------------------------------------------
+// 8-bpp particle drawing code.
+//----------------------------------------------------------------------
+
+//FIXME: comments, full optimization
+
+//----------------------------------------------------------------------
+// 8-bpp particle queueing code.
+//----------------------------------------------------------------------
+
+ .text
+
+#define P 12+4
+
+ .align 4
+.globl C(D_DrawParticle)
+C(D_DrawParticle):
+ pushl %ebp // preserve caller's stack frame
+ pushl %edi // preserve register variables
+ pushl %ebx
+
+ movl P(%esp),%edi
+
+// FIXME: better FP overlap in general here
+
+// transform point
+// VectorSubtract (p->org, r_origin, local);
+ flds C(r_origin)
+ fsubrs pt_org(%edi)
+ flds pt_org+4(%edi)
+ fsubs C(r_origin)+4
+ flds pt_org+8(%edi)
+ fsubs C(r_origin)+8
+ fxch %st(2) // local[0] | local[1] | local[2]
+
+// transformed[2] = DotProduct(local, r_ppn);
+ flds C(r_ppn) // r_ppn[0] | local[0] | local[1] | local[2]
+ fmul %st(1),%st(0) // dot0 | local[0] | local[1] | local[2]
+ flds C(r_ppn)+4 // r_ppn[1] | dot0 | local[0] | local[1] | local[2]
+ fmul %st(3),%st(0) // dot1 | dot0 | local[0] | local[1] | local[2]
+ flds C(r_ppn)+8 // r_ppn[2] | dot1 | dot0 | local[0] |
+ // local[1] | local[2]
+ fmul %st(5),%st(0) // dot2 | dot1 | dot0 | local[0] | local[1] | local[2]
+ fxch %st(2) // dot0 | dot1 | dot2 | local[0] | local[1] | local[2]
+ faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[0] | local[1] |
+ // local[2]
+ faddp %st(0),%st(1) // z | local[0] | local[1] | local[2]
+ fld %st(0) // z | z | local[0] | local[1] |
+ // local[2]
+ fdivrs float_1 // 1/z | z | local[0] | local[1] | local[2]
+ fxch %st(1) // z | 1/z | local[0] | local[1] | local[2]
+
+// if (transformed[2] < PARTICLE_Z_CLIP)
+// return;
+ fcomps float_particle_z_clip // 1/z | local[0] | local[1] | local[2]
+ fxch %st(3) // local[2] | local[0] | local[1] | 1/z
+
+ flds C(r_pup) // r_pup[0] | local[2] | local[0] | local[1] | 1/z
+ fmul %st(2),%st(0) // dot0 | local[2] | local[0] | local[1] | 1/z
+ flds C(r_pup)+4 // r_pup[1] | dot0 | local[2] | local[0] |
+ // local[1] | 1/z
+
+ fnstsw %ax
+ testb $1,%ah
+ jnz LPop6AndDone
+
+// transformed[1] = DotProduct(local, r_pup);
+ fmul %st(4),%st(0) // dot1 | dot0 | local[2] | local[0] | local[1] | 1/z
+ flds C(r_pup)+8 // r_pup[2] | dot1 | dot0 | local[2] |
+ // local[0] | local[1] | 1/z
+ fmul %st(3),%st(0) // dot2 | dot1 | dot0 | local[2] | local[0] |
+ // local[1] | 1/z
+ fxch %st(2) // dot0 | dot1 | dot2 | local[2] | local[0] |
+ // local[1] | 1/z
+ faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[2] | local[0] |
+ // local[1] | 1/z
+ faddp %st(0),%st(1) // y | local[2] | local[0] | local[1] | 1/z
+ fxch %st(3) // local[1] | local[2] | local[0] | y | 1/z
+
+// transformed[0] = DotProduct(local, r_pright);
+ fmuls C(r_pright)+4 // dot1 | local[2] | local[0] | y | 1/z
+ fxch %st(2) // local[0] | local[2] | dot1 | y | 1/z
+ fmuls C(r_pright) // dot0 | local[2] | dot1 | y | 1/z
+ fxch %st(1) // local[2] | dot0 | dot1 | y | 1/z
+ fmuls C(r_pright)+8 // dot2 | dot0 | dot1 | y | 1/z
+ fxch %st(2) // dot1 | dot0 | dot2 | y | 1/z
+ faddp %st(0),%st(1) // dot1 + dot0 | dot2 | y | 1/z
+
+ faddp %st(0),%st(1) // x | y | 1/z
+ fxch %st(1) // y | x | 1/z
+
+// project the point
+ fmul %st(2),%st(0) // y/z | x | 1/z
+ fxch %st(1) // x | y/z | 1/z
+ fmul %st(2),%st(0) // x/z | y/z | 1/z
+ fxch %st(1) // y/z | x/z | 1/z
+ fsubrs C(ycenter) // v | x/z | 1/z
+ fxch %st(1) // x/z | v | 1/z
+ fadds C(xcenter) // u | v | 1/z
+// FIXME: preadjust xcenter and ycenter
+ fxch %st(1) // v | u | 1/z
+ fadds float_point5 // v | u | 1/z
+ fxch %st(1) // u | v | 1/z
+ fadds float_point5 // u | v | 1/z
+ fxch %st(2) // 1/z | v | u
+ fmuls DP_32768 // 1/z * 0x8000 | v | u
+ fxch %st(2) // u | v | 1/z * 0x8000
+
+// FIXME: use Terje's fp->int trick here?
+// FIXME: check we're getting proper rounding here
+ fistpl DP_u // v | 1/z * 0x8000
+ fistpl DP_v // 1/z * 0x8000
+
+ movl DP_u,%eax
+ movl DP_v,%edx
+
+// if ((v > d_vrectbottom_particle) ||
+// (u > d_vrectright_particle) ||
+// (v < d_vrecty) ||
+// (u < d_vrectx))
+// {
+// continue;
+// }
+
+ movl C(d_vrectbottom_particle),%ebx
+ movl C(d_vrectright_particle),%ecx
+ cmpl %ebx,%edx
+ jg LPop1AndDone
+ cmpl %ecx,%eax
+ jg LPop1AndDone
+ movl C(d_vrecty),%ebx
+ movl C(d_vrectx),%ecx
+ cmpl %ebx,%edx
+ jl LPop1AndDone
+
+ cmpl %ecx,%eax
+ jl LPop1AndDone
+
+ flds pt_color(%edi) // color | 1/z * 0x8000
+// FIXME: use Terje's fast fp->int trick?
+ fistpl DP_Color // 1/z * 0x8000
+
+ movl C(d_viewbuffer),%ebx
+
+ addl %eax,%ebx
+ movl C(d_scantable)(,%edx,4),%edi // point to the pixel
+
+ imull C(d_zrowbytes),%edx // point to the z pixel
+
+ leal (%edx,%eax,2),%edx
+ movl C(d_pzbuffer),%eax
+
+ fistpl izi
+
+ addl %ebx,%edi
+ addl %eax,%edx
+
+// pix = izi >> d_pix_shift;
+
+ movl izi,%eax
+ movl C(d_pix_shift),%ecx
+ shrl %cl,%eax
+ movl izi,%ebp
+
+// if (pix < d_pix_min)
+// pix = d_pix_min;
+// else if (pix > d_pix_max)
+// pix = d_pix_max;
+
+ movl C(d_pix_min),%ebx
+ movl C(d_pix_max),%ecx
+ cmpl %ebx,%eax
+ jnl LTestPixMax
+ movl %ebx,%eax
+ jmp LTestDone
+
+LTestPixMax:
+ cmpl %ecx,%eax
+ jng LTestDone
+ movl %ecx,%eax
+LTestDone:
+
+ movb DP_Color,%ch
+
+ movl C(d_y_aspect_shift),%ebx
+ testl %ebx,%ebx
+ jnz LDefault
+
+ cmpl $4,%eax
+ ja LDefault
+
+ jmp DP_EntryTable-4(,%eax,4)
+
+// 1x1
+.globl DP_1x1
+DP_1x1:
+ cmpw %bp,(%edx) // just one pixel to do
+ jg LDone
+ movw %bp,(%edx)
+ movb %ch,(%edi)
+ jmp LDone
+
+// 2x2
+.globl DP_2x2
+DP_2x2:
+ pushl %esi
+ movl C(screenwidth),%ebx
+ movl C(d_zrowbytes),%esi
+
+ cmpw %bp,(%edx)
+ jg L2x2_1
+ movw %bp,(%edx)
+ movb %ch,(%edi)
+L2x2_1:
+ cmpw %bp,2(%edx)
+ jg L2x2_2
+ movw %bp,2(%edx)
+ movb %ch,1(%edi)
+L2x2_2:
+ cmpw %bp,(%edx,%esi,1)
+ jg L2x2_3
+ movw %bp,(%edx,%esi,1)
+ movb %ch,(%edi,%ebx,1)
+L2x2_3:
+ cmpw %bp,2(%edx,%esi,1)
+ jg L2x2_4
+ movw %bp,2(%edx,%esi,1)
+ movb %ch,1(%edi,%ebx,1)
+L2x2_4:
+
+ popl %esi
+ jmp LDone
+
+// 3x3
+.globl DP_3x3
+DP_3x3:
+ pushl %esi
+ movl C(screenwidth),%ebx
+ movl C(d_zrowbytes),%esi
+
+ cmpw %bp,(%edx)
+ jg L3x3_1
+ movw %bp,(%edx)
+ movb %ch,(%edi)
+L3x3_1:
+ cmpw %bp,2(%edx)
+ jg L3x3_2
+ movw %bp,2(%edx)
+ movb %ch,1(%edi)
+L3x3_2:
+ cmpw %bp,4(%edx)
+ jg L3x3_3
+ movw %bp,4(%edx)
+ movb %ch,2(%edi)
+L3x3_3:
+
+ cmpw %bp,(%edx,%esi,1)
+ jg L3x3_4
+ movw %bp,(%edx,%esi,1)
+ movb %ch,(%edi,%ebx,1)
+L3x3_4:
+ cmpw %bp,2(%edx,%esi,1)
+ jg L3x3_5
+ movw %bp,2(%edx,%esi,1)
+ movb %ch,1(%edi,%ebx,1)
+L3x3_5:
+ cmpw %bp,4(%edx,%esi,1)
+ jg L3x3_6
+ movw %bp,4(%edx,%esi,1)
+ movb %ch,2(%edi,%ebx,1)
+L3x3_6:
+
+ cmpw %bp,(%edx,%esi,2)
+ jg L3x3_7
+ movw %bp,(%edx,%esi,2)
+ movb %ch,(%edi,%ebx,2)
+L3x3_7:
+ cmpw %bp,2(%edx,%esi,2)
+ jg L3x3_8
+ movw %bp,2(%edx,%esi,2)
+ movb %ch,1(%edi,%ebx,2)
+L3x3_8:
+ cmpw %bp,4(%edx,%esi,2)
+ jg L3x3_9
+ movw %bp,4(%edx,%esi,2)
+ movb %ch,2(%edi,%ebx,2)
+L3x3_9:
+
+ popl %esi
+ jmp LDone
+
+
+// 4x4
+.globl DP_4x4
+DP_4x4:
+ pushl %esi
+ movl C(screenwidth),%ebx
+ movl C(d_zrowbytes),%esi
+
+ cmpw %bp,(%edx)
+ jg L4x4_1
+ movw %bp,(%edx)
+ movb %ch,(%edi)
+L4x4_1:
+ cmpw %bp,2(%edx)
+ jg L4x4_2
+ movw %bp,2(%edx)
+ movb %ch,1(%edi)
+L4x4_2:
+ cmpw %bp,4(%edx)
+ jg L4x4_3
+ movw %bp,4(%edx)
+ movb %ch,2(%edi)
+L4x4_3:
+ cmpw %bp,6(%edx)
+ jg L4x4_4
+ movw %bp,6(%edx)
+ movb %ch,3(%edi)
+L4x4_4:
+
+ cmpw %bp,(%edx,%esi,1)
+ jg L4x4_5
+ movw %bp,(%edx,%esi,1)
+ movb %ch,(%edi,%ebx,1)
+L4x4_5:
+ cmpw %bp,2(%edx,%esi,1)
+ jg L4x4_6
+ movw %bp,2(%edx,%esi,1)
+ movb %ch,1(%edi,%ebx,1)
+L4x4_6:
+ cmpw %bp,4(%edx,%esi,1)
+ jg L4x4_7
+ movw %bp,4(%edx,%esi,1)
+ movb %ch,2(%edi,%ebx,1)
+L4x4_7:
+ cmpw %bp,6(%edx,%esi,1)
+ jg L4x4_8
+ movw %bp,6(%edx,%esi,1)
+ movb %ch,3(%edi,%ebx,1)
+L4x4_8:
+
+ leal (%edx,%esi,2),%edx
+ leal (%edi,%ebx,2),%edi
+
+ cmpw %bp,(%edx)
+ jg L4x4_9
+ movw %bp,(%edx)
+ movb %ch,(%edi)
+L4x4_9:
+ cmpw %bp,2(%edx)
+ jg L4x4_10
+ movw %bp,2(%edx)
+ movb %ch,1(%edi)
+L4x4_10:
+ cmpw %bp,4(%edx)
+ jg L4x4_11
+ movw %bp,4(%edx)
+ movb %ch,2(%edi)
+L4x4_11:
+ cmpw %bp,6(%edx)
+ jg L4x4_12
+ movw %bp,6(%edx)
+ movb %ch,3(%edi)
+L4x4_12:
+
+ cmpw %bp,(%edx,%esi,1)
+ jg L4x4_13
+ movw %bp,(%edx,%esi,1)
+ movb %ch,(%edi,%ebx,1)
+L4x4_13:
+ cmpw %bp,2(%edx,%esi,1)
+ jg L4x4_14
+ movw %bp,2(%edx,%esi,1)
+ movb %ch,1(%edi,%ebx,1)
+L4x4_14:
+ cmpw %bp,4(%edx,%esi,1)
+ jg L4x4_15
+ movw %bp,4(%edx,%esi,1)
+ movb %ch,2(%edi,%ebx,1)
+L4x4_15:
+ cmpw %bp,6(%edx,%esi,1)
+ jg L4x4_16
+ movw %bp,6(%edx,%esi,1)
+ movb %ch,3(%edi,%ebx,1)
+L4x4_16:
+
+ popl %esi
+ jmp LDone
+
+// default case, handling any size particle
+LDefault:
+
+// count = pix << d_y_aspect_shift;
+
+ movl %eax,%ebx
+ movl %eax,DP_Pix
+ movb C(d_y_aspect_shift),%cl
+ shll %cl,%ebx
+
+// for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth)
+// {
+// for (i=0 ; i<pix ; i++)
+// {
+// if (pz[i] <= izi)
+// {
+// pz[i] = izi;
+// pdest[i] = color;
+// }
+// }
+// }
+
+LGenRowLoop:
+ movl DP_Pix,%eax
+
+LGenColLoop:
+ cmpw %bp,-2(%edx,%eax,2)
+ jg LGSkip
+ movw %bp,-2(%edx,%eax,2)
+ movb %ch,-1(%edi,%eax,1)
+LGSkip:
+ decl %eax // --pix
+ jnz LGenColLoop
+
+ addl C(d_zrowbytes),%edx
+ addl C(screenwidth),%edi
+
+ decl %ebx // --count
+ jnz LGenRowLoop
+
+LDone:
+ popl %ebx // restore register variables
+ popl %edi
+ popl %ebp // restore the caller's stack frame
+ ret
+
+LPop6AndDone:
+ fstp %st(0)
+ fstp %st(0)
+ fstp %st(0)
+ fstp %st(0)
+ fstp %st(0)
+LPop1AndDone:
+ fstp %st(0)
+ jmp LDone
+
+#endif // id386
--- /dev/null
+++ b/u/d_polysa.s
@@ -1,0 +1,1723 @@
+//
+// d_polysa.s
+// x86 assembly-language polygon model drawing code
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "asm_draw.h"
+#include "d_ifacea.h"
+
+#ifdef id386
+
+// !!! if this is changed, it must be changed in d_polyse.c too !!!
+#define DPS_MAXSPANS MAXHEIGHT+1
+ // 1 extra for spanpackage that marks end
+
+//#define SPAN_SIZE (((DPS_MAXSPANS + 1 + ((CACHE_SIZE - 1) / spanpackage_t_size)) + 1) * spanpackage_t_size)
+#define SPAN_SIZE (1024+1+1+1)*32
+
+
+ .data
+
+ .align 4
+p10_minus_p20: .single 0
+p01_minus_p21: .single 0
+temp0: .single 0
+temp1: .single 0
+Ltemp: .single 0
+
+aff8entryvec_table: .long LDraw8, LDraw7, LDraw6, LDraw5
+ .long LDraw4, LDraw3, LDraw2, LDraw1
+
+lzistepx: .long 0
+
+
+ .text
+
+ .extern C(D_PolysetSetEdgeTable)
+ .extern C(D_RasterizeAliasPolySmooth)
+
+//----------------------------------------------------------------------
+// affine triangle gradient calculation code
+//----------------------------------------------------------------------
+
+#define skinwidth 4+0
+
+.globl C(D_PolysetCalcGradients)
+C(D_PolysetCalcGradients):
+
+// p00_minus_p20 = r_p0[0] - r_p2[0];
+// p01_minus_p21 = r_p0[1] - r_p2[1];
+// p10_minus_p20 = r_p1[0] - r_p2[0];
+// p11_minus_p21 = r_p1[1] - r_p2[1];
+//
+// xstepdenominv = 1.0 / (p10_minus_p20 * p01_minus_p21 -
+// p00_minus_p20 * p11_minus_p21);
+//
+// ystepdenominv = -xstepdenominv;
+
+ fildl C(r_p0)+0 // r_p0[0]
+ fildl C(r_p2)+0 // r_p2[0] | r_p0[0]
+ fildl C(r_p0)+4 // r_p0[1] | r_p2[0] | r_p0[0]
+ fildl C(r_p2)+4 // r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
+ fildl C(r_p1)+0 // r_p1[0] | r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
+ fildl C(r_p1)+4 // r_p1[1] | r_p1[0] | r_p2[1] | r_p0[1] |
+ // r_p2[0] | r_p0[0]
+ fxch %st(3) // r_p0[1] | r_p1[0] | r_p2[1] | r_p1[1] |
+ // r_p2[0] | r_p0[0]
+ fsub %st(2),%st(0) // p01_minus_p21 | r_p1[0] | r_p2[1] | r_p1[1] |
+ // r_p2[0] | r_p0[0]
+ fxch %st(1) // r_p1[0] | p01_minus_p21 | r_p2[1] | r_p1[1] |
+ // r_p2[0] | r_p0[0]
+ fsub %st(4),%st(0) // p10_minus_p20 | p01_minus_p21 | r_p2[1] |
+ // r_p1[1] | r_p2[0] | r_p0[0]
+ fxch %st(5) // r_p0[0] | p01_minus_p21 | r_p2[1] |
+ // r_p1[1] | r_p2[0] | p10_minus_p20
+ fsubp %st(0),%st(4) // p01_minus_p21 | r_p2[1] | r_p1[1] |
+ // p00_minus_p20 | p10_minus_p20
+ fxch %st(2) // r_p1[1] | r_p2[1] | p01_minus_p21 |
+ // p00_minus_p20 | p10_minus_p20
+ fsubp %st(0),%st(1) // p11_minus_p21 | p01_minus_p21 |
+ // p00_minus_p20 | p10_minus_p20
+ fxch %st(1) // p01_minus_p21 | p11_minus_p21 |
+ // p00_minus_p20 | p10_minus_p20
+ flds C(d_xdenom) // d_xdenom | p01_minus_p21 | p11_minus_p21 |
+ // p00_minus_p20 | p10_minus_p20
+ fxch %st(4) // p10_minus_p20 | p01_minus_p21 | p11_minus_p21 |
+ // p00_minus_p20 | d_xdenom
+ fstps p10_minus_p20 // p01_minus_p21 | p11_minus_p21 |
+ // p00_minus_p20 | d_xdenom
+ fstps p01_minus_p21 // p11_minus_p21 | p00_minus_p20 | xstepdenominv
+ fxch %st(2) // xstepdenominv | p00_minus_p20 | p11_minus_p21
+
+//// ceil () for light so positive steps are exaggerated, negative steps
+//// diminished, pushing us away from underflow toward overflow. Underflow is
+//// very visible, overflow is very unlikely, because of ambient lighting
+// t0 = r_p0[4] - r_p2[4];
+// t1 = r_p1[4] - r_p2[4];
+
+ fildl C(r_p2)+16 // r_p2[4] | xstepdenominv | p00_minus_p20 |
+ // p11_minus_p21
+ fildl C(r_p0)+16 // r_p0[4] | r_p2[4] | xstepdenominv |
+ // p00_minus_p20 | p11_minus_p21
+ fildl C(r_p1)+16 // r_p1[4] | r_p0[4] | r_p2[4] | xstepdenominv |
+ // p00_minus_p20 | p11_minus_p21
+ fxch %st(2) // r_p2[4] | r_p0[4] | r_p1[4] | xstepdenominv |
+ // p00_minus_p20 | p11_minus_p21
+ fld %st(0) // r_p2[4] | r_p2[4] | r_p0[4] | r_p1[4] |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fsubrp %st(0),%st(2) // r_p2[4] | t0 | r_p1[4] | xstepdenominv |
+ // p00_minus_p20 | p11_minus_p21
+ fsubrp %st(0),%st(2) // t0 | t1 | xstepdenominv | p00_minus_p20 |
+ // p11_minus_p21
+
+// r_lstepx = (int)
+// ceil((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv);
+// r_lstepy = (int)
+// ceil((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv);
+
+ fld %st(0) // t0 | t0 | t1 | xstepdenominv | p00_minus_p20 |
+ // p11_minus_p21
+ fmul %st(5),%st(0) // t0*p11_minus_p21 | t0 | t1 | xstepdenominv |
+ // p00_minus_p20 | p11_minus_p21
+ fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | xstepdenominv |
+ // p00_minus_p20 | p11_minus_p21
+ fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
+ // t0*p11_minus_p21 | xstepdenominv |
+ // p00_minus_p20 | p11_minus_p21
+ fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
+ // t0*p11_minus_p21 | xstepdenominv |
+ // p00_minus_p20 | p11_minus_p21
+ fmul %st(5),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 |
+ // t1*p01_minus_p21 | t0*p11_minus_p21 |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 |
+ // t1*p00_minus_p20 | t0*p11_minus_p21 |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 |
+ // t1*p01_minus_p21 - t0*p11_minus_p21 |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
+ // t1*p01_minus_p21 - t0*p11_minus_p21 |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fld %st(2) // xstepdenominv |
+ // t1*p00_minus_p20 - t0*p10_minus_p20 |
+ // t1*p01_minus_p21 - t0*p11_minus_p21 |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fmuls float_minus_1 // ystepdenominv |
+ // t1*p00_minus_p20 - t0*p10_minus_p20 |
+ // t1*p01_minus_p21 - t0*p11_minus_p21 |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fxch %st(2) // t1*p01_minus_p21 - t0*p11_minus_p21 |
+ // t1*p00_minus_p20 - t0*p10_minus_p20 |
+ // ystepdenominv | xstepdenominv | p00_minus_p20 |
+ // p11_minus_p21
+ fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)*
+ // xstepdenominv |
+ // t1*p00_minus_p20 - t0*p10_minus_p20 |
+ // | ystepdenominv | xstepdenominv |
+ // p00_minus_p20 | p11_minus_p21
+ fxch %st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
+ // (t1*p01_minus_p21 - t0*p11_minus_p21)*
+ // xstepdenominv | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
+ // ystepdenominv |
+ // (t1*p01_minus_p21 - t0*p11_minus_p21)*
+ // xstepdenominv | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fldcw ceil_cw
+ fistpl C(r_lstepy) // r_lstepx | ystepdenominv | xstepdenominv |
+ // p00_minus_p20 | p11_minus_p21
+ fistpl C(r_lstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 |
+ // p11_minus_p21
+ fldcw single_cw
+
+// t0 = r_p0[2] - r_p2[2];
+// t1 = r_p1[2] - r_p2[2];
+
+ fildl C(r_p2)+8 // r_p2[2] | ystepdenominv | xstepdenominv |
+ // p00_minus_p20 | p11_minus_p21
+ fildl C(r_p0)+8 // r_p0[2] | r_p2[2] | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fildl C(r_p1)+8 // r_p1[2] | r_p0[2] | r_p2[2] | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fxch %st(2) // r_p2[2] | r_p0[2] | r_p1[2] | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fld %st(0) // r_p2[2] | r_p2[2] | r_p0[2] | r_p1[2] |
+ // ystepdenominv | xstepdenominv | p00_minus_p20 |
+ // p11_minus_p21
+ fsubrp %st(0),%st(2) // r_p2[2] | t0 | r_p1[2] | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv |
+ // p00_minus_p20 | p11_minus_p21
+
+// r_sstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
+// xstepdenominv);
+// r_sstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
+// ystepdenominv);
+
+ fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv
+ fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 |
+ // ystepdenominv | xstepdenominv | p00_minus_p20 |
+ // p11_minus_p21
+ fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
+ // ystepdenominv | xstepdenominv | p00_minus_p20 |
+ // p11_minus_p21
+ fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
+ // ystepdenominv | xstepdenominv | p00_minus_p20 |
+ // p11_minus_p21
+ fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
+ // t0*p11_minus_p21 | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
+ // t0*p11_minus_p21 | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 |
+ // t1*p01_minus_p21 | t0*p11_minus_p21 |
+ // ystepdenominv | xstepdenominv | p00_minus_p20 |
+ // p11_minus_p21
+ fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 |
+ // t1*p00_minus_p20 | t0*p11_minus_p21 |
+ // ystepdenominv | xstepdenominv | p00_minus_p20 |
+ // p11_minus_p21
+ fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 |
+ // t1*p01_minus_p21 - t0*p11_minus_p21 |
+ // ystepdenominv | xstepdenominv | p00_minus_p20 |
+ // p11_minus_p21
+ fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
+ // t1*p01_minus_p21 - t0*p11_minus_p21 |
+ // ystepdenominv | xstepdenominv | p00_minus_p20 |
+ // p11_minus_p21
+ fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
+ // ystepdenominv |
+ // t1*p01_minus_p21 - t0*p11_minus_p21 |
+ // ystepdenominv | xstepdenominv | p00_minus_p20 |
+ // p11_minus_p21
+ fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 |
+ // (t1*p00_minus_p20 - t0*p10_minus_p20)*
+ // ystepdenominv | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)*
+ // xstepdenominv |
+ // (t1*p00_minus_p20 - t0*p10_minus_p20)*
+ // ystepdenominv | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
+ // ystepdenominv |
+ // (t1*p01_minus_p21 - t0*p11_minus_p21)*
+ // xstepdenominv | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fistpl C(r_sstepy) // r_sstepx | ystepdenominv | xstepdenominv |
+ // p00_minus_p20 | p11_minus_p21
+ fistpl C(r_sstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 |
+ // p11_minus_p21
+
+// t0 = r_p0[3] - r_p2[3];
+// t1 = r_p1[3] - r_p2[3];
+
+ fildl C(r_p2)+12 // r_p2[3] | ystepdenominv | xstepdenominv |
+ // p00_minus_p20 | p11_minus_p21
+ fildl C(r_p0)+12 // r_p0[3] | r_p2[3] | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fildl C(r_p1)+12 // r_p1[3] | r_p0[3] | r_p2[3] | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fxch %st(2) // r_p2[3] | r_p0[3] | r_p1[3] | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fld %st(0) // r_p2[3] | r_p2[3] | r_p0[3] | r_p1[3] |
+ // ystepdenominv | xstepdenominv | p00_minus_p20 |
+ // p11_minus_p21
+ fsubrp %st(0),%st(2) // r_p2[3] | t0 | r_p1[3] | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv |
+ // p00_minus_p20 | p11_minus_p21
+
+// r_tstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
+// xstepdenominv);
+// r_tstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
+// ystepdenominv);
+
+ fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv |
+ // p00_minus_p20 | p11_minus_p21
+ fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 |
+ // ystepdenominv | xstepdenominv | p00_minus_p20 |
+ // p11_minus_p21
+ fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
+ // ystepdenominv | xstepdenominv | p00_minus_p20 |
+ // p11_minus_p21
+ fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
+ // ystepdenominv | xstepdenominv | p00_minus_p20 |
+ // p11_minus_p21
+ fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
+ // t0*p11_minus_p21 | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
+ // t0*p11_minus_p21 | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 |
+ // t1*p01_minus_p21 | t0*p11_minus_p21 |
+ // ystepdenominv | xstepdenominv | p00_minus_p20 |
+ // p11_minus_p21
+ fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 |
+ // t1*p00_minus_p20 | t0*p11_minus_p21 |
+ // ystepdenominv | xstepdenominv | p00_minus_p20 |
+ // p11_minus_p21
+ fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 |
+ // t1*p01_minus_p21 - t0*p11_minus_p21 |
+ // ystepdenominv | xstepdenominv | p00_minus_p20 |
+ // p11_minus_p21
+ fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
+ // t1*p01_minus_p21 - t0*p11_minus_p21 |
+ // ystepdenominv | xstepdenominv | p00_minus_p20 |
+ // p11_minus_p21
+ fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
+ // ystepdenominv |
+ // t1*p01_minus_p21 - t0*p11_minus_p21 |
+ // ystepdenominv | xstepdenominv | p00_minus_p20 |
+ // p11_minus_p21
+ fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 |
+ // (t1*p00_minus_p20 - t0*p10_minus_p20)*
+ // ystepdenominv | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)*
+ // xstepdenominv |
+ // (t1*p00_minus_p20 - t0*p10_minus_p20)*
+ // ystepdenominv | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
+ // ystepdenominv |
+ // (t1*p01_minus_p21 - t0*p11_minus_p21)*
+ // xstepdenominv | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fistpl C(r_tstepy) // r_tstepx | ystepdenominv | xstepdenominv |
+ // p00_minus_p20 | p11_minus_p21
+ fistpl C(r_tstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 |
+ // p11_minus_p21
+
+// t0 = r_p0[5] - r_p2[5];
+// t1 = r_p1[5] - r_p2[5];
+
+ fildl C(r_p2)+20 // r_p2[5] | ystepdenominv | xstepdenominv |
+ // p00_minus_p20 | p11_minus_p21
+ fildl C(r_p0)+20 // r_p0[5] | r_p2[5] | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fildl C(r_p1)+20 // r_p1[5] | r_p0[5] | r_p2[5] | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fxch %st(2) // r_p2[5] | r_p0[5] | r_p1[5] | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fld %st(0) // r_p2[5] | r_p2[5] | r_p0[5] | r_p1[5] |
+ // ystepdenominv | xstepdenominv | p00_minus_p20 |
+ // p11_minus_p21
+ fsubrp %st(0),%st(2) // r_p2[5] | t0 | r_p1[5] | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 | p11_minus_p21
+ fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv |
+ // p00_minus_p20 | p11_minus_p21
+
+// r_zistepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
+// xstepdenominv);
+// r_zistepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
+// ystepdenominv);
+
+ fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv |
+ // p00_minus_p20 | p11_minus_p21
+ fmulp %st(0),%st(6) // t0 | t1 | ystepdenominv | xstepdenominv |
+ // p00_minus_p20 | t0*p11_minus_p21
+ fxch %st(1) // t1 | t0 | ystepdenominv | xstepdenominv |
+ // p00_minus_p20 | t0*p11_minus_p21
+ fld %st(0) // t1 | t1 | t0 | ystepdenominv | xstepdenominv |
+ // p00_minus_p20 | t0*p11_minus_p21
+ fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 |
+ // t0*p11_minus_p21
+ fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | ystepdenominv |
+ // xstepdenominv | p00_minus_p20 |
+ // t0*p11_minus_p21
+ fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
+ // ystepdenominv | xstepdenominv | p00_minus_p20 |
+ // t0*p11_minus_p21
+ fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
+ // ystepdenominv | xstepdenominv | p00_minus_p20 |
+ // t0*p11_minus_p21
+ fmulp %st(0),%st(5) // t0*p10_minus_p20 | t1*p01_minus_p21 |
+ // ystepdenominv | xstepdenominv |
+ // t1*p00_minus_p20 | t0*p11_minus_p21
+ fxch %st(5) // t0*p11_minus_p21 | t1*p01_minus_p21 |
+ // ystepdenominv | xstepdenominv |
+ // t1*p00_minus_p20 | t0*p10_minus_p20
+ fsubrp %st(0),%st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 |
+ // ystepdenominv | xstepdenominv |
+ // t1*p00_minus_p20 | t0*p10_minus_p20
+ fxch %st(3) // t1*p00_minus_p20 | ystepdenominv |
+ // xstepdenominv |
+ // t1*p01_minus_p21 - t0*p11_minus_p21 |
+ // t0*p10_minus_p20
+ fsubp %st(0),%st(4) // ystepdenominv | xstepdenominv |
+ // t1*p01_minus_p21 - t0*p11_minus_p21 |
+ // t1*p00_minus_p20 - t0*p10_minus_p20
+ fxch %st(1) // xstepdenominv | ystepdenominv |
+ // t1*p01_minus_p21 - t0*p11_minus_p21 |
+ // t1*p00_minus_p20 - t0*p10_minus_p20
+ fmulp %st(0),%st(2) // ystepdenominv |
+ // (t1*p01_minus_p21 - t0*p11_minus_p21) *
+ // xstepdenominv |
+ // t1*p00_minus_p20 - t0*p10_minus_p20
+ fmulp %st(0),%st(2) // (t1*p01_minus_p21 - t0*p11_minus_p21) *
+ // xstepdenominv |
+ // (t1*p00_minus_p20 - t0*p10_minus_p20) *
+ // ystepdenominv
+ fistpl C(r_zistepx) // (t1*p00_minus_p20 - t0*p10_minus_p20) *
+ // ystepdenominv
+ fistpl C(r_zistepy)
+
+// a_sstepxfrac = r_sstepx << 16;
+// a_tstepxfrac = r_tstepx << 16;
+//
+// a_ststepxwhole = r_affinetridesc.skinwidth * (r_tstepx >> 16) +
+// (r_sstepx >> 16);
+
+ movl C(r_sstepx),%eax
+ movl C(r_tstepx),%edx
+ shll $16,%eax
+ shll $16,%edx
+ movl %eax,C(a_sstepxfrac)
+ movl %edx,C(a_tstepxfrac)
+
+ movl C(r_sstepx),%ecx
+ movl C(r_tstepx),%eax
+ sarl $16,%ecx
+ sarl $16,%eax
+ imull skinwidth(%esp)
+ addl %ecx,%eax
+ movl %eax,C(a_ststepxwhole)
+
+ ret
+
+
+//----------------------------------------------------------------------
+// recursive subdivision affine triangle drawing code
+//
+// not C-callable because of stdcall return
+//----------------------------------------------------------------------
+
+#define lp1 4+16
+#define lp2 8+16
+#define lp3 12+16
+
+.globl C(D_PolysetRecursiveTriangle)
+C(D_PolysetRecursiveTriangle):
+ pushl %ebp // preserve caller stack frame pointer
+ pushl %esi // preserve register variables
+ pushl %edi
+ pushl %ebx
+
+// int *temp;
+// int d;
+// int new[6];
+// int i;
+// int z;
+// short *zbuf;
+ movl lp2(%esp),%esi
+ movl lp1(%esp),%ebx
+ movl lp3(%esp),%edi
+
+// d = lp2[0] - lp1[0];
+// if (d < -1 || d > 1)
+// goto split;
+ movl 0(%esi),%eax
+
+ movl 0(%ebx),%edx
+ movl 4(%esi),%ebp
+
+ subl %edx,%eax
+ movl 4(%ebx),%ecx
+
+ subl %ecx,%ebp
+ incl %eax
+
+ cmpl $2,%eax
+ ja LSplit
+
+// d = lp2[1] - lp1[1];
+// if (d < -1 || d > 1)
+// goto split;
+ movl 0(%edi),%eax
+ incl %ebp
+
+ cmpl $2,%ebp
+ ja LSplit
+
+// d = lp3[0] - lp2[0];
+// if (d < -1 || d > 1)
+// goto split2;
+ movl 0(%esi),%edx
+ movl 4(%edi),%ebp
+
+ subl %edx,%eax
+ movl 4(%esi),%ecx
+
+ subl %ecx,%ebp
+ incl %eax
+
+ cmpl $2,%eax
+ ja LSplit2
+
+// d = lp3[1] - lp2[1];
+// if (d < -1 || d > 1)
+// goto split2;
+ movl 0(%ebx),%eax
+ incl %ebp
+
+ cmpl $2,%ebp
+ ja LSplit2
+
+// d = lp1[0] - lp3[0];
+// if (d < -1 || d > 1)
+// goto split3;
+ movl 0(%edi),%edx
+ movl 4(%ebx),%ebp
+
+ subl %edx,%eax
+ movl 4(%edi),%ecx
+
+ subl %ecx,%ebp
+ incl %eax
+
+ incl %ebp
+ movl %ebx,%edx
+
+ cmpl $2,%eax
+ ja LSplit3
+
+// d = lp1[1] - lp3[1];
+// if (d < -1 || d > 1)
+// {
+//split3:
+// temp = lp1;
+// lp3 = lp2;
+// lp1 = lp3;
+// lp2 = temp;
+// goto split;
+// }
+//
+// return; // entire tri is filled
+//
+ cmpl $2,%ebp
+ jna LDone
+
+LSplit3:
+ movl %edi,%ebx
+ movl %esi,%edi
+ movl %edx,%esi
+ jmp LSplit
+
+//split2:
+LSplit2:
+
+// temp = lp1;
+// lp1 = lp2;
+// lp2 = lp3;
+// lp3 = temp;
+ movl %ebx,%eax
+ movl %esi,%ebx
+ movl %edi,%esi
+ movl %eax,%edi
+
+//split:
+LSplit:
+
+ subl $24,%esp // allocate space for a new vertex
+
+//// split this edge
+// new[0] = (lp1[0] + lp2[0]) >> 1;
+// new[1] = (lp1[1] + lp2[1]) >> 1;
+// new[2] = (lp1[2] + lp2[2]) >> 1;
+// new[3] = (lp1[3] + lp2[3]) >> 1;
+// new[5] = (lp1[5] + lp2[5]) >> 1;
+ movl 8(%ebx),%eax
+
+ movl 8(%esi),%edx
+ movl 12(%ebx),%ecx
+
+ addl %edx,%eax
+ movl 12(%esi),%edx
+
+ sarl $1,%eax
+ addl %edx,%ecx
+
+ movl %eax,8(%esp)
+ movl 20(%ebx),%eax
+
+ sarl $1,%ecx
+ movl 20(%esi),%edx
+
+ movl %ecx,12(%esp)
+ addl %edx,%eax
+
+ movl 0(%ebx),%ecx
+ movl 0(%esi),%edx
+
+ sarl $1,%eax
+ addl %ecx,%edx
+
+ movl %eax,20(%esp)
+ movl 4(%ebx),%eax
+
+ sarl $1,%edx
+ movl 4(%esi),%ebp
+
+ movl %edx,0(%esp)
+ addl %eax,%ebp
+
+ sarl $1,%ebp
+ movl %ebp,4(%esp)
+
+//// draw the point if splitting a leading edge
+// if (lp2[1] > lp1[1])
+// goto nodraw;
+ cmpl %eax,4(%esi)
+ jg LNoDraw
+
+// if ((lp2[1] == lp1[1]) && (lp2[0] < lp1[0]))
+// goto nodraw;
+ movl 0(%esi),%edx
+ jnz LDraw
+
+ cmpl %ecx,%edx
+ jl LNoDraw
+
+LDraw:
+
+// z = new[5] >> 16;
+ movl 20(%esp),%edx
+ movl 4(%esp),%ecx
+
+ sarl $16,%edx
+ movl 0(%esp),%ebp
+
+// zbuf = zspantable[new[1]] + new[0];
+ movl C(zspantable)(,%ecx,4),%eax
+
+// if (z >= *zbuf)
+// {
+ cmpw (%eax,%ebp,2),%dx
+ jnge LNoDraw
+
+// int pix;
+//
+// *zbuf = z;
+ movw %dx,(%eax,%ebp,2)
+
+// pix = d_pcolormap[skintable[new[3]>>16][new[2]>>16]];
+ movl 12(%esp),%eax
+
+ sarl $16,%eax
+ movl 8(%esp),%edx
+
+ sarl $16,%edx
+ subl %ecx,%ecx
+
+ movl C(skintable)(,%eax,4),%eax
+ movl 4(%esp),%ebp
+
+ movb (%eax,%edx,),%cl
+ movl C(d_pcolormap),%edx
+
+ movb (%edx,%ecx,),%dl
+ movl 0(%esp),%ecx
+
+// d_viewbuffer[d_scantable[new[1]] + new[0]] = pix;
+ movl C(d_scantable)(,%ebp,4),%eax
+ addl %eax,%ecx
+ movl C(d_viewbuffer),%eax
+ movb %dl,(%eax,%ecx,1)
+
+// }
+//
+//nodraw:
+LNoDraw:
+
+//// recursively continue
+// D_PolysetRecursiveTriangle (lp3, lp1, new);
+ pushl %esp
+ pushl %ebx
+ pushl %edi
+ call C(D_PolysetRecursiveTriangle)
+
+// D_PolysetRecursiveTriangle (lp3, new, lp2);
+ movl %esp,%ebx
+ pushl %esi
+ pushl %ebx
+ pushl %edi
+ call C(D_PolysetRecursiveTriangle)
+ addl $24,%esp
+
+LDone:
+ popl %ebx // restore register variables
+ popl %edi
+ popl %esi
+ popl %ebp // restore caller stack frame pointer
+ ret $12
+
+
+//----------------------------------------------------------------------
+// 8-bpp horizontal span drawing code for affine polygons, with smooth
+// shading and no transparency
+//----------------------------------------------------------------------
+
+#define pspans 4+8
+
+.globl C(D_PolysetAff8Start)
+C(D_PolysetAff8Start):
+
+.globl C(D_PolysetDrawSpans8)
+C(D_PolysetDrawSpans8):
+ pushl %esi // preserve register variables
+ pushl %ebx
+
+ movl pspans(%esp),%esi // point to the first span descriptor
+ movl C(r_zistepx),%ecx
+
+ pushl %ebp // preserve caller's stack frame
+ pushl %edi
+
+ rorl $16,%ecx // put high 16 bits of 1/z step in low word
+ movl spanpackage_t_count(%esi),%edx
+
+ movl %ecx,lzistepx
+
+LSpanLoop:
+
+// lcount = d_aspancount - pspanpackage->count;
+//
+// errorterm += erroradjustup;
+// if (errorterm >= 0)
+// {
+// d_aspancount += d_countextrastep;
+// errorterm -= erroradjustdown;
+// }
+// else
+// {
+// d_aspancount += ubasestep;
+// }
+ movl C(d_aspancount),%eax
+ subl %edx,%eax
+
+ movl C(erroradjustup),%edx
+ movl C(errorterm),%ebx
+ addl %edx,%ebx
+ js LNoTurnover
+
+ movl C(erroradjustdown),%edx
+ movl C(d_countextrastep),%edi
+ subl %edx,%ebx
+ movl C(d_aspancount),%ebp
+ movl %ebx,C(errorterm)
+ addl %edi,%ebp
+ movl %ebp,C(d_aspancount)
+ jmp LRightEdgeStepped
+
+LNoTurnover:
+ movl C(d_aspancount),%edi
+ movl C(ubasestep),%edx
+ movl %ebx,C(errorterm)
+ addl %edx,%edi
+ movl %edi,C(d_aspancount)
+
+LRightEdgeStepped:
+ cmpl $1,%eax
+
+ jl LNextSpan
+ jz LExactlyOneLong
+
+//
+// set up advancetable
+//
+ movl C(a_ststepxwhole),%ecx
+ movl C(r_affinetridesc)+atd_skinwidth,%edx
+
+ movl %ecx,advancetable+4 // advance base in t
+ addl %edx,%ecx
+
+ movl %ecx,advancetable // advance extra in t
+ movl C(a_tstepxfrac),%ecx
+
+ movw C(r_lstepx),%cx
+ movl %eax,%edx // count
+
+ movl %ecx,tstep
+ addl $7,%edx
+
+ shrl $3,%edx // count of full and partial loops
+ movl spanpackage_t_sfrac(%esi),%ebx
+
+ movw %dx,%bx
+ movl spanpackage_t_pz(%esi),%ecx
+
+ negl %eax
+
+ movl spanpackage_t_pdest(%esi),%edi
+ andl $7,%eax // 0->0, 1->7, 2->6, ... , 7->1
+
+ subl %eax,%edi // compensate for hardwired offsets
+ subl %eax,%ecx
+
+ subl %eax,%ecx
+ movl spanpackage_t_tfrac(%esi),%edx
+
+ movw spanpackage_t_light(%esi),%dx
+ movl spanpackage_t_zi(%esi),%ebp
+
+ rorl $16,%ebp // put high 16 bits of 1/z in low word
+ pushl %esi
+
+ movl spanpackage_t_ptex(%esi),%esi
+ jmp aff8entryvec_table(,%eax,4)
+
+// %bx = count of full and partial loops
+// %ebx high word = sfrac
+// %ecx = pz
+// %dx = light
+// %edx high word = tfrac
+// %esi = ptex
+// %edi = pdest
+// %ebp = 1/z
+// tstep low word = C(r_lstepx)
+// tstep high word = C(a_tstepxfrac)
+// C(a_sstepxfrac) low word = 0
+// C(a_sstepxfrac) high word = C(a_sstepxfrac)
+
+LDrawLoop:
+
+// FIXME: do we need to clamp light? We may need at least a buffer bit to
+// keep it from poking into tfrac and causing problems
+
+LDraw8:
+ cmpw (%ecx),%bp
+ jl Lp1
+ xorl %eax,%eax
+ movb %dh,%ah
+ movb (%esi),%al
+ movw %bp,(%ecx)
+ movb 0x12345678(%eax),%al
+LPatch8:
+ movb %al,(%edi)
+Lp1:
+ addl tstep,%edx
+ sbbl %eax,%eax
+ addl lzistepx,%ebp
+ adcl $0,%ebp
+ addl C(a_sstepxfrac),%ebx
+ adcl advancetable+4(,%eax,4),%esi
+
+LDraw7:
+ cmpw 2(%ecx),%bp
+ jl Lp2
+ xorl %eax,%eax
+ movb %dh,%ah
+ movb (%esi),%al
+ movw %bp,2(%ecx)
+ movb 0x12345678(%eax),%al
+LPatch7:
+ movb %al,1(%edi)
+Lp2:
+ addl tstep,%edx
+ sbbl %eax,%eax
+ addl lzistepx,%ebp
+ adcl $0,%ebp
+ addl C(a_sstepxfrac),%ebx
+ adcl advancetable+4(,%eax,4),%esi
+
+LDraw6:
+ cmpw 4(%ecx),%bp
+ jl Lp3
+ xorl %eax,%eax
+ movb %dh,%ah
+ movb (%esi),%al
+ movw %bp,4(%ecx)
+ movb 0x12345678(%eax),%al
+LPatch6:
+ movb %al,2(%edi)
+Lp3:
+ addl tstep,%edx
+ sbbl %eax,%eax
+ addl lzistepx,%ebp
+ adcl $0,%ebp
+ addl C(a_sstepxfrac),%ebx
+ adcl advancetable+4(,%eax,4),%esi
+
+LDraw5:
+ cmpw 6(%ecx),%bp
+ jl Lp4
+ xorl %eax,%eax
+ movb %dh,%ah
+ movb (%esi),%al
+ movw %bp,6(%ecx)
+ movb 0x12345678(%eax),%al
+LPatch5:
+ movb %al,3(%edi)
+Lp4:
+ addl tstep,%edx
+ sbbl %eax,%eax
+ addl lzistepx,%ebp
+ adcl $0,%ebp
+ addl C(a_sstepxfrac),%ebx
+ adcl advancetable+4(,%eax,4),%esi
+
+LDraw4:
+ cmpw 8(%ecx),%bp
+ jl Lp5
+ xorl %eax,%eax
+ movb %dh,%ah
+ movb (%esi),%al
+ movw %bp,8(%ecx)
+ movb 0x12345678(%eax),%al
+LPatch4:
+ movb %al,4(%edi)
+Lp5:
+ addl tstep,%edx
+ sbbl %eax,%eax
+ addl lzistepx,%ebp
+ adcl $0,%ebp
+ addl C(a_sstepxfrac),%ebx
+ adcl advancetable+4(,%eax,4),%esi
+
+LDraw3:
+ cmpw 10(%ecx),%bp
+ jl Lp6
+ xorl %eax,%eax
+ movb %dh,%ah
+ movb (%esi),%al
+ movw %bp,10(%ecx)
+ movb 0x12345678(%eax),%al
+LPatch3:
+ movb %al,5(%edi)
+Lp6:
+ addl tstep,%edx
+ sbbl %eax,%eax
+ addl lzistepx,%ebp
+ adcl $0,%ebp
+ addl C(a_sstepxfrac),%ebx
+ adcl advancetable+4(,%eax,4),%esi
+
+LDraw2:
+ cmpw 12(%ecx),%bp
+ jl Lp7
+ xorl %eax,%eax
+ movb %dh,%ah
+ movb (%esi),%al
+ movw %bp,12(%ecx)
+ movb 0x12345678(%eax),%al
+LPatch2:
+ movb %al,6(%edi)
+Lp7:
+ addl tstep,%edx
+ sbbl %eax,%eax
+ addl lzistepx,%ebp
+ adcl $0,%ebp
+ addl C(a_sstepxfrac),%ebx
+ adcl advancetable+4(,%eax,4),%esi
+
+LDraw1:
+ cmpw 14(%ecx),%bp
+ jl Lp8
+ xorl %eax,%eax
+ movb %dh,%ah
+ movb (%esi),%al
+ movw %bp,14(%ecx)
+ movb 0x12345678(%eax),%al
+LPatch1:
+ movb %al,7(%edi)
+Lp8:
+ addl tstep,%edx
+ sbbl %eax,%eax
+ addl lzistepx,%ebp
+ adcl $0,%ebp
+ addl C(a_sstepxfrac),%ebx
+ adcl advancetable+4(,%eax,4),%esi
+
+ addl $8,%edi
+ addl $16,%ecx
+
+ decw %bx
+ jnz LDrawLoop
+
+ popl %esi // restore spans pointer
+LNextSpan:
+ addl $(spanpackage_t_size),%esi // point to next span
+LNextSpanESISet:
+ movl spanpackage_t_count(%esi),%edx
+ cmpl $-999999,%edx // any more spans?
+ jnz LSpanLoop // yes
+
+ popl %edi
+ popl %ebp // restore the caller's stack frame
+ popl %ebx // restore register variables
+ popl %esi
+ ret
+
+
+// draw a one-long span
+
+LExactlyOneLong:
+
+ movl spanpackage_t_pz(%esi),%ecx
+ movl spanpackage_t_zi(%esi),%ebp
+
+ rorl $16,%ebp // put high 16 bits of 1/z in low word
+ movl spanpackage_t_ptex(%esi),%ebx
+
+ cmpw (%ecx),%bp
+ jl LNextSpan
+ xorl %eax,%eax
+ movl spanpackage_t_pdest(%esi),%edi
+ movb spanpackage_t_light+1(%esi),%ah
+ addl $(spanpackage_t_size),%esi // point to next span
+ movb (%ebx),%al
+ movw %bp,(%ecx)
+ movb 0x12345678(%eax),%al
+LPatch9:
+ movb %al,(%edi)
+
+ jmp LNextSpanESISet
+
+.globl C(D_PolysetAff8End)
+C(D_PolysetAff8End):
+
+
+#define pcolormap 4
+
+.globl C(D_Aff8Patch)
+C(D_Aff8Patch):
+ movl pcolormap(%esp),%eax
+ movl %eax,LPatch1-4
+ movl %eax,LPatch2-4
+ movl %eax,LPatch3-4
+ movl %eax,LPatch4-4
+ movl %eax,LPatch5-4
+ movl %eax,LPatch6-4
+ movl %eax,LPatch7-4
+ movl %eax,LPatch8-4
+ movl %eax,LPatch9-4
+
+ ret
+
+
+//----------------------------------------------------------------------
+// Alias model polygon dispatching code, combined with subdivided affine
+// triangle drawing code
+//----------------------------------------------------------------------
+
+.globl C(D_PolysetDraw)
+C(D_PolysetDraw):
+
+// spanpackage_t spans[DPS_MAXSPANS + 1 +
+// ((CACHE_SIZE - 1) / sizeof(spanpackage_t)) + 1];
+// // one extra because of cache line pretouching
+//
+// a_spans = (spanpackage_t *)
+// (((intptr)&spans[0] + CACHE_SIZE - 1) & ~(CACHE_SIZE - 1));
+ subl $(SPAN_SIZE),%esp
+ movl %esp,%eax
+ addl $(CACHE_SIZE - 1),%eax
+ andl $(~(CACHE_SIZE - 1)),%eax
+ movl %eax,C(a_spans)
+
+// if (r_affinetridesc.drawtype)
+// D_DrawSubdiv ();
+// else
+// D_DrawNonSubdiv ();
+ movl C(r_affinetridesc)+atd_drawtype,%eax
+ testl %eax,%eax
+ jz C(D_DrawNonSubdiv)
+
+ pushl %ebp // preserve caller stack frame pointer
+
+// lnumtriangles = r_affinetridesc.numtriangles;
+ movl C(r_affinetridesc)+atd_numtriangles,%ebp
+
+ pushl %esi // preserve register variables
+ shll $4,%ebp
+
+ pushl %ebx
+// ptri = r_affinetridesc.ptriangles;
+ movl C(r_affinetridesc)+atd_ptriangles,%ebx
+
+ pushl %edi
+
+// mtriangle_t *ptri;
+// finalvert_t *pfv, *index0, *index1, *index2;
+// int i;
+// int lnumtriangles;
+// int s0, s1, s2;
+
+// pfv = r_affinetridesc.pfinalverts;
+ movl C(r_affinetridesc)+atd_pfinalverts,%edi
+
+// for (i=0 ; i<lnumtriangles ; i++)
+// {
+
+Llooptop:
+
+// index0 = pfv + ptri[i].vertindex[0];
+// index1 = pfv + ptri[i].vertindex[1];
+// index2 = pfv + ptri[i].vertindex[2];
+ movl mtri_vertindex-16+0(%ebx,%ebp,),%ecx
+ movl mtri_vertindex-16+4(%ebx,%ebp,),%esi
+
+ shll $(fv_shift),%ecx
+ movl mtri_vertindex-16+8(%ebx,%ebp,),%edx
+
+ shll $(fv_shift),%esi
+ addl %edi,%ecx
+
+ shll $(fv_shift),%edx
+ addl %edi,%esi
+
+ addl %edi,%edx
+
+// if (((index0->v[1]-index1->v[1]) *
+// (index0->v[0]-index2->v[0]) -
+// (index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1])) >= 0)
+// {
+// continue;
+// }
+//
+// d_pcolormap = &((byte *)acolormap)[index0->v[4] & 0xFF00];
+ fildl fv_v+4(%ecx) // i0v1
+ fildl fv_v+4(%esi) // i1v1 | i0v1
+ fildl fv_v+0(%ecx) // i0v0 | i1v1 | i0v1
+ fildl fv_v+0(%edx) // i2v0 | i0v0 | i1v1 | i0v1
+ fxch %st(2) // i1v1 | i0v0 | i2v0 | i0v1
+ fsubr %st(3),%st(0) // i0v1-i1v1 | i0v0 | i2v0 | i0v1
+ fildl fv_v+0(%esi) // i1v0 | i0v1-i1v1 | i0v0 | i2v0 | i0v1
+ fxch %st(2) // i0v0 | i0v1-i1v1 | i1v0 | i2v0 | i0v1
+ fsub %st(0),%st(3) // i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0 | i0v1
+ fildl fv_v+4(%edx) // i2v1 | i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1
+ fxch %st(1) // i0v0 | i2v1 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1
+ fsubp %st(0),%st(3) // i2v1 | i0v1-i1v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1
+ fxch %st(1) // i0v1-i1v1 | i2v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1
+ fmulp %st(0),%st(3) // i2v1 | i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1
+ fsubrp %st(0),%st(3) // i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1-i2v1
+ movl fv_v+16(%ecx),%eax
+ andl $0xFF00,%eax
+ fmulp %st(0),%st(2) // i0v1-i1v1*i0v0-i2v0 | i0v0-i1v0*i0v1-i2v1
+ addl C(acolormap),%eax
+ fsubp %st(0),%st(1) // (i0v1-i1v1)*(i0v0-i2v0)-(i0v0-i1v0)*(i0v1-i2v1)
+ movl %eax,C(d_pcolormap)
+ fstps Ltemp
+ movl Ltemp,%eax
+ subl $0x80000001,%eax
+ jc Lskip
+
+// if (ptri[i].facesfront)
+// {
+// D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v);
+ movl mtri_facesfront-16(%ebx,%ebp,),%eax
+ testl %eax,%eax
+ jz Lfacesback
+
+ pushl %edx
+ pushl %esi
+ pushl %ecx
+ call C(D_PolysetRecursiveTriangle)
+
+ subl $16,%ebp
+ jnz Llooptop
+ jmp Ldone2
+
+// }
+// else
+// {
+Lfacesback:
+
+// s0 = index0->v[2];
+// s1 = index1->v[2];
+// s2 = index2->v[2];
+ movl fv_v+8(%ecx),%eax
+ pushl %eax
+ movl fv_v+8(%esi),%eax
+ pushl %eax
+ movl fv_v+8(%edx),%eax
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+
+// if (index0->flags & ALIAS_ONSEAM)
+// index0->v[2] += r_affinetridesc.seamfixupX16;
+ movl C(r_affinetridesc)+atd_seamfixupX16,%eax
+ testl $(ALIAS_ONSEAM),fv_flags(%ecx)
+ jz Lp11
+ addl %eax,fv_v+8(%ecx)
+Lp11:
+
+// if (index1->flags & ALIAS_ONSEAM)
+// index1->v[2] += r_affinetridesc.seamfixupX16;
+ testl $(ALIAS_ONSEAM),fv_flags(%esi)
+ jz Lp12
+ addl %eax,fv_v+8(%esi)
+Lp12:
+
+// if (index2->flags & ALIAS_ONSEAM)
+// index2->v[2] += r_affinetridesc.seamfixupX16;
+ testl $(ALIAS_ONSEAM),fv_flags(%edx)
+ jz Lp13
+ addl %eax,fv_v+8(%edx)
+Lp13:
+
+// D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v);
+ pushl %edx
+ pushl %esi
+ pushl %ecx
+ call C(D_PolysetRecursiveTriangle)
+
+// index0->v[2] = s0;
+// index1->v[2] = s1;
+// index2->v[2] = s2;
+ popl %edx
+ popl %ecx
+ popl %eax
+ movl %eax,fv_v+8(%edx)
+ popl %eax
+ movl %eax,fv_v+8(%esi)
+ popl %eax
+ movl %eax,fv_v+8(%ecx)
+
+// }
+// }
+Lskip:
+ subl $16,%ebp
+ jnz Llooptop
+
+Ldone2:
+ popl %edi // restore the caller's stack frame
+ popl %ebx
+ popl %esi // restore register variables
+ popl %ebp
+
+ addl $(SPAN_SIZE),%esp
+
+ ret
+
+
+//----------------------------------------------------------------------
+// Alias model triangle left-edge scanning code
+//----------------------------------------------------------------------
+
+#define height 4+16
+
+.globl C(D_PolysetScanLeftEdge)
+C(D_PolysetScanLeftEdge):
+ pushl %ebp // preserve caller stack frame pointer
+ pushl %esi // preserve register variables
+ pushl %edi
+ pushl %ebx
+
+ movl height(%esp),%eax
+ movl C(d_sfrac),%ecx
+ andl $0xFFFF,%eax
+ movl C(d_ptex),%ebx
+ orl %eax,%ecx
+ movl C(d_pedgespanpackage),%esi
+ movl C(d_tfrac),%edx
+ movl C(d_light),%edi
+ movl C(d_zi),%ebp
+
+// %eax: scratch
+// %ebx: d_ptex
+// %ecx: d_sfrac in high word, count in low word
+// %edx: d_tfrac
+// %esi: d_pedgespanpackage, errorterm, scratch alternately
+// %edi: d_light
+// %ebp: d_zi
+
+// do
+// {
+
+LScanLoop:
+
+// d_pedgespanpackage->ptex = ptex;
+// d_pedgespanpackage->pdest = d_pdest;
+// d_pedgespanpackage->pz = d_pz;
+// d_pedgespanpackage->count = d_aspancount;
+// d_pedgespanpackage->light = d_light;
+// d_pedgespanpackage->zi = d_zi;
+// d_pedgespanpackage->sfrac = d_sfrac << 16;
+// d_pedgespanpackage->tfrac = d_tfrac << 16;
+ movl %ebx,spanpackage_t_ptex(%esi)
+ movl C(d_pdest),%eax
+ movl %eax,spanpackage_t_pdest(%esi)
+ movl C(d_pz),%eax
+ movl %eax,spanpackage_t_pz(%esi)
+ movl C(d_aspancount),%eax
+ movl %eax,spanpackage_t_count(%esi)
+ movl %edi,spanpackage_t_light(%esi)
+ movl %ebp,spanpackage_t_zi(%esi)
+ movl %ecx,spanpackage_t_sfrac(%esi)
+ movl %edx,spanpackage_t_tfrac(%esi)
+
+// pretouch the next cache line
+ movb spanpackage_t_size(%esi),%al
+
+// d_pedgespanpackage++;
+ addl $(spanpackage_t_size),%esi
+ movl C(erroradjustup),%eax
+ movl %esi,C(d_pedgespanpackage)
+
+// errorterm += erroradjustup;
+ movl C(errorterm),%esi
+ addl %eax,%esi
+ movl C(d_pdest),%eax
+
+// if (errorterm >= 0)
+// {
+ js LNoLeftEdgeTurnover
+
+// errorterm -= erroradjustdown;
+// d_pdest += d_pdestextrastep;
+ subl C(erroradjustdown),%esi
+ addl C(d_pdestextrastep),%eax
+ movl %esi,C(errorterm)
+ movl %eax,C(d_pdest)
+
+// d_pz += d_pzextrastep;
+// d_aspancount += d_countextrastep;
+// d_ptex += d_ptexextrastep;
+// d_sfrac += d_sfracextrastep;
+// d_ptex += d_sfrac >> 16;
+// d_sfrac &= 0xFFFF;
+// d_tfrac += d_tfracextrastep;
+ movl C(d_pz),%eax
+ movl C(d_aspancount),%esi
+ addl C(d_pzextrastep),%eax
+ addl C(d_sfracextrastep),%ecx
+ adcl C(d_ptexextrastep),%ebx
+ addl C(d_countextrastep),%esi
+ movl %eax,C(d_pz)
+ movl C(d_tfracextrastep),%eax
+ movl %esi,C(d_aspancount)
+ addl %eax,%edx
+
+// if (d_tfrac & 0x10000)
+// {
+ jnc LSkip1
+
+// d_ptex += r_affinetridesc.skinwidth;
+// d_tfrac &= 0xFFFF;
+ addl C(r_affinetridesc)+atd_skinwidth,%ebx
+
+// }
+
+LSkip1:
+
+// d_light += d_lightextrastep;
+// d_zi += d_ziextrastep;
+ addl C(d_lightextrastep),%edi
+ addl C(d_ziextrastep),%ebp
+
+// }
+ movl C(d_pedgespanpackage),%esi
+ decl %ecx
+ testl $0xFFFF,%ecx
+ jnz LScanLoop
+
+ popl %ebx
+ popl %edi
+ popl %esi
+ popl %ebp
+ ret
+
+// else
+// {
+
+LNoLeftEdgeTurnover:
+ movl %esi,C(errorterm)
+
+// d_pdest += d_pdestbasestep;
+ addl C(d_pdestbasestep),%eax
+ movl %eax,C(d_pdest)
+
+// d_pz += d_pzbasestep;
+// d_aspancount += ubasestep;
+// d_ptex += d_ptexbasestep;
+// d_sfrac += d_sfracbasestep;
+// d_ptex += d_sfrac >> 16;
+// d_sfrac &= 0xFFFF;
+ movl C(d_pz),%eax
+ movl C(d_aspancount),%esi
+ addl C(d_pzbasestep),%eax
+ addl C(d_sfracbasestep),%ecx
+ adcl C(d_ptexbasestep),%ebx
+ addl C(ubasestep),%esi
+ movl %eax,C(d_pz)
+ movl %esi,C(d_aspancount)
+
+// d_tfrac += d_tfracbasestep;
+ movl C(d_tfracbasestep),%esi
+ addl %esi,%edx
+
+// if (d_tfrac & 0x10000)
+// {
+ jnc LSkip2
+
+// d_ptex += r_affinetridesc.skinwidth;
+// d_tfrac &= 0xFFFF;
+ addl C(r_affinetridesc)+atd_skinwidth,%ebx
+
+// }
+
+LSkip2:
+
+// d_light += d_lightbasestep;
+// d_zi += d_zibasestep;
+ addl C(d_lightbasestep),%edi
+ addl C(d_zibasestep),%ebp
+
+// }
+// } while (--height);
+ movl C(d_pedgespanpackage),%esi
+ decl %ecx
+ testl $0xFFFF,%ecx
+ jnz LScanLoop
+
+ popl %ebx
+ popl %edi
+ popl %esi
+ popl %ebp
+ ret
+
+
+//----------------------------------------------------------------------
+// Alias model vertex drawing code
+//----------------------------------------------------------------------
+
+#define fv 4+8
+#define numverts 8+8
+
+.globl C(D_PolysetDrawFinalVerts)
+C(D_PolysetDrawFinalVerts):
+ pushl %ebp // preserve caller stack frame pointer
+ pushl %ebx
+
+// int i, z;
+// short *zbuf;
+
+ movl numverts(%esp),%ecx
+ movl fv(%esp),%ebx
+
+ pushl %esi // preserve register variables
+ pushl %edi
+
+LFVLoop:
+
+// for (i=0 ; i<numverts ; i++, fv++)
+// {
+// // valid triangle coordinates for filling can include the bottom and
+// // right clip edges, due to the fill rule; these shouldn't be drawn
+// if ((fv->v[0] < r_refdef.vrectright) &&
+// (fv->v[1] < r_refdef.vrectbottom))
+// {
+ movl fv_v+0(%ebx),%eax
+ movl C(r_refdef)+rd_vrectright,%edx
+ cmpl %edx,%eax
+ jge LNextVert
+ movl fv_v+4(%ebx),%esi
+ movl C(r_refdef)+rd_vrectbottom,%edx
+ cmpl %edx,%esi
+ jge LNextVert
+
+// zbuf = zspantable[fv->v[1]] + fv->v[0];
+ movl C(zspantable)(,%esi,4),%edi
+
+// z = fv->v[5]>>16;
+ movl fv_v+20(%ebx),%edx
+ shrl $16,%edx
+
+// if (z >= *zbuf)
+// {
+// int pix;
+ cmpw (%edi,%eax,2),%dx
+ jl LNextVert
+
+// *zbuf = z;
+ movw %dx,(%edi,%eax,2)
+
+// pix = skintable[fv->v[3]>>16][fv->v[2]>>16];
+ movl fv_v+12(%ebx),%edi
+ shrl $16,%edi
+ movl C(skintable)(,%edi,4),%edi
+ movl fv_v+8(%ebx),%edx
+ shrl $16,%edx
+ movb (%edi,%edx),%dl
+
+// pix = ((byte *)acolormap)[pix + (fv->v[4] & 0xFF00)];
+ movl fv_v+16(%ebx),%edi
+ andl $0xFF00,%edi
+ andl $0x00FF,%edx
+ addl %edx,%edi
+ movl C(acolormap),%edx
+ movb (%edx,%edi,1),%dl
+
+// d_viewbuffer[d_scantable[fv->v[1]] + fv->v[0]] = pix;
+ movl C(d_scantable)(,%esi,4),%edi
+ movl C(d_viewbuffer),%esi
+ addl %eax,%edi
+ movb %dl,(%esi,%edi)
+
+// }
+// }
+// }
+LNextVert:
+ addl $(fv_size),%ebx
+ decl %ecx
+ jnz LFVLoop
+
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+
+
+//----------------------------------------------------------------------
+// Alias model non-subdivided polygon dispatching code
+//
+// not C-callable because of stack buffer cleanup
+//----------------------------------------------------------------------
+
+.globl C(D_DrawNonSubdiv)
+C(D_DrawNonSubdiv):
+ pushl %ebp // preserve caller stack frame pointer
+ movl C(r_affinetridesc)+atd_numtriangles,%ebp
+ pushl %ebx
+ shll $(mtri_shift),%ebp
+ pushl %esi // preserve register variables
+ movl C(r_affinetridesc)+atd_ptriangles,%esi
+ pushl %edi
+
+// mtriangle_t *ptri;
+// finalvert_t *pfv, *index0, *index1, *index2;
+// int i;
+// int lnumtriangles;
+
+// pfv = r_affinetridesc.pfinalverts;
+// ptri = r_affinetridesc.ptriangles;
+// lnumtriangles = r_affinetridesc.numtriangles;
+
+LNDLoop:
+
+// for (i=0 ; i<lnumtriangles ; i++, ptri++)
+// {
+// index0 = pfv + ptri->vertindex[0];
+// index1 = pfv + ptri->vertindex[1];
+// index2 = pfv + ptri->vertindex[2];
+ movl C(r_affinetridesc)+atd_pfinalverts,%edi
+ movl mtri_vertindex+0-mtri_size(%esi,%ebp,1),%ecx
+ shll $(fv_shift),%ecx
+ movl mtri_vertindex+4-mtri_size(%esi,%ebp,1),%edx
+ shll $(fv_shift),%edx
+ movl mtri_vertindex+8-mtri_size(%esi,%ebp,1),%ebx
+ shll $(fv_shift),%ebx
+ addl %edi,%ecx
+ addl %edi,%edx
+ addl %edi,%ebx
+
+// d_xdenom = (index0->v[1]-index1->v[1]) *
+// (index0->v[0]-index2->v[0]) -
+// (index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1]);
+ movl fv_v+4(%ecx),%eax
+ movl fv_v+0(%ecx),%esi
+ subl fv_v+4(%edx),%eax
+ subl fv_v+0(%ebx),%esi
+ imull %esi,%eax
+ movl fv_v+0(%ecx),%esi
+ movl fv_v+4(%ecx),%edi
+ subl fv_v+0(%edx),%esi
+ subl fv_v+4(%ebx),%edi
+ imull %esi,%edi
+ subl %edi,%eax
+
+// if (d_xdenom >= 0)
+// {
+// continue;
+ jns LNextTri
+
+// }
+
+ movl %eax,C(d_xdenom)
+ fildl C(d_xdenom)
+
+// r_p0[0] = index0->v[0]; // u
+// r_p0[1] = index0->v[1]; // v
+// r_p0[2] = index0->v[2]; // s
+// r_p0[3] = index0->v[3]; // t
+// r_p0[4] = index0->v[4]; // light
+// r_p0[5] = index0->v[5]; // iz
+ movl fv_v+0(%ecx),%eax
+ movl fv_v+4(%ecx),%esi
+ movl %eax,C(r_p0)+0
+ movl %esi,C(r_p0)+4
+ movl fv_v+8(%ecx),%eax
+ movl fv_v+12(%ecx),%esi
+ movl %eax,C(r_p0)+8
+ movl %esi,C(r_p0)+12
+ movl fv_v+16(%ecx),%eax
+ movl fv_v+20(%ecx),%esi
+ movl %eax,C(r_p0)+16
+ movl %esi,C(r_p0)+20
+
+ fdivrs float_1
+
+// r_p1[0] = index1->v[0];
+// r_p1[1] = index1->v[1];
+// r_p1[2] = index1->v[2];
+// r_p1[3] = index1->v[3];
+// r_p1[4] = index1->v[4];
+// r_p1[5] = index1->v[5];
+ movl fv_v+0(%edx),%eax
+ movl fv_v+4(%edx),%esi
+ movl %eax,C(r_p1)+0
+ movl %esi,C(r_p1)+4
+ movl fv_v+8(%edx),%eax
+ movl fv_v+12(%edx),%esi
+ movl %eax,C(r_p1)+8
+ movl %esi,C(r_p1)+12
+ movl fv_v+16(%edx),%eax
+ movl fv_v+20(%edx),%esi
+ movl %eax,C(r_p1)+16
+ movl %esi,C(r_p1)+20
+
+// r_p2[0] = index2->v[0];
+// r_p2[1] = index2->v[1];
+// r_p2[2] = index2->v[2];
+// r_p2[3] = index2->v[3];
+// r_p2[4] = index2->v[4];
+// r_p2[5] = index2->v[5];
+ movl fv_v+0(%ebx),%eax
+ movl fv_v+4(%ebx),%esi
+ movl %eax,C(r_p2)+0
+ movl %esi,C(r_p2)+4
+ movl fv_v+8(%ebx),%eax
+ movl fv_v+12(%ebx),%esi
+ movl %eax,C(r_p2)+8
+ movl %esi,C(r_p2)+12
+ movl fv_v+16(%ebx),%eax
+ movl fv_v+20(%ebx),%esi
+ movl %eax,C(r_p2)+16
+ movl C(r_affinetridesc)+atd_ptriangles,%edi
+ movl %esi,C(r_p2)+20
+ movl mtri_facesfront-mtri_size(%edi,%ebp,1),%eax
+
+// if (!ptri->facesfront)
+// {
+ testl %eax,%eax
+ jnz LFacesFront
+
+// if (index0->flags & ALIAS_ONSEAM)
+// r_p0[2] += r_affinetridesc.seamfixupX16;
+ movl fv_flags(%ecx),%eax
+ movl fv_flags(%edx),%esi
+ movl fv_flags(%ebx),%edi
+ testl $(ALIAS_ONSEAM),%eax
+ movl C(r_affinetridesc)+atd_seamfixupX16,%eax
+ jz LOnseamDone0
+ addl %eax,C(r_p0)+8
+LOnseamDone0:
+
+// if (index1->flags & ALIAS_ONSEAM)
+// r_p1[2] += r_affinetridesc.seamfixupX16;
+ testl $(ALIAS_ONSEAM),%esi
+ jz LOnseamDone1
+ addl %eax,C(r_p1)+8
+LOnseamDone1:
+
+// if (index2->flags & ALIAS_ONSEAM)
+// r_p2[2] += r_affinetridesc.seamfixupX16;
+ testl $(ALIAS_ONSEAM),%edi
+ jz LOnseamDone2
+ addl %eax,C(r_p2)+8
+LOnseamDone2:
+
+// }
+
+LFacesFront:
+
+ fstps C(d_xdenom)
+
+// D_PolysetSetEdgeTable ();
+// D_RasterizeAliasPolySmooth ();
+ call C(D_PolysetSetEdgeTable)
+ call C(D_RasterizeAliasPolySmooth)
+
+LNextTri:
+ movl C(r_affinetridesc)+atd_ptriangles,%esi
+ subl $16,%ebp
+ jnz LNDLoop
+// }
+
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+
+ addl $(SPAN_SIZE),%esp
+
+ ret
+
+
+#endif // id386
+
--- /dev/null
+++ b/u/d_scana.s
@@ -1,0 +1,70 @@
+//
+// d_scana.s
+// x86 assembly-language turbulent texture mapping code
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "asm_draw.h"
+#include "d_ifacea.h"
+
+#ifdef id386
+
+ .data
+
+ .text
+
+//----------------------------------------------------------------------
+// turbulent texture mapping code
+//----------------------------------------------------------------------
+
+ .align 4
+.globl C(D_DrawTurbulent8Span)
+C(D_DrawTurbulent8Span):
+ pushl %ebp // preserve caller's stack frame pointer
+ pushl %esi // preserve register variables
+ pushl %edi
+ pushl %ebx
+
+ movl C(r_turb_s),%esi
+ movl C(r_turb_t),%ecx
+ movl C(r_turb_pdest),%edi
+ movl C(r_turb_spancount),%ebx
+
+Llp:
+ movl %ecx,%eax
+ movl %esi,%edx
+ sarl $16,%eax
+ movl C(r_turb_turb),%ebp
+ sarl $16,%edx
+ andl $(CYCLE-1),%eax
+ andl $(CYCLE-1),%edx
+ movl (%ebp,%eax,4),%eax
+ movl (%ebp,%edx,4),%edx
+ addl %esi,%eax
+ sarl $16,%eax
+ addl %ecx,%edx
+ sarl $16,%edx
+ andl $(TURB_TEX_SIZE-1),%eax
+ andl $(TURB_TEX_SIZE-1),%edx
+ shll $6,%edx
+ movl C(r_turb_pbase),%ebp
+ addl %eax,%edx
+ incl %edi
+ addl C(r_turb_sstep),%esi
+ addl C(r_turb_tstep),%ecx
+ movb (%ebp,%edx,1),%dl
+ decl %ebx
+ movb %dl,-1(%edi)
+ jnz Llp
+
+ movl %edi,C(r_turb_pdest)
+
+ popl %ebx // restore register variables
+ popl %edi
+ popl %esi
+ popl %ebp // restore caller's stack frame pointer
+ ret
+
+#endif // id386
+
--- /dev/null
+++ b/u/d_spr8.s
@@ -1,0 +1,881 @@
+//
+// d_spr8.s
+// x86 assembly-language horizontal 8-bpp transparent span-drawing code.
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "asm_draw.h"
+
+#ifdef id386
+
+//----------------------------------------------------------------------
+// 8-bpp horizontal span drawing code for polygons, with transparency.
+//----------------------------------------------------------------------
+
+ .text
+
+// out-of-line, rarely-needed clamping code
+
+LClampHigh0:
+ movl C(bbextents),%esi
+ jmp LClampReentry0
+LClampHighOrLow0:
+ jg LClampHigh0
+ xorl %esi,%esi
+ jmp LClampReentry0
+
+LClampHigh1:
+ movl C(bbextentt),%edx
+ jmp LClampReentry1
+LClampHighOrLow1:
+ jg LClampHigh1
+ xorl %edx,%edx
+ jmp LClampReentry1
+
+LClampLow2:
+ movl $2048,%ebp
+ jmp LClampReentry2
+LClampHigh2:
+ movl C(bbextents),%ebp
+ jmp LClampReentry2
+
+LClampLow3:
+ movl $2048,%ecx
+ jmp LClampReentry3
+LClampHigh3:
+ movl C(bbextentt),%ecx
+ jmp LClampReentry3
+
+LClampLow4:
+ movl $2048,%eax
+ jmp LClampReentry4
+LClampHigh4:
+ movl C(bbextents),%eax
+ jmp LClampReentry4
+
+LClampLow5:
+ movl $2048,%ebx
+ jmp LClampReentry5
+LClampHigh5:
+ movl C(bbextentt),%ebx
+ jmp LClampReentry5
+
+
+#define pspans 4+16
+
+ .align 4
+.globl C(D_SpriteDrawSpans)
+C(D_SpriteDrawSpans):
+ pushl %ebp // preserve caller's stack frame
+ pushl %edi
+ pushl %esi // preserve register variables
+ pushl %ebx
+
+//
+// set up scaled-by-8 steps, for 8-long segments; also set up cacheblock
+// and span list pointers, and 1/z step in 0.32 fixed-point
+//
+// FIXME: any overlap from rearranging?
+ flds C(d_sdivzstepu)
+ fmuls fp_8
+ movl C(cacheblock),%edx
+ flds C(d_tdivzstepu)
+ fmuls fp_8
+ movl pspans(%esp),%ebx // point to the first span descriptor
+ flds C(d_zistepu)
+ fmuls fp_8
+ movl %edx,pbase // pbase = cacheblock
+ flds C(d_zistepu)
+ fmuls fp_64kx64k
+ fxch %st(3)
+ fstps sdivz8stepu
+ fstps zi8stepu
+ fstps tdivz8stepu
+ fistpl izistep
+ movl izistep,%eax
+ rorl $16,%eax // put upper 16 bits in low word
+ movl sspan_t_count(%ebx),%ecx
+ movl %eax,izistep
+
+ cmpl $0,%ecx
+ jle LNextSpan
+
+LSpanLoop:
+
+//
+// set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
+// initial s and t values
+//
+// FIXME: pipeline FILD?
+ fildl sspan_t_v(%ebx)
+ fildl sspan_t_u(%ebx)
+
+ fld %st(1) // dv | du | dv
+ fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv
+ fld %st(1) // du | dv*d_sdivzstepv | du | dv
+ fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
+ fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
+ fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu |
+ // dv*d_sdivzstepv | du | dv
+ fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu |
+ // dv*d_sdivzstepv | du | dv
+ faddp %st(0),%st(2) // du*d_tdivzstepu |
+ // du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
+ fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
+ // du*d_tdivzstepu | du | dv
+ fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv |
+ // du*d_tdivzstepu | du | dv
+ fmuls C(d_tdivzstepv) // dv*d_tdivzstepv |
+ // du*d_sdivzstepu + dv*d_sdivzstepv |
+ // du*d_tdivzstepu | du | dv
+ fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
+ // dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
+ fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv +
+ // du*d_sdivzstepu; stays in %st(2) at end
+ fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
+ // s/z
+ fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv |
+ // du*d_tdivzstepu | du | s/z
+ fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv |
+ // du*d_tdivzstepu | du | s/z
+ faddp %st(0),%st(2) // dv*d_zistepv |
+ // dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
+ fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu |
+ // dv*d_zistepv | s/z
+ fmuls C(d_zistepu) // du*d_zistepu |
+ // dv*d_tdivzstepv + du*d_tdivzstepu |
+ // dv*d_zistepv | s/z
+ fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu |
+ // du*d_zistepu | dv*d_zistepv | s/z
+ fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv +
+ // du*d_tdivzstepu; stays in %st(1) at end
+ fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z
+ faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z
+
+ flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
+ fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
+ fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv +
+ // du*d_zistepu; stays in %st(0) at end
+ // 1/z | fp_64k | t/z | s/z
+
+ fld %st(0) // FIXME: get rid of stall on FMUL?
+ fmuls fp_64kx64k
+ fxch %st(1)
+
+//
+// calculate and clamp s & t
+//
+ fdivr %st(0),%st(2) // 1/z | z*64k | t/z | s/z
+ fxch %st(1)
+
+ fistpl izi // 0.32 fixed-point 1/z
+ movl izi,%ebp
+
+//
+// set pz to point to the first z-buffer pixel in the span
+//
+ rorl $16,%ebp // put upper 16 bits in low word
+ movl sspan_t_v(%ebx),%eax
+ movl %ebp,izi
+ movl sspan_t_u(%ebx),%ebp
+ imull C(d_zrowbytes)
+ shll $1,%ebp // a word per pixel
+ addl C(d_pzbuffer),%eax
+ addl %ebp,%eax
+ movl %eax,pz
+
+//
+// point %edi to the first pixel in the span
+//
+ movl C(d_viewbuffer),%ebp
+ movl sspan_t_v(%ebx),%eax
+ pushl %ebx // preserve spans pointer
+ movl C(tadjust),%edx
+ movl C(sadjust),%esi
+ movl C(d_scantable)(,%eax,4),%edi // v * screenwidth
+ addl %ebp,%edi
+ movl sspan_t_u(%ebx),%ebp
+ addl %ebp,%edi // pdest = &pdestspan[scans->u];
+
+//
+// now start the FDIV for the end of the span
+//
+ cmpl $8,%ecx
+ ja LSetupNotLast1
+
+ decl %ecx
+ jz LCleanup1 // if only one pixel, no need to start an FDIV
+ movl %ecx,spancountminus1
+
+// finish up the s and t calcs
+ fxch %st(1) // z*64k | 1/z | t/z | s/z
+
+ fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
+ fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
+ fxch %st(1) // z*64k | s | 1/z | t/z | s/z
+ fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
+ fxch %st(1) // s | t | 1/z | t/z | s/z
+ fistpl s // 1/z | t | t/z | s/z
+ fistpl t // 1/z | t/z | s/z
+
+ fildl spancountminus1
+
+ flds C(d_tdivzstepu) // _d_tdivzstepu | spancountminus1
+ flds C(d_zistepu) // _d_zistepu | _d_tdivzstepu | spancountminus1
+ fmul %st(2),%st(0) // _d_zistepu*scm1 | _d_tdivzstepu | scm1
+ fxch %st(1) // _d_tdivzstepu | _d_zistepu*scm1 | scm1
+ fmul %st(2),%st(0) // _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
+ fxch %st(2) // scm1 | _d_zistepu*scm1 | _d_tdivzstepu*scm1
+ fmuls C(d_sdivzstepu) // _d_sdivzstepu*scm1 | _d_zistepu*scm1 |
+ // _d_tdivzstepu*scm1
+ fxch %st(1) // _d_zistepu*scm1 | _d_sdivzstepu*scm1 |
+ // _d_tdivzstepu*scm1
+ faddp %st(0),%st(3) // _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
+ fxch %st(1) // _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
+ faddp %st(0),%st(3) // _d_sdivzstepu*scm1
+ faddp %st(0),%st(3)
+
+ flds fp_64k
+ fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
+ // overlap
+ jmp LFDIVInFlight1
+
+LCleanup1:
+// finish up the s and t calcs
+ fxch %st(1) // z*64k | 1/z | t/z | s/z
+
+ fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
+ fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
+ fxch %st(1) // z*64k | s | 1/z | t/z | s/z
+ fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
+ fxch %st(1) // s | t | 1/z | t/z | s/z
+ fistpl s // 1/z | t | t/z | s/z
+ fistpl t // 1/z | t/z | s/z
+ jmp LFDIVInFlight1
+
+ .align 4
+LSetupNotLast1:
+// finish up the s and t calcs
+ fxch %st(1) // z*64k | 1/z | t/z | s/z
+
+ fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
+ fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
+ fxch %st(1) // z*64k | s | 1/z | t/z | s/z
+ fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
+ fxch %st(1) // s | t | 1/z | t/z | s/z
+ fistpl s // 1/z | t | t/z | s/z
+ fistpl t // 1/z | t/z | s/z
+
+ fadds zi8stepu
+ fxch %st(2)
+ fadds sdivz8stepu
+ fxch %st(2)
+ flds tdivz8stepu
+ faddp %st(0),%st(2)
+ flds fp_64k
+ fdiv %st(1),%st(0) // z = 1/1/z
+ // this is what we've gone to all this trouble to
+ // overlap
+LFDIVInFlight1:
+
+ addl s,%esi
+ addl t,%edx
+ movl C(bbextents),%ebx
+ movl C(bbextentt),%ebp
+ cmpl %ebx,%esi
+ ja LClampHighOrLow0
+LClampReentry0:
+ movl %esi,s
+ movl pbase,%ebx
+ shll $16,%esi
+ cmpl %ebp,%edx
+ movl %esi,sfracf
+ ja LClampHighOrLow1
+LClampReentry1:
+ movl %edx,t
+ movl s,%esi // sfrac = scans->sfrac;
+ shll $16,%edx
+ movl t,%eax // tfrac = scans->tfrac;
+ sarl $16,%esi
+ movl %edx,tfracf
+
+//
+// calculate the texture starting address
+//
+ sarl $16,%eax
+ addl %ebx,%esi
+ imull C(cachewidth),%eax // (tfrac >> 16) * cachewidth
+ addl %eax,%esi // psource = pbase + (sfrac >> 16) +
+ // ((tfrac >> 16) * cachewidth);
+
+//
+// determine whether last span or not
+//
+ cmpl $8,%ecx
+ jna LLastSegment
+
+//
+// not the last segment; do full 8-wide segment
+//
+LNotLastSegment:
+
+//
+// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
+// get there
+//
+
+// pick up after the FDIV that was left in flight previously
+
+ fld %st(0) // duplicate it
+ fmul %st(4),%st(0) // s = s/z * z
+ fxch %st(1)
+ fmul %st(3),%st(0) // t = t/z * z
+ fxch %st(1)
+ fistpl snext
+ fistpl tnext
+ movl snext,%eax
+ movl tnext,%edx
+
+ subl $8,%ecx // count off this segments' pixels
+ movl C(sadjust),%ebp
+ pushl %ecx // remember count of remaining pixels
+ movl C(tadjust),%ecx
+
+ addl %eax,%ebp
+ addl %edx,%ecx
+
+ movl C(bbextents),%eax
+ movl C(bbextentt),%edx
+
+ cmpl $2048,%ebp
+ jl LClampLow2
+ cmpl %eax,%ebp
+ ja LClampHigh2
+LClampReentry2:
+
+ cmpl $2048,%ecx
+ jl LClampLow3
+ cmpl %edx,%ecx
+ ja LClampHigh3
+LClampReentry3:
+
+ movl %ebp,snext
+ movl %ecx,tnext
+
+ subl s,%ebp
+ subl t,%ecx
+
+//
+// set up advancetable
+//
+ movl %ecx,%eax
+ movl %ebp,%edx
+ sarl $19,%edx // sstep >>= 16;
+ movl C(cachewidth),%ebx
+ sarl $19,%eax // tstep >>= 16;
+ jz LIsZero
+ imull %ebx,%eax // (tstep >> 16) * cachewidth;
+LIsZero:
+ addl %edx,%eax // add in sstep
+ // (tstep >> 16) * cachewidth + (sstep >> 16);
+ movl tfracf,%edx
+ movl %eax,advancetable+4 // advance base in t
+ addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth +
+ // (sstep >> 16);
+ shll $13,%ebp // left-justify sstep fractional part
+ movl %ebp,sstep
+ movl sfracf,%ebx
+ shll $13,%ecx // left-justify tstep fractional part
+ movl %eax,advancetable // advance extra in t
+ movl %ecx,tstep
+
+ movl pz,%ecx
+ movl izi,%ebp
+
+ cmpw (%ecx),%bp
+ jl Lp1
+ movb (%esi),%al // get first source texel
+ cmpb $(TRANSPARENT_COLOR),%al
+ jz Lp1
+ movw %bp,(%ecx)
+ movb %al,(%edi) // store first dest pixel
+Lp1:
+ addl izistep,%ebp
+ adcl $0,%ebp
+ addl tstep,%edx // advance tfrac fractional part by tstep frac
+
+ sbbl %eax,%eax // turn tstep carry into -1 (0 if none)
+ addl sstep,%ebx // advance sfrac fractional part by sstep frac
+ adcl advancetable+4(,%eax,4),%esi // point to next source texel
+
+ cmpw 2(%ecx),%bp
+ jl Lp2
+ movb (%esi),%al
+ cmpb $(TRANSPARENT_COLOR),%al
+ jz Lp2
+ movw %bp,2(%ecx)
+ movb %al,1(%edi)
+Lp2:
+ addl izistep,%ebp
+ adcl $0,%ebp
+ addl tstep,%edx
+ sbbl %eax,%eax
+ addl sstep,%ebx
+ adcl advancetable+4(,%eax,4),%esi
+
+ cmpw 4(%ecx),%bp
+ jl Lp3
+ movb (%esi),%al
+ cmpb $(TRANSPARENT_COLOR),%al
+ jz Lp3
+ movw %bp,4(%ecx)
+ movb %al,2(%edi)
+Lp3:
+ addl izistep,%ebp
+ adcl $0,%ebp
+ addl tstep,%edx
+ sbbl %eax,%eax
+ addl sstep,%ebx
+ adcl advancetable+4(,%eax,4),%esi
+
+ cmpw 6(%ecx),%bp
+ jl Lp4
+ movb (%esi),%al
+ cmpb $(TRANSPARENT_COLOR),%al
+ jz Lp4
+ movw %bp,6(%ecx)
+ movb %al,3(%edi)
+Lp4:
+ addl izistep,%ebp
+ adcl $0,%ebp
+ addl tstep,%edx
+ sbbl %eax,%eax
+ addl sstep,%ebx
+ adcl advancetable+4(,%eax,4),%esi
+
+ cmpw 8(%ecx),%bp
+ jl Lp5
+ movb (%esi),%al
+ cmpb $(TRANSPARENT_COLOR),%al
+ jz Lp5
+ movw %bp,8(%ecx)
+ movb %al,4(%edi)
+Lp5:
+ addl izistep,%ebp
+ adcl $0,%ebp
+ addl tstep,%edx
+ sbbl %eax,%eax
+ addl sstep,%ebx
+ adcl advancetable+4(,%eax,4),%esi
+
+//
+// start FDIV for end of next segment in flight, so it can overlap
+//
+ popl %eax
+ cmpl $8,%eax // more than one segment after this?
+ ja LSetupNotLast2 // yes
+
+ decl %eax
+ jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV
+ movl %eax,spancountminus1
+ fildl spancountminus1
+
+ flds C(d_zistepu) // _d_zistepu | spancountminus1
+ fmul %st(1),%st(0) // _d_zistepu*scm1 | scm1
+ flds C(d_tdivzstepu) // _d_tdivzstepu | _d_zistepu*scm1 | scm1
+ fmul %st(2),%st(0) // _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
+ fxch %st(1) // _d_zistepu*scm1 | _d_tdivzstepu*scm1 | scm1
+ faddp %st(0),%st(3) // _d_tdivzstepu*scm1 | scm1
+ fxch %st(1) // scm1 | _d_tdivzstepu*scm1
+ fmuls C(d_sdivzstepu) // _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
+ fxch %st(1) // _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
+ faddp %st(0),%st(3) // _d_sdivzstepu*scm1
+ flds fp_64k // 64k | _d_sdivzstepu*scm1
+ fxch %st(1) // _d_sdivzstepu*scm1 | 64k
+ faddp %st(0),%st(4) // 64k
+
+ fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
+ // overlap
+ jmp LFDIVInFlight2
+
+ .align 4
+LSetupNotLast2:
+ fadds zi8stepu
+ fxch %st(2)
+ fadds sdivz8stepu
+ fxch %st(2)
+ flds tdivz8stepu
+ faddp %st(0),%st(2)
+ flds fp_64k
+ fdiv %st(1),%st(0) // z = 1/1/z
+ // this is what we've gone to all this trouble to
+ // overlap
+LFDIVInFlight2:
+ pushl %eax
+
+ cmpw 10(%ecx),%bp
+ jl Lp6
+ movb (%esi),%al
+ cmpb $(TRANSPARENT_COLOR),%al
+ jz Lp6
+ movw %bp,10(%ecx)
+ movb %al,5(%edi)
+Lp6:
+ addl izistep,%ebp
+ adcl $0,%ebp
+ addl tstep,%edx
+ sbbl %eax,%eax
+ addl sstep,%ebx
+ adcl advancetable+4(,%eax,4),%esi
+
+ cmpw 12(%ecx),%bp
+ jl Lp7
+ movb (%esi),%al
+ cmpb $(TRANSPARENT_COLOR),%al
+ jz Lp7
+ movw %bp,12(%ecx)
+ movb %al,6(%edi)
+Lp7:
+ addl izistep,%ebp
+ adcl $0,%ebp
+ addl tstep,%edx
+ sbbl %eax,%eax
+ addl sstep,%ebx
+ adcl advancetable+4(,%eax,4),%esi
+
+ cmpw 14(%ecx),%bp
+ jl Lp8
+ movb (%esi),%al
+ cmpb $(TRANSPARENT_COLOR),%al
+ jz Lp8
+ movw %bp,14(%ecx)
+ movb %al,7(%edi)
+Lp8:
+ addl izistep,%ebp
+ adcl $0,%ebp
+ addl tstep,%edx
+ sbbl %eax,%eax
+ addl sstep,%ebx
+ adcl advancetable+4(,%eax,4),%esi
+
+ addl $8,%edi
+ addl $16,%ecx
+ movl %edx,tfracf
+ movl snext,%edx
+ movl %ebx,sfracf
+ movl tnext,%ebx
+ movl %edx,s
+ movl %ebx,t
+
+ movl %ecx,pz
+ movl %ebp,izi
+
+ popl %ecx // retrieve count
+
+//
+// determine whether last span or not
+//
+ cmpl $8,%ecx // are there multiple segments remaining?
+ ja LNotLastSegment // yes
+
+//
+// last segment of scan
+//
+LLastSegment:
+
+//
+// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
+// get there. The number of pixels left is variable, and we want to land on the
+// last pixel, not step one past it, so we can't run into arithmetic problems
+//
+ testl %ecx,%ecx
+ jz LNoSteps // just draw the last pixel and we're done
+
+// pick up after the FDIV that was left in flight previously
+
+
+ fld %st(0) // duplicate it
+ fmul %st(4),%st(0) // s = s/z * z
+ fxch %st(1)
+ fmul %st(3),%st(0) // t = t/z * z
+ fxch %st(1)
+ fistpl snext
+ fistpl tnext
+
+ movl C(tadjust),%ebx
+ movl C(sadjust),%eax
+
+ addl snext,%eax
+ addl tnext,%ebx
+
+ movl C(bbextents),%ebp
+ movl C(bbextentt),%edx
+
+ cmpl $2048,%eax
+ jl LClampLow4
+ cmpl %ebp,%eax
+ ja LClampHigh4
+LClampReentry4:
+ movl %eax,snext
+
+ cmpl $2048,%ebx
+ jl LClampLow5
+ cmpl %edx,%ebx
+ ja LClampHigh5
+LClampReentry5:
+
+ cmpl $1,%ecx // don't bother
+ je LOnlyOneStep // if two pixels in segment, there's only one step,
+ // of the segment length
+ subl s,%eax
+ subl t,%ebx
+
+ addl %eax,%eax // convert to 15.17 format so multiply by 1.31
+ addl %ebx,%ebx // reciprocal yields 16.48
+ imull reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1)
+ movl %edx,%ebp
+
+ movl %ebx,%eax
+ imull reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1)
+
+LSetEntryvec:
+//
+// set up advancetable
+//
+ movl spr8entryvec_table(,%ecx,4),%ebx
+ movl %edx,%eax
+ pushl %ebx // entry point into code for RET later
+ movl %ebp,%ecx
+ sarl $16,%ecx // sstep >>= 16;
+ movl C(cachewidth),%ebx
+ sarl $16,%edx // tstep >>= 16;
+ jz LIsZeroLast
+ imull %ebx,%edx // (tstep >> 16) * cachewidth;
+LIsZeroLast:
+ addl %ecx,%edx // add in sstep
+ // (tstep >> 16) * cachewidth + (sstep >> 16);
+ movl tfracf,%ecx
+ movl %edx,advancetable+4 // advance base in t
+ addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth +
+ // (sstep >> 16);
+ shll $16,%ebp // left-justify sstep fractional part
+ movl sfracf,%ebx
+ shll $16,%eax // left-justify tstep fractional part
+ movl %edx,advancetable // advance extra in t
+
+ movl %eax,tstep
+ movl %ebp,sstep
+ movl %ecx,%edx
+
+ movl pz,%ecx
+ movl izi,%ebp
+
+ ret // jump to the number-of-pixels handler
+
+//----------------------------------------
+
+LNoSteps:
+ movl pz,%ecx
+ subl $7,%edi // adjust for hardwired offset
+ subl $14,%ecx
+ jmp LEndSpan
+
+
+LOnlyOneStep:
+ subl s,%eax
+ subl t,%ebx
+ movl %eax,%ebp
+ movl %ebx,%edx
+ jmp LSetEntryvec
+
+//----------------------------------------
+
+.globl Spr8Entry2_8
+Spr8Entry2_8:
+ subl $6,%edi // adjust for hardwired offsets
+ subl $12,%ecx
+ movb (%esi),%al
+ jmp LLEntry2_8
+
+//----------------------------------------
+
+.globl Spr8Entry3_8
+Spr8Entry3_8:
+ subl $5,%edi // adjust for hardwired offsets
+ subl $10,%ecx
+ jmp LLEntry3_8
+
+//----------------------------------------
+
+.globl Spr8Entry4_8
+Spr8Entry4_8:
+ subl $4,%edi // adjust for hardwired offsets
+ subl $8,%ecx
+ jmp LLEntry4_8
+
+//----------------------------------------
+
+.globl Spr8Entry5_8
+Spr8Entry5_8:
+ subl $3,%edi // adjust for hardwired offsets
+ subl $6,%ecx
+ jmp LLEntry5_8
+
+//----------------------------------------
+
+.globl Spr8Entry6_8
+Spr8Entry6_8:
+ subl $2,%edi // adjust for hardwired offsets
+ subl $4,%ecx
+ jmp LLEntry6_8
+
+//----------------------------------------
+
+.globl Spr8Entry7_8
+Spr8Entry7_8:
+ decl %edi // adjust for hardwired offsets
+ subl $2,%ecx
+ jmp LLEntry7_8
+
+//----------------------------------------
+
+.globl Spr8Entry8_8
+Spr8Entry8_8:
+ cmpw (%ecx),%bp
+ jl Lp9
+ movb (%esi),%al
+ cmpb $(TRANSPARENT_COLOR),%al
+ jz Lp9
+ movw %bp,(%ecx)
+ movb %al,(%edi)
+Lp9:
+ addl izistep,%ebp
+ adcl $0,%ebp
+ addl tstep,%edx
+ sbbl %eax,%eax
+ addl sstep,%ebx
+ adcl advancetable+4(,%eax,4),%esi
+LLEntry7_8:
+ cmpw 2(%ecx),%bp
+ jl Lp10
+ movb (%esi),%al
+ cmpb $(TRANSPARENT_COLOR),%al
+ jz Lp10
+ movw %bp,2(%ecx)
+ movb %al,1(%edi)
+Lp10:
+ addl izistep,%ebp
+ adcl $0,%ebp
+ addl tstep,%edx
+ sbbl %eax,%eax
+ addl sstep,%ebx
+ adcl advancetable+4(,%eax,4),%esi
+LLEntry6_8:
+ cmpw 4(%ecx),%bp
+ jl Lp11
+ movb (%esi),%al
+ cmpb $(TRANSPARENT_COLOR),%al
+ jz Lp11
+ movw %bp,4(%ecx)
+ movb %al,2(%edi)
+Lp11:
+ addl izistep,%ebp
+ adcl $0,%ebp
+ addl tstep,%edx
+ sbbl %eax,%eax
+ addl sstep,%ebx
+ adcl advancetable+4(,%eax,4),%esi
+LLEntry5_8:
+ cmpw 6(%ecx),%bp
+ jl Lp12
+ movb (%esi),%al
+ cmpb $(TRANSPARENT_COLOR),%al
+ jz Lp12
+ movw %bp,6(%ecx)
+ movb %al,3(%edi)
+Lp12:
+ addl izistep,%ebp
+ adcl $0,%ebp
+ addl tstep,%edx
+ sbbl %eax,%eax
+ addl sstep,%ebx
+ adcl advancetable+4(,%eax,4),%esi
+LLEntry4_8:
+ cmpw 8(%ecx),%bp
+ jl Lp13
+ movb (%esi),%al
+ cmpb $(TRANSPARENT_COLOR),%al
+ jz Lp13
+ movw %bp,8(%ecx)
+ movb %al,4(%edi)
+Lp13:
+ addl izistep,%ebp
+ adcl $0,%ebp
+ addl tstep,%edx
+ sbbl %eax,%eax
+ addl sstep,%ebx
+ adcl advancetable+4(,%eax,4),%esi
+LLEntry3_8:
+ cmpw 10(%ecx),%bp
+ jl Lp14
+ movb (%esi),%al
+ cmpb $(TRANSPARENT_COLOR),%al
+ jz Lp14
+ movw %bp,10(%ecx)
+ movb %al,5(%edi)
+Lp14:
+ addl izistep,%ebp
+ adcl $0,%ebp
+ addl tstep,%edx
+ sbbl %eax,%eax
+ addl sstep,%ebx
+ adcl advancetable+4(,%eax,4),%esi
+LLEntry2_8:
+ cmpw 12(%ecx),%bp
+ jl Lp15
+ movb (%esi),%al
+ cmpb $(TRANSPARENT_COLOR),%al
+ jz Lp15
+ movw %bp,12(%ecx)
+ movb %al,6(%edi)
+Lp15:
+ addl izistep,%ebp
+ adcl $0,%ebp
+ addl tstep,%edx
+ sbbl %eax,%eax
+ addl sstep,%ebx
+ adcl advancetable+4(,%eax,4),%esi
+
+LEndSpan:
+ cmpw 14(%ecx),%bp
+ jl Lp16
+ movb (%esi),%al // load first texel in segment
+ cmpb $(TRANSPARENT_COLOR),%al
+ jz Lp16
+ movw %bp,14(%ecx)
+ movb %al,7(%edi)
+Lp16:
+
+//
+// clear s/z, t/z, 1/z from FP stack
+//
+ fstp %st(0)
+ fstp %st(0)
+ fstp %st(0)
+
+ popl %ebx // restore spans pointer
+LNextSpan:
+ addl $(sspan_t_size),%ebx // point to next span
+ movl sspan_t_count(%ebx),%ecx
+ cmpl $0,%ecx // any more spans?
+ jg LSpanLoop // yes
+ jz LNextSpan // yes, but this one's empty
+
+ popl %ebx // restore register variables
+ popl %esi
+ popl %edi
+ popl %ebp // restore the caller's stack frame
+ ret
+
+#endif // id386
--- /dev/null
+++ b/u/d_varsa.s
@@ -1,0 +1,186 @@
+//
+// d_varsa.s
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "asm_draw.h"
+#include "d_ifacea.h"
+
+#ifdef id386
+
+ .data
+
+//-------------------------------------------------------
+// global refresh variables
+//-------------------------------------------------------
+
+// FIXME: put all refresh variables into one contiguous block. Make into one
+// big structure, like cl or sv?
+
+ .align 4
+.globl C(d_sdivzstepu)
+.globl C(d_tdivzstepu)
+.globl C(d_zistepu)
+.globl C(d_sdivzstepv)
+.globl C(d_tdivzstepv)
+.globl C(d_zistepv)
+.globl C(d_sdivzorigin)
+.globl C(d_tdivzorigin)
+.globl C(d_ziorigin)
+C(d_sdivzstepu): .single 0
+C(d_tdivzstepu): .single 0
+C(d_zistepu): .single 0
+C(d_sdivzstepv): .single 0
+C(d_tdivzstepv): .single 0
+C(d_zistepv): .single 0
+C(d_sdivzorigin): .single 0
+C(d_tdivzorigin): .single 0
+C(d_ziorigin): .single 0
+
+.globl C(sadjust)
+.globl C(tadjust)
+.globl C(bbextents)
+.globl C(bbextentt)
+C(sadjust): .long 0
+C(tadjust): .long 0
+C(bbextents): .long 0
+C(bbextentt): .long 0
+
+.globl C(cacheblock)
+.globl C(d_viewbuffer)
+.globl C(cachewidth)
+.globl C(d_pzbuffer)
+.globl C(d_zrowbytes)
+.globl C(d_zwidth)
+C(cacheblock): .long 0
+C(cachewidth): .long 0
+C(d_viewbuffer): .long 0
+C(d_pzbuffer): .long 0
+C(d_zrowbytes): .long 0
+C(d_zwidth): .long 0
+
+
+//-------------------------------------------------------
+// ASM-only variables
+//-------------------------------------------------------
+.globl izi
+izi: .long 0
+
+.globl pbase, s, t, sfracf, tfracf, snext, tnext
+.globl spancountminus1, zi16stepu, sdivz16stepu, tdivz16stepu
+.globl zi8stepu, sdivz8stepu, tdivz8stepu, pz
+s: .long 0
+t: .long 0
+snext: .long 0
+tnext: .long 0
+sfracf: .long 0
+tfracf: .long 0
+pbase: .long 0
+zi8stepu: .long 0
+sdivz8stepu: .long 0
+tdivz8stepu: .long 0
+zi16stepu: .long 0
+sdivz16stepu: .long 0
+tdivz16stepu: .long 0
+spancountminus1: .long 0
+pz: .long 0
+
+.globl izistep
+izistep: .long 0
+
+//-------------------------------------------------------
+// local variables for d_draw16.s
+//-------------------------------------------------------
+
+.globl reciprocal_table_16, entryvec_table_16
+// 1/2, 1/3, 1/4, 1/5, 1/6, 1/7, 1/8, 1/9, 1/10, 1/11, 1/12, 1/13,
+// 1/14, and 1/15 in 0.32 form
+reciprocal_table_16: .long 0x40000000, 0x2aaaaaaa, 0x20000000
+ .long 0x19999999, 0x15555555, 0x12492492
+ .long 0x10000000, 0xe38e38e, 0xccccccc, 0xba2e8ba
+ .long 0xaaaaaaa, 0x9d89d89, 0x9249249, 0x8888888
+
+ .extern Entry2_16
+ .extern Entry3_16
+ .extern Entry4_16
+ .extern Entry5_16
+ .extern Entry6_16
+ .extern Entry7_16
+ .extern Entry8_16
+ .extern Entry9_16
+ .extern Entry10_16
+ .extern Entry11_16
+ .extern Entry12_16
+ .extern Entry13_16
+ .extern Entry14_16
+ .extern Entry15_16
+ .extern Entry16_16
+
+entryvec_table_16: .long 0, Entry2_16, Entry3_16, Entry4_16
+ .long Entry5_16, Entry6_16, Entry7_16, Entry8_16
+ .long Entry9_16, Entry10_16, Entry11_16, Entry12_16
+ .long Entry13_16, Entry14_16, Entry15_16, Entry16_16
+
+//-------------------------------------------------------
+// local variables for d_parta.s
+//-------------------------------------------------------
+.globl DP_Count, DP_u, DP_v, DP_32768, DP_Color, DP_Pix, DP_EntryTable
+DP_Count: .long 0
+DP_u: .long 0
+DP_v: .long 0
+DP_32768: .single 32768.0
+DP_Color: .long 0
+DP_Pix: .long 0
+
+
+ .extern DP_1x1
+ .extern DP_2x2
+ .extern DP_3x3
+ .extern DP_4x4
+
+DP_EntryTable: .long DP_1x1, DP_2x2, DP_3x3, DP_4x4
+
+//
+// advancetable is 8 bytes, but points to the middle of that range so negative
+// offsets will work
+//
+.globl advancetable, sstep, tstep, pspantemp, counttemp, jumptemp
+advancetable: .long 0, 0
+sstep: .long 0
+tstep: .long 0
+
+pspantemp: .long 0
+counttemp: .long 0
+jumptemp: .long 0
+
+// 1/2, 1/3, 1/4, 1/5, 1/6, and 1/7 in 0.32 form
+.globl reciprocal_table, entryvec_table
+reciprocal_table: .long 0x40000000, 0x2aaaaaaa, 0x20000000
+ .long 0x19999999, 0x15555555, 0x12492492
+
+ .extern Entry2_8
+ .extern Entry3_8
+ .extern Entry4_8
+ .extern Entry5_8
+ .extern Entry6_8
+ .extern Entry7_8
+ .extern Entry8_8
+
+entryvec_table: .long 0, Entry2_8, Entry3_8, Entry4_8
+ .long Entry5_8, Entry6_8, Entry7_8, Entry8_8
+
+ .extern Spr8Entry2_8
+ .extern Spr8Entry3_8
+ .extern Spr8Entry4_8
+ .extern Spr8Entry5_8
+ .extern Spr8Entry6_8
+ .extern Spr8Entry7_8
+ .extern Spr8Entry8_8
+
+.globl spr8entryvec_table
+spr8entryvec_table: .long 0, Spr8Entry2_8, Spr8Entry3_8, Spr8Entry4_8
+ .long Spr8Entry5_8, Spr8Entry6_8, Spr8Entry7_8, Spr8Entry8_8
+
+#endif // id386
+
--- /dev/null
+++ b/u/math.s
@@ -1,0 +1,399 @@
+//
+// math.s
+// x86 assembly-language math routines.
+
+#define GLQUAKE 1 // don't include unneeded defs
+#include "asm_i386.h"
+#include "quakeasm.h"
+
+
+#ifdef id386
+
+ .data
+
+ .align 4
+Ljmptab: .long Lcase0, Lcase1, Lcase2, Lcase3
+ .long Lcase4, Lcase5, Lcase6, Lcase7
+
+ .text
+
+// TODO: rounding needed?
+// stack parameter offset
+#define val 4
+
+.globl C(Invert24To16)
+C(Invert24To16):
+
+ movl val(%esp),%ecx
+ movl $0x100,%edx // 0x10000000000 as dividend
+ cmpl %edx,%ecx
+ jle LOutOfRange
+
+ subl %eax,%eax
+ divl %ecx
+
+ ret
+
+LOutOfRange:
+ movl $0xFFFFFFFF,%eax
+ ret
+
+#define in 4
+#define out 8
+
+ .align 2
+.globl C(TransformVector)
+C(TransformVector):
+ movl in(%esp),%eax
+ movl out(%esp),%edx
+
+ flds (%eax) // in[0]
+ fmuls C(vright) // in[0]*vright[0]
+ flds (%eax) // in[0] | in[0]*vright[0]
+ fmuls C(vup) // in[0]*vup[0] | in[0]*vright[0]
+ flds (%eax) // in[0] | in[0]*vup[0] | in[0]*vright[0]
+ fmuls C(vpn) // in[0]*vpn[0] | in[0]*vup[0] | in[0]*vright[0]
+
+ flds 4(%eax) // in[1] | ...
+ fmuls C(vright)+4 // in[1]*vright[1] | ...
+ flds 4(%eax) // in[1] | in[1]*vright[1] | ...
+ fmuls C(vup)+4 // in[1]*vup[1] | in[1]*vright[1] | ...
+ flds 4(%eax) // in[1] | in[1]*vup[1] | in[1]*vright[1] | ...
+ fmuls C(vpn)+4 // in[1]*vpn[1] | in[1]*vup[1] | in[1]*vright[1] | ...
+ fxch %st(2) // in[1]*vright[1] | in[1]*vup[1] | in[1]*vpn[1] | ...
+
+ faddp %st(0),%st(5) // in[1]*vup[1] | in[1]*vpn[1] | ...
+ faddp %st(0),%st(3) // in[1]*vpn[1] | ...
+ faddp %st(0),%st(1) // vpn_accum | vup_accum | vright_accum
+
+ flds 8(%eax) // in[2] | ...
+ fmuls C(vright)+8 // in[2]*vright[2] | ...
+ flds 8(%eax) // in[2] | in[2]*vright[2] | ...
+ fmuls C(vup)+8 // in[2]*vup[2] | in[2]*vright[2] | ...
+ flds 8(%eax) // in[2] | in[2]*vup[2] | in[2]*vright[2] | ...
+ fmuls C(vpn)+8 // in[2]*vpn[2] | in[2]*vup[2] | in[2]*vright[2] | ...
+ fxch %st(2) // in[2]*vright[2] | in[2]*vup[2] | in[2]*vpn[2] | ...
+
+ faddp %st(0),%st(5) // in[2]*vup[2] | in[2]*vpn[2] | ...
+ faddp %st(0),%st(3) // in[2]*vpn[2] | ...
+ faddp %st(0),%st(1) // vpn_accum | vup_accum | vright_accum
+
+ fstps 8(%edx) // out[2]
+ fstps 4(%edx) // out[1]
+ fstps (%edx) // out[0]
+
+ ret
+
+
+#define EMINS 4+4
+#define EMAXS 4+8
+#define P 4+12
+
+ .align 2
+.globl C(BoxOnPlaneSide)
+C(BoxOnPlaneSide):
+ pushl %ebx
+
+ movl P(%esp),%edx
+ movl EMINS(%esp),%ecx
+ xorl %eax,%eax
+ movl EMAXS(%esp),%ebx
+ movb pl_signbits(%edx),%al
+ cmpb $8,%al
+ jge Lerror
+ flds pl_normal(%edx) // p->normal[0]
+ fld %st(0) // p->normal[0] | p->normal[0]
+ jmp Ljmptab(,%eax,4)
+
+
+//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
+//dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
+Lcase0:
+ fmuls (%ebx) // p->normal[0]*emaxs[0] | p->normal[0]
+ flds pl_normal+4(%edx) // p->normal[1] | p->normal[0]*emaxs[0] |
+ // p->normal[0]
+ fxch %st(2) // p->normal[0] | p->normal[0]*emaxs[0] |
+ // p->normal[1]
+ fmuls (%ecx) // p->normal[0]*emins[0] |
+ // p->normal[0]*emaxs[0] | p->normal[1]
+ fxch %st(2) // p->normal[1] | p->normal[0]*emaxs[0] |
+ // p->normal[0]*emins[0]
+ fld %st(0) // p->normal[1] | p->normal[1] |
+ // p->normal[0]*emaxs[0] |
+ // p->normal[0]*emins[0]
+ fmuls 4(%ebx) // p->normal[1]*emaxs[1] | p->normal[1] |
+ // p->normal[0]*emaxs[0] |
+ // p->normal[0]*emins[0]
+ flds pl_normal+8(%edx) // p->normal[2] | p->normal[1]*emaxs[1] |
+ // p->normal[1] | p->normal[0]*emaxs[0] |
+ // p->normal[0]*emins[0]
+ fxch %st(2) // p->normal[1] | p->normal[1]*emaxs[1] |
+ // p->normal[2] | p->normal[0]*emaxs[0] |
+ // p->normal[0]*emins[0]
+ fmuls 4(%ecx) // p->normal[1]*emins[1] |
+ // p->normal[1]*emaxs[1] |
+ // p->normal[2] | p->normal[0]*emaxs[0] |
+ // p->normal[0]*emins[0]
+ fxch %st(2) // p->normal[2] | p->normal[1]*emaxs[1] |
+ // p->normal[1]*emins[1] |
+ // p->normal[0]*emaxs[0] |
+ // p->normal[0]*emins[0]
+ fld %st(0) // p->normal[2] | p->normal[2] |
+ // p->normal[1]*emaxs[1] |
+ // p->normal[1]*emins[1] |
+ // p->normal[0]*emaxs[0] |
+ // p->normal[0]*emins[0]
+ fmuls 8(%ebx) // p->normal[2]*emaxs[2] |
+ // p->normal[2] |
+ // p->normal[1]*emaxs[1] |
+ // p->normal[1]*emins[1] |
+ // p->normal[0]*emaxs[0] |
+ // p->normal[0]*emins[0]
+ fxch %st(5) // p->normal[0]*emins[0] |
+ // p->normal[2] |
+ // p->normal[1]*emaxs[1] |
+ // p->normal[1]*emins[1] |
+ // p->normal[0]*emaxs[0] |
+ // p->normal[2]*emaxs[2]
+ faddp %st(0),%st(3) //p->normal[2] |
+ // p->normal[1]*emaxs[1] |
+ // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
+ // p->normal[0]*emaxs[0] |
+ // p->normal[2]*emaxs[2]
+ fmuls 8(%ecx) //p->normal[2]*emins[2] |
+ // p->normal[1]*emaxs[1] |
+ // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
+ // p->normal[0]*emaxs[0] |
+ // p->normal[2]*emaxs[2]
+ fxch %st(1) //p->normal[1]*emaxs[1] |
+ // p->normal[2]*emins[2] |
+ // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
+ // p->normal[0]*emaxs[0] |
+ // p->normal[2]*emaxs[2]
+ faddp %st(0),%st(3) //p->normal[2]*emins[2] |
+ // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
+ // p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]|
+ // p->normal[2]*emaxs[2]
+ fxch %st(3) //p->normal[2]*emaxs[2] +
+ // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
+ // p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]|
+ // p->normal[2]*emins[2]
+ faddp %st(0),%st(2) //p->normal[1]*emins[1]+p->normal[0]*emins[0]|
+ // dist1 | p->normal[2]*emins[2]
+
+ jmp LSetSides
+
+//dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
+//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
+Lcase1:
+ fmuls (%ecx) // emins[0]
+ flds pl_normal+4(%edx)
+ fxch %st(2)
+ fmuls (%ebx) // emaxs[0]
+ fxch %st(2)
+ fld %st(0)
+ fmuls 4(%ebx) // emaxs[1]
+ flds pl_normal+8(%edx)
+ fxch %st(2)
+ fmuls 4(%ecx) // emins[1]
+ fxch %st(2)
+ fld %st(0)
+ fmuls 8(%ebx) // emaxs[2]
+ fxch %st(5)
+ faddp %st(0),%st(3)
+ fmuls 8(%ecx) // emins[2]
+ fxch %st(1)
+ faddp %st(0),%st(3)
+ fxch %st(3)
+ faddp %st(0),%st(2)
+
+ jmp LSetSides
+
+//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
+//dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
+Lcase2:
+ fmuls (%ebx) // emaxs[0]
+ flds pl_normal+4(%edx)
+ fxch %st(2)
+ fmuls (%ecx) // emins[0]
+ fxch %st(2)
+ fld %st(0)
+ fmuls 4(%ecx) // emins[1]
+ flds pl_normal+8(%edx)
+ fxch %st(2)
+ fmuls 4(%ebx) // emaxs[1]
+ fxch %st(2)
+ fld %st(0)
+ fmuls 8(%ebx) // emaxs[2]
+ fxch %st(5)
+ faddp %st(0),%st(3)
+ fmuls 8(%ecx) // emins[2]
+ fxch %st(1)
+ faddp %st(0),%st(3)
+ fxch %st(3)
+ faddp %st(0),%st(2)
+
+ jmp LSetSides
+
+//dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
+//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
+Lcase3:
+ fmuls (%ecx) // emins[0]
+ flds pl_normal+4(%edx)
+ fxch %st(2)
+ fmuls (%ebx) // emaxs[0]
+ fxch %st(2)
+ fld %st(0)
+ fmuls 4(%ecx) // emins[1]
+ flds pl_normal+8(%edx)
+ fxch %st(2)
+ fmuls 4(%ebx) // emaxs[1]
+ fxch %st(2)
+ fld %st(0)
+ fmuls 8(%ebx) // emaxs[2]
+ fxch %st(5)
+ faddp %st(0),%st(3)
+ fmuls 8(%ecx) // emins[2]
+ fxch %st(1)
+ faddp %st(0),%st(3)
+ fxch %st(3)
+ faddp %st(0),%st(2)
+
+ jmp LSetSides
+
+//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
+//dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
+Lcase4:
+ fmuls (%ebx) // emaxs[0]
+ flds pl_normal+4(%edx)
+ fxch %st(2)
+ fmuls (%ecx) // emins[0]
+ fxch %st(2)
+ fld %st(0)
+ fmuls 4(%ebx) // emaxs[1]
+ flds pl_normal+8(%edx)
+ fxch %st(2)
+ fmuls 4(%ecx) // emins[1]
+ fxch %st(2)
+ fld %st(0)
+ fmuls 8(%ecx) // emins[2]
+ fxch %st(5)
+ faddp %st(0),%st(3)
+ fmuls 8(%ebx) // emaxs[2]
+ fxch %st(1)
+ faddp %st(0),%st(3)
+ fxch %st(3)
+ faddp %st(0),%st(2)
+
+ jmp LSetSides
+
+//dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
+//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
+Lcase5:
+ fmuls (%ecx) // emins[0]
+ flds pl_normal+4(%edx)
+ fxch %st(2)
+ fmuls (%ebx) // emaxs[0]
+ fxch %st(2)
+ fld %st(0)
+ fmuls 4(%ebx) // emaxs[1]
+ flds pl_normal+8(%edx)
+ fxch %st(2)
+ fmuls 4(%ecx) // emins[1]
+ fxch %st(2)
+ fld %st(0)
+ fmuls 8(%ecx) // emins[2]
+ fxch %st(5)
+ faddp %st(0),%st(3)
+ fmuls 8(%ebx) // emaxs[2]
+ fxch %st(1)
+ faddp %st(0),%st(3)
+ fxch %st(3)
+ faddp %st(0),%st(2)
+
+ jmp LSetSides
+
+//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
+//dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
+Lcase6:
+ fmuls (%ebx) // emaxs[0]
+ flds pl_normal+4(%edx)
+ fxch %st(2)
+ fmuls (%ecx) // emins[0]
+ fxch %st(2)
+ fld %st(0)
+ fmuls 4(%ecx) // emins[1]
+ flds pl_normal+8(%edx)
+ fxch %st(2)
+ fmuls 4(%ebx) // emaxs[1]
+ fxch %st(2)
+ fld %st(0)
+ fmuls 8(%ecx) // emins[2]
+ fxch %st(5)
+ faddp %st(0),%st(3)
+ fmuls 8(%ebx) // emaxs[2]
+ fxch %st(1)
+ faddp %st(0),%st(3)
+ fxch %st(3)
+ faddp %st(0),%st(2)
+
+ jmp LSetSides
+
+//dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
+//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
+Lcase7:
+ fmuls (%ecx) // emins[0]
+ flds pl_normal+4(%edx)
+ fxch %st(2)
+ fmuls (%ebx) // emaxs[0]
+ fxch %st(2)
+ fld %st(0)
+ fmuls 4(%ecx) // emins[1]
+ flds pl_normal+8(%edx)
+ fxch %st(2)
+ fmuls 4(%ebx) // emaxs[1]
+ fxch %st(2)
+ fld %st(0)
+ fmuls 8(%ecx) // emins[2]
+ fxch %st(5)
+ faddp %st(0),%st(3)
+ fmuls 8(%ebx) // emaxs[2]
+ fxch %st(1)
+ faddp %st(0),%st(3)
+ fxch %st(3)
+ faddp %st(0),%st(2)
+
+LSetSides:
+
+// sides = 0;
+// if (dist1 >= p->dist)
+// sides = 1;
+// if (dist2 < p->dist)
+// sides |= 2;
+
+ faddp %st(0),%st(2) // dist1 | dist2
+ fcomps pl_dist(%edx)
+ xorl %ecx,%ecx
+ fnstsw %ax
+ fcomps pl_dist(%edx)
+ andb $1,%ah
+ xorb $1,%ah
+ addb %ah,%cl
+
+ fnstsw %ax
+ andb $1,%ah
+ addb %ah,%ah
+ addb %ah,%cl
+
+// return sides;
+
+ popl %ebx
+ movl %ecx,%eax // return status
+
+ ret
+
+
+Lerror:
+ call C(BOPS_Error)
+
+#endif // id386
--- /dev/null
+++ b/u/quakeasm.h
@@ -1,0 +1,248 @@
+//
+// quakeasm.h: general asm header file
+//
+
+//#define GLQUAKE 1
+
+#ifdef __i386__
+#define id386
+#endif
+
+// !!! must be kept the same as in d_iface.h !!!
+#define TRANSPARENT_COLOR 255
+
+#ifndef GLQUAKE
+ .extern C(d_zistepu)
+ .extern C(d_pzbuffer)
+ .extern C(d_zistepv)
+ .extern C(d_zrowbytes)
+ .extern C(d_ziorigin)
+ .extern C(r_turb_s)
+ .extern C(r_turb_t)
+ .extern C(r_turb_pdest)
+ .extern C(r_turb_spancount)
+ .extern C(r_turb_turb)
+ .extern C(r_turb_pbase)
+ .extern C(r_turb_sstep)
+ .extern C(r_turb_tstep)
+ .extern C(r_bmodelactive)
+ .extern C(d_sdivzstepu)
+ .extern C(d_tdivzstepu)
+ .extern C(d_sdivzstepv)
+ .extern C(d_tdivzstepv)
+ .extern C(d_sdivzorigin)
+ .extern C(d_tdivzorigin)
+ .extern C(sadjust)
+ .extern C(tadjust)
+ .extern C(bbextents)
+ .extern C(bbextentt)
+ .extern C(cacheblock)
+ .extern C(d_viewbuffer)
+ .extern C(cachewidth)
+ .extern C(d_pzbuffer)
+ .extern C(d_zrowbytes)
+ .extern C(d_zwidth)
+ .extern C(d_scantable)
+ .extern C(r_lightptr)
+ .extern C(r_numvblocks)
+ .extern C(prowdestbase)
+ .extern C(pbasesource)
+ .extern C(r_lightwidth)
+ .extern C(lightright)
+ .extern C(lightrightstep)
+ .extern C(lightdeltastep)
+ .extern C(lightdelta)
+ .extern C(lightright)
+ .extern C(lightdelta)
+ .extern C(sourcetstep)
+ .extern C(surfrowbytes)
+ .extern C(lightrightstep)
+ .extern C(lightdeltastep)
+ .extern C(r_sourcemax)
+ .extern C(r_stepback)
+ .extern C(colormap)
+ .extern C(blocksize)
+ .extern C(sourcesstep)
+ .extern C(lightleft)
+ .extern C(blockdivshift)
+ .extern C(blockdivmask)
+ .extern C(lightleftstep)
+ .extern C(r_origin)
+ .extern C(r_ppn)
+ .extern C(r_pup)
+ .extern C(r_pright)
+ .extern C(ycenter)
+ .extern C(xcenter)
+ .extern C(d_vrectbottom_particle)
+ .extern C(d_vrectright_particle)
+ .extern C(d_vrecty)
+ .extern C(d_vrectx)
+ .extern C(d_pix_shift)
+ .extern C(d_pix_min)
+ .extern C(d_pix_max)
+ .extern C(d_y_aspect_shift)
+ .extern C(screenwidth)
+ .extern C(r_leftclipped)
+ .extern C(r_leftenter)
+ .extern C(r_rightclipped)
+ .extern C(r_rightenter)
+ .extern C(modelorg)
+ .extern C(xscale)
+ .extern C(r_refdef)
+ .extern C(yscale)
+ .extern C(r_leftexit)
+ .extern C(r_rightexit)
+ .extern C(r_lastvertvalid)
+ .extern C(cacheoffset)
+ .extern C(newedges)
+ .extern C(removeedges)
+ .extern C(r_pedge)
+ .extern C(r_framecount)
+ .extern C(r_u1)
+ .extern C(r_emitted)
+ .extern C(edge_p)
+ .extern C(surface_p)
+ .extern C(surfaces)
+ .extern C(r_lzi1)
+ .extern C(r_v1)
+ .extern C(r_ceilv1)
+ .extern C(r_nearzi)
+ .extern C(r_nearzionly)
+ .extern C(edge_aftertail)
+ .extern C(edge_tail)
+ .extern C(current_iv)
+ .extern C(edge_head_u_shift20)
+ .extern C(span_p)
+ .extern C(edge_head)
+ .extern C(fv)
+ .extern C(edge_tail_u_shift20)
+ .extern C(r_apverts)
+ .extern C(r_anumverts)
+ .extern C(aliastransform)
+ .extern C(r_avertexnormals)
+ .extern C(r_plightvec)
+ .extern C(r_ambientlight)
+ .extern C(r_shadelight)
+ .extern C(aliasxcenter)
+ .extern C(aliasycenter)
+ .extern C(a_sstepxfrac)
+ .extern C(r_affinetridesc)
+ .extern C(acolormap)
+ .extern C(d_pcolormap)
+ .extern C(r_affinetridesc)
+ .extern C(d_sfrac)
+ .extern C(d_ptex)
+ .extern C(d_pedgespanpackage)
+ .extern C(d_tfrac)
+ .extern C(d_light)
+ .extern C(d_zi)
+ .extern C(d_pdest)
+ .extern C(d_pz)
+ .extern C(d_aspancount)
+ .extern C(erroradjustup)
+ .extern C(errorterm)
+ .extern C(d_xdenom)
+ .extern C(r_p0)
+ .extern C(r_p1)
+ .extern C(r_p2)
+ .extern C(a_tstepxfrac)
+ .extern C(r_sstepx)
+ .extern C(r_tstepx)
+ .extern C(a_ststepxwhole)
+ .extern C(zspantable)
+ .extern C(skintable)
+ .extern C(r_zistepx)
+ .extern C(erroradjustdown)
+ .extern C(d_countextrastep)
+ .extern C(ubasestep)
+ .extern C(a_ststepxwhole)
+ .extern C(a_tstepxfrac)
+ .extern C(r_lstepx)
+ .extern C(a_spans)
+ .extern C(erroradjustdown)
+ .extern C(d_pdestextrastep)
+ .extern C(d_pzextrastep)
+ .extern C(d_sfracextrastep)
+ .extern C(d_ptexextrastep)
+ .extern C(d_countextrastep)
+ .extern C(d_tfracextrastep)
+ .extern C(d_lightextrastep)
+ .extern C(d_ziextrastep)
+ .extern C(d_pdestbasestep)
+ .extern C(d_pzbasestep)
+ .extern C(d_sfracbasestep)
+ .extern C(d_ptexbasestep)
+ .extern C(ubasestep)
+ .extern C(d_tfracbasestep)
+ .extern C(d_lightbasestep)
+ .extern C(d_zibasestep)
+ .extern C(zspantable)
+ .extern C(r_lstepy)
+ .extern C(r_sstepy)
+ .extern C(r_tstepy)
+ .extern C(r_zistepy)
+ .extern C(D_PolysetSetEdgeTable)
+ .extern C(D_RasterizeAliasPolySmooth)
+
+ .extern float_point5
+ .extern Float2ToThe31nd
+ .extern izistep
+ .extern izi
+ .extern FloatMinus2ToThe31nd
+ .extern float_1
+ .extern float_particle_z_clip
+ .extern float_minus_1
+ .extern float_0
+ .extern fp_16
+ .extern fp_64k
+ .extern fp_1m
+ .extern fp_1m_minus_1
+ .extern fp_8
+ .extern entryvec_table
+ .extern advancetable
+ .extern sstep
+ .extern tstep
+ .extern pspantemp
+ .extern counttemp
+ .extern jumptemp
+ .extern reciprocal_table
+ .extern DP_Count
+ .extern DP_u
+ .extern DP_v
+ .extern DP_32768
+ .extern DP_Color
+ .extern DP_Pix
+ .extern DP_EntryTable
+ .extern pbase
+ .extern s
+ .extern t
+ .extern sfracf
+ .extern tfracf
+ .extern snext
+ .extern tnext
+ .extern spancountminus1
+ .extern zi16stepu
+ .extern sdivz16stepu
+ .extern tdivz16stepu
+ .extern zi8stepu
+ .extern sdivz8stepu
+ .extern tdivz8stepu
+ .extern reciprocal_table_16
+ .extern entryvec_table_16
+ .extern ceil_cw
+ .extern single_cw
+ .extern fp_64kx64k
+ .extern pz
+ .extern spr8entryvec_table
+#endif
+
+ .extern C(snd_scaletable)
+ .extern C(paintbuffer)
+ .extern C(snd_linear_count)
+ .extern C(snd_p)
+ .extern C(snd_vol)
+ .extern C(snd_out)
+ .extern C(vright)
+ .extern C(vup)
+ .extern C(vpn)
+ .extern C(BOPS_Error)
--- /dev/null
+++ b/u/r_aclipa.s
@@ -1,0 +1,197 @@
+//
+// r_aliasa.s
+// x86 assembly-language Alias model transform and project code.
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "asm_draw.h"
+#include "d_ifacea.h"
+
+#ifdef id386
+
+ .data
+Ltemp0: .long 0
+Ltemp1: .long 0
+
+ .text
+
+#define pfv0 8+4
+#define pfv1 8+8
+#define out 8+12
+
+.globl C(R_Alias_clip_bottom)
+C(R_Alias_clip_bottom):
+ pushl %esi
+ pushl %edi
+
+ movl pfv0(%esp),%esi
+ movl pfv1(%esp),%edi
+
+ movl C(r_refdef)+rd_aliasvrectbottom,%eax
+
+LDoForwardOrBackward:
+
+ movl fv_v+4(%esi),%edx
+ movl fv_v+4(%edi),%ecx
+
+ cmpl %ecx,%edx
+ jl LDoForward
+
+ movl fv_v+4(%esi),%ecx
+ movl fv_v+4(%edi),%edx
+ movl pfv0(%esp),%edi
+ movl pfv1(%esp),%esi
+
+LDoForward:
+
+ subl %edx,%ecx
+ subl %edx,%eax
+ movl %ecx,Ltemp1
+ movl %eax,Ltemp0
+ fildl Ltemp1
+ fildl Ltemp0
+ movl out(%esp),%edx
+ movl $2,%eax
+
+ fdivp %st(0),%st(1) // scale
+
+LDo3Forward:
+ fildl fv_v+0(%esi) // fv0v0 | scale
+ fildl fv_v+0(%edi) // fv1v0 | fv0v0 | scale
+ fildl fv_v+4(%esi) // fv0v1 | fv1v0 | fv0v0 | scale
+ fildl fv_v+4(%edi) // fv1v1 | fv0v1 | fv1v0 | fv0v0 | scale
+ fildl fv_v+8(%esi) // fv0v2 | fv1v1 | fv0v1 | fv1v0 | fv0v0 | scale
+ fildl fv_v+8(%edi) // fv1v2 | fv0v2 | fv1v1 | fv0v1 | fv1v0 | fv0v0 |
+ // scale
+ fxch %st(5) // fv0v0 | fv0v2 | fv1v1 | fv0v1 | fv1v0 | fv1v2 |
+ // scale
+ fsubr %st(0),%st(4) // fv0v0 | fv0v2 | fv1v1 | fv0v1 | fv1v0-fv0v0 |
+ // fv1v2 | scale
+ fxch %st(3) // fv0v1 | fv0v2 | fv1v1 | fv0v0 | fv1v0-fv0v0 |
+ // fv1v2 | scale
+ fsubr %st(0),%st(2) // fv0v1 | fv0v2 | fv1v1-fv0v1 | fv0v0 |
+ // fv1v0-fv0v0 | fv1v2 | scale
+ fxch %st(1) // fv0v2 | fv0v1 | fv1v1-fv0v1 | fv0v0 |
+ // fv1v0-fv0v0 | fv1v2 | scale
+ fsubr %st(0),%st(5) // fv0v2 | fv0v1 | fv1v1-fv0v1 | fv0v0 |
+ // fv1v0-fv0v0 | fv1v2-fv0v2 | scale
+ fxch %st(6) // scale | fv0v1 | fv1v1-fv0v1 | fv0v0 |
+ // fv1v0-fv0v0 | fv1v2-fv0v2 | fv0v2
+ fmul %st(0),%st(4) // scale | fv0v1 | fv1v1-fv0v1 | fv0v0 |
+ // (fv1v0-fv0v0)*scale | fv1v2-fv0v2 | fv0v2
+ addl $12,%edi
+ fmul %st(0),%st(2) // scale | fv0v1 | (fv1v1-fv0v1)*scale | fv0v0 |
+ // (fv1v0-fv0v0)*scale | fv1v2-fv0v2 | fv0v2
+ addl $12,%esi
+ addl $12,%edx
+ fmul %st(0),%st(5) // scale | fv0v1 | (fv1v1-fv0v1)*scale | fv0v0 |
+ // (fv1v0-fv0v0)*scale | (fv1v2-fv0v2)*scale |
+ // fv0v2
+ fxch %st(3) // fv0v0 | fv0v1 | (fv1v1-fv0v1)*scale | scale |
+ // (fv1v0-fv0v0)*scale | (fv1v2-fv0v2)*scale |
+ // fv0v2
+ faddp %st(0),%st(4) // fv0v1 | (fv1v1-fv0v1)*scale | scale |
+ // fv0v0+(fv1v0-fv0v0)*scale |
+ // (fv1v2-fv0v2)*scale | fv0v2
+ faddp %st(0),%st(1) // fv0v1+(fv1v1-fv0v1)*scale | scale |
+ // fv0v0+(fv1v0-fv0v0)*scale |
+ // (fv1v2-fv0v2)*scale | fv0v2
+ fxch %st(4) // fv0v2 | scale | fv0v0+(fv1v0-fv0v0)*scale |
+ // (fv1v2-fv0v2)*scale | fv0v1+(fv1v1-fv0v1)*scale
+ faddp %st(0),%st(3) // scale | fv0v0+(fv1v0-fv0v0)*scale |
+ // fv0v2+(fv1v2-fv0v2)*scale |
+ // fv0v1+(fv1v1-fv0v1)*scale
+ fxch %st(1) // fv0v0+(fv1v0-fv0v0)*scale | scale |
+ // fv0v2+(fv1v2-fv0v2)*scale |
+ // fv0v1+(fv1v1-fv0v1)*scale
+ fadds float_point5
+ fxch %st(3) // fv0v1+(fv1v1-fv0v1)*scale | scale |
+ // fv0v2+(fv1v2-fv0v2)*scale |
+ // fv0v0+(fv1v0-fv0v0)*scale
+ fadds float_point5
+ fxch %st(2) // fv0v2+(fv1v2-fv0v2)*scale | scale |
+ // fv0v1+(fv1v1-fv0v1)*scale |
+ // fv0v0+(fv1v0-fv0v0)*scale
+ fadds float_point5
+ fxch %st(3) // fv0v0+(fv1v0-fv0v0)*scale | scale |
+ // fv0v1+(fv1v1-fv0v1)*scale |
+ // fv0v2+(fv1v2-fv0v2)*scale
+ fistpl fv_v+0-12(%edx) // scale | fv0v1+(fv1v1-fv0v1)*scale |
+ // fv0v2+(fv1v2-fv0v2)*scale
+ fxch %st(1) // fv0v1+(fv1v1-fv0v1)*scale | scale |
+ // fv0v2+(fv1v2-fv0v2)*scale | scale
+ fistpl fv_v+4-12(%edx) // scale | fv0v2+(fv1v2-fv0v2)*scale
+ fxch %st(1) // fv0v2+(fv1v2-fv0v2)*sc | scale
+ fistpl fv_v+8-12(%edx) // scale
+
+ decl %eax
+ jnz LDo3Forward
+
+ fstp %st(0)
+
+ popl %edi
+ popl %esi
+
+ ret
+
+
+.globl C(R_Alias_clip_top)
+C(R_Alias_clip_top):
+ pushl %esi
+ pushl %edi
+
+ movl pfv0(%esp),%esi
+ movl pfv1(%esp),%edi
+
+ movl C(r_refdef)+rd_aliasvrect+4,%eax
+ jmp LDoForwardOrBackward
+
+
+
+.globl C(R_Alias_clip_right)
+C(R_Alias_clip_right):
+ pushl %esi
+ pushl %edi
+
+ movl pfv0(%esp),%esi
+ movl pfv1(%esp),%edi
+
+ movl C(r_refdef)+rd_aliasvrectright,%eax
+
+LRightLeftEntry:
+
+
+ movl fv_v+4(%esi),%edx
+ movl fv_v+4(%edi),%ecx
+
+ cmpl %ecx,%edx
+ movl fv_v+0(%esi),%edx
+
+ movl fv_v+0(%edi),%ecx
+ jl LDoForward2
+
+ movl fv_v+0(%esi),%ecx
+ movl fv_v+0(%edi),%edx
+ movl pfv0(%esp),%edi
+ movl pfv1(%esp),%esi
+
+LDoForward2:
+
+ jmp LDoForward
+
+
+.globl C(R_Alias_clip_left)
+C(R_Alias_clip_left):
+ pushl %esi
+ pushl %edi
+
+ movl pfv0(%esp),%esi
+ movl pfv1(%esp),%edi
+
+ movl C(r_refdef)+rd_aliasvrect+0,%eax
+ jmp LRightLeftEntry
+
+
+#endif // id386
+
--- /dev/null
+++ b/u/r_aliasa.s
@@ -1,0 +1,218 @@
+//
+// r_aliasa.s
+// x86 assembly-language Alias model transform and project code.
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "asm_draw.h"
+#include "d_ifacea.h"
+
+#ifdef id386
+
+ .data
+
+Lfloat_1: .single 1.0
+Ltemp: .long 0
+Lcoords: .long 0, 0, 0
+
+ .text
+
+#define fv 12+4
+#define pstverts 12+8
+
+.globl C(R_AliasTransformAndProjectFinalVerts)
+C(R_AliasTransformAndProjectFinalVerts):
+ pushl %ebp // preserve caller's stack frame
+ pushl %edi
+ pushl %esi // preserve register variables
+
+// int i, temp;
+// float lightcos, *plightnormal, zi;
+// trivertx_t *pverts;
+
+// pverts = r_apverts;
+ movl C(r_apverts),%esi
+
+// for (i=0 ; i<r_anumverts ; i++, fv++, pverts++, pstverts++)
+// {
+ movl pstverts(%esp),%ebp
+ movl fv(%esp),%edi
+ movl C(r_anumverts),%ecx
+ subl %edx,%edx
+
+Lloop:
+
+// // transform and project
+// zi = 1.0 / (DotProduct(pverts->v, aliastransform[2]) +
+// aliastransform[2][3]);
+ movb (%esi),%dl
+ movb %dl,Lcoords
+ fildl Lcoords // v[0]
+ movb 1(%esi),%dl
+ movb %dl,Lcoords+4
+ fildl Lcoords+4 // v[1] | v[0]
+ movb 2(%esi),%dl
+ movb %dl,Lcoords+8
+ fildl Lcoords+8 // v[2] | v[1] | v[0]
+
+ fld %st(2) // v[0] | v[2] | v[1] | v[0]
+ fmuls C(aliastransform)+32 // accum | v[2] | v[1] | v[0]
+ fld %st(2) // v[1] | accum | v[2] | v[1] | v[0]
+ fmuls C(aliastransform)+36 // accum2 | accum | v[2] | v[1] | v[0]
+ fxch %st(1) // accum | accum2 | v[2] | v[1] | v[0]
+ fadds C(aliastransform)+44 // accum | accum2 | v[2] | v[1] | v[0]
+ fld %st(2) // v[2] | accum | accum2 | v[2] | v[1] | v[0]
+ fmuls C(aliastransform)+40 // accum3 | accum | accum2 | v[2] | v[1] |
+ // v[0]
+ fxch %st(1) // accum | accum3 | accum2 | v[2] | v[1] | v[0]
+ faddp %st(0),%st(2) // accum3 | accum | v[2] | v[1] | v[0]
+ movb tv_lightnormalindex(%esi),%dl
+ movl stv_s(%ebp),%eax
+ movl %eax,fv_v+8(%edi)
+ faddp %st(0),%st(1) // z | v[2] | v[1] | v[0]
+
+ movl stv_t(%ebp),%eax
+ movl %eax,fv_v+12(%edi)
+
+// // lighting
+// plightnormal = r_avertexnormals[pverts->lightnormalindex];
+
+ fdivrs Lfloat_1 // zi | v[2] | v[1] | v[0]
+
+// fv->v[2] = pstverts->s;
+// fv->v[3] = pstverts->t;
+// fv->flags = pstverts->onseam;
+ movl stv_onseam(%ebp),%eax
+ movl %eax,fv_flags(%edi)
+
+ movl fv_size(%edi),%eax
+ movl stv_size(%ebp),%eax
+ movl 4(%esi),%eax
+
+ leal (%edx,%edx,2),%eax // index*3
+
+ fxch %st(3) // v[0] | v[2] | v[1] | zi
+
+// lightcos = DotProduct (plightnormal, r_plightvec);
+ flds C(r_avertexnormals)(,%eax,4)
+ fmuls C(r_plightvec)
+ flds C(r_avertexnormals)+4(,%eax,4)
+ fmuls C(r_plightvec)+4
+ flds C(r_avertexnormals)+8(,%eax,4)
+ fmuls C(r_plightvec)+8
+ fxch %st(1)
+ faddp %st(0),%st(2)
+ fld %st(2) // v[0] | laccum | laccum2 | v[0] | v[2] |
+ // v[1] | zi
+ fmuls C(aliastransform)+0 // xaccum | laccum | laccum2 | v[0] | v[2] |
+ // v[1] | zi
+ fxch %st(2) // laccum2 | laccum | xaccum | v[0] | v[2] |
+ // v[1] | zi
+ faddp %st(0),%st(1) // laccum | xaccum | v[0] | v[2] | v[1] | zi
+
+// temp = r_ambientlight;
+// if (lightcos < 0)
+// {
+ fsts Ltemp
+ movl C(r_ambientlight),%eax
+ movb Ltemp+3,%dl
+ testb $0x80,%dl
+ jz Lsavelight // no need to clamp if only ambient lit, because
+ // r_ambientlight is preclamped
+
+// temp += (int)(r_shadelight * lightcos);
+ fmuls C(r_shadelight)
+// FIXME: fast float->int conversion?
+ fistpl Ltemp
+ addl Ltemp,%eax
+
+// // clamp; because we limited the minimum ambient and shading light, we
+// // don't have to clamp low light, just bright
+// if (temp < 0)
+// temp = 0;
+ jns Lp1
+ subl %eax,%eax
+
+// }
+
+Lp1:
+
+// fv->v[4] = temp;
+//
+// // x, y, and z are scaled down by 1/2**31 in the transform, so 1/z is
+// // scaled up by 1/2**31, and the scaling cancels out for x and y in the
+// // projection
+// fv->v[0] = ((DotProduct(pverts->v, aliastransform[0]) +
+// aliastransform[0][3]) * zi) + aliasxcenter;
+// fv->v[1] = ((DotProduct(pverts->v, aliastransform[1]) +
+// aliastransform[1][3]) * zi) + aliasycenter;
+// fv->v[5] = zi;
+ fxch %st(1) // v[0] | xaccum | v[2] | v[1] | zi
+ fmuls C(aliastransform)+16 // yaccum | xaccum | v[2] | v[1] | zi
+ fxch %st(3) // v[1] | xaccum | v[2] | yaccum | zi
+ fld %st(0) // v[1] | v[1] | xaccum | v[2] | yaccum | zi
+ fmuls C(aliastransform)+4 // xaccum2 | v[1] | xaccum | v[2] | yaccum |zi
+ fxch %st(1) // v[1] | xaccum2 | xaccum | v[2] | yaccum |zi
+ movl %eax,fv_v+16(%edi)
+ fmuls C(aliastransform)+20 // yaccum2 | xaccum2 | xaccum | v[2] | yaccum|
+ // zi
+ fxch %st(2) // xaccum | xaccum2 | yaccum2 | v[2] | yaccum|
+ // zi
+ fadds C(aliastransform)+12 // xaccum | xaccum2 | yaccum2 | v[2] | yaccum|
+ // zi
+ fxch %st(4) // yaccum | xaccum2 | yaccum2 | v[2] | xaccum|
+ // zi
+ fadds C(aliastransform)+28 // yaccum | xaccum2 | yaccum2 | v[2] | xaccum|
+ // zi
+ fxch %st(3) // v[2] | xaccum2 | yaccum2 | yaccum | xaccum|
+ // zi
+ fld %st(0) // v[2] | v[2] | xaccum2 | yaccum2 | yaccum |
+ // xaccum | zi
+ fmuls C(aliastransform)+8 // xaccum3 | v[2] | xaccum2 | yaccum2 |yaccum|
+ // xaccum | zi
+ fxch %st(1) // v[2] | xaccum3 | xaccum2 | yaccum2 |yaccum|
+ // xaccum | zi
+ fmuls C(aliastransform)+24 // yaccum3 | xaccum3 | xaccum2 | yaccum2 |
+ // yaccum | xaccum | zi
+ fxch %st(5) // xaccum | xaccum3 | xaccum2 | yaccum2 |
+ // yaccum | yaccum3 | zi
+ faddp %st(0),%st(2) // xaccum3 | xaccum | yaccum2 | yaccum |
+ // yaccum3 | zi
+ fxch %st(3) // yaccum | xaccum | yaccum2 | xaccum3 |
+ // yaccum3 | zi
+ faddp %st(0),%st(2) // xaccum | yaccum | xaccum3 | yaccum3 | zi
+ addl $(tv_size),%esi
+ faddp %st(0),%st(2) // yaccum | x | yaccum3 | zi
+ faddp %st(0),%st(2) // x | y | zi
+ addl $(stv_size),%ebp
+ fmul %st(2),%st(0) // x/z | y | zi
+ fxch %st(1) // y | x/z | zi
+ fmul %st(2),%st(0) // y/z | x/z | zi
+ fxch %st(1) // x/z | y/z | zi
+ fadds C(aliasxcenter) // u | y/z | zi
+ fxch %st(1) // y/z | u | zi
+ fadds C(aliasycenter) // v | u | zi
+ fxch %st(2) // zi | u | v
+// FIXME: fast float->int conversion?
+ fistpl fv_v+20(%edi) // u | v
+ fistpl fv_v+0(%edi) // v
+ fistpl fv_v+4(%edi)
+
+// }
+
+ addl $(fv_size),%edi
+ decl %ecx
+ jnz Lloop
+
+ popl %esi // restore register variables
+ popl %edi
+ popl %ebp // restore the caller's stack frame
+ ret
+
+Lsavelight:
+ fstp %st(0)
+ jmp Lp1
+
+#endif // id386
+
--- /dev/null
+++ b/u/r_drawa.s
@@ -1,0 +1,819 @@
+//
+// r_drawa.s
+// x86 assembly-language edge clipping and emission code
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "asm_draw.h"
+#include "d_ifacea.h"
+
+#ifdef id386
+
+// !!! if these are changed, they must be changed in r_draw.c too !!!
+#define FULLY_CLIPPED_CACHED 0x80000000
+#define FRAMECOUNT_MASK 0x7FFFFFFF
+
+ .data
+
+Ld0: .single 0.0
+Ld1: .single 0.0
+Lstack: .long 0
+Lfp_near_clip: .single NEAR_CLIP
+Lceilv0: .long 0
+Lv: .long 0
+Lu0: .long 0
+Lv0: .long 0
+Lzi0: .long 0
+
+ .text
+
+//----------------------------------------------------------------------
+// edge clipping code
+//----------------------------------------------------------------------
+
+#define pv0 4+12
+#define pv1 8+12
+#define clip 12+12
+
+ .align 4
+.globl C(R_ClipEdge)
+C(R_ClipEdge):
+ pushl %esi // preserve register variables
+ pushl %edi
+ pushl %ebx
+ movl %esp,Lstack // for clearing the stack later
+
+// float d0, d1, f;
+// mvertex_t clipvert;
+
+ movl clip(%esp),%ebx
+ movl pv0(%esp),%esi
+ movl pv1(%esp),%edx
+
+// if (clip)
+// {
+ testl %ebx,%ebx
+ jz Lemit
+
+// do
+// {
+
+Lcliploop:
+
+// d0 = DotProduct (pv0->position, clip->normal) - clip->dist;
+// d1 = DotProduct (pv1->position, clip->normal) - clip->dist;
+ flds mv_position+0(%esi)
+ fmuls cp_normal+0(%ebx)
+ flds mv_position+4(%esi)
+ fmuls cp_normal+4(%ebx)
+ flds mv_position+8(%esi)
+ fmuls cp_normal+8(%ebx)
+ fxch %st(1)
+ faddp %st(0),%st(2) // d0mul2 | d0add0
+
+ flds mv_position+0(%edx)
+ fmuls cp_normal+0(%ebx)
+ flds mv_position+4(%edx)
+ fmuls cp_normal+4(%ebx)
+ flds mv_position+8(%edx)
+ fmuls cp_normal+8(%ebx)
+ fxch %st(1)
+ faddp %st(0),%st(2) // d1mul2 | d1add0 | d0mul2 | d0add0
+ fxch %st(3) // d0add0 | d1add0 | d0mul2 | d1mul2
+
+ faddp %st(0),%st(2) // d1add0 | dot0 | d1mul2
+ faddp %st(0),%st(2) // dot0 | dot1
+
+ fsubs cp_dist(%ebx) // d0 | dot1
+ fxch %st(1) // dot1 | d0
+ fsubs cp_dist(%ebx) // d1 | d0
+ fxch %st(1)
+ fstps Ld0
+ fstps Ld1
+
+// if (d0 >= 0)
+// {
+ movl Ld0,%eax
+ movl Ld1,%ecx
+ orl %eax,%ecx
+ js Lp2
+
+// both points are unclipped
+
+Lcontinue:
+
+//
+// R_ClipEdge (&clipvert, pv1, clip->next);
+// return;
+// }
+// } while ((clip = clip->next) != NULL);
+ movl cp_next(%ebx),%ebx
+ testl %ebx,%ebx
+ jnz Lcliploop
+
+// }
+
+//// add the edge
+// R_EmitEdge (pv0, pv1);
+Lemit:
+
+//
+// set integer rounding to ceil mode, set to single precision
+//
+// FIXME: do away with by manually extracting integers from floats?
+// FIXME: set less often
+ fldcw ceil_cw
+
+// edge_t *edge, *pcheck;
+// int u_check;
+// float u, u_step;
+// vec3_t local, transformed;
+// float *world;
+// int v, v2, ceilv0;
+// float scale, lzi0, u0, v0;
+// int side;
+
+// if (r_lastvertvalid)
+// {
+ cmpl $0,C(r_lastvertvalid)
+ jz LCalcFirst
+
+// u0 = r_u1;
+// v0 = r_v1;
+// lzi0 = r_lzi1;
+// ceilv0 = r_ceilv1;
+ movl C(r_lzi1),%eax
+ movl C(r_u1),%ecx
+ movl %eax,Lzi0
+ movl %ecx,Lu0
+ movl C(r_v1),%ecx
+ movl C(r_ceilv1),%eax
+ movl %ecx,Lv0
+ movl %eax,Lceilv0
+ jmp LCalcSecond
+
+// }
+
+LCalcFirst:
+
+// else
+// {
+// world = &pv0->position[0];
+
+ call LTransformAndProject // v0 | lzi0 | u0
+
+ fsts Lv0
+ fxch %st(2) // u0 | lzi0 | v0
+ fstps Lu0 // lzi0 | v0
+ fstps Lzi0 // v0
+
+// ceilv0 = (int)(v0 - 2000) + 2000; // ceil(v0);
+ fistpl Lceilv0
+
+// }
+
+LCalcSecond:
+
+// world = &pv1->position[0];
+ movl %edx,%esi
+
+ call LTransformAndProject // v1 | lzi1 | u1
+
+ flds Lu0 // u0 | v1 | lzi1 | u1
+ fxch %st(3) // u1 | v1 | lzi1 | u0
+ flds Lzi0 // lzi0 | u1 | v1 | lzi1 | u0
+ fxch %st(3) // lzi1 | u1 | v1 | lzi0 | u0
+ flds Lv0 // v0 | lzi1 | u1 | v1 | lzi0 | u0
+ fxch %st(3) // v1 | lzi1 | u1 | v0 | lzi0 | u0
+
+// r_ceilv1 = (int)(r_v1 - 2000) + 2000; // ceil(r_v1);
+ fistl C(r_ceilv1)
+
+ fldcw single_cw // put back normal floating-point state
+
+ fsts C(r_v1)
+ fxch %st(4) // lzi0 | lzi1 | u1 | v0 | v1 | u0
+
+// if (r_lzi1 > lzi0)
+// lzi0 = r_lzi1;
+ fcom %st(1)
+ fnstsw %ax
+ testb $1,%ah
+ jz LP0
+ fstp %st(0)
+ fld %st(0)
+LP0:
+
+ fxch %st(1) // lzi1 | lzi0 | u1 | v0 | v1 | u0
+ fstps C(r_lzi1) // lzi0 | u1 | v0 | v1 | u0
+ fxch %st(1)
+ fsts C(r_u1)
+ fxch %st(1)
+
+// if (lzi0 > r_nearzi) // for mipmap finding
+// r_nearzi = lzi0;
+ fcoms C(r_nearzi)
+ fnstsw %ax
+ testb $0x45,%ah
+ jnz LP1
+ fsts C(r_nearzi)
+LP1:
+
+// // for right edges, all we want is the effect on 1/z
+// if (r_nearzionly)
+// return;
+ movl C(r_nearzionly),%eax
+ testl %eax,%eax
+ jz LP2
+LPop5AndDone:
+ movl C(cacheoffset),%eax
+ movl C(r_framecount),%edx
+ cmpl $0x7FFFFFFF,%eax
+ jz LDoPop
+ andl $(FRAMECOUNT_MASK),%edx
+ orl $(FULLY_CLIPPED_CACHED),%edx
+ movl %edx,C(cacheoffset)
+
+LDoPop:
+ fstp %st(0) // u1 | v0 | v1 | u0
+ fstp %st(0) // v0 | v1 | u0
+ fstp %st(0) // v1 | u0
+ fstp %st(0) // u0
+ fstp %st(0)
+ jmp Ldone
+
+LP2:
+
+// // create the edge
+// if (ceilv0 == r_ceilv1)
+// return; // horizontal edge
+ movl Lceilv0,%ebx
+ movl C(edge_p),%edi
+ movl C(r_ceilv1),%ecx
+ movl %edi,%edx
+ movl C(r_pedge),%esi
+ addl $(et_size),%edx
+ cmpl %ecx,%ebx
+ jz LPop5AndDone
+
+ movl C(r_pedge),%eax
+ movl %eax,et_owner(%edi)
+
+// side = ceilv0 > r_ceilv1;
+//
+// edge->nearzi = lzi0;
+ fstps et_nearzi(%edi) // u1 | v0 | v1 | u0
+
+// if (side == 1)
+// {
+ jc LSide0
+
+LSide1:
+
+// // leading edge (go from p2 to p1)
+
+// u_step = ((u0 - r_u1) / (v0 - r_v1));
+ fsubrp %st(0),%st(3) // v0 | v1 | u0-u1
+ fsub %st(1),%st(0) // v0-v1 | v1 | u0-u1
+ fdivrp %st(0),%st(2) // v1 | ustep
+
+// r_emitted = 1;
+ movl $1,C(r_emitted)
+
+// edge = edge_p++;
+ movl %edx,C(edge_p)
+
+// pretouch next edge
+ movl (%edx),%eax
+
+// v2 = ceilv0 - 1;
+// v = r_ceilv1;
+ movl %ecx,%eax
+ leal -1(%ebx),%ecx
+ movl %eax,%ebx
+
+// edge->surfs[0] = 0;
+// edge->surfs[1] = surface_p - surfaces;
+ movl C(surface_p),%eax
+ movl C(surfaces),%esi
+ subl %edx,%edx
+ subl %esi,%eax
+ shrl $(SURF_T_SHIFT),%eax
+ movl %edx,et_surfs(%edi)
+ movl %eax,et_surfs+2(%edi)
+
+ subl %esi,%esi
+
+// u = r_u1 + ((float)v - r_v1) * u_step;
+ movl %ebx,Lv
+ fildl Lv // v | v1 | ustep
+ fsubp %st(0),%st(1) // v-v1 | ustep
+ fmul %st(1),%st(0) // (v-v1)*ustep | ustep
+ fadds C(r_u1) // u | ustep
+
+ jmp LSideDone
+
+// }
+
+LSide0:
+
+// else
+// {
+// // trailing edge (go from p1 to p2)
+
+// u_step = ((r_u1 - u0) / (r_v1 - v0));
+ fsub %st(3),%st(0) // u1-u0 | v0 | v1 | u0
+ fxch %st(2) // v1 | v0 | u1-u0 | u0
+ fsub %st(1),%st(0) // v1-v0 | v0 | u1-u0 | u0
+ fdivrp %st(0),%st(2) // v0 | ustep | u0
+
+// r_emitted = 1;
+ movl $1,C(r_emitted)
+
+// edge = edge_p++;
+ movl %edx,C(edge_p)
+
+// pretouch next edge
+ movl (%edx),%eax
+
+// v = ceilv0;
+// v2 = r_ceilv1 - 1;
+ decl %ecx
+
+// edge->surfs[0] = surface_p - surfaces;
+// edge->surfs[1] = 0;
+ movl C(surface_p),%eax
+ movl C(surfaces),%esi
+ subl %edx,%edx
+ subl %esi,%eax
+ shrl $(SURF_T_SHIFT),%eax
+ movl %edx,et_surfs+2(%edi)
+ movl %eax,et_surfs(%edi)
+
+ movl $1,%esi
+
+// u = u0 + ((float)v - v0) * u_step;
+ movl %ebx,Lv
+ fildl Lv // v | v0 | ustep | u0
+ fsubp %st(0),%st(1) // v-v0 | ustep | u0
+ fmul %st(1),%st(0) // (v-v0)*ustep | ustep | u0
+ faddp %st(0),%st(2) // ustep | u
+ fxch %st(1) // u | ustep
+
+// }
+
+LSideDone:
+
+// edge->u_step = u_step*0x100000;
+// edge->u = u*0x100000 + 0xFFFFF;
+
+ fmuls fp_1m // u*0x100000 | ustep
+ fxch %st(1) // ustep | u*0x100000
+ fmuls fp_1m // ustep*0x100000 | u*0x100000
+ fxch %st(1) // u*0x100000 | ustep*0x100000
+ fadds fp_1m_minus_1 // u*0x100000 + 0xFFFFF | ustep*0x100000
+ fxch %st(1) // ustep*0x100000 | u*0x100000 + 0xFFFFF
+ fistpl et_u_step(%edi) // u*0x100000 + 0xFFFFF
+ fistpl et_u(%edi)
+
+// // we need to do this to avoid stepping off the edges if a very nearly
+// // horizontal edge is less than epsilon above a scan, and numeric error
+// // causes it to incorrectly extend to the scan, and the extension of the
+// // line goes off the edge of the screen
+// // FIXME: is this actually needed?
+// if (edge->u < r_refdef.vrect_x_adj_shift20)
+// edge->u = r_refdef.vrect_x_adj_shift20;
+// if (edge->u > r_refdef.vrectright_adj_shift20)
+// edge->u = r_refdef.vrectright_adj_shift20;
+ movl et_u(%edi),%eax
+ movl C(r_refdef)+rd_vrect_x_adj_shift20,%edx
+ cmpl %edx,%eax
+ jl LP4
+ movl C(r_refdef)+rd_vrectright_adj_shift20,%edx
+ cmpl %edx,%eax
+ jng LP5
+LP4:
+ movl %edx,et_u(%edi)
+ movl %edx,%eax
+LP5:
+
+// // sort the edge in normally
+// u_check = edge->u;
+//
+// if (edge->surfs[0])
+// u_check++; // sort trailers after leaders
+ addl %esi,%eax
+
+// if (!newedges[v] || newedges[v]->u >= u_check)
+// {
+ movl C(newedges)(,%ebx,4),%esi
+ testl %esi,%esi
+ jz LDoFirst
+ cmpl %eax,et_u(%esi)
+ jl LNotFirst
+LDoFirst:
+
+// edge->next = newedges[v];
+// newedges[v] = edge;
+ movl %esi,et_next(%edi)
+ movl %edi,C(newedges)(,%ebx,4)
+
+ jmp LSetRemove
+
+// }
+
+LNotFirst:
+
+// else
+// {
+// pcheck = newedges[v];
+//
+// while (pcheck->next && pcheck->next->u < u_check)
+// pcheck = pcheck->next;
+LFindInsertLoop:
+ movl %esi,%edx
+ movl et_next(%esi),%esi
+ testl %esi,%esi
+ jz LInsertFound
+ cmpl %eax,et_u(%esi)
+ jl LFindInsertLoop
+
+LInsertFound:
+
+// edge->next = pcheck->next;
+// pcheck->next = edge;
+ movl %esi,et_next(%edi)
+ movl %edi,et_next(%edx)
+
+// }
+
+LSetRemove:
+
+// edge->nextremove = removeedges[v2];
+// removeedges[v2] = edge;
+ movl C(removeedges)(,%ecx,4),%eax
+ movl %edi,C(removeedges)(,%ecx,4)
+ movl %eax,et_nextremove(%edi)
+
+Ldone:
+ movl Lstack,%esp // clear temporary variables from stack
+
+ popl %ebx // restore register variables
+ popl %edi
+ popl %esi
+ ret
+
+// at least one point is clipped
+
+Lp2:
+ testl %eax,%eax
+ jns Lp1
+
+// else
+// {
+// // point 0 is clipped
+
+// if (d1 < 0)
+// {
+ movl Ld1,%eax
+ testl %eax,%eax
+ jns Lp3
+
+// // both points are clipped
+// // we do cache fully clipped edges
+// if (!leftclipped)
+ movl C(r_leftclipped),%eax
+ movl C(r_pedge),%ecx
+ testl %eax,%eax
+ jnz Ldone
+
+// r_pedge->framecount = r_framecount;
+ movl C(r_framecount),%eax
+ andl $(FRAMECOUNT_MASK),%eax
+ orl $(FULLY_CLIPPED_CACHED),%eax
+ movl %eax,C(cacheoffset)
+
+// return;
+ jmp Ldone
+
+// }
+
+Lp1:
+
+// // point 0 is unclipped
+// if (d1 >= 0)
+// {
+// // both points are unclipped
+// continue;
+
+// // only point 1 is clipped
+
+// f = d0 / (d0 - d1);
+ flds Ld0
+ flds Ld1
+ fsubr %st(1),%st(0)
+
+// // we don't cache partially clipped edges
+ movl $0x7FFFFFFF,C(cacheoffset)
+
+ fdivrp %st(0),%st(1)
+
+ subl $(mv_size),%esp // allocate space for clipvert
+
+// clipvert.position[0] = pv0->position[0] +
+// f * (pv1->position[0] - pv0->position[0]);
+// clipvert.position[1] = pv0->position[1] +
+// f * (pv1->position[1] - pv0->position[1]);
+// clipvert.position[2] = pv0->position[2] +
+// f * (pv1->position[2] - pv0->position[2]);
+ flds mv_position+8(%edx)
+ fsubs mv_position+8(%esi)
+ flds mv_position+4(%edx)
+ fsubs mv_position+4(%esi)
+ flds mv_position+0(%edx)
+ fsubs mv_position+0(%esi) // 0 | 1 | 2
+
+// replace pv1 with the clip point
+ movl %esp,%edx
+ movl cp_leftedge(%ebx),%eax
+ testb %al,%al
+
+ fmul %st(3),%st(0)
+ fxch %st(1) // 1 | 0 | 2
+ fmul %st(3),%st(0)
+ fxch %st(2) // 2 | 0 | 1
+ fmulp %st(0),%st(3) // 0 | 1 | 2
+ fadds mv_position+0(%esi)
+ fxch %st(1) // 1 | 0 | 2
+ fadds mv_position+4(%esi)
+ fxch %st(2) // 2 | 0 | 1
+ fadds mv_position+8(%esi)
+ fxch %st(1) // 0 | 2 | 1
+ fstps mv_position+0(%esp) // 2 | 1
+ fstps mv_position+8(%esp) // 1
+ fstps mv_position+4(%esp)
+
+// if (clip->leftedge)
+// {
+ jz Ltestright
+
+// r_leftclipped = true;
+// r_leftexit = clipvert;
+ movl $1,C(r_leftclipped)
+ movl mv_position+0(%esp),%eax
+ movl %eax,C(r_leftexit)+mv_position+0
+ movl mv_position+4(%esp),%eax
+ movl %eax,C(r_leftexit)+mv_position+4
+ movl mv_position+8(%esp),%eax
+ movl %eax,C(r_leftexit)+mv_position+8
+
+ jmp Lcontinue
+
+// }
+
+Ltestright:
+// else if (clip->rightedge)
+// {
+ testb %ah,%ah
+ jz Lcontinue
+
+// r_rightclipped = true;
+// r_rightexit = clipvert;
+ movl $1,C(r_rightclipped)
+ movl mv_position+0(%esp),%eax
+ movl %eax,C(r_rightexit)+mv_position+0
+ movl mv_position+4(%esp),%eax
+ movl %eax,C(r_rightexit)+mv_position+4
+ movl mv_position+8(%esp),%eax
+ movl %eax,C(r_rightexit)+mv_position+8
+
+// }
+//
+// R_ClipEdge (pv0, &clipvert, clip->next);
+// return;
+// }
+ jmp Lcontinue
+
+// }
+
+Lp3:
+
+// // only point 0 is clipped
+// r_lastvertvalid = false;
+
+ movl $0,C(r_lastvertvalid)
+
+// f = d0 / (d0 - d1);
+ flds Ld0
+ flds Ld1
+ fsubr %st(1),%st(0)
+
+// // we don't cache partially clipped edges
+ movl $0x7FFFFFFF,C(cacheoffset)
+
+ fdivrp %st(0),%st(1)
+
+ subl $(mv_size),%esp // allocate space for clipvert
+
+// clipvert.position[0] = pv0->position[0] +
+// f * (pv1->position[0] - pv0->position[0]);
+// clipvert.position[1] = pv0->position[1] +
+// f * (pv1->position[1] - pv0->position[1]);
+// clipvert.position[2] = pv0->position[2] +
+// f * (pv1->position[2] - pv0->position[2]);
+ flds mv_position+8(%edx)
+ fsubs mv_position+8(%esi)
+ flds mv_position+4(%edx)
+ fsubs mv_position+4(%esi)
+ flds mv_position+0(%edx)
+ fsubs mv_position+0(%esi) // 0 | 1 | 2
+
+ movl cp_leftedge(%ebx),%eax
+ testb %al,%al
+
+ fmul %st(3),%st(0)
+ fxch %st(1) // 1 | 0 | 2
+ fmul %st(3),%st(0)
+ fxch %st(2) // 2 | 0 | 1
+ fmulp %st(0),%st(3) // 0 | 1 | 2
+ fadds mv_position+0(%esi)
+ fxch %st(1) // 1 | 0 | 2
+ fadds mv_position+4(%esi)
+ fxch %st(2) // 2 | 0 | 1
+ fadds mv_position+8(%esi)
+ fxch %st(1) // 0 | 2 | 1
+ fstps mv_position+0(%esp) // 2 | 1
+ fstps mv_position+8(%esp) // 1
+ fstps mv_position+4(%esp)
+
+// replace pv0 with the clip point
+ movl %esp,%esi
+
+// if (clip->leftedge)
+// {
+ jz Ltestright2
+
+// r_leftclipped = true;
+// r_leftenter = clipvert;
+ movl $1,C(r_leftclipped)
+ movl mv_position+0(%esp),%eax
+ movl %eax,C(r_leftenter)+mv_position+0
+ movl mv_position+4(%esp),%eax
+ movl %eax,C(r_leftenter)+mv_position+4
+ movl mv_position+8(%esp),%eax
+ movl %eax,C(r_leftenter)+mv_position+8
+
+ jmp Lcontinue
+
+// }
+
+Ltestright2:
+// else if (clip->rightedge)
+// {
+ testb %ah,%ah
+ jz Lcontinue
+
+// r_rightclipped = true;
+// r_rightenter = clipvert;
+ movl $1,C(r_rightclipped)
+ movl mv_position+0(%esp),%eax
+ movl %eax,C(r_rightenter)+mv_position+0
+ movl mv_position+4(%esp),%eax
+ movl %eax,C(r_rightenter)+mv_position+4
+ movl mv_position+8(%esp),%eax
+ movl %eax,C(r_rightenter)+mv_position+8
+
+// }
+ jmp Lcontinue
+
+// %esi = vec3_t point to transform and project
+// %edx preserved
+LTransformAndProject:
+
+// // transform and project
+// VectorSubtract (world, modelorg, local);
+ flds mv_position+0(%esi)
+ fsubs C(modelorg)+0
+ flds mv_position+4(%esi)
+ fsubs C(modelorg)+4
+ flds mv_position+8(%esi)
+ fsubs C(modelorg)+8
+ fxch %st(2) // local[0] | local[1] | local[2]
+
+// TransformVector (local, transformed);
+//
+// if (transformed[2] < NEAR_CLIP)
+// transformed[2] = NEAR_CLIP;
+//
+// lzi0 = 1.0 / transformed[2];
+ fld %st(0) // local[0] | local[0] | local[1] | local[2]
+ fmuls C(vpn)+0 // zm0 | local[0] | local[1] | local[2]
+ fld %st(1) // local[0] | zm0 | local[0] | local[1] |
+ // local[2]
+ fmuls C(vright)+0 // xm0 | zm0 | local[0] | local[1] | local[2]
+ fxch %st(2) // local[0] | zm0 | xm0 | local[1] | local[2]
+ fmuls C(vup)+0 // ym0 | zm0 | xm0 | local[1] | local[2]
+ fld %st(3) // local[1] | ym0 | zm0 | xm0 | local[1] |
+ // local[2]
+ fmuls C(vpn)+4 // zm1 | ym0 | zm0 | xm0 | local[1] |
+ // local[2]
+ fld %st(4) // local[1] | zm1 | ym0 | zm0 | xm0 |
+ // local[1] | local[2]
+ fmuls C(vright)+4 // xm1 | zm1 | ym0 | zm0 | xm0 |
+ // local[1] | local[2]
+ fxch %st(5) // local[1] | zm1 | ym0 | zm0 | xm0 |
+ // xm1 | local[2]
+ fmuls C(vup)+4 // ym1 | zm1 | ym0 | zm0 | xm0 |
+ // xm1 | local[2]
+ fxch %st(1) // zm1 | ym1 | ym0 | zm0 | xm0 |
+ // xm1 | local[2]
+ faddp %st(0),%st(3) // ym1 | ym0 | zm2 | xm0 | xm1 | local[2]
+ fxch %st(3) // xm0 | ym0 | zm2 | ym1 | xm1 | local[2]
+ faddp %st(0),%st(4) // ym0 | zm2 | ym1 | xm2 | local[2]
+ faddp %st(0),%st(2) // zm2 | ym2 | xm2 | local[2]
+ fld %st(3) // local[2] | zm2 | ym2 | xm2 | local[2]
+ fmuls C(vpn)+8 // zm3 | zm2 | ym2 | xm2 | local[2]
+ fld %st(4) // local[2] | zm3 | zm2 | ym2 | xm2 | local[2]
+ fmuls C(vright)+8 // xm3 | zm3 | zm2 | ym2 | xm2 | local[2]
+ fxch %st(5) // local[2] | zm3 | zm2 | ym2 | xm2 | xm3
+ fmuls C(vup)+8 // ym3 | zm3 | zm2 | ym2 | xm2 | xm3
+ fxch %st(1) // zm3 | ym3 | zm2 | ym2 | xm2 | xm3
+ faddp %st(0),%st(2) // ym3 | zm4 | ym2 | xm2 | xm3
+ fxch %st(4) // xm3 | zm4 | ym2 | xm2 | ym3
+ faddp %st(0),%st(3) // zm4 | ym2 | xm4 | ym3
+ fxch %st(1) // ym2 | zm4 | xm4 | ym3
+ faddp %st(0),%st(3) // zm4 | xm4 | ym4
+
+ fcoms Lfp_near_clip
+ fnstsw %ax
+ testb $1,%ah
+ jz LNoClip
+ fstp %st(0)
+ flds Lfp_near_clip
+
+LNoClip:
+
+ fdivrs float_1 // lzi0 | x | y
+ fxch %st(1) // x | lzi0 | y
+
+// // FIXME: build x/yscale into transform?
+// scale = xscale * lzi0;
+// u0 = (xcenter + scale*transformed[0]);
+ flds C(xscale) // xscale | x | lzi0 | y
+ fmul %st(2),%st(0) // scale | x | lzi0 | y
+ fmulp %st(0),%st(1) // scale*x | lzi0 | y
+ fadds C(xcenter) // u0 | lzi0 | y
+
+// if (u0 < r_refdef.fvrectx_adj)
+// u0 = r_refdef.fvrectx_adj;
+// if (u0 > r_refdef.fvrectright_adj)
+// u0 = r_refdef.fvrectright_adj;
+// FIXME: use integer compares of floats?
+ fcoms C(r_refdef)+rd_fvrectx_adj
+ fnstsw %ax
+ testb $1,%ah
+ jz LClampP0
+ fstp %st(0)
+ flds C(r_refdef)+rd_fvrectx_adj
+LClampP0:
+ fcoms C(r_refdef)+rd_fvrectright_adj
+ fnstsw %ax
+ testb $0x45,%ah
+ jnz LClampP1
+ fstp %st(0)
+ flds C(r_refdef)+rd_fvrectright_adj
+LClampP1:
+
+ fld %st(1) // lzi0 | u0 | lzi0 | y
+
+// scale = yscale * lzi0;
+// v0 = (ycenter - scale*transformed[1]);
+ fmuls C(yscale) // scale | u0 | lzi0 | y
+ fmulp %st(0),%st(3) // u0 | lzi0 | scale*y
+ fxch %st(2) // scale*y | lzi0 | u0
+ fsubrs C(ycenter) // v0 | lzi0 | u0
+
+// if (v0 < r_refdef.fvrecty_adj)
+// v0 = r_refdef.fvrecty_adj;
+// if (v0 > r_refdef.fvrectbottom_adj)
+// v0 = r_refdef.fvrectbottom_adj;
+// FIXME: use integer compares of floats?
+ fcoms C(r_refdef)+rd_fvrecty_adj
+ fnstsw %ax
+ testb $1,%ah
+ jz LClampP2
+ fstp %st(0)
+ flds C(r_refdef)+rd_fvrecty_adj
+LClampP2:
+ fcoms C(r_refdef)+rd_fvrectbottom_adj
+ fnstsw %ax
+ testb $0x45,%ah
+ jnz LClampP3
+ fstp %st(0)
+ flds C(r_refdef)+rd_fvrectbottom_adj
+LClampP3:
+ ret
+
+#endif // id386
+
--- /dev/null
+++ b/u/r_edgea.s
@@ -1,0 +1,731 @@
+//
+// r_edgea.s
+// x86 assembly-language edge-processing code.
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "asm_draw.h"
+
+#ifdef id386
+
+ .data
+Ltemp: .long 0
+float_1_div_0100000h: .long 0x35800000 // 1.0/(float)0x100000
+float_point_999: .single 0.999
+float_1_point_001: .single 1.001
+
+ .text
+
+//--------------------------------------------------------------------
+
+#define edgestoadd 4+8 // note odd stack offsets because of interleaving
+#define edgelist 8+12 // with pushes
+
+.globl C(R_EdgeCodeStart)
+C(R_EdgeCodeStart):
+
+.globl C(R_InsertNewEdges)
+C(R_InsertNewEdges):
+ pushl %edi
+ pushl %esi // preserve register variables
+ movl edgestoadd(%esp),%edx
+ pushl %ebx
+ movl edgelist(%esp),%ecx
+
+LDoNextEdge:
+ movl et_u(%edx),%eax
+ movl %edx,%edi
+
+LContinueSearch:
+ movl et_u(%ecx),%ebx
+ movl et_next(%ecx),%esi
+ cmpl %ebx,%eax
+ jle LAddedge
+ movl et_u(%esi),%ebx
+ movl et_next(%esi),%ecx
+ cmpl %ebx,%eax
+ jle LAddedge2
+ movl et_u(%ecx),%ebx
+ movl et_next(%ecx),%esi
+ cmpl %ebx,%eax
+ jle LAddedge
+ movl et_u(%esi),%ebx
+ movl et_next(%esi),%ecx
+ cmpl %ebx,%eax
+ jg LContinueSearch
+
+LAddedge2:
+ movl et_next(%edx),%edx
+ movl et_prev(%esi),%ebx
+ movl %esi,et_next(%edi)
+ movl %ebx,et_prev(%edi)
+ movl %edi,et_next(%ebx)
+ movl %edi,et_prev(%esi)
+ movl %esi,%ecx
+
+ cmpl $0,%edx
+ jnz LDoNextEdge
+ jmp LDone
+
+ .align 4
+LAddedge:
+ movl et_next(%edx),%edx
+ movl et_prev(%ecx),%ebx
+ movl %ecx,et_next(%edi)
+ movl %ebx,et_prev(%edi)
+ movl %edi,et_next(%ebx)
+ movl %edi,et_prev(%ecx)
+
+ cmpl $0,%edx
+ jnz LDoNextEdge
+
+LDone:
+ popl %ebx // restore register variables
+ popl %esi
+ popl %edi
+
+ ret
+
+//--------------------------------------------------------------------
+
+#define predge 4+4
+
+.globl C(R_RemoveEdges)
+C(R_RemoveEdges):
+ pushl %ebx
+ movl predge(%esp),%eax
+
+Lre_loop:
+ movl et_next(%eax),%ecx
+ movl et_nextremove(%eax),%ebx
+ movl et_prev(%eax),%edx
+ testl %ebx,%ebx
+ movl %edx,et_prev(%ecx)
+ jz Lre_done
+ movl %ecx,et_next(%edx)
+
+ movl et_next(%ebx),%ecx
+ movl et_prev(%ebx),%edx
+ movl et_nextremove(%ebx),%eax
+ movl %edx,et_prev(%ecx)
+ testl %eax,%eax
+ movl %ecx,et_next(%edx)
+ jnz Lre_loop
+
+ popl %ebx
+ ret
+
+Lre_done:
+ movl %ecx,et_next(%edx)
+ popl %ebx
+
+ ret
+
+//--------------------------------------------------------------------
+
+#define pedgelist 4+4 // note odd stack offset because of interleaving
+ // with pushes
+
+.globl C(R_StepActiveU)
+C(R_StepActiveU):
+ pushl %edi
+ movl pedgelist(%esp),%edx
+ pushl %esi // preserve register variables
+ pushl %ebx
+
+ movl et_prev(%edx),%esi
+
+LNewEdge:
+ movl et_u(%esi),%edi
+
+LNextEdge:
+ movl et_u(%edx),%eax
+ movl et_u_step(%edx),%ebx
+ addl %ebx,%eax
+ movl et_next(%edx),%esi
+ movl %eax,et_u(%edx)
+ cmpl %edi,%eax
+ jl LPushBack
+
+ movl et_u(%esi),%edi
+ movl et_u_step(%esi),%ebx
+ addl %ebx,%edi
+ movl et_next(%esi),%edx
+ movl %edi,et_u(%esi)
+ cmpl %eax,%edi
+ jl LPushBack2
+
+ movl et_u(%edx),%eax
+ movl et_u_step(%edx),%ebx
+ addl %ebx,%eax
+ movl et_next(%edx),%esi
+ movl %eax,et_u(%edx)
+ cmpl %edi,%eax
+ jl LPushBack
+
+ movl et_u(%esi),%edi
+ movl et_u_step(%esi),%ebx
+ addl %ebx,%edi
+ movl et_next(%esi),%edx
+ movl %edi,et_u(%esi)
+ cmpl %eax,%edi
+ jnl LNextEdge
+
+LPushBack2:
+ movl %edx,%ebx
+ movl %edi,%eax
+ movl %esi,%edx
+ movl %ebx,%esi
+
+LPushBack:
+// push it back to keep it sorted
+ movl et_prev(%edx),%ecx
+ movl et_next(%edx),%ebx
+
+// done if the -1 in edge_aftertail triggered this
+ cmpl $(C(edge_aftertail)),%edx
+ jz LUDone
+
+// pull the edge out of the edge list
+ movl et_prev(%ecx),%edi
+ movl %ecx,et_prev(%esi)
+ movl %ebx,et_next(%ecx)
+
+// find out where the edge goes in the edge list
+LPushBackLoop:
+ movl et_prev(%edi),%ecx
+ movl et_u(%edi),%ebx
+ cmpl %ebx,%eax
+ jnl LPushBackFound
+
+ movl et_prev(%ecx),%edi
+ movl et_u(%ecx),%ebx
+ cmpl %ebx,%eax
+ jl LPushBackLoop
+
+ movl %ecx,%edi
+
+// put the edge back into the edge list
+LPushBackFound:
+ movl et_next(%edi),%ebx
+ movl %edi,et_prev(%edx)
+ movl %ebx,et_next(%edx)
+ movl %edx,et_next(%edi)
+ movl %edx,et_prev(%ebx)
+
+ movl %esi,%edx
+ movl et_prev(%esi),%esi
+
+ cmpl $(C(edge_tail)),%edx
+ jnz LNewEdge
+
+LUDone:
+ popl %ebx // restore register variables
+ popl %esi
+ popl %edi
+
+ ret
+
+//--------------------------------------------------------------------
+
+#define surf 4 // note this is loaded before any pushes
+
+ .align 4
+TrailingEdge:
+ movl st_spanstate(%esi),%eax // check for edge inversion
+ decl %eax
+ jnz LInverted
+
+ movl %eax,st_spanstate(%esi)
+ movl st_insubmodel(%esi),%ecx
+ movl 0x12345678,%edx // surfaces[1].st_next
+LPatch0:
+ movl C(r_bmodelactive),%eax
+ subl %ecx,%eax
+ cmpl %esi,%edx
+ movl %eax,C(r_bmodelactive)
+ jnz LNoEmit // surface isn't on top, just remove
+
+// emit a span (current top going away)
+ movl et_u(%ebx),%eax
+ shrl $20,%eax // iu = integral pixel u
+ movl st_last_u(%esi),%edx
+ movl st_next(%esi),%ecx
+ cmpl %edx,%eax
+ jle LNoEmit2 // iu <= surf->last_u, so nothing to emit
+
+ movl %eax,st_last_u(%ecx) // surf->next->last_u = iu;
+ subl %edx,%eax
+ movl %edx,espan_t_u(%ebp) // span->u = surf->last_u;
+
+ movl %eax,espan_t_count(%ebp) // span->count = iu - span->u;
+ movl C(current_iv),%eax
+ movl %eax,espan_t_v(%ebp) // span->v = current_iv;
+ movl st_spans(%esi),%eax
+ movl %eax,espan_t_pnext(%ebp) // span->pnext = surf->spans;
+ movl %ebp,st_spans(%esi) // surf->spans = span;
+ addl $(espan_t_size),%ebp
+
+ movl st_next(%esi),%edx // remove the surface from the surface
+ movl st_prev(%esi),%esi // stack
+
+ movl %edx,st_next(%esi)
+ movl %esi,st_prev(%edx)
+ ret
+
+LNoEmit2:
+ movl %eax,st_last_u(%ecx) // surf->next->last_u = iu;
+ movl st_next(%esi),%edx // remove the surface from the surface
+ movl st_prev(%esi),%esi // stack
+
+ movl %edx,st_next(%esi)
+ movl %esi,st_prev(%edx)
+ ret
+
+LNoEmit:
+ movl st_next(%esi),%edx // remove the surface from the surface
+ movl st_prev(%esi),%esi // stack
+
+ movl %edx,st_next(%esi)
+ movl %esi,st_prev(%edx)
+ ret
+
+LInverted:
+ movl %eax,st_spanstate(%esi)
+ ret
+
+//--------------------------------------------------------------------
+
+// trailing edge only
+Lgs_trailing:
+ pushl $Lgs_nextedge
+ jmp TrailingEdge
+
+
+.globl C(R_GenerateSpans)
+C(R_GenerateSpans):
+ pushl %ebp // preserve caller's stack frame
+ pushl %edi
+ pushl %esi // preserve register variables
+ pushl %ebx
+
+// clear active surfaces to just the background surface
+ movl C(surfaces),%eax
+ movl C(edge_head_u_shift20),%edx
+ addl $(st_size),%eax
+// %ebp = span_p throughout
+ movl C(span_p),%ebp
+
+ movl $0,C(r_bmodelactive)
+
+ movl %eax,st_next(%eax)
+ movl %eax,st_prev(%eax)
+ movl %edx,st_last_u(%eax)
+ movl C(edge_head)+et_next,%ebx // edge=edge_head.next
+
+// generate spans
+ cmpl $(C(edge_tail)),%ebx // done if empty list
+ jz Lgs_lastspan
+
+Lgs_edgeloop:
+
+ movl et_surfs(%ebx),%edi
+ movl C(surfaces),%eax
+ movl %edi,%esi
+ andl $0xFFFF0000,%edi
+ andl $0xFFFF,%esi
+ jz Lgs_leading // not a trailing edge
+
+// it has a left surface, so a surface is going away for this span
+ shll $(SURF_T_SHIFT),%esi
+ addl %eax,%esi
+ testl %edi,%edi
+ jz Lgs_trailing
+
+// both leading and trailing
+ call TrailingEdge
+ movl C(surfaces),%eax
+
+// ---------------------------------------------------------------
+// handle a leading edge
+// ---------------------------------------------------------------
+
+Lgs_leading:
+ shrl $16-SURF_T_SHIFT,%edi
+ movl C(surfaces),%eax
+ addl %eax,%edi
+ movl 0x12345678,%esi // surf2 = surfaces[1].next;
+LPatch2:
+ movl st_spanstate(%edi),%edx
+ movl st_insubmodel(%edi),%eax
+ testl %eax,%eax
+ jnz Lbmodel_leading
+
+// handle a leading non-bmodel edge
+
+// don't start a span if this is an inverted span, with the end edge preceding
+// the start edge (that is, we've already seen the end edge)
+ testl %edx,%edx
+ jnz Lxl_done
+
+
+// if (surf->key < surf2->key)
+// goto newtop;
+ incl %edx
+ movl st_key(%edi),%eax
+ movl %edx,st_spanstate(%edi)
+ movl st_key(%esi),%ecx
+ cmpl %ecx,%eax
+ jl Lnewtop
+
+// main sorting loop to search through surface stack until insertion point
+// found. Always terminates because background surface is sentinel
+// do
+// {
+// surf2 = surf2->next;
+// } while (surf->key >= surf2->key);
+Lsortloopnb:
+ movl st_next(%esi),%esi
+ movl st_key(%esi),%ecx
+ cmpl %ecx,%eax
+ jge Lsortloopnb
+
+ jmp LInsertAndExit
+
+
+// handle a leading bmodel edge
+ .align 4
+Lbmodel_leading:
+
+// don't start a span if this is an inverted span, with the end edge preceding
+// the start edge (that is, we've already seen the end edge)
+ testl %edx,%edx
+ jnz Lxl_done
+
+ movl C(r_bmodelactive),%ecx
+ incl %edx
+ incl %ecx
+ movl %edx,st_spanstate(%edi)
+ movl %ecx,C(r_bmodelactive)
+
+// if (surf->key < surf2->key)
+// goto newtop;
+ movl st_key(%edi),%eax
+ movl st_key(%esi),%ecx
+ cmpl %ecx,%eax
+ jl Lnewtop
+
+// if ((surf->key == surf2->key) && surf->insubmodel)
+// {
+ jz Lzcheck_for_newtop
+
+// main sorting loop to search through surface stack until insertion point
+// found. Always terminates because background surface is sentinel
+// do
+// {
+// surf2 = surf2->next;
+// } while (surf->key > surf2->key);
+Lsortloop:
+ movl st_next(%esi),%esi
+ movl st_key(%esi),%ecx
+ cmpl %ecx,%eax
+ jg Lsortloop
+
+ jne LInsertAndExit
+
+// Do 1/z sorting to see if we've arrived in the right position
+ movl et_u(%ebx),%eax
+ subl $0xFFFFF,%eax
+ movl %eax,Ltemp
+ fildl Ltemp
+
+ fmuls float_1_div_0100000h // fu = (float)(edge->u - 0xFFFFF) *
+ // (1.0 / 0x100000);
+
+ fld %st(0) // fu | fu
+ fmuls st_d_zistepu(%edi) // fu*surf->d_zistepu | fu
+ flds C(fv) // fv | fu*surf->d_zistepu | fu
+ fmuls st_d_zistepv(%edi) // fv*surf->d_zistepv | fu*surf->d_zistepu | fu
+ fxch %st(1) // fu*surf->d_zistepu | fv*surf->d_zistepv | fu
+ fadds st_d_ziorigin(%edi) // fu*surf->d_zistepu + surf->d_ziorigin |
+ // fv*surf->d_zistepv | fu
+
+ flds st_d_zistepu(%esi) // surf2->d_zistepu |
+ // fu*surf->d_zistepu + surf->d_ziorigin |
+ // fv*surf->d_zistepv | fu
+ fmul %st(3),%st(0) // fu*surf2->d_zistepu |
+ // fu*surf->d_zistepu + surf->d_ziorigin |
+ // fv*surf->d_zistepv | fu
+ fxch %st(1) // fu*surf->d_zistepu + surf->d_ziorigin |
+ // fu*surf2->d_zistepu |
+ // fv*surf->d_zistepv | fu
+ faddp %st(0),%st(2) // fu*surf2->d_zistepu | newzi | fu
+
+ flds C(fv) // fv | fu*surf2->d_zistepu | newzi | fu
+ fmuls st_d_zistepv(%esi) // fv*surf2->d_zistepv |
+ // fu*surf2->d_zistepu | newzi | fu
+ fld %st(2) // newzi | fv*surf2->d_zistepv |
+ // fu*surf2->d_zistepu | newzi | fu
+ fmuls float_point_999 // newzibottom | fv*surf2->d_zistepv |
+ // fu*surf2->d_zistepu | newzi | fu
+
+ fxch %st(2) // fu*surf2->d_zistepu | fv*surf2->d_zistepv |
+ // newzibottom | newzi | fu
+ fadds st_d_ziorigin(%esi) // fu*surf2->d_zistepu + surf2->d_ziorigin |
+ // fv*surf2->d_zistepv | newzibottom | newzi |
+ // fu
+ faddp %st(0),%st(1) // testzi | newzibottom | newzi | fu
+ fxch %st(1) // newzibottom | testzi | newzi | fu
+
+// if (newzibottom >= testzi)
+// goto Lgotposition;
+
+ fcomp %st(1) // testzi | newzi | fu
+
+ fxch %st(1) // newzi | testzi | fu
+ fmuls float_1_point_001 // newzitop | testzi | fu
+ fxch %st(1) // testzi | newzitop | fu
+
+ fnstsw %ax
+ testb $0x01,%ah
+ jz Lgotposition_fpop3
+
+// if (newzitop >= testzi)
+// {
+
+ fcomp %st(1) // newzitop | fu
+ fnstsw %ax
+ testb $0x45,%ah
+ jz Lsortloop_fpop2
+
+// if (surf->d_zistepu >= surf2->d_zistepu)
+// goto newtop;
+
+ flds st_d_zistepu(%edi) // surf->d_zistepu | newzitop| fu
+ fcomps st_d_zistepu(%esi) // newzitop | fu
+ fnstsw %ax
+ testb $0x01,%ah
+ jz Lgotposition_fpop2
+
+ fstp %st(0) // clear the FPstack
+ fstp %st(0)
+ movl st_key(%edi),%eax
+ jmp Lsortloop
+
+
+Lgotposition_fpop3:
+ fstp %st(0)
+Lgotposition_fpop2:
+ fstp %st(0)
+ fstp %st(0)
+ jmp LInsertAndExit
+
+
+// emit a span (obscures current top)
+
+Lnewtop_fpop3:
+ fstp %st(0)
+Lnewtop_fpop2:
+ fstp %st(0)
+ fstp %st(0)
+ movl st_key(%edi),%eax // reload the sorting key
+
+Lnewtop:
+ movl et_u(%ebx),%eax
+ movl st_last_u(%esi),%edx
+ shrl $20,%eax // iu = integral pixel u
+ movl %eax,st_last_u(%edi) // surf->last_u = iu;
+ cmpl %edx,%eax
+ jle LInsertAndExit // iu <= surf->last_u, so nothing to emit
+
+ subl %edx,%eax
+ movl %edx,espan_t_u(%ebp) // span->u = surf->last_u;
+
+ movl %eax,espan_t_count(%ebp) // span->count = iu - span->u;
+ movl C(current_iv),%eax
+ movl %eax,espan_t_v(%ebp) // span->v = current_iv;
+ movl st_spans(%esi),%eax
+ movl %eax,espan_t_pnext(%ebp) // span->pnext = surf->spans;
+ movl %ebp,st_spans(%esi) // surf->spans = span;
+ addl $(espan_t_size),%ebp
+
+LInsertAndExit:
+// insert before surf2
+ movl %esi,st_next(%edi) // surf->next = surf2;
+ movl st_prev(%esi),%eax
+ movl %eax,st_prev(%edi) // surf->prev = surf2->prev;
+ movl %edi,st_prev(%esi) // surf2->prev = surf;
+ movl %edi,st_next(%eax) // surf2->prev->next = surf;
+
+// ---------------------------------------------------------------
+// leading edge done
+// ---------------------------------------------------------------
+
+// ---------------------------------------------------------------
+// see if there are any more edges
+// ---------------------------------------------------------------
+
+Lgs_nextedge:
+ movl et_next(%ebx),%ebx
+ cmpl $(C(edge_tail)),%ebx
+ jnz Lgs_edgeloop
+
+// clean up at the right edge
+Lgs_lastspan:
+
+// now that we've reached the right edge of the screen, we're done with any
+// unfinished surfaces, so emit a span for whatever's on top
+ movl 0x12345678,%esi // surfaces[1].st_next
+LPatch3:
+ movl C(edge_tail_u_shift20),%eax
+ xorl %ecx,%ecx
+ movl st_last_u(%esi),%edx
+ subl %edx,%eax
+ jle Lgs_resetspanstate
+
+ movl %edx,espan_t_u(%ebp)
+ movl %eax,espan_t_count(%ebp)
+ movl C(current_iv),%eax
+ movl %eax,espan_t_v(%ebp)
+ movl st_spans(%esi),%eax
+ movl %eax,espan_t_pnext(%ebp)
+ movl %ebp,st_spans(%esi)
+ addl $(espan_t_size),%ebp
+
+// reset spanstate for all surfaces in the surface stack
+Lgs_resetspanstate:
+ movl %ecx,st_spanstate(%esi)
+ movl st_next(%esi),%esi
+ cmpl $0x12345678,%esi // &surfaces[1]
+LPatch4:
+ jnz Lgs_resetspanstate
+
+// store the final span_p
+ movl %ebp,C(span_p)
+
+ popl %ebx // restore register variables
+ popl %esi
+ popl %edi
+ popl %ebp // restore the caller's stack frame
+ ret
+
+
+// ---------------------------------------------------------------
+// 1/z sorting for bmodels in the same leaf
+// ---------------------------------------------------------------
+ .align 4
+Lxl_done:
+ incl %edx
+ movl %edx,st_spanstate(%edi)
+
+ jmp Lgs_nextedge
+
+
+ .align 4
+Lzcheck_for_newtop:
+ movl et_u(%ebx),%eax
+ subl $0xFFFFF,%eax
+ movl %eax,Ltemp
+ fildl Ltemp
+
+ fmuls float_1_div_0100000h // fu = (float)(edge->u - 0xFFFFF) *
+ // (1.0 / 0x100000);
+
+ fld %st(0) // fu | fu
+ fmuls st_d_zistepu(%edi) // fu*surf->d_zistepu | fu
+ flds C(fv) // fv | fu*surf->d_zistepu | fu
+ fmuls st_d_zistepv(%edi) // fv*surf->d_zistepv | fu*surf->d_zistepu | fu
+ fxch %st(1) // fu*surf->d_zistepu | fv*surf->d_zistepv | fu
+ fadds st_d_ziorigin(%edi) // fu*surf->d_zistepu + surf->d_ziorigin |
+ // fv*surf->d_zistepv | fu
+
+ flds st_d_zistepu(%esi) // surf2->d_zistepu |
+ // fu*surf->d_zistepu + surf->d_ziorigin |
+ // fv*surf->d_zistepv | fu
+ fmul %st(3),%st(0) // fu*surf2->d_zistepu |
+ // fu*surf->d_zistepu + surf->d_ziorigin |
+ // fv*surf->d_zistepv | fu
+ fxch %st(1) // fu*surf->d_zistepu + surf->d_ziorigin |
+ // fu*surf2->d_zistepu |
+ // fv*surf->d_zistepv | fu
+ faddp %st(0),%st(2) // fu*surf2->d_zistepu | newzi | fu
+
+ flds C(fv) // fv | fu*surf2->d_zistepu | newzi | fu
+ fmuls st_d_zistepv(%esi) // fv*surf2->d_zistepv |
+ // fu*surf2->d_zistepu | newzi | fu
+ fld %st(2) // newzi | fv*surf2->d_zistepv |
+ // fu*surf2->d_zistepu | newzi | fu
+ fmuls float_point_999 // newzibottom | fv*surf2->d_zistepv |
+ // fu*surf2->d_zistepu | newzi | fu
+
+ fxch %st(2) // fu*surf2->d_zistepu | fv*surf2->d_zistepv |
+ // newzibottom | newzi | fu
+ fadds st_d_ziorigin(%esi) // fu*surf2->d_zistepu + surf2->d_ziorigin |
+ // fv*surf2->d_zistepv | newzibottom | newzi |
+ // fu
+ faddp %st(0),%st(1) // testzi | newzibottom | newzi | fu
+ fxch %st(1) // newzibottom | testzi | newzi | fu
+
+// if (newzibottom >= testzi)
+// goto newtop;
+
+ fcomp %st(1) // testzi | newzi | fu
+
+ fxch %st(1) // newzi | testzi | fu
+ fmuls float_1_point_001 // newzitop | testzi | fu
+ fxch %st(1) // testzi | newzitop | fu
+
+ fnstsw %ax
+ testb $0x01,%ah
+ jz Lnewtop_fpop3
+
+// if (newzitop >= testzi)
+// {
+
+ fcomp %st(1) // newzitop | fu
+ fnstsw %ax
+ testb $0x45,%ah
+ jz Lsortloop_fpop2
+
+// if (surf->d_zistepu >= surf2->d_zistepu)
+// goto newtop;
+
+ flds st_d_zistepu(%edi) // surf->d_zistepu | newzitop | fu
+ fcomps st_d_zistepu(%esi) // newzitop | fu
+ fnstsw %ax
+ testb $0x01,%ah
+ jz Lnewtop_fpop2
+
+Lsortloop_fpop2:
+ fstp %st(0) // clear the FP stack
+ fstp %st(0)
+ movl st_key(%edi),%eax
+ jmp Lsortloop
+
+
+.globl C(R_EdgeCodeEnd)
+C(R_EdgeCodeEnd):
+
+
+//----------------------------------------------------------------------
+// Surface array address code patching routine
+//----------------------------------------------------------------------
+
+ .align 4
+.globl C(R_SurfacePatch)
+C(R_SurfacePatch):
+
+ movl C(surfaces),%eax
+ addl $(st_size),%eax
+ movl %eax,LPatch4-4
+
+ addl $(st_next),%eax
+ movl %eax,LPatch0-4
+ movl %eax,LPatch2-4
+ movl %eax,LPatch3-4
+
+ ret
+
+#endif // id386
+
--- /dev/null
+++ b/u/r_varsa.s
@@ -1,0 +1,45 @@
+//
+// r_varsa.s
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "asm_draw.h"
+#include "d_ifacea.h"
+
+#ifdef id386
+
+ .data
+
+//-------------------------------------------------------
+// ASM-only variables
+//-------------------------------------------------------
+.globl float_1, float_particle_z_clip, float_point5
+.globl float_minus_1, float_0
+float_0: .single 0.0
+float_1: .single 1.0
+float_minus_1: .single -1.0
+float_particle_z_clip: .single PARTICLE_Z_CLIP
+float_point5: .single 0.5
+
+.globl fp_16, fp_64k, fp_1m, fp_64kx64k
+.globl fp_1m_minus_1
+.globl fp_8
+fp_1m: .single 1048576.0
+fp_1m_minus_1: .single 1048575.0
+fp_64k: .single 65536.0
+fp_8: .single 8.0
+fp_16: .single 16.0
+fp_64kx64k: .long 0x4f000000 // (float)0x8000*0x10000
+
+
+.globl FloatZero, Float2ToThe31nd, FloatMinus2ToThe31nd
+FloatZero: .long 0
+Float2ToThe31nd: .long 0x4f000000
+FloatMinus2ToThe31nd: .long 0xcf000000
+
+.globl C(r_bmodelactive)
+C(r_bmodelactive): .long 0
+
+#endif // id386
+
--- /dev/null
+++ b/u/snd_mixa.s
@@ -1,0 +1,199 @@
+//
+// snd_mixa.s
+// x86 assembly-language sound code
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+
+#ifdef id386
+
+ .text
+
+//----------------------------------------------------------------------
+// 8-bit sound-mixing code
+//----------------------------------------------------------------------
+
+#define ch 4+16
+#define sc 8+16
+#define count 12+16
+
+.globl C(SND_PaintChannelFrom8)
+C(SND_PaintChannelFrom8):
+ pushl %esi // preserve register variables
+ pushl %edi
+ pushl %ebx
+ pushl %ebp
+
+// int data;
+// short *lscale, *rscale;
+// unsigned char *sfx;
+// int i;
+
+ movl ch(%esp),%ebx
+ movl sc(%esp),%esi
+
+// if (ch->leftvol > 255)
+// ch->leftvol = 255;
+// if (ch->rightvol > 255)
+// ch->rightvol = 255;
+ movl ch_leftvol(%ebx),%eax
+ movl ch_rightvol(%ebx),%edx
+ cmpl $255,%eax
+ jna LLeftSet
+ movl $255,%eax
+LLeftSet:
+ cmpl $255,%edx
+ jna LRightSet
+ movl $255,%edx
+LRightSet:
+
+// lscale = snd_scaletable[ch->leftvol >> 3];
+// rscale = snd_scaletable[ch->rightvol >> 3];
+// sfx = (signed char *)sc->data + ch->pos;
+// ch->pos += count;
+ andl $0xF8,%eax
+ addl $(sfxc_data),%esi
+ andl $0xF8,%edx
+ movl ch_pos(%ebx),%edi
+ movl count(%esp),%ecx
+ addl %edi,%esi
+ shll $7,%eax
+ addl %ecx,%edi
+ shll $7,%edx
+ movl %edi,ch_pos(%ebx)
+ addl $(C(snd_scaletable)),%eax
+ addl $(C(snd_scaletable)),%edx
+ subl %ebx,%ebx
+ movb -1(%esi,%ecx,1),%bl
+
+ testl $1,%ecx
+ jz LMix8Loop
+
+ movl (%eax,%ebx,4),%edi
+ movl (%edx,%ebx,4),%ebp
+ addl C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size),%edi
+ addl C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size),%ebp
+ movl %edi,C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size)
+ movl %ebp,C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size)
+ movb -2(%esi,%ecx,1),%bl
+
+ decl %ecx
+ jz LDone
+
+// for (i=0 ; i<count ; i++)
+// {
+LMix8Loop:
+
+// data = sfx[i];
+// paintbuffer[i].left += lscale[data];
+// paintbuffer[i].right += rscale[data];
+ movl (%eax,%ebx,4),%edi
+ movl (%edx,%ebx,4),%ebp
+ addl C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size),%edi
+ addl C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size),%ebp
+ movb -2(%esi,%ecx,1),%bl
+ movl %edi,C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size)
+ movl %ebp,C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size)
+
+ movl (%eax,%ebx,4),%edi
+ movl (%edx,%ebx,4),%ebp
+ movb -3(%esi,%ecx,1),%bl
+ addl C(paintbuffer)+psp_left-psp_size*2(,%ecx,psp_size),%edi
+ addl C(paintbuffer)+psp_right-psp_size*2(,%ecx,psp_size),%ebp
+ movl %edi,C(paintbuffer)+psp_left-psp_size*2(,%ecx,psp_size)
+ movl %ebp,C(paintbuffer)+psp_right-psp_size*2(,%ecx,psp_size)
+
+// }
+ subl $2,%ecx
+ jnz LMix8Loop
+
+LDone:
+ popl %ebp
+ popl %ebx
+ popl %edi
+ popl %esi
+
+ ret
+
+
+//----------------------------------------------------------------------
+// Transfer of stereo buffer to 16-bit DMA buffer code
+//----------------------------------------------------------------------
+
+.globl C(Snd_WriteLinearBlastStereo16)
+C(Snd_WriteLinearBlastStereo16):
+ pushl %esi // preserve register variables
+ pushl %edi
+ pushl %ebx
+
+// int i;
+// int val;
+ movl C(snd_linear_count),%ecx
+ movl C(snd_p),%ebx
+ movl C(snd_vol),%esi
+ movl C(snd_out),%edi
+
+// for (i=0 ; i<snd_linear_count ; i+=2)
+// {
+LWLBLoopTop:
+
+// val = (snd_p[i]*snd_vol)>>8;
+// if (val > 0x7fff)
+// snd_out[i] = 0x7fff;
+// else if (val < (short)0x8000)
+// snd_out[i] = (short)0x8000;
+// else
+// snd_out[i] = val;
+ movl -8(%ebx,%ecx,4),%eax
+ imull %esi,%eax
+ sarl $8,%eax
+ cmpl $0x7FFF,%eax
+ jg LClampHigh
+ cmpl $0xFFFF8000,%eax
+ jnl LClampDone
+ movl $0xFFFF8000,%eax
+ jmp LClampDone
+LClampHigh:
+ movl $0x7FFF,%eax
+LClampDone:
+
+// val = (snd_p[i+1]*snd_vol)>>8;
+// if (val > 0x7fff)
+// snd_out[i+1] = 0x7fff;
+// else if (val < (short)0x8000)
+// snd_out[i+1] = (short)0x8000;
+// else
+// snd_out[i+1] = val;
+ movl -4(%ebx,%ecx,4),%edx
+ imull %esi,%edx
+ sarl $8,%edx
+ cmpl $0x7FFF,%edx
+ jg LClampHigh2
+ cmpl $0xFFFF8000,%edx
+ jnl LClampDone2
+ movl $0xFFFF8000,%edx
+ jmp LClampDone2
+LClampHigh2:
+ movl $0x7FFF,%edx
+LClampDone2:
+ shll $16,%edx
+ andl $0xFFFF,%eax
+ orl %eax,%edx
+ movl %edx,-4(%edi,%ecx,2)
+
+// }
+ subl $2,%ecx
+ jnz LWLBLoopTop
+
+// snd_p += snd_linear_count;
+
+ popl %ebx
+ popl %edi
+ popl %esi
+
+ ret
+
+
+#endif // id386
+
--- /dev/null
+++ b/u/surf16.s
@@ -1,0 +1,153 @@
+//
+// surf16.s
+// x86 assembly-language 16 bpp surface block drawing code.
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "asm_draw.h"
+
+#ifdef id386
+
+//----------------------------------------------------------------------
+// Surface block drawer
+//----------------------------------------------------------------------
+
+ .data
+
+k: .long 0
+loopentry: .long 0
+
+ .align 4
+blockjumptable16:
+ .long LEnter2_16
+ .long LEnter4_16
+ .long 0, LEnter8_16
+ .long 0, 0, 0, LEnter16_16
+
+
+ .text
+
+ .align 4
+.globl C(R_Surf16Start)
+C(R_Surf16Start):
+
+ .align 4
+.globl C(R_DrawSurfaceBlock16)
+C(R_DrawSurfaceBlock16):
+ pushl %ebp // preserve caller's stack frame
+ pushl %edi
+ pushl %esi // preserve register variables
+ pushl %ebx
+
+ movl C(blocksize),%eax
+ movl C(prowdestbase),%edi
+ movl C(pbasesource),%esi
+ movl C(sourcesstep),%ebx
+ movl blockjumptable16-4(,%eax,2),%ecx
+ movl %eax,k
+ movl %ecx,loopentry
+ movl C(lightleft),%edx
+ movl C(lightright),%ebp
+
+Lblockloop16:
+
+ subl %edx,%ebp
+ movb C(blockdivshift),%cl
+ sarl %cl,%ebp
+ jns Lp1_16
+ testl C(blockdivmask),%ebp
+ jz Lp1_16
+ incl %ebp
+Lp1_16:
+
+ subl %eax,%eax
+ subl %ecx,%ecx // high words must be 0 in loop for addressing
+
+ jmp *loopentry
+
+ .align 4
+
+#include "block16.h"
+
+ movl C(pbasesource),%esi
+ movl C(lightleft),%edx
+ movl C(lightright),%ebp
+ movl C(sourcetstep),%eax
+ movl C(lightrightstep),%ecx
+ movl C(prowdestbase),%edi
+
+ addl %eax,%esi
+ addl %ecx,%ebp
+
+ movl C(lightleftstep),%eax
+ movl C(surfrowbytes),%ecx
+
+ addl %eax,%edx
+ addl %ecx,%edi
+
+ movl %esi,C(pbasesource)
+ movl %ebp,C(lightright)
+ movl k,%eax
+ movl %edx,C(lightleft)
+ decl %eax
+ movl %edi,C(prowdestbase)
+ movl %eax,k
+ jnz Lblockloop16
+
+ popl %ebx // restore register variables
+ popl %esi
+ popl %edi
+ popl %ebp // restore the caller's stack frame
+ ret
+
+.globl C(R_Surf16End)
+C(R_Surf16End):
+
+//----------------------------------------------------------------------
+// Code patching routines
+//----------------------------------------------------------------------
+ .data
+
+ .align 4
+LPatchTable16:
+ .long LBPatch0-4
+ .long LBPatch1-4
+ .long LBPatch2-4
+ .long LBPatch3-4
+ .long LBPatch4-4
+ .long LBPatch5-4
+ .long LBPatch6-4
+ .long LBPatch7-4
+ .long LBPatch8-4
+ .long LBPatch9-4
+ .long LBPatch10-4
+ .long LBPatch11-4
+ .long LBPatch12-4
+ .long LBPatch13-4
+ .long LBPatch14-4
+ .long LBPatch15-4
+
+ .text
+
+ .align 4
+.globl C(R_Surf16Patch)
+C(R_Surf16Patch):
+ pushl %ebx
+
+ movl C(colormap),%eax
+ movl $LPatchTable16,%ebx
+ movl $16,%ecx
+LPatchLoop16:
+ movl (%ebx),%edx
+ addl $4,%ebx
+ movl %eax,(%edx)
+ decl %ecx
+ jnz LPatchLoop16
+
+ popl %ebx
+
+ ret
+
+
+#endif // id386
--- /dev/null
+++ b/u/surf8.s
@@ -1,0 +1,764 @@
+//
+// surf8.s
+// x86 assembly-language 8 bpp surface block drawing code.
+//
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "asm_draw.h"
+
+#ifdef id386
+
+ .data
+
+sb_v: .long 0
+
+ .text
+
+ .align 4
+.globl C(R_Surf8Start)
+C(R_Surf8Start):
+
+//----------------------------------------------------------------------
+// Surface block drawer for mip level 0
+//----------------------------------------------------------------------
+
+ .align 4
+.globl C(R_DrawSurfaceBlock8_mip0)
+C(R_DrawSurfaceBlock8_mip0):
+ pushl %ebp // preserve caller's stack frame
+ pushl %edi
+ pushl %esi // preserve register variables
+ pushl %ebx
+
+// for (v=0 ; v<numvblocks ; v++)
+// {
+ movl C(r_lightptr),%ebx
+ movl C(r_numvblocks),%eax
+
+ movl %eax,sb_v
+ movl C(prowdestbase),%edi
+
+ movl C(pbasesource),%esi
+
+Lv_loop_mip0:
+
+// lightleft = lightptr[0];
+// lightright = lightptr[1];
+// lightdelta = (lightleft - lightright) & 0xFFFFF;
+ movl (%ebx),%eax // lightleft
+ movl 4(%ebx),%edx // lightright
+
+ movl %eax,%ebp
+ movl C(r_lightwidth),%ecx
+
+ movl %edx,C(lightright)
+ subl %edx,%ebp
+
+ andl $0xFFFFF,%ebp
+ leal (%ebx,%ecx,4),%ebx
+
+// lightptr += lightwidth;
+ movl %ebx,C(r_lightptr)
+
+// lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
+// lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
+// lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
+// 0xF0000000;
+ movl 4(%ebx),%ecx // lightptr[1]
+ movl (%ebx),%ebx // lightptr[0]
+
+ subl %eax,%ebx
+ subl %edx,%ecx
+
+ sarl $4,%ecx
+ orl $0xF0000000,%ebp
+
+ sarl $4,%ebx
+ movl %ecx,C(lightrightstep)
+
+ subl %ecx,%ebx
+ andl $0xFFFFF,%ebx
+
+ orl $0xF0000000,%ebx
+ subl %ecx,%ecx // high word must be 0 in loop for addressing
+
+ movl %ebx,C(lightdeltastep)
+ subl %ebx,%ebx // high word must be 0 in loop for addressing
+
+Lblockloop8_mip0:
+ movl %ebp,C(lightdelta)
+ movb 14(%esi),%cl
+
+ sarl $4,%ebp
+ movb %dh,%bh
+
+ movb 15(%esi),%bl
+ addl %ebp,%edx
+
+ movb %dh,%ch
+ addl %ebp,%edx
+
+ movb 0x12345678(%ebx),%ah
+LBPatch0:
+ movb 13(%esi),%bl
+
+ movb 0x12345678(%ecx),%al
+LBPatch1:
+ movb 12(%esi),%cl
+
+ movb %dh,%bh
+ addl %ebp,%edx
+
+ rorl $16,%eax
+ movb %dh,%ch
+
+ addl %ebp,%edx
+ movb 0x12345678(%ebx),%ah
+LBPatch2:
+
+ movb 11(%esi),%bl
+ movb 0x12345678(%ecx),%al
+LBPatch3:
+
+ movb 10(%esi),%cl
+ movl %eax,12(%edi)
+
+ movb %dh,%bh
+ addl %ebp,%edx
+
+ movb %dh,%ch
+ addl %ebp,%edx
+
+ movb 0x12345678(%ebx),%ah
+LBPatch4:
+ movb 9(%esi),%bl
+
+ movb 0x12345678(%ecx),%al
+LBPatch5:
+ movb 8(%esi),%cl
+
+ movb %dh,%bh
+ addl %ebp,%edx
+
+ rorl $16,%eax
+ movb %dh,%ch
+
+ addl %ebp,%edx
+ movb 0x12345678(%ebx),%ah
+LBPatch6:
+
+ movb 7(%esi),%bl
+ movb 0x12345678(%ecx),%al
+LBPatch7:
+
+ movb 6(%esi),%cl
+ movl %eax,8(%edi)
+
+ movb %dh,%bh
+ addl %ebp,%edx
+
+ movb %dh,%ch
+ addl %ebp,%edx
+
+ movb 0x12345678(%ebx),%ah
+LBPatch8:
+ movb 5(%esi),%bl
+
+ movb 0x12345678(%ecx),%al
+LBPatch9:
+ movb 4(%esi),%cl
+
+ movb %dh,%bh
+ addl %ebp,%edx
+
+ rorl $16,%eax
+ movb %dh,%ch
+
+ addl %ebp,%edx
+ movb 0x12345678(%ebx),%ah
+LBPatch10:
+
+ movb 3(%esi),%bl
+ movb 0x12345678(%ecx),%al
+LBPatch11:
+
+ movb 2(%esi),%cl
+ movl %eax,4(%edi)
+
+ movb %dh,%bh
+ addl %ebp,%edx
+
+ movb %dh,%ch
+ addl %ebp,%edx
+
+ movb 0x12345678(%ebx),%ah
+LBPatch12:
+ movb 1(%esi),%bl
+
+ movb 0x12345678(%ecx),%al
+LBPatch13:
+ movb (%esi),%cl
+
+ movb %dh,%bh
+ addl %ebp,%edx
+
+ rorl $16,%eax
+ movb %dh,%ch
+
+ movb 0x12345678(%ebx),%ah
+LBPatch14:
+ movl C(lightright),%edx
+
+ movb 0x12345678(%ecx),%al
+LBPatch15:
+ movl C(lightdelta),%ebp
+
+ movl %eax,(%edi)
+
+ addl C(sourcetstep),%esi
+ addl C(surfrowbytes),%edi
+
+ addl C(lightrightstep),%edx
+ addl C(lightdeltastep),%ebp
+
+ movl %edx,C(lightright)
+ jc Lblockloop8_mip0
+
+// if (pbasesource >= r_sourcemax)
+// pbasesource -= stepback;
+
+ cmpl C(r_sourcemax),%esi
+ jb LSkip_mip0
+ subl C(r_stepback),%esi
+LSkip_mip0:
+
+ movl C(r_lightptr),%ebx
+ decl sb_v
+
+ jnz Lv_loop_mip0
+
+ popl %ebx // restore register variables
+ popl %esi
+ popl %edi
+ popl %ebp // restore the caller's stack frame
+ ret
+
+
+//----------------------------------------------------------------------
+// Surface block drawer for mip level 1
+//----------------------------------------------------------------------
+
+ .align 4
+.globl C(R_DrawSurfaceBlock8_mip1)
+C(R_DrawSurfaceBlock8_mip1):
+ pushl %ebp // preserve caller's stack frame
+ pushl %edi
+ pushl %esi // preserve register variables
+ pushl %ebx
+
+// for (v=0 ; v<numvblocks ; v++)
+// {
+ movl C(r_lightptr),%ebx
+ movl C(r_numvblocks),%eax
+
+ movl %eax,sb_v
+ movl C(prowdestbase),%edi
+
+ movl C(pbasesource),%esi
+
+Lv_loop_mip1:
+
+// lightleft = lightptr[0];
+// lightright = lightptr[1];
+// lightdelta = (lightleft - lightright) & 0xFFFFF;
+ movl (%ebx),%eax // lightleft
+ movl 4(%ebx),%edx // lightright
+
+ movl %eax,%ebp
+ movl C(r_lightwidth),%ecx
+
+ movl %edx,C(lightright)
+ subl %edx,%ebp
+
+ andl $0xFFFFF,%ebp
+ leal (%ebx,%ecx,4),%ebx
+
+// lightptr += lightwidth;
+ movl %ebx,C(r_lightptr)
+
+// lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
+// lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
+// lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
+// 0xF0000000;
+ movl 4(%ebx),%ecx // lightptr[1]
+ movl (%ebx),%ebx // lightptr[0]
+
+ subl %eax,%ebx
+ subl %edx,%ecx
+
+ sarl $3,%ecx
+ orl $0x70000000,%ebp
+
+ sarl $3,%ebx
+ movl %ecx,C(lightrightstep)
+
+ subl %ecx,%ebx
+ andl $0xFFFFF,%ebx
+
+ orl $0xF0000000,%ebx
+ subl %ecx,%ecx // high word must be 0 in loop for addressing
+
+ movl %ebx,C(lightdeltastep)
+ subl %ebx,%ebx // high word must be 0 in loop for addressing
+
+Lblockloop8_mip1:
+ movl %ebp,C(lightdelta)
+ movb 6(%esi),%cl
+
+ sarl $3,%ebp
+ movb %dh,%bh
+
+ movb 7(%esi),%bl
+ addl %ebp,%edx
+
+ movb %dh,%ch
+ addl %ebp,%edx
+
+ movb 0x12345678(%ebx),%ah
+LBPatch22:
+ movb 5(%esi),%bl
+
+ movb 0x12345678(%ecx),%al
+LBPatch23:
+ movb 4(%esi),%cl
+
+ movb %dh,%bh
+ addl %ebp,%edx
+
+ rorl $16,%eax
+ movb %dh,%ch
+
+ addl %ebp,%edx
+ movb 0x12345678(%ebx),%ah
+LBPatch24:
+
+ movb 3(%esi),%bl
+ movb 0x12345678(%ecx),%al
+LBPatch25:
+
+ movb 2(%esi),%cl
+ movl %eax,4(%edi)
+
+ movb %dh,%bh
+ addl %ebp,%edx
+
+ movb %dh,%ch
+ addl %ebp,%edx
+
+ movb 0x12345678(%ebx),%ah
+LBPatch26:
+ movb 1(%esi),%bl
+
+ movb 0x12345678(%ecx),%al
+LBPatch27:
+ movb (%esi),%cl
+
+ movb %dh,%bh
+ addl %ebp,%edx
+
+ rorl $16,%eax
+ movb %dh,%ch
+
+ movb 0x12345678(%ebx),%ah
+LBPatch28:
+ movl C(lightright),%edx
+
+ movb 0x12345678(%ecx),%al
+LBPatch29:
+ movl C(lightdelta),%ebp
+
+ movl %eax,(%edi)
+ movl C(sourcetstep),%eax
+
+ addl %eax,%esi
+ movl C(surfrowbytes),%eax
+
+ addl %eax,%edi
+ movl C(lightrightstep),%eax
+
+ addl %eax,%edx
+ movl C(lightdeltastep),%eax
+
+ addl %eax,%ebp
+ movl %edx,C(lightright)
+
+ jc Lblockloop8_mip1
+
+// if (pbasesource >= r_sourcemax)
+// pbasesource -= stepback;
+
+ cmpl C(r_sourcemax),%esi
+ jb LSkip_mip1
+ subl C(r_stepback),%esi
+LSkip_mip1:
+
+ movl C(r_lightptr),%ebx
+ decl sb_v
+
+ jnz Lv_loop_mip1
+
+ popl %ebx // restore register variables
+ popl %esi
+ popl %edi
+ popl %ebp // restore the caller's stack frame
+ ret
+
+
+//----------------------------------------------------------------------
+// Surface block drawer for mip level 2
+//----------------------------------------------------------------------
+
+ .align 4
+.globl C(R_DrawSurfaceBlock8_mip2)
+C(R_DrawSurfaceBlock8_mip2):
+ pushl %ebp // preserve caller's stack frame
+ pushl %edi
+ pushl %esi // preserve register variables
+ pushl %ebx
+
+// for (v=0 ; v<numvblocks ; v++)
+// {
+ movl C(r_lightptr),%ebx
+ movl C(r_numvblocks),%eax
+
+ movl %eax,sb_v
+ movl C(prowdestbase),%edi
+
+ movl C(pbasesource),%esi
+
+Lv_loop_mip2:
+
+// lightleft = lightptr[0];
+// lightright = lightptr[1];
+// lightdelta = (lightleft - lightright) & 0xFFFFF;
+ movl (%ebx),%eax // lightleft
+ movl 4(%ebx),%edx // lightright
+
+ movl %eax,%ebp
+ movl C(r_lightwidth),%ecx
+
+ movl %edx,C(lightright)
+ subl %edx,%ebp
+
+ andl $0xFFFFF,%ebp
+ leal (%ebx,%ecx,4),%ebx
+
+// lightptr += lightwidth;
+ movl %ebx,C(r_lightptr)
+
+// lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
+// lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
+// lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
+// 0xF0000000;
+ movl 4(%ebx),%ecx // lightptr[1]
+ movl (%ebx),%ebx // lightptr[0]
+
+ subl %eax,%ebx
+ subl %edx,%ecx
+
+ sarl $2,%ecx
+ orl $0x30000000,%ebp
+
+ sarl $2,%ebx
+ movl %ecx,C(lightrightstep)
+
+ subl %ecx,%ebx
+
+ andl $0xFFFFF,%ebx
+
+ orl $0xF0000000,%ebx
+ subl %ecx,%ecx // high word must be 0 in loop for addressing
+
+ movl %ebx,C(lightdeltastep)
+ subl %ebx,%ebx // high word must be 0 in loop for addressing
+
+Lblockloop8_mip2:
+ movl %ebp,C(lightdelta)
+ movb 2(%esi),%cl
+
+ sarl $2,%ebp
+ movb %dh,%bh
+
+ movb 3(%esi),%bl
+ addl %ebp,%edx
+
+ movb %dh,%ch
+ addl %ebp,%edx
+
+ movb 0x12345678(%ebx),%ah
+LBPatch18:
+ movb 1(%esi),%bl
+
+ movb 0x12345678(%ecx),%al
+LBPatch19:
+ movb (%esi),%cl
+
+ movb %dh,%bh
+ addl %ebp,%edx
+
+ rorl $16,%eax
+ movb %dh,%ch
+
+ movb 0x12345678(%ebx),%ah
+LBPatch20:
+ movl C(lightright),%edx
+
+ movb 0x12345678(%ecx),%al
+LBPatch21:
+ movl C(lightdelta),%ebp
+
+ movl %eax,(%edi)
+ movl C(sourcetstep),%eax
+
+ addl %eax,%esi
+ movl C(surfrowbytes),%eax
+
+ addl %eax,%edi
+ movl C(lightrightstep),%eax
+
+ addl %eax,%edx
+ movl C(lightdeltastep),%eax
+
+ addl %eax,%ebp
+ movl %edx,C(lightright)
+
+ jc Lblockloop8_mip2
+
+// if (pbasesource >= r_sourcemax)
+// pbasesource -= stepback;
+
+ cmpl C(r_sourcemax),%esi
+ jb LSkip_mip2
+ subl C(r_stepback),%esi
+LSkip_mip2:
+
+ movl C(r_lightptr),%ebx
+ decl sb_v
+
+ jnz Lv_loop_mip2
+
+ popl %ebx // restore register variables
+ popl %esi
+ popl %edi
+ popl %ebp // restore the caller's stack frame
+ ret
+
+
+//----------------------------------------------------------------------
+// Surface block drawer for mip level 3
+//----------------------------------------------------------------------
+
+ .align 4
+.globl C(R_DrawSurfaceBlock8_mip3)
+C(R_DrawSurfaceBlock8_mip3):
+ pushl %ebp // preserve caller's stack frame
+ pushl %edi
+ pushl %esi // preserve register variables
+ pushl %ebx
+
+// for (v=0 ; v<numvblocks ; v++)
+// {
+ movl C(r_lightptr),%ebx
+ movl C(r_numvblocks),%eax
+
+ movl %eax,sb_v
+ movl C(prowdestbase),%edi
+
+ movl C(pbasesource),%esi
+
+Lv_loop_mip3:
+
+// lightleft = lightptr[0];
+// lightright = lightptr[1];
+// lightdelta = (lightleft - lightright) & 0xFFFFF;
+ movl (%ebx),%eax // lightleft
+ movl 4(%ebx),%edx // lightright
+
+ movl %eax,%ebp
+ movl C(r_lightwidth),%ecx
+
+ movl %edx,C(lightright)
+ subl %edx,%ebp
+
+ andl $0xFFFFF,%ebp
+ leal (%ebx,%ecx,4),%ebx
+
+ movl %ebp,C(lightdelta)
+// lightptr += lightwidth;
+ movl %ebx,C(r_lightptr)
+
+// lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
+// lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
+// lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
+// 0xF0000000;
+ movl 4(%ebx),%ecx // lightptr[1]
+ movl (%ebx),%ebx // lightptr[0]
+
+ subl %eax,%ebx
+ subl %edx,%ecx
+
+ sarl $1,%ecx
+
+ sarl $1,%ebx
+ movl %ecx,C(lightrightstep)
+
+ subl %ecx,%ebx
+ andl $0xFFFFF,%ebx
+
+ sarl $1,%ebp
+ orl $0xF0000000,%ebx
+
+ movl %ebx,C(lightdeltastep)
+ subl %ebx,%ebx // high word must be 0 in loop for addressing
+
+ movb 1(%esi),%bl
+ subl %ecx,%ecx // high word must be 0 in loop for addressing
+
+ movb %dh,%bh
+ movb (%esi),%cl
+
+ addl %ebp,%edx
+ movb %dh,%ch
+
+ movb 0x12345678(%ebx),%al
+LBPatch16:
+ movl C(lightright),%edx
+
+ movb %al,1(%edi)
+ movb 0x12345678(%ecx),%al
+LBPatch17:
+
+ movb %al,(%edi)
+ movl C(sourcetstep),%eax
+
+ addl %eax,%esi
+ movl C(surfrowbytes),%eax
+
+ addl %eax,%edi
+ movl C(lightdeltastep),%eax
+
+ movl C(lightdelta),%ebp
+ movb (%esi),%cl
+
+ addl %eax,%ebp
+ movl C(lightrightstep),%eax
+
+ sarl $1,%ebp
+ addl %eax,%edx
+
+ movb %dh,%bh
+ movb 1(%esi),%bl
+
+ addl %ebp,%edx
+ movb %dh,%ch
+
+ movb 0x12345678(%ebx),%al
+LBPatch30:
+ movl C(sourcetstep),%edx
+
+ movb %al,1(%edi)
+ movb 0x12345678(%ecx),%al
+LBPatch31:
+
+ movb %al,(%edi)
+ movl C(surfrowbytes),%ebp
+
+ addl %edx,%esi
+ addl %ebp,%edi
+
+// if (pbasesource >= r_sourcemax)
+// pbasesource -= stepback;
+
+ cmpl C(r_sourcemax),%esi
+ jb LSkip_mip3
+ subl C(r_stepback),%esi
+LSkip_mip3:
+
+ movl C(r_lightptr),%ebx
+ decl sb_v
+
+ jnz Lv_loop_mip3
+
+ popl %ebx // restore register variables
+ popl %esi
+ popl %edi
+ popl %ebp // restore the caller's stack frame
+ ret
+
+
+.globl C(R_Surf8End)
+C(R_Surf8End):
+
+//----------------------------------------------------------------------
+// Code patching routines
+//----------------------------------------------------------------------
+ .data
+
+ .align 4
+LPatchTable8:
+ .long LBPatch0-4
+ .long LBPatch1-4
+ .long LBPatch2-4
+ .long LBPatch3-4
+ .long LBPatch4-4
+ .long LBPatch5-4
+ .long LBPatch6-4
+ .long LBPatch7-4
+ .long LBPatch8-4
+ .long LBPatch9-4
+ .long LBPatch10-4
+ .long LBPatch11-4
+ .long LBPatch12-4
+ .long LBPatch13-4
+ .long LBPatch14-4
+ .long LBPatch15-4
+ .long LBPatch16-4
+ .long LBPatch17-4
+ .long LBPatch18-4
+ .long LBPatch19-4
+ .long LBPatch20-4
+ .long LBPatch21-4
+ .long LBPatch22-4
+ .long LBPatch23-4
+ .long LBPatch24-4
+ .long LBPatch25-4
+ .long LBPatch26-4
+ .long LBPatch27-4
+ .long LBPatch28-4
+ .long LBPatch29-4
+ .long LBPatch30-4
+ .long LBPatch31-4
+
+ .text
+
+ .align 4
+.globl C(R_Surf8Patch)
+C(R_Surf8Patch):
+ pushl %ebx
+
+ movl C(colormap),%eax
+ movl $LPatchTable8,%ebx
+ movl $32,%ecx
+LPatchLoop8:
+ movl (%ebx),%edx
+ addl $4,%ebx
+ movl %eax,(%edx)
+ decl %ecx
+ jnz LPatchLoop8
+
+ popl %ebx
+
+ ret
+
+#endif // id386
--- /dev/null
+++ b/u/sys_dosa.s
@@ -1,0 +1,95 @@
+//
+// sys_dosa.s
+// x86 assembly-language DOS-dependent routines.
+
+#include "asm_i386.h"
+#include "quakeasm.h"
+
+
+ .data
+
+ .align 4
+fpenv:
+ .long 0, 0, 0, 0, 0, 0, 0, 0
+
+ .text
+
+.globl C(MaskExceptions)
+C(MaskExceptions):
+ fnstenv fpenv
+ orl $0x3F,fpenv
+ fldenv fpenv
+
+ ret
+
+/*
+.globl C(unmaskexceptions)
+C(unmaskexceptions):
+ fnstenv fpenv
+ andl $0xFFFFFFE0,fpenv
+ fldenv fpenv
+
+ ret
+*/
+
+ .data
+
+ .align 4
+.globl ceil_cw, single_cw, full_cw, cw, pushed_cw
+ceil_cw: .long 0
+single_cw: .long 0
+full_cw: .long 0
+cw: .long 0
+pushed_cw: .long 0
+
+ .text
+
+.globl C(Sys_LowFPPrecision)
+C(Sys_LowFPPrecision):
+ fldcw single_cw
+
+ ret
+
+.globl C(Sys_HighFPPrecision)
+C(Sys_HighFPPrecision):
+ fldcw full_cw
+
+ ret
+
+.globl C(Sys_PushFPCW_SetHigh)
+C(Sys_PushFPCW_SetHigh):
+ fnstcw pushed_cw
+ fldcw full_cw
+
+ ret
+
+.globl C(Sys_PopFPCW)
+C(Sys_PopFPCW):
+ fldcw pushed_cw
+
+ ret
+
+.globl C(Sys_SetFPCW)
+C(Sys_SetFPCW):
+ fnstcw cw
+ movl cw,%eax
+#ifdef id386
+ andb $0xF0,%ah
+ orb $0x03,%ah // round mode, 64-bit precision
+#endif
+ movl %eax,full_cw
+
+#ifdef id386
+ andb $0xF0,%ah
+ orb $0x0C,%ah // chop mode, single precision
+#endif
+ movl %eax,single_cw
+
+#ifdef id386
+ andb $0xF0,%ah
+ orb $0x08,%ah // ceil mode, single precision
+#endif
+ movl %eax,ceil_cw
+
+ ret
+
--- /dev/null
+++ b/u/worlda.s
@@ -1,0 +1,125 @@
+//
+// worlda.s
+// x86 assembly-language server testing stuff
+//
+
+#define GLQUAKE 1 // don't include unneeded defs
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "d_ifacea.h"
+
+#ifdef id386
+
+ .data
+
+Ltemp: .long 0
+
+ .text
+
+//----------------------------------------------------------------------
+// hull-point test
+//----------------------------------------------------------------------
+
+#define hull 4+8 // because only partially pushed
+#define num 8+4 // because only partially pushed
+#define p 12+12 // because only partially pushed
+
+ .align 4
+.globl C(SV_HullPointContents)
+C(SV_HullPointContents):
+ pushl %edi // preserve register variables
+ movl num(%esp),%eax
+ testl %eax,%eax
+ js Lhquickout
+
+// float d;
+// dclipnode_t *node;
+// mplane_t *plane;
+
+ pushl %ebx
+ movl hull(%esp),%ebx
+
+ pushl %ebp
+ movl p(%esp),%edx
+
+ movl hu_clipnodes(%ebx),%edi
+ movl hu_planes(%ebx),%ebp
+
+ subl %ebx,%ebx
+ pushl %esi
+
+// %ebx: 0
+// %eax: num
+// %edx: p
+// %edi: hull->clipnodes
+// %ebp: hull->planes
+
+// while (num >= 0)
+// {
+
+Lhloop:
+
+// node = hull->clipnodes + num;
+// plane = hull->planes + node->planenum;
+// !!! if the size of dclipnode_t changes, the scaling of %eax needs to be
+// changed !!!
+ movl nd_planenum(%edi,%eax,8),%ecx
+ movl nd_children(%edi,%eax,8),%eax
+ movl %eax,%esi
+ rorl $16,%eax
+ leal (%ecx,%ecx,4),%ecx
+
+// if (plane->type < 3)
+// d = p[plane->type] - plane->dist;
+ movb pl_type(%ebp,%ecx,4),%bl
+ cmpb $3,%bl
+ jb Lnodot
+
+// else
+// d = DotProduct (plane->normal, p) - plane->dist;
+ flds pl_normal(%ebp,%ecx,4)
+ fmuls 0(%edx)
+ flds pl_normal+4(%ebp,%ecx,4)
+ fmuls 4(%edx)
+ flds pl_normal+8(%ebp,%ecx,4)
+ fmuls 8(%edx)
+ fxch %st(1)
+ faddp %st(0),%st(2)
+ faddp %st(0),%st(1)
+ fsubs pl_dist(%ebp,%ecx,4)
+ jmp Lsub
+
+Lnodot:
+ flds pl_dist(%ebp,%ecx,4)
+ fsubrs (%edx,%ebx,4)
+
+Lsub:
+ sarl $16,%eax
+ sarl $16,%esi
+
+// if (d < 0)
+// num = node->children[1];
+// else
+// num = node->children[0];
+ fstps Ltemp
+ movl Ltemp,%ecx
+ sarl $31,%ecx
+ andl %ecx,%esi
+ xorl $0xFFFFFFFF,%ecx
+ andl %ecx,%eax
+ orl %esi,%eax
+ jns Lhloop
+
+// return num;
+Lhdone:
+ popl %esi
+ popl %ebp
+ popl %ebx // restore register variables
+
+Lhquickout:
+ popl %edi
+
+ ret
+
+#endif // id386
+
--- a/worlda.s
+++ /dev/null
@@ -1,125 +1,0 @@
-//
-// worlda.s
-// x86 assembly-language server testing stuff
-//
-
-#define GLQUAKE 1 // don't include unneeded defs
-#include "asm_i386.h"
-#include "quakeasm.h"
-#include "d_ifacea.h"
-
-#ifdef id386
-
- .data
-
-Ltemp: .long 0
-
- .text
-
-//----------------------------------------------------------------------
-// hull-point test
-//----------------------------------------------------------------------
-
-#define hull 4+8 // because only partially pushed
-#define num 8+4 // because only partially pushed
-#define p 12+12 // because only partially pushed
-
- .align 4
-.globl C(SV_HullPointContents)
-C(SV_HullPointContents):
- pushl %edi // preserve register variables
- movl num(%esp),%eax
- testl %eax,%eax
- js Lhquickout
-
-// float d;
-// dclipnode_t *node;
-// mplane_t *plane;
-
- pushl %ebx
- movl hull(%esp),%ebx
-
- pushl %ebp
- movl p(%esp),%edx
-
- movl hu_clipnodes(%ebx),%edi
- movl hu_planes(%ebx),%ebp
-
- subl %ebx,%ebx
- pushl %esi
-
-// %ebx: 0
-// %eax: num
-// %edx: p
-// %edi: hull->clipnodes
-// %ebp: hull->planes
-
-// while (num >= 0)
-// {
-
-Lhloop:
-
-// node = hull->clipnodes + num;
-// plane = hull->planes + node->planenum;
-// !!! if the size of dclipnode_t changes, the scaling of %eax needs to be
-// changed !!!
- movl nd_planenum(%edi,%eax,8),%ecx
- movl nd_children(%edi,%eax,8),%eax
- movl %eax,%esi
- rorl $16,%eax
- leal (%ecx,%ecx,4),%ecx
-
-// if (plane->type < 3)
-// d = p[plane->type] - plane->dist;
- movb pl_type(%ebp,%ecx,4),%bl
- cmpb $3,%bl
- jb Lnodot
-
-// else
-// d = DotProduct (plane->normal, p) - plane->dist;
- flds pl_normal(%ebp,%ecx,4)
- fmuls 0(%edx)
- flds pl_normal+4(%ebp,%ecx,4)
- fmuls 4(%edx)
- flds pl_normal+8(%ebp,%ecx,4)
- fmuls 8(%edx)
- fxch %st(1)
- faddp %st(0),%st(2)
- faddp %st(0),%st(1)
- fsubs pl_dist(%ebp,%ecx,4)
- jmp Lsub
-
-Lnodot:
- flds pl_dist(%ebp,%ecx,4)
- fsubrs (%edx,%ebx,4)
-
-Lsub:
- sarl $16,%eax
- sarl $16,%esi
-
-// if (d < 0)
-// num = node->children[1];
-// else
-// num = node->children[0];
- fstps Ltemp
- movl Ltemp,%ecx
- sarl $31,%ecx
- andl %ecx,%esi
- xorl $0xFFFFFFFF,%ecx
- andl %ecx,%eax
- orl %esi,%eax
- jns Lhloop
-
-// return num;
-Lhdone:
- popl %esi
- popl %ebp
- popl %ebx // restore register variables
-
-Lhquickout:
- popl %edi
-
- ret
-
-#endif // id386
-