;
; GS Lib v2.0
; manage the graphic processor
;
; (c) Duke / Napalm 2001
; (c) wiRe / Napalm 2002
;


GS_REG          EQU     0x12000000
GS_REG_CSR      EQU     0x1000

; some flgs for GsInit
NON_INTERLACED  EQU     0
INTERLACED      EQU     1
NTSC            EQU     2
PAL             EQU     3
FRAME           EQU     1
FIELD           EQU     2

; gs general purpose registers
GS_PRIM         EQU 0x00
GS_RGBAQ        EQU 0x01
GS_ST           EQU 0x02
GS_UV           EQU 0x03
GS_XYZF2        EQU 0x04
GS_XYZ2         EQU 0x05
GS_TEX0_1       EQU 0x06
GS_TEX0_2       EQU 0x07
GS_CLAMP_1      EQU 0x08
GS_CLAMP_2      EQU 0x09
GS_FOG          EQU 0x0A
GS_XYZF3        EQU 0x0C
GS_XYZ3         EQU 0x0D

GS_TEX1_1       EQU 0x14
GS_TEX1_2       EQU 0x15
GS_TEX2_1       EQU 0x16
GS_TEX2_2       EQU 0x17
GS_XYOFFSET_1   EQU 0x18
GS_XYOFFSET_2   EQU 0x19
GS_PRMODECONT   EQU 0x1A
GS_PRMODE       EQU 0x1B

GS_TEXA         EQU 0x3B
GS_FOGCOL       EQU 0x3D
GS_TEXFLUSH     EQU 0x3F

GS_SCISSOR_1    EQU 0x40
GS_SCISSOR_2    EQU 0x41
GS_ALPHA_1      EQU 0x42
GS_ALPHA_2      EQU 0x43
GS_DTHE         EQU 0x45
GS_COLCLAMP     EQU 0x46
GS_TEST_1       EQU 0x47
GS_TEST_2       EQU 0x48
GS_PABE         EQU 0x49
GS_FRAME_1      EQU 0x4C
GS_FRAME_2      EQU 0x4D
GS_ZBUF_1       EQU 0x4E
GS_ZBUF_2       EQU 0x4F

GS_BITBLTBUF    EQU 0x50
GS_TRXPOS       EQU 0x51
GS_TRXREG       EQU 0x52
GS_TRXDIR       EQU 0x53
GS_HWREG        EQU 0x54

GS_SIGNAL       EQU 0x60
GS_FINISH       EQU 0x61
GS_LABEL        EQU 0x62

; prim register constants
PRIM_POINT      EQU 0
PRIM_LINE       EQU 1
PRIM_LINE_STRIP EQU 2
PRIM_TRI        EQU 3
PRIM_TRI_STRIP  EQU 4
PRIM_TRI_FAN    EQU 5
PRIM_SPRITE     EQU 6

PRIM_GOURAUD    EQU (1<<3)
PRIM_TEXTURE    EQU (1<<4)
PRIM_FOGGING    EQU (1<<5)
PRIM_BLENDING   EQU (1<<6)
PRIM_ANTIALIAS  EQU (1<<7)
PRIM_TEXT_UV    EQU (1<<8)
PRIM_CONTEXT_2  EQU (1<<9)
PRIM_FVC_FIXED  EQU (1<<10)

; storage formats
PSMCT32         EQU 0
PSMCT24         EQU 1
PSMCT16         EQU 2
PSMCT16S        EQU 10
PSMT8           EQU 19
PSMT4           EQU 20
PSMT8H          EQU 27

PSMZ32          EQU 0
PSMZ24          EQU 1
PSMZ16          EQU 2
PSMZ16S         EQU 10



; desc: wait some retrace
; waste: at,v0,v1,ra
GS_WAIT_FRAMES  MACRO   _cntr
                li          v1, _cntr
                jal         GsVSync
                subiu       v1, 1
                bgtz        v1, $-8
                nop
GS_WAIT_FRAMES  ENDM



; desc: detect if u have PAL or NTSC
; out: v0 = 2(NTSC) or 3(PAL)
; waste: -
GsDetectMode    PROC
                lui         v0, 0x1fc8
                lbu         v0, -0xae(v0)
                subiu       v0, 'E'
                beql        v0, zero, $+12
                addiu       v0, zero, PAL
                addiu       v0, zero, NTSC
                jr          ra
                nop
GsDetectMode    ENDP


; desc: init graphic processor
; in:
;   a0 -> 0=NON, 1=INTERLACED
;   a1 -> 2=NTSC, 3=PAL
;   a2 -> 1=FRAME, 2=FIELD
GsInit          PROC
                la          at, interlaced
                sb          a0, 0(at)

                move        s0, a0
                move        s1, a1
                move        s2, a2
                li          v1, GS_REG|GS_REG_CSR   ; reset gs
                li          v0, 1<<9
                sd          v0, 0(v1)
                sync.p
                nop
                li          a0, 0x0000FF00  ; set imr
                ld          v0, 0(v1)
                dsrl        v0, 16
                andi        v0, 0xFF
                li          v1, 0x71
                syscall
                nop
                andi        a0, s0, 1       ; set gs crt
                andi        a1, s1, 0xFF
                andi        a2, s2, 1
                li          v1, 2
                syscall
                jr          ra
                nop

interlaced      DB          0               ; interlaced flag (1=on, 0=off)
GsInit          ENDP


; desc: setup videomode
; in: a0 = width, a1 = height, a2 = magh
;
; following pal modes are possible:
;   a0  | a1  | a2
;  -----+-----+----
;   256 | 256 | 10
;   320 | 256 | 8
;   384 | 256 | 7
;   512 | 256 | 5
;   640 | 256 | 4
;
GsSetVideoMode  PROC
                la          at, GsSetVideoMode  ; store video parameters
                sw          a0, GsSetVideoMode.~width(at)
                sw          a1, GsSetVideoMode.~height(at)

                li          at, GS_REG
                li          v0, 0xFF62          ; PMODE: EN1=0,EN2=1,001=0,MMOD=1,AMOD=1,SLBG=0,ALP=0xFF
                sd          v0, 0x00(at)

               ;li          v0, 0x0002          ; SMODE2: already set by GsInit
               ;sd          v0, 0x20(at)

                srl         v1, a0, 6           ; DISPFB2: FBP=0,FBW=width/64,PSM=0,DBX=0,DBY=0
                sll         v0, v1, 9
                sd          v0, 0x90(at)

                la          at, GsEE2Vram.gif_packet.bb
                sb          v1, 6(at)           ; write FBW->DBW

                la          at, GsSwapScreen    ; remember register values for later display swapping
                sw          v0, GsSwapScreen.~dispfb2_1(at)
                sll         v0, 7
                sw          v0, GsSwapScreen.~frame_2(at)
                la          v1, gif_packet      ; update gif packet too
                sw          v0, gif_packet.~frame(v1)

                multu       v1, a0, a1          ; calc and remember register values for seccond screen
                addiu       v1, (1<<11)-1
                srl         v1, 11              ; 2nd screen address = v1 = (width*height*(32/8))/8192
                or          v0, v1
                sw          v0, GsSwapScreen.~frame_1(at)
                srl         v0, 16
                sll         v0, 9
                or          v0, v1
                sw          v0, GsSwapScreen.~dispfb2_2(at)

                li          at, GS_REG          ; DISPLAY2: DX=653,DY=(interlaced+1)*36,MAGH=magh-1,MAGV=0,DW=width*magh-1,DH=height-1
                daddiu      v0, zero, 36
                la          v1, GsInit.interlaced
                lbu         v1, 0(v1)
                addiu       v1, 1
                multu       v0, v0, v1
                dsll        v0, 12
                ori         v0, 653
                subi        v1, a1, 1           ; (height-1)<<44
                dsll32      v1, 44-32
                daddu       v0, v1
                multu       v1, a0, a2          ; (width*magh-1)<<32
                subi        v1, 1
                dsll32      v1, 32-32
                daddu       v0, v1
                subi        v1, a2, 1           ; (magh-1)<<23
                dsll        v1, 23
                daddu       v0, v1
                sd          v0, 0xA0(at)

                sd          zero, 0xE0(at)      ; BGCOLOR: R=0, G=0, B=0


                la          at, gif_packet      ; alter gif_packet regs
                subi        v0, a0, 1           ; width
                sh          v0, gif_packet.~scissor+2(at)
                sll         v0, a0, 4
                ori         v0, 0x8000
                sh          v0, gif_packet.~cls_xy+0(at)
                subi        v0, a1, 1           ; height
                sh          v0, gif_packet.~scissor+6(at)
                sll         v0, a1, 4
                ori         v0, 0x8000
                sh          v0, gif_packet.~cls_xy+2(at)

                SysFlushCache                   ; syscall

                la          a0, gif_packet      ; send the gif
                j           Dma02SendGif
                addiu       v0, zero, gif_packet._size/16


width           DW          0
height          DW          0


gif_packet      TABLE ALIGN 16
                GIF_TAG     7, 1, 0, 0, 0, 1, GIF_AD
                GIF_DATA    GS_PRMODECONT, 1                ; use primitive for mode controll flags
                GIF_DATA    GS_ZBUF_1, 1<<32                ; turn off zbuffer
                GIF_DATA    GS_TEST_1, (15<<0)              ; skip pixels with A=0, DepthTest off
                GIF_DATA    GS_ALPHA_1, (1<<2)|(1<<6)       ; blend by Cv=(Csrc - Cframe)*Asrc >>7 + Cframe
                GIF_DATA    GS_PABE, 0                      ; not perform ABC in units of pixels
                GIF_DATA    GS_COLCLAMP, 1                  ; colclamp on
                GIF_DATA    GS_DTHE, 0                      ; dont perform dithering

                GIF_TAG     3, 1, 0, 0, 0, 1, GIF_AD        ; setup screen
  frame:        GIF_DATA    GS_FRAME_1, 0
  xyoffset:     GIF_DATA    GS_XYOFFSET_1, 0x800000008000   ; upper/left screen corner is located at 32768/32768
  scissor:      GIF_DATA    GS_SCISSOR_1, 0

  cls:          GIF_TAG     4, 1, 0, 0, 0, 1, GIF_AD        ; clear screen
                GIF_DATA    GS_PRIM, PRIM_SPRITE
                GIF_DATA    GS_RGBAQ, 128<<24
                GIF_DATA    GS_XYZ2, 0
  cls_xy:       GIF_DATA    GS_XYZ2, 0
gif_packet      ENDT

GsSetVideoMode  ENDP


; desc: fills active frame with black pixels
; waste: at,a0,v0,v1
GsClearScreen   PROC
                la          a0, GsSetVideoMode.gif_packet.cls
                b           Dma02SendGif
                addiu       v0, zero, 5
GsClearScreen   ENDP


; desc: activate zbuffer
; in: a0 = zbuff base pointer (vmem_addr >> 11), a1 = zbuff storage format (eg PSMZ16)
; note: call after GsSetVideoMode, occupies vmem at a0*2048
; waste: at,a0,v0,v1
GsInitZBuff     PROC
                la          v0, GsClearZBuff.gif_packet
                la          v1, GsSetVideoMode.gif_packet.cls_xy
                ld          at, 0(v1)
                sh          a0, GsClearZBuff.gif_packet.~zbp(v0)
                sb          a1, GsClearZBuff.gif_packet.~zbp+3(v0)
                sd          at, GsClearZBuff.gif_packet.~cls_xy(v0)

                SysFlushCache

                la          a0, GsClearZBuff.gif_packet.zbuff_on
                b           Dma02SendGif
                addiu       v0, zero, 9
GsInitZBuff     ENDP


; desc: clear screen and zbuff (black)
; note: first call GsInitZBuff
; waste: at,a0,v0,v1
GsClearZBuff    PROC
                la          a0, gif_packet.zbuff_clear
                b           Dma02SendGif
                addiu       v0, zero, 7

gif_packet      TABLE ALIGN 16
  zbuff_on:     GIF_TAG     1, 1, 0, 0, 0, 1, GIF_AD        ; turn zbuff on
  zbp:          GIF_DATA    GS_ZBUF_1, 0
  zbuff_clear:  GIF_TAG     6, 1, 0, 0, 0, 1, GIF_AD        ; clear screen
                GIF_DATA    GS_TEST_1, (3<<16)|(15<<0)      ; skip pixels with A=0, DepthTest on, pass all pixels
                GIF_DATA    GS_PRIM, PRIM_SPRITE
                GIF_DATA    GS_RGBAQ, 128<<24
                GIF_DATA    GS_XYZ2, 0
  cls_xy:       GIF_DATA    GS_XYZ2, 0
                GIF_DATA    GS_TEST_1, (5<<16)|(15<<0)      ; skip pixels with A=0, DepthTest on, skip Z<Zbuf
gif_packet      ENDT

GsClearZBuff    ENDP


; desc: swap double buffered screen
; waste: at,a0,v0,v1
GsSwapScreen    PROC
                la          at, GsSwapScreen
                ld          v0, GsSwapScreen.~frame_1(at)
                ld          v1, GsSwapScreen.~frame_2(at)
                sd          v0, GsSwapScreen.~frame_2(at)
                sd          v1, GsSwapScreen.~frame_1(at)
                sd          v0, GsSwapScreen.~gif_packet.frame(at)

                ld          v0, GsSwapScreen.~dispfb2_1(at)
                ld          v1, GsSwapScreen.~dispfb2_2(at)
                sd          v0, GsSwapScreen.~dispfb2_2(at)
                sd          v1, GsSwapScreen.~dispfb2_1(at)
                
                li          at, GS_REG
                sd          v0, 0x90(at)

                SysFlushCache

                la          a0, gif_packet
                j           Dma02SendGif
                addiu       v0, zero, gif_packet._size/16

frame_1         DD 0
dispfb2_1       DD 0
frame_2         DD 0
dispfb2_2       DD 0

gif_packet      TABLE ALIGN 16
                GIF_TAG     1, 1, 0, 0, 0, 1, GIF_AD    ; setup screen
  frame:        GIF_DATA    GS_FRAME_1, 0
gif_packet      ENDT

GsSwapScreen    ENDP


; desc: change background color
; in: v0 = background color ((B<<16)|(G<<8)|(R<<0))
; waste: at
GsSetBgColor    PROC
                li          at, GS_REG
                sd          v0, 0xE0(at)
                jr          ra
                nop
GsSetBgColor    ENDP


; desc: wait for finish event
; waste: at,v0,v1,t0,t1
GsFinish        PROC
                move        t0, ra
                li          t1, GS_REG|GS_REG_CSR
                ld          at, 0(t1)
                ori         at, 2
                sd          at, 0(t1)

                la          a0, gif_packet
                jal         Dma02SendGif
                addiu       v0, zero, gif_packet._size/16

lp:             ld          at, 0(t1)
                andi        at, 2
                beqz        at, lp
                nop
                jr          t0
                nop

gif_packet      TABLE ALIGN 16
                GIF_TAG     1, 1, 0, 0, 0, 1, GIF_AD
                GIF_DATA    GS_FINISH, 0
gif_packet      ENDT

GsFinish        ENDP


; desc: wait for vertical retrace
; waste: at,v0
GsVSync         PROC
                li          at, GS_REG|GS_REG_CSR
                ld          v0, 0(at)
                ori         v0, 8
                sd          v0, 0(at)
lp:             ld          v0, 0(at)
                andi        v0, 8
                beqz        v0, lp
                nop
                jr      ra
                nop
GsVSync         ENDP


; desc: wait for horizontal retrace
; waste: at,v0
GsHSync         PROC
                li          at, GS_REG|GS_REG_CSR
                ld          v0, 0(at)
                ori         v0, 4
                sd          v0, 0(at)
lp:             ld          v0, 0(at)
                andi        v0, 4
                beqz        v0, lp
                nop
                jr      ra
                nop
GsHSync         ENDP


; desc: transfer ee-memory to gs-mem (vram)
; in:
;   a0 = (y<<16)|x
;   a1 = (h<<16)|w
;   a2 = (w*h*bpp)/16 = size in qwords
;   a3 = picture (ptr)
;   t0 = buffer base ptr
;   t1 = pixel storage format (eg. PSMCT32)
GsEE2Vram       PROC
                push        ra

                ; init prim
                la          ra, gif_packet
                sw          a0, gif_packet.~xy+4(ra)
                sh          a1, gif_packet.~wh+0(ra)
                srl         at, a1, 16
                sh          at, gif_packet.~wh+4(ra)
                sh          a2, gif_packet.~tg+0(ra)
                sh          t0, gif_packet.~bb+4(ra)
                sb          t1, gif_packet.~bb+7(ra)

                ; flush cache, and write out the cached changes to our gif_packet
                ; dma transfer wouldnt ask the cache what he contains, only reads memory banks
                SysFlushCache
                jal         Dma02Wait
                nop

                ; transfer packet
                la          t0, gif_packet
                addiu       t1, zero, gif_packet._size/16
                addiu       t2, zero, 101h
                li          at, DMA02
                sw          t0, DMA_MADR(at)
                sw          t1, DMA_QWC(at)
                sw          t2, DMA_CHCR(at)
                jal         Dma02Wait
                nop
                ; transfer data
                li          at, DMA02
                sw          a3, DMA_MADR(at)
                sw          a2, DMA_QWC(at)
                sw          t2, DMA_CHCR(at)

                pop         ra
                jr          ra
                nop


gif_packet      TABLE ALIGN 16
                  GIF_TAG     4, 1, 0, 0, 0, 1, GIF_AD
  bb:             GIF_DATA    GS_BITBLTBUF, 0             ; dest buffer width / 64
  xy:             GIF_DATA    GS_TRXPOS,    0             ; dest_x,dest_y
  wh:             GIF_DATA    GS_TRXREG,    0             ; width,height
                  GIF_DATA    GS_TRXDIR,    0             ; direction: 0=ee->gs
  tg:             GIF_TAG     0, 1, 0, 0, 8, 1, 0         ; nloop = (w*h*bpp)/16 = size in qwords
gif_packet      ENDT
              
GsEE2Vram       ENDP
