2010-12-04 16:14:34 +00:00
//pbKit core functions
//see AFL license
//#define DBG
//#define LOG
# include <hal/video.h>
# include <hal/xbox.h>
# include <hal/io.h>
# include <xboxkrnl/xboxkrnl.h>
# include <openxdk/debug.h>
# include "pbKit.h"
# include "outer.h"
# include "nv_objects.h" //shared with renouveau files
# include "nv20_shader.h" //(search "nouveau" on wiki)
# include <string.h>
# include <stdio.h>
# include <stdlib.h>
# include <stdarg.h>
# define INSTANCE_MEM_MAXSIZE 0x5000 //20Kb
# define ADDR_SYSMEM 1
# define ADDR_FBMEM 2
# define ADDR_AGPMEM 3
# define DMA_CLASS_2 2
# define DMA_CLASS_3 3
# define DMA_CLASS_3D 0x3D
# define GR_CLASS_30 0x30
# define GR_CLASS_39 0x39
# define GR_CLASS_62 0x62
# define GR_CLASS_97 0x97
# define GR_CLASS_9F 0x9F
# define GPU_IRQ 3
# define XTAL_16MHZ 16.6667f
# define DW_XTAL_16MHZ 16666666
# define MAX_EXTRA_BUFFERS 8
# define MAXRAM 0x03FFAFFF
# define NONE -1
# define TICKSTIMEOUT 100 //if Dma doesn't react in that time, send a warning
# define PB_SETOUTER 0xB2A
# define PB_SETNOISE 0xBAA
# define PB_FINISHED 0xFAB
struct s_CtxDma
{
DWORD ChannelID ;
DWORD Inst ; //Addr in PRAMIN area, unit=16 bytes blocks, baseaddr=VIDEO_BASE+NV_PRAMIN
DWORD Class ;
DWORD isGr ;
} ;
struct s_PseudoReg
{
int reg ;
int num ;
union {
int msk ;
int swz ;
} ;
int mod ;
int idx ;
} ;
struct s_PseudoRegs
{
int n ;
struct s_PseudoReg dest ;
struct s_PseudoReg src0 ;
struct s_PseudoReg src1 ;
struct s_PseudoReg src2 ;
} ;
static int pb_running = 0 ;
static DWORD pb_vbl_counter = 0 ;
# ifdef DBG
static int pb_trace_mode = 1 ;
# else
static int pb_trace_mode = 0 ;
# endif
//if set, we wait after each block sending (pb_end)
//so we are sure GPU received all the data (slower)
//and that any GPU error comes from last block sent.
static int pb_disable_gpu = 0 ;
//if set, prevents GPU from delaying CPU when FIFO is
//full (allows to see how fast CPU code is fast alone)
static KINTERRUPT pb_InterruptObject ;
static KDPC pb_DPCObject ;
static HANDLE pb_VBlankEvent ;
static DWORD pb_OldMCEnable ;
static DWORD pb_OldMCInterrupt ;
static DWORD pb_OldFBConfig0 ;
static DWORD pb_OldFBConfig1 ;
static DWORD pb_OldVideoStart ;
static DWORD * pb_DmaBuffer8 ; //points at 32 contiguous bytes (Dma Channel ID 8 buffer)
static DWORD * pb_DmaBuffer2 ; //points at 32 contiguous bytes (Dma Channel ID 2 buffer)
static DWORD * pb_DmaBuffer7 ; //points at 32 contiguous bytes (Dma Channel ID 7 buffer)
static DWORD pb_Size = 512 * 1024 ; //push buffer size, must be >64Kb and a power of 2
static DWORD * pb_Head ; //points at push buffer head
static DWORD * pb_Tail ; //points at push buffer tail
static DWORD * pb_Put = NULL ; //where next command+params are to be written
static float pb_CpuFrequency ;
static DWORD pb_GpuInstMem ;
static DWORD pb_PushBase ;
static DWORD pb_PushLimit ;
static DWORD pb_FifoHTAddr ;
static DWORD pb_FifoFCAddr ;
static DWORD pb_FifoU1Addr ;
static DWORD pb_3DGrCtxInst [ 2 ] = { 0 , 0 } ; //Adress of the two 3D graphic contexts (addr=inst<<4+NV_PRAMIN)
static DWORD pb_GrCtxTableInst ; //Adress of the table that points at the two graphic contexts
static DWORD pb_GrCtxInst [ 2 ] ; //Adress of the two graphic contexts (addr=inst<<4+NV_PRAMIN)
static int pb_GrCtxID ; //Current context ID : 0,1 or NONE
static DWORD pb_FifoBigInst ; //graphic contexts are stored there, and much more (addr=inst<<4+NV_PRAMIN)
static DWORD pb_FreeInst ; //next free space in PRAMIN area (addr=inst<<4+NV_PRAMIN)
static int pb_GammaRampIdx = 0 ;
static int pb_GammaRampbReady [ 3 ] = { 0 , 0 , 0 } ;
static BYTE pb_GammaRamp [ 3 ] [ 3 ] [ 256 ] ;
static int pb_BackBufferNxt = 0 ;
static int pb_BackBufferNxtVBL = 0 ;
static int pb_BackBufferbReady [ 3 ] = { 0 , 0 , 0 } ;
static int pb_BackBufferIndex [ 3 ] ;
static DWORD pb_FifoChannelsReady = 0 ;
static DWORD pb_FifoChannelsMode = NV_PFIFO_MODE_ALL_PIO ;
static DWORD pb_FifoChannelID = 0 ;
static DWORD pb_PutRunSize = 0 ;
static DWORD pb_GetRunSize ;
static DWORD pb_FrameBuffersCount ;
static DWORD pb_FrameBuffersWidth ;
static DWORD pb_FrameBuffersHeight ;
static DWORD pb_FrameBuffersAddr ;
static DWORD pb_FrameBuffersPitch ;
static DWORD pb_FBAddr [ 3 ] ; //frame buffers addresses
static DWORD pb_FBSize ; //size of 1 buffer
static DWORD pb_FBGlobalSize ; //size of all buffers
static DWORD pb_FBVFlag ;
static DWORD pb_GPUFrameBuffersFormat ; //encoded format for GPU
static DWORD pb_EXAddr [ 8 ] ; //extra buffers addresses
static DWORD pb_ExtraBuffersCount = 0 ;
static DWORD pb_DepthStencilAddr ;
static DWORD pb_DepthStencilPitch ;
static int pb_DepthStencilLast ;
static DWORD pb_DSAddr ; //depth stencil address
static DWORD pb_DSSize ; //size of depth stencil buffer
static DWORD pb_GPUDepthStencilFormat ; //encoded format for GPU
static int pb_front_index ;
static int pb_back_index ;
static DWORD pb_Viewport_x ;
static DWORD pb_Viewport_y ;
static DWORD pb_Viewport_width ;
static DWORD pb_Viewport_height ;
static DWORD pb_Viewport_zmin ;
static DWORD pb_Viewport_zmax ;
static float pb_XScale ;
static float pb_YScale ;
static float pb_ZScale ;
static float pb_GlobalScale ;
static float pb_Bias ;
static int pb_debug_screen_active ;
static DWORD pb_DmaChID9Inst ;
static DWORD pb_DmaChID10Inst ;
static DWORD pb_DmaChID11Inst ;
static DWORD * pb_DmaUserAddr ;
static DWORD pb_PushIndex ;
static DWORD * pb_PushStart ;
static DWORD * pb_PushNext ;
static int pb_BeginEndPair = 0 ;
static float pb_FixedPipelineConstants [ 12 ] = {
0.0f , 0.5f , 1.0f , 2.0f ,
- 1.0f , 0.0f , 1.0f , 2.0f ,
0.0f , 0.0f , - 1.0f , 0.0f } ;
static float pb_IdentityMatrix [ 16 ] = {
1.0f , 0.0f , 0.0f , 0.0f ,
0.0f , 1.0f , 0.0f , 0.0f ,
0.0f , 0.0f , 1.0f , 0.0f ,
0.0f , 0.0f , 0.0f , 1.0f } ;
static DWORD pb_TilePitches [ 16 ] = {
0x0200 , 0x0400 , 0x0600 , 0x0800 ,
0x0A00 , 0x0C00 , 0x0E00 , 0x1000 ,
0x1400 , 0x1800 , 0x1C00 , 0x2800 ,
0x3000 , 0x3800 , 0x5000 , 0x7000 } ;
static float pb_BiasTable [ 7 ] = {
0.0f ,
0.585f ,
1.0f ,
1.322f ,
1.585f ,
1.907f ,
2.0f } ;
//temporary storage for pb_pcode2mcode()
static DWORD pb_gpu_programnc [ 136 * 5 + 192 * 7 + 8 ] ; //vertex shader micro-code setup (max:136 instructions + 192 constants)
static DWORD pb_gpu_registers [ 6 * 8 + 7 ] ; //pixel shader registers values
static int pb_tmp_registers [ 16 ] ; //some vertex shader macros need to find free temp registers
static int pb_exp_constflag ;
static int pb_log_constflag ;
//forward references
static void pb_load_gr_ctx ( int ctx_id ) ;
//private pb_text_screen functions
# define ROWS 16
# define COLS 60
static char pb_text_screen [ ROWS ] [ COLS ] ;
static int pb_next_row = 0 ;
static int pb_next_col = 0 ;
static unsigned char systemFont [ ] =
{
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 56 , 56 , 56 , 56 , 56 , 0 , 56 , 56 ,
108 , 108 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 108 , 254 , 254 , 108 , 254 , 254 , 108 ,
48 , 126 , 224 , 124 , 14 , 254 , 252 , 48 , 98 , 230 , 204 , 24 , 48 , 102 , 206 , 140 ,
120 , 220 , 252 , 120 , 250 , 222 , 252 , 118 , 28 , 28 , 56 , 0 , 0 , 0 , 0 , 0 ,
14 , 28 , 28 , 28 , 28 , 28 , 28 , 14 , 112 , 56 , 56 , 56 , 56 , 56 , 56 , 112 ,
0 , 0 , 0 , 230 , 124 , 56 , 124 , 206 , 0 , 0 , 28 , 28 , 127 , 127 , 28 , 28 ,
0 , 0 , 0 , 0 , 0 , 28 , 28 , 56 , 0 , 0 , 0 , 0 , 124 , 124 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 56 , 56 , 28 , 28 , 56 , 56 , 112 , 112 , 224 , 224 ,
124 , 254 , 238 , 238 , 238 , 254 , 254 , 124 , 56 , 120 , 248 , 56 , 56 , 254 , 254 , 254 ,
252 , 254 , 14 , 60 , 112 , 254 , 254 , 254 , 252 , 254 , 14 , 60 , 14 , 254 , 254 , 252 ,
238 , 238 , 238 , 254 , 254 , 14 , 14 , 14 , 254 , 254 , 224 , 252 , 14 , 254 , 254 , 252 ,
124 , 252 , 224 , 252 , 238 , 254 , 254 , 124 , 252 , 254 , 14 , 14 , 28 , 28 , 56 , 56 ,
124 , 254 , 238 , 124 , 238 , 254 , 254 , 124 , 124 , 254 , 238 , 126 , 14 , 254 , 254 , 252 ,
0 , 0 , 28 , 28 , 0 , 28 , 28 , 28 , 0 , 0 , 28 , 28 , 0 , 28 , 28 , 56 ,
6 , 14 , 28 , 56 , 56 , 28 , 14 , 6 , 0 , 0 , 124 , 124 , 0 , 124 , 124 , 124 ,
112 , 56 , 28 , 14 , 14 , 28 , 56 , 112 , 124 , 254 , 206 , 28 , 56 , 0 , 56 , 56 ,
124 , 198 , 190 , 182 , 190 , 182 , 200 , 126 , 124 , 254 , 238 , 254 , 238 , 238 , 238 , 238 ,
252 , 254 , 206 , 252 , 206 , 254 , 254 , 252 , 124 , 254 , 238 , 224 , 238 , 254 , 254 , 124 ,
252 , 254 , 238 , 238 , 238 , 254 , 254 , 252 , 254 , 254 , 224 , 248 , 224 , 254 , 254 , 254 ,
126 , 254 , 224 , 248 , 224 , 224 , 224 , 224 , 126 , 254 , 224 , 238 , 238 , 254 , 254 , 124 ,
238 , 238 , 238 , 254 , 238 , 238 , 238 , 238 , 254 , 254 , 56 , 56 , 56 , 254 , 254 , 254 ,
254 , 254 , 14 , 14 , 238 , 254 , 254 , 124 , 238 , 238 , 252 , 248 , 252 , 238 , 238 , 238 ,
224 , 224 , 224 , 224 , 224 , 254 , 254 , 126 , 130 , 198 , 238 , 254 , 254 , 238 , 238 , 238 ,
206 , 238 , 254 , 254 , 254 , 254 , 238 , 230 , 124 , 254 , 238 , 238 , 238 , 254 , 254 , 124 ,
252 , 254 , 238 , 238 , 252 , 224 , 224 , 224 , 124 , 254 , 238 , 238 , 254 , 254 , 252 , 118 ,
252 , 254 , 238 , 238 , 252 , 238 , 238 , 238 , 126 , 254 , 224 , 124 , 14 , 254 , 254 , 252 ,
254 , 254 , 56 , 56 , 56 , 56 , 56 , 56 , 238 , 238 , 238 , 238 , 238 , 254 , 254 , 124 ,
238 , 238 , 238 , 238 , 238 , 238 , 124 , 56 , 238 , 238 , 238 , 254 , 254 , 238 , 198 , 130 ,
238 , 238 , 124 , 56 , 124 , 238 , 238 , 238 , 238 , 238 , 124 , 124 , 56 , 56 , 112 , 112 ,
254 , 254 , 28 , 56 , 112 , 254 , 254 , 254 , 124 , 124 , 112 , 112 , 112 , 124 , 124 , 124 ,
112 , 112 , 56 , 56 , 28 , 28 , 14 , 14 , 124 , 124 , 28 , 28 , 28 , 124 , 124 , 124 ,
56 , 124 , 238 , 198 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 254 , 254 , 254 ,
56 , 56 , 28 , 0 , 0 , 0 , 0 , 0 , 0 , 124 , 254 , 238 , 254 , 238 , 238 , 238 ,
0 , 252 , 254 , 206 , 252 , 206 , 254 , 252 , 0 , 124 , 254 , 238 , 224 , 238 , 254 , 124 ,
0 , 252 , 254 , 238 , 238 , 238 , 254 , 252 , 0 , 254 , 254 , 224 , 248 , 224 , 254 , 254 ,
0 , 126 , 254 , 224 , 248 , 224 , 224 , 224 , 0 , 126 , 254 , 224 , 238 , 238 , 254 , 124 ,
0 , 238 , 238 , 238 , 254 , 238 , 238 , 238 , 0 , 254 , 254 , 56 , 56 , 56 , 254 , 254 ,
0 , 254 , 254 , 14 , 14 , 238 , 254 , 124 , 0 , 238 , 238 , 252 , 248 , 252 , 238 , 238 ,
0 , 224 , 224 , 224 , 224 , 224 , 254 , 126 , 0 , 130 , 198 , 238 , 254 , 254 , 238 , 238 ,
0 , 206 , 238 , 254 , 254 , 254 , 238 , 230 , 0 , 124 , 254 , 238 , 238 , 238 , 254 , 124 ,
0 , 252 , 254 , 238 , 238 , 252 , 224 , 224 , 0 , 124 , 254 , 238 , 238 , 254 , 252 , 118 ,
0 , 252 , 254 , 238 , 238 , 252 , 238 , 238 , 0 , 126 , 254 , 224 , 124 , 14 , 254 , 252 ,
0 , 254 , 254 , 56 , 56 , 56 , 56 , 56 , 0 , 238 , 238 , 238 , 238 , 238 , 254 , 124 ,
0 , 238 , 238 , 238 , 238 , 238 , 124 , 56 , 0 , 238 , 238 , 238 , 254 , 238 , 198 , 130 ,
0 , 238 , 238 , 124 , 56 , 124 , 238 , 238 , 0 , 238 , 238 , 124 , 124 , 56 , 56 , 112 ,
0 , 254 , 254 , 28 , 56 , 112 , 254 , 254 , 60 , 124 , 112 , 112 , 112 , 124 , 124 , 60 ,
56 , 56 , 56 , 0 , 56 , 56 , 56 , 56 , 120 , 124 , 28 , 28 , 28 , 124 , 124 , 120 ,
236 , 254 , 118 , 0 , 0 , 0 , 0 , 0 , 0 , 16 , 56 , 124 , 254 , 254 , 254 , 254 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
} ;
static void pb_scrollup ( void )
{
int i ;
for ( i = 0 ; i < ROWS - 1 ; i + + )
memcpy ( & pb_text_screen [ i ] [ 0 ] , & pb_text_screen [ i + 1 ] [ 0 ] , COLS ) ;
memset ( & pb_text_screen [ ROWS - 1 ] [ 0 ] , 0 , COLS ) ;
}
static void pb_print_char ( char c )
{
if ( c = = ' \n ' )
{
pb_next_row + + ;
if ( pb_next_row > = ROWS ) { pb_next_row = ROWS - 1 ; pb_scrollup ( ) ; }
pb_next_col = 0 ;
}
else
if ( c = = ' \r ' )
{
pb_next_col = 0 ;
}
else
if ( c = = 8 )
{
pb_next_col - - ;
if ( pb_next_col < 0 ) pb_next_col = 0 ;
}
else
if ( c > = 32 )
{
pb_text_screen [ pb_next_row ] [ pb_next_col ] = c ;
pb_next_col + + ;
if ( pb_next_col > = COLS )
{
pb_next_row + + ;
if ( pb_next_row > = ROWS ) { pb_next_row = ROWS - 1 ; pb_scrollup ( ) ; }
pb_next_col = 0 ;
}
}
}
//private functions
static void pb_set_gamma_ramp ( BYTE * pGammaRamp )
{
int i ;
VIDEOREG8 ( NV_USER_DAC_WRITE_MODE_ADDRESS ) = 0 ; //&NV_USER_DAC_WRITE_MODE_ADDRESS_VALUE
for ( i = 0 ; i < 256 ; i + + )
{
VIDEOREG8 ( NV_USER_DAC_PALETTE_DATA ) = pGammaRamp [ i ] ; //&NV_USER_DAC_PALETTE_DATA_VALUE
VIDEOREG8 ( NV_USER_DAC_PALETTE_DATA ) = pGammaRamp [ i + 256 ] ; //&NV_USER_DAC_PALETTE_DATA_VALUE
VIDEOREG8 ( NV_USER_DAC_PALETTE_DATA ) = pGammaRamp [ i + 512 ] ; //&NV_USER_DAC_PALETTE_DATA_VALUE
}
}
static void pb_vbl_handler ( void )
{
BYTE old_color_addr ; //important index to preserve if we are called from Dpc or Isr
int flag ;
int next ;
int index ;
old_color_addr = VIDEOREG8 ( NV_PRMCIO_CRX__COLOR ) ;
pb_vbl_counter + + ;
//Index of next back buffer to show up (0-4)
next = pb_BackBufferNxtVBL ;
//Is the next back buffer to show up is ready?
if ( pb_BackBufferbReady [ next ] = = 1 )
{
//screen swapping has been done already, theoretically, in ISR
pb_BackBufferbReady [ next ] = 0 ;
index = pb_GammaRampIdx ;
if ( pb_GammaRampbReady [ index ] )
{
pb_set_gamma_ramp ( & pb_GammaRamp [ index ] [ 0 ] [ 0 ] ) ;
pb_GammaRampbReady [ index ] = 0 ;
index = ( index + 1 ) % 3 ;
pb_GammaRampIdx = index ;
}
VIDEOREG ( NV_PGRAPH_INCREMENT ) | = NV_PGRAPH_INCREMENT_READ_3D_TRIGGER ;
//rotate next back buffer & gamma ramp index
next = ( next + 1 ) % 3 ;
pb_BackBufferNxtVBL = next ;
}
do
{
VIDEOREG ( PCRTC_INTR ) = PCRTC_INTR_VBLANK_RESET ;
} while ( VIDEOREG ( NV_PMC_INTR_0 ) & NV_PMC_INTR_0_PCRTC_PENDING ) ;
NtPulseEvent ( pb_VBlankEvent , NULL ) ;
// if (UserCallback) UserCallback(); //user callback must be brief and preserve fpu state
VIDEOREG8 ( NV_PRMCIO_CRX__COLOR ) = old_color_addr ; //restore color index
}
static void pb_cache_flush ( void )
{
__asm__ __volatile__ ( " sfence " ) ;
//assembler instruction "sfence" : waits end of previous instructions
VIDEOREG ( NV_PFB_WC_CACHE ) | = NV_PFB_WC_CACHE_FLUSH_TRIGGER ;
while ( VIDEOREG ( NV_PFB_WC_CACHE ) & NV_PFB_WC_CACHE_FLUSH_IN_PROGRESS ) { } ;
}
static void pb_subprog ( DWORD subprogID , DWORD paramA , DWORD paramB )
{
//inner registers 0x1D8C & 0x1D90 match 2 outer registers :
//[0x1D8C]=[NV20_TCL_PRIMITIVE_3D_PARAMETER_A]=VIDEOREG(NV_PGRAPH_PARAMETER_A)=[0xFD401A88]
//[0x1D90]=[NV20_TCL_PRIMITIVE_3D_PARAMETER_B]=VIDEOREG(NV_PGRAPH_PARAMETER_B)=[0xFD40186C]
//so they can be used by a push buffer sequence to set parameters
//before triggering a subprogram by the command 0x0100 which will
//throw an interrupt and have CPU execute its code right here.
//Here just test the subprogID value and execute your own subprogram
//associated code (avoid using subprogID=0, it seems to be reserved)
int next ;
switch ( subprogID )
{
case PB_SETOUTER : //sets an outer register
VIDEOREG ( paramA ) = paramB ;
break ;
case PB_SETNOISE : //Dxt1NoiseEnable: copy paramA in NV_PGRAPH_RDI(sel 0xE0 adr 0x50 & sel 0xDF adr 0x08)
VIDEOREG ( NV_PGRAPH_RDI_INDEX ) = ( ( 0xE0 < < 16 ) & NV_PGRAPH_RDI_INDEX_SELECT ) | ( ( 0x50 ) & NV_PGRAPH_RDI_INDEX_ADDRESS ) ;
VIDEOREG ( NV_PGRAPH_RDI_DATA ) = paramA ;
VIDEOREG ( NV_PGRAPH_RDI_INDEX ) = ( ( 0xDF < < 16 ) & NV_PGRAPH_RDI_INDEX_SELECT ) | ( ( 0x08 ) & NV_PGRAPH_RDI_INDEX_ADDRESS ) ;
VIDEOREG ( NV_PGRAPH_RDI_DATA ) = paramA ;
break ;
case PB_FINISHED : //warns that all drawing has been finished for the frame
next = pb_BackBufferNxt ;
pb_BackBufferIndex [ next ] = paramA ;
pb_BackBufferbReady [ next ] = 1 ;
next = ( next + 1 ) % 3 ;
pb_BackBufferNxt = next ;
break ;
default :
debugPrint ( " Unknown subProgID %d has been detected by DPC (A=%x B=%x). \n " ,
subprogID ,
paramA ,
paramB ) ;
break ;
}
}
static DWORD pb_gr_handler ( void )
{
DWORD status ;
DWORD trapped_address ;
int trapped_ctx_id ;
DWORD nsource ;
DWORD GrClass ;
DWORD DataLow ;
int i ;
DWORD * p ;
VIDEOREG ( NV_PGRAPH_FIFO ) = NV_PGRAPH_FIFO_ACCESS_DISABLE ;
status = VIDEOREG ( NV_PGRAPH_INTR ) ;
trapped_address = VIDEOREG ( NV_PGRAPH_TRAPPED_ADDR ) ;
nsource = VIDEOREG ( NV_PGRAPH_NSOURCE ) ;
trapped_ctx_id = ( trapped_address & NV_PGRAPH_TRAPPED_ADDR_CHID ) > > 20 ;
trapped_address & = NV_PGRAPH_TRAPPED_ADDR_MTHD ;
if ( status & NV_PGRAPH_INTR_CONTEXT_SWITCH_PENDING )
{
VIDEOREG ( NV_PGRAPH_INTR ) = NV_PGRAPH_INTR_CONTEXT_SWITCH_RESET ;
while ( VIDEOREG ( NV_PGRAPH_STATUS ) ) ;
pb_load_gr_ctx ( trapped_ctx_id ) ;
}
if ( status & NV_PGRAPH_INTR_MISSING_HW_PENDING )
{
VIDEOREG ( NV_PGRAPH_INTR ) = NV_PGRAPH_INTR_MISSING_HW_RESET ;
}
if ( ( status & NV_PGRAPH_INTR_NOTIFY_PENDING ) | |
( status & NV_PGRAPH_INTR_ERROR_PENDING ) )
{
if ( nsource & NV_PGRAPH_NSOURCE_ILLEGAL_MTHD_PENDING )
{
if ( status & NV_PGRAPH_INTR_NOTIFY_PENDING )
VIDEOREG ( NV_PGRAPH_INTR ) = NV_PGRAPH_INTR_NOTIFY_RESET |
NV_PGRAPH_INTR_ERROR_RESET |
NV_PGRAPH_INTR_SINGLE_STEP_RESET |
NV_PGRAPH_INTR_MORE_RESET ;
else
VIDEOREG ( NV_PGRAPH_INTR ) = NV_PGRAPH_INTR_ERROR_RESET ;
}
}
status = VIDEOREG ( NV_PGRAPH_INTR ) ;
if ( status )
{
VIDEOREG ( NV_PGRAPH_INTR ) = NV_PGRAPH_INTR_CONTEXT_SWITCH_RESET ;
if ( ( status ! = NV_PGRAPH_INTR_CONTEXT_SWITCH_PENDING ) & &
( status ! = NV_PGRAPH_INTR_SINGLE_STEP_PENDING ) )
{
if ( status & NV_PGRAPH_INTR_MISSING_HW_PENDING )
{
while ( VIDEOREG ( NV_PGRAPH_STATUS ) ) { } ;
}
if ( nsource )
{
if ( ( status & NV_PGRAPH_INTR_NOTIFY_PENDING ) | |
( status & NV_PGRAPH_INTR_ERROR_PENDING ) )
{
GrClass = VIDEOREG ( NV_PGRAPH_CTX_SWITCH1 ) & NV_PGRAPH_CTX_SWITCH1_GRCLASS ;
DataLow = VIDEOREG ( NV_PGRAPH_TRAPPED_DATA_LOW ) ; //&NV_PGRAPH_TRAPPED_DATA_LOW_VALUE
if ( ( nsource & NV_PGRAPH_NSOURCE_ILLEGAL_MTHD_PENDING ) = = 0 )
{
if ( trapped_address = = 0x0100 )
{
//The following line may be a bad idea. But without it, interrupt fires permanently...
VIDEOREG ( NV_PGRAPH_INTR ) = NV_PGRAPH_INTR_ERROR_RESET ;
//calls subprogram
pb_subprog ( DataLow , VIDEOREG ( NV_PGRAPH_PARAMETER_A ) , VIDEOREG ( NV_PGRAPH_PARAMETER_B ) ) ;
}
else
{
pb_show_debug_screen ( ) ;
debugPrint ( " \n " ) ;
if ( nsource & NV_PGRAPH_NSOURCE_DATA_ERROR_PENDING ) debugPrint ( " GPU Error : invalid data error! \n " ) ;
if ( nsource & NV_PGRAPH_NSOURCE_PROTECTION_ERROR_PENDING ) debugPrint ( " GPU Error : protection error! \n " ) ;
if ( nsource & NV_PGRAPH_NSOURCE_RANGE_EXCEPTION_PENDING ) debugPrint ( " GPU Error : range exception error! \n " ) ;
if ( nsource & NV_PGRAPH_NSOURCE_LIMIT_COLOR_PENDING ) debugPrint ( " GPU Error : color buffer limit error! \n " ) ;
if ( nsource & NV_PGRAPH_NSOURCE_LIMIT_ZETA_PENDING ) debugPrint ( " GPU Error : zeta buffer limit error! \n " ) ;
if ( nsource & NV_PGRAPH_NSOURCE_DMA_R_PROTECTION_PENDING ) debugPrint ( " GPU Error : dma read protection error! \n " ) ;
if ( nsource & NV_PGRAPH_NSOURCE_DMA_W_PROTECTION_PENDING ) debugPrint ( " GPU Error : dma write protection error! \n " ) ;
if ( nsource & NV_PGRAPH_NSOURCE_FORMAT_EXCEPTION_PENDING ) debugPrint ( " GPU Error : format exception error! \n " ) ;
if ( nsource & NV_PGRAPH_NSOURCE_PATCH_EXCEPTION_PENDING ) debugPrint ( " GPU Error : patch exception error! \n " ) ;
if ( nsource & NV_PGRAPH_NSOURCE_STATE_INVALID_PENDING ) debugPrint ( " GPU Error : object state invalid error! \n " ) ;
if ( nsource & NV_PGRAPH_NSOURCE_DOUBLE_NOTIFY_PENDING ) debugPrint ( " GPU Error : double notify error! \n " ) ;
if ( nsource & NV_PGRAPH_NSOURCE_NOTIFY_IN_USE_PENDING ) debugPrint ( " GPU Error : notify in use error! \n " ) ;
if ( nsource & NV_PGRAPH_NSOURCE_METHOD_CNT_PENDING ) debugPrint ( " GPU Error : method count error! \n " ) ;
if ( nsource & NV_PGRAPH_NSOURCE_BFR_NOTIFICATION_PENDING ) debugPrint ( " GPU Error : buffer notification error! \n " ) ;
if ( nsource & NV_PGRAPH_NSOURCE_DMA_VTX_PROTECTION_PENDING ) debugPrint ( " GPU Error : DMA vertex protection error! \n " ) ;
if ( nsource & NV_PGRAPH_NSOURCE_IDX_INLINE_REUSE_PENDING ) debugPrint ( " Graphics index inline reuse error! \n " ) ;
if ( nsource & NV_PGRAPH_NSOURCE_INVALID_OPERATION_PENDING ) debugPrint ( " GPU Error : invalid operation error! \n " ) ;
if ( nsource & NV_PGRAPH_NSOURCE_FD_INVALID_OPERATION_PENDING ) debugPrint ( " GPU Error : FD invalid operation error! \n " ) ;
if ( nsource & NV_PGRAPH_NSOURCE_TEX_A_PROTECTION_PENDING ) debugPrint ( " GPU Error : texture A protection error! \n " ) ;
if ( nsource & NV_PGRAPH_NSOURCE_TEX_B_PROTECTION_PENDING ) debugPrint ( " GPU Error : texture B protection error! \n " ) ;
debugPrint ( " Error binary flags : %08x \n "
" Channel ID : %d (0=3D) \n "
" Channel class : %x \n "
" Push buffer inner register target : %04x \n "
" Push buffer data (lo) or instance : %08x \n "
" Push buffer data (hi) or instance : %08x \n "
" Multi-purpose register A [0x1D8C] : %08x \n "
" Multi-purpose register B [0x1D90] : %08x \n \n " ,
nsource ,
trapped_ctx_id ,
GrClass ,
trapped_address ,
DataLow ,
VIDEOREG ( NV_PGRAPH_TRAPPED_DATA_HIGH ) ,
VIDEOREG ( NV_PGRAPH_PARAMETER_A ) ,
VIDEOREG ( NV_PGRAPH_PARAMETER_B ) ) ;
if ( pb_trace_mode = = 0 ) debugPrint ( " Report is accurate only if pb_trace_mode=1 (slower) \n " ) ;
debugPrint ( " System halted \n " ) ;
//calling XReboot() from here doesn't work well.
2010-12-27 01:01:25 +00:00
// Halt the system with these instructions, so the CPU can idle.
__asm__ (
" cli \n "
" hlt " ) ;
2010-12-04 16:14:34 +00:00
}
}
}
}
if ( status & NV_PGRAPH_INTR_BUFFER_NOTIFY_PENDING )
{
while ( VIDEOREG ( NV_PGRAPH_STATUS ) ) { } ;
}
}
}
VIDEOREG ( NV_PGRAPH_FIFO ) = NV_PGRAPH_FIFO_ACCESS_ENABLE ;
return VIDEOREG ( NV_PGRAPH_INTR ) ;
}
static void pb_wait_until_gr_not_busy ( void )
{
DWORD status ;
while ( VIDEOREG ( NV_PGRAPH_STATUS ) ! = NV_PGRAPH_STATUS_NOT_BUSY )
{
status = VIDEOREG ( NV_PMC_INTR_0 ) ;
if ( status & NV_PMC_INTR_0_PGRAPH_PENDING ) pb_gr_handler ( ) ;
if ( status & NV_PMC_INTR_0_PCRTC_PENDING ) pb_vbl_handler ( ) ;
}
}
static void pb_load_gr_ctx ( int ctx_id )
{
DWORD old_fifo_access ;
DWORD dummy ;
int i ;
if ( VIDEOREG ( NV_PGRAPH_INTR ) ! = NV_PGRAPH_INTR_NOT_PENDING ) pb_gr_handler ( ) ;
old_fifo_access = VIDEOREG ( NV_PGRAPH_FIFO ) ;
VIDEOREG ( NV_PGRAPH_FIFO ) = NV_PGRAPH_FIFO_ACCESS_DISABLE ;
pb_wait_until_gr_not_busy ( ) ;
if ( ( ctx_id ! = pb_GrCtxID ) & & ( ctx_id ! = NONE ) )
{
VIDEOREG ( NV_PGRAPH_CHANNEL_CTX_POINTER ) = pb_GrCtxInst [ ctx_id ] & NV_PGRAPH_CHANNEL_CTX_POINTER_INST ;
VIDEOREG ( NV_PGRAPH_CHANNEL_CTX_STATUS ) = NV_PGRAPH_CHANNEL_CTX_STATUS_UNLOADED ;
pb_wait_until_gr_not_busy ( ) ;
VIDEOREG ( NV_PGRAPH_CTX_CONTROL ) = NV_PGRAPH_CTX_CONTROL_DEVICE_ENABLED ;
}
pb_GrCtxID = ctx_id ;
if ( ctx_id = = NONE )
{
VIDEOREG ( NV_PGRAPH_CTX_CONTROL ) = NV_PGRAPH_CTX_CONTROL_DEVICE_ENABLED | NV_PGRAPH_CTX_CONTROL_TIME_NOT_EXPIRED ;
VIDEOREG ( NV_PGRAPH_FFINTFC_ST2 ) = NV_PGRAPH_FFINTFC_ST2_CHID_STATUS_VALID ;
VIDEOREG ( NV_PGRAPH_FIFO ) = old_fifo_access | NV_PGRAPH_FIFO_ACCESS_ENABLE ;
}
else
{
if ( pb_3DGrCtxInst [ ctx_id ] )
{
VIDEOREG ( NV_PGRAPH_DEBUG_0 ) = NV_PGRAPH_DEBUG_0_IDX_STATE_RESET |
NV_PGRAPH_DEBUG_0_VTX_STATE_RESET |
NV_PGRAPH_DEBUG_0_CAS_STATE_RESET ;
dummy = VIDEOREG ( NV_PGRAPH_DEBUG_0 ) ;
VIDEOREG ( NV_PGRAPH_DEBUG_0 ) = NV_PGRAPH_DEBUG_0_NO_RESET ;
dummy = VIDEOREG ( NV_PGRAPH_DEBUG_0 ) ;
VIDEOREG ( NV_PGRAPH_RDI_INDEX ) = ( ( 0x00 ) & NV_PGRAPH_RDI_INDEX_ADDRESS ) | ( ( 0x3D < < 16 ) & NV_PGRAPH_RDI_INDEX_SELECT ) ;
for ( i = 0 ; i < 15 ; i + + ) VIDEOREG ( NV_PGRAPH_RDI_DATA ) = 0 ;
}
VIDEOREG ( NV_PGRAPH_DEBUG_1 ) | = NV_PGRAPH_DEBUG_1_CACHE_INVALIDATE ;
VIDEOREG ( NV_PGRAPH_CTX_USER ) = ( ctx_id < < 24 ) & NV_PGRAPH_CTX_USER_CHID ;
VIDEOREG ( NV_PGRAPH_CHANNEL_CTX_POINTER ) = pb_GrCtxInst [ ctx_id ] & NV_PGRAPH_CHANNEL_CTX_POINTER_INST ;
VIDEOREG ( NV_PGRAPH_CHANNEL_CTX_STATUS ) = NV_PGRAPH_CHANNEL_CTX_STATUS_LOADED ;
pb_wait_until_gr_not_busy ( ) ;
VIDEOREG ( NV_PGRAPH_CTX_USER ) = ( VIDEOREG ( NV_PGRAPH_CTX_USER ) & ~ NV_PGRAPH_CTX_USER_CHID ) | ( ( ctx_id < < 24 ) & NV_PGRAPH_CTX_USER_CHID ) ;
VIDEOREG ( NV_PGRAPH_CTX_CONTROL ) = NV_PGRAPH_CTX_CONTROL_TIME_NOT_EXPIRED |
NV_PGRAPH_CTX_CONTROL_CHID_VALID |
NV_PGRAPH_CTX_CONTROL_DEVICE_ENABLED ;
VIDEOREG ( NV_PGRAPH_FFINTFC_ST2 ) & = ( NV_PGRAPH_FFINTFC_ST2_CHSWITCH_CLEAR & NV_PGRAPH_FFINTFC_ST2_FIFOHOLD_CLEAR ) ;
}
}
static DWORD pb_fifo_handler ( void )
{
DWORD i ;
DWORD status ;
DWORD pull ;
DWORD get_address ;
int skip_waiting ;
skip_waiting = 0 ;
status = VIDEOREG ( NV_PFIFO_INTR_0 ) ;
if ( status & NV_PFIFO_INTR_0_SEMAPHORE_PENDING )
{
VIDEOREG ( NV_PFIFO_INTR_0 ) = NV_PFIFO_INTR_0_SEMAPHORE_RESET ;
}
if ( status & NV_PFIFO_INTR_0_ACQUIRE_TIMEOUT_PENDING )
{
VIDEOREG ( NV_PFIFO_INTR_0 ) = NV_PFIFO_INTR_0_ACQUIRE_TIMEOUT_RESET ;
}
status = VIDEOREG ( NV_PFIFO_INTR_0 ) ;
if ( status & NV_PFIFO_INTR_0_CACHE_ERROR_PENDING )
{
pull = VIDEOREG ( NV_PFIFO_CACHE1_PULL0 ) ;
get_address = VIDEOREG ( NV_PFIFO_CACHE1_GET ) ; //&NV_PFIFO_CACHE1_GET_ADDRESS (0x3FC)
get_address > > = 2 ;
VIDEOREG ( NV_PFIFO_CACHES ) = NV_PFIFO_CACHES_ALL_DISABLE ;
VIDEOREG ( NV_PFIFO_CACHE1_PULL0 ) = NV_PFIFO_CACHE1_PULL0_ACCESS_DISABLE ;
VIDEOREG ( NV_PFIFO_INTR_0 ) = NV_PFIFO_INTR_0_CACHE_ERROR_RESET ;
for ( i = 0 ; i < 65535 ; i + + )
{
if ( ( pull & NV_PFIFO_CACHE1_PULL0_HASH_STATE_BUSY ) = = 0 ) break ;
pull = VIDEOREG ( NV_PFIFO_CACHE1_PULL0 ) ;
}
if ( ( pull & NV_PFIFO_CACHE1_PULL0_DEVICE_SOFTWARE ) | |
( pull & NV_PFIFO_CACHE1_PULL0_HASH_FAILED ) )
{
VIDEOREG ( NV_PFIFO_CACHE1_GET ) = ( ( get_address + 1 ) < < 2 ) & NV_PFIFO_CACHE1_GET_ADDRESS ;
}
VIDEOREG ( NV_PFIFO_CACHE1_HASH ) = 0 ; //&NV_PFIFO_CACHE1_HASH_INSTANCE
VIDEOREG ( NV_PFIFO_CACHE1_PULL0 ) = NV_PFIFO_CACHE1_PULL0_ACCESS_ENABLE ;
VIDEOREG ( NV_PFIFO_CACHES ) = NV_PFIFO_CACHES_REASSIGN_ENABLED ;
}
if ( status & NV_PFIFO_INTR_0_DMA_PUSHER_PENDING )
{
pb_show_debug_screen ( ) ;
debugPrint ( " Software Put=%08x \n " , pb_Put ) ;
debugPrint ( " Hardware Put=%08x \n " , VIDEOREG ( NV_PFIFO_CACHE1_DMA_PUT ) ) ;
debugPrint ( " Hardware Get=%08x \n " , VIDEOREG ( NV_PFIFO_CACHE1_DMA_GET ) ) ;
debugPrint ( " Dma push buffer engine encountered invalid data at these addresses. \n " ) ;
VIDEOREG ( NV_PFIFO_INTR_0 ) = NV_PFIFO_INTR_0_DMA_PUSHER_RESET ;
VIDEOREG ( NV_PFIFO_CACHE1_DMA_STATE ) = NV_PFIFO_CACHE1_DMA_STATE_METHOD_COUNT_0 ;
if ( VIDEOREG ( NV_PFIFO_CACHE1_DMA_PUT ) ! = VIDEOREG ( NV_PFIFO_CACHE1_DMA_GET ) )
VIDEOREG ( NV_PFIFO_CACHE1_DMA_GET ) + = ( 1 < < 2 ) ;
}
if ( status & NV_PFIFO_INTR_0_DMA_PT_PENDING )
{
VIDEOREG ( NV_PFIFO_INTR_0 ) = NV_PFIFO_INTR_0_DMA_PT_RESET ;
}
if ( VIDEOREG ( NV_PFIFO_CACHE1_DMA_PUSH ) & NV_PFIFO_CACHE1_DMA_PUSH_STATE_BUSY )
{
if ( ( VIDEOREG8 ( NV_PFIFO_CACHE1_STATUS ) & NV_PFIFO_CACHE1_STATUS_LOW_MARK_EMPTY ) = = 0 )
do
{
if ( VIDEOREG ( NV_PFIFO_INTR_0 ) = = NV_PFIFO_INTR_0_NOT_PENDING )
{
if ( VIDEOREG ( NV_PGRAPH_INTR ) ) pb_fifo_handler ( ) ;
if ( VIDEOREG ( NV_PMC_INTR_0 ) & NV_PMC_INTR_0_PCRTC_PENDING ) pb_vbl_handler ( ) ;
if ( ( VIDEOREG8 ( NV_PFIFO_CACHE1_STATUS ) & NV_PFIFO_CACHE1_STATUS_LOW_MARK_EMPTY ) = = 0 )
continue ; //jump to loop start
}
if ( ( VIDEOREG8 ( NV_PFIFO_CACHE1_STATUS ) & NV_PFIFO_CACHE1_STATUS_LOW_MARK_EMPTY ) = = 0 )
{
skip_waiting = 1 ;
break ;
}
} while ( VIDEOREG8 ( NV_PFIFO_CACHE1_STATUS ) & NV_PFIFO_CACHE1_STATUS_LOW_MARK_EMPTY ) ;
if ( skip_waiting = = 0 )
{
//wait
while ( VIDEOREG8 ( NV_PFIFO_CACHES ) & NV_PFIFO_CACHES_DMA_SUSPEND_BUSY ) ;
VIDEOREG ( NV_PFIFO_CACHE1_DMA_PUSH ) & = NV_PFIFO_CACHE1_DMA_PUSH_STATUS_RUNNING ;
}
}
if ( VIDEOREG ( NV_PFIFO_INTR_0 ) = = NV_PFIFO_INTR_0_NOT_PENDING )
{
VIDEOREG ( NV_PFIFO_CACHE1_PULL0 ) = NV_PFIFO_CACHE1_PULL0_ACCESS_ENABLE ;
VIDEOREG ( NV_PFIFO_CACHES ) = NV_PFIFO_CACHES_REASSIGN_ENABLED ;
}
return VIDEOREG ( NV_PFIFO_INTR_0 ) | ( VIDEOREG ( NV_PFIFO_DEBUG_0 ) & NV_PFIFO_DEBUG_0_CACHE_ERROR0_PENDING ) ;
}
static void pb_set_fifo_channel ( int channel )
{
DWORD old_caches , old_push , old_pull , old_channel ;
DWORD * p ;
DWORD pending_flags ;
old_caches = VIDEOREG ( NV_PFIFO_CACHES ) ;
old_push = VIDEOREG ( NV_PFIFO_CACHE1_PUSH0 ) ;
old_pull = VIDEOREG ( NV_PFIFO_CACHE1_PULL0 ) ;
VIDEOREG ( NV_PFIFO_CACHES ) = NV_PFIFO_CACHES_ALL_DISABLE ;
VIDEOREG ( NV_PFIFO_CACHE1_PUSH0 ) = NV_PFIFO_CACHE1_PUSH0_ACCESS_DISABLE ;
VIDEOREG ( NV_PFIFO_CACHE1_PULL0 ) = NV_PFIFO_CACHE1_PULL0_ACCESS_DISABLE ;
old_channel = VIDEOREG ( NV_PFIFO_CACHE1_PUSH1 ) & NV_PFIFO_CACHE1_PUSH1_CHID ;
//backup old channel details into PRAMIN area
p = ( DWORD * ) ( VIDEO_BASE + pb_FifoFCAddr + old_channel * 64 ) ;
* ( p + 0 ) = VIDEOREG ( NV_PFIFO_CACHE1_DMA_PUT ) ; //&NV_PFIFO_CACHE1_DMA_PUT_OFFSET
* ( p + 1 ) = VIDEOREG ( NV_PFIFO_CACHE1_DMA_GET ) ; //&NV_PFIFO_CACHE1_DMA_GET_OFFSET
* ( p + 2 ) = VIDEOREG ( NV_PFIFO_CACHE1_REF ) ; //&NV_PFIFO_CACHE1_REF_CNT
* ( p + 3 ) = VIDEOREG ( NV_PFIFO_CACHE1_DMA_INSTANCE ) ; //&NV_PFIFO_CACHE1_DMA_INSTANCE_ADDRESS
* ( p + 4 ) = VIDEOREG ( NV_PFIFO_CACHE1_DMA_STATE ) ;
* ( p + 5 ) = VIDEOREG ( NV_PFIFO_CACHE1_DMA_FETCH ) ;
* ( p + 6 ) = VIDEOREG ( NV_PFIFO_CACHE1_ENGINE ) ;
* ( p + 7 ) = VIDEOREG ( NV_PFIFO_CACHE1_PULL1 ) ;
* ( p + 8 ) = VIDEOREG ( NV_PFIFO_CACHE1_ACQUIRE_2 ) ; //&NV_PFIFO_CACHE1_ACQUIRE_2_VALUE
* ( p + 9 ) = VIDEOREG ( NV_PFIFO_CACHE1_ACQUIRE_1 ) ; //&NV_PFIFO_CACHE1_ACQUIRE_1_TIMESTAMP
* ( p + 10 ) = VIDEOREG ( NV_PFIFO_CACHE1_ACQUIRE_0 ) ; //&NV_PFIFO_CACHE1_ACQUIRE_0_TIMEOUT
* ( p + 11 ) = VIDEOREG ( NV_PFIFO_CACHE1_SEMAPHORE ) ;
* ( p + 12 ) = VIDEOREG ( NV_PFIFO_CACHE1_DMA_SUBROUTINE ) ;
if ( VIDEOREG ( NV_PFIFO_CACHE1_PUSH1 ) & NV_PFIFO_CACHE1_PUSH1_MODE_DMA )
{
pending_flags = VIDEOREG ( NV_PFIFO_DMA ) ;
pending_flags & = ~ ( 1 < < old_channel ) ;
if ( VIDEOREG ( NV_PFIFO_CACHE1_DMA_PUT ) ! = VIDEOREG ( NV_PFIFO_CACHE1_DMA_GET ) )
pending_flags | = ( 1 < < old_channel ) ;
VIDEOREG ( NV_PFIFO_DMA ) = pending_flags ;
}
//let's switch from old_channel to channel
VIDEOREG ( NV_PFIFO_CACHE1_PUSH1 ) = channel & NV_PFIFO_CACHE1_PUSH1_CHID ;
if ( channel ! = 1 )
if ( pb_FifoChannelsMode & ( 1 < < channel ) ) //Channel mode was DMA?
VIDEOREG ( NV_PFIFO_CACHE1_PUSH1 ) | = NV_PFIFO_CACHE1_PUSH1_MODE_DMA ;
//restore channel details from VRAM
p = ( DWORD * ) ( VIDEO_BASE + pb_FifoFCAddr + channel * 64 ) ;
VIDEOREG ( NV_PFIFO_CACHE1_DMA_PUT ) = * ( p + 0 ) ; //&NV_PFIFO_CACHE1_DMA_PUT_OFFSET
VIDEOREG ( NV_PFIFO_CACHE1_DMA_GET ) = * ( p + 1 ) ; //&NV_PFIFO_CACHE1_DMA_GET_OFFSET
VIDEOREG ( NV_PFIFO_CACHE1_REF ) = * ( p + 2 ) ; //&NV_PFIFO_CACHE1_REF_CNT
VIDEOREG ( NV_PFIFO_CACHE1_DMA_INSTANCE ) = * ( p + 3 ) ; //&NV_PFIFO_CACHE1_DMA_INSTANCE_ADDRESS
VIDEOREG ( NV_PFIFO_CACHE1_DMA_STATE ) = * ( p + 4 ) ;
VIDEOREG ( NV_PFIFO_CACHE1_DMA_FETCH ) = * ( p + 5 ) ;
VIDEOREG ( NV_PFIFO_CACHE1_ENGINE ) = * ( p + 6 ) ;
VIDEOREG ( NV_PFIFO_CACHE1_PULL1 ) = * ( p + 7 ) ;
VIDEOREG ( NV_PFIFO_CACHE1_ACQUIRE_2 ) = * ( p + 8 ) ; //&NV_PFIFO_CACHE1_ACQUIRE_2_VALUE
VIDEOREG ( NV_PFIFO_CACHE1_ACQUIRE_1 ) = * ( p + 9 ) ; //&NV_PFIFO_CACHE1_ACQUIRE_1_TIMESTAMP
VIDEOREG ( NV_PFIFO_CACHE1_ACQUIRE_0 ) = * ( p + 10 ) ; //&NV_PFIFO_CACHE1_ACQUIRE_0_TIMEOUT
VIDEOREG ( NV_PFIFO_CACHE1_SEMAPHORE ) = * ( p + 11 ) ;
VIDEOREG ( NV_PFIFO_CACHE1_DMA_SUBROUTINE ) = * ( p + 12 ) ;
if ( channel ! = 1 )
if ( pb_FifoChannelsMode & ( 1 < < channel ) ) //Channel mode was DMA?
VIDEOREG ( NV_PFIFO_CACHE1_DMA_PUSH ) = NV_PFIFO_CACHE1_DMA_PUSH_ACCESS_ENABLE ;
VIDEOREG ( NV_PFIFO_TIMESLICE ) = NV_PFIFO_TIMESLICE_TIMER_EXPIRED ;
VIDEOREG ( NV_PFIFO_CACHE1_PULL0 ) = old_pull ;
VIDEOREG ( NV_PFIFO_CACHE1_PUSH0 ) = old_push ;
VIDEOREG ( NV_PFIFO_CACHES ) = old_caches ;
}
static void __stdcall DPC ( PKDPC Dpc , PVOID DeferredContext , PVOID SystemArgument1 , PVOID SystemArgument2 )
{
//Deferred Procedure Call (delayed treatment, triggered by ISR)
//DPCs avoid crashes inside non reentrant user callbacks called by nested ISRs.
//CAUTION : if you use fpu in DPC you have to save & restore yourself fpu state!!!
//(fpu=floating point unit, i.e the coprocessor executing floating point opcodes)
DWORD more ;
DWORD status ;
do
{
more = 0 ;
status = VIDEOREG ( NV_PMC_INTR_0 ) ;
if ( status & NV_PMC_INTR_0_PTIMER_PENDING )
{
VIDEOREG ( NV_PTIMER_INTR_0 ) = NV_PTIMER_INTR_0_ALARM_RESET ;
more = VIDEOREG ( NV_PTIMER_INTR_0 ) ;
}
if ( status & NV_PMC_INTR_0_PCRTC_PENDING ) pb_vbl_handler ( ) ;
if ( status & NV_PMC_INTR_0_PGRAPH_PENDING ) more | = pb_gr_handler ( ) ;
if ( ( VIDEOREG8 ( NV_PFIFO_DEBUG_0 ) & NV_PFIFO_DEBUG_0_CACHE_ERROR0_PENDING ) | |
( status & NV_PMC_INTR_0_PFIFO_PENDING ) ) more | = pb_fifo_handler ( ) ;
if ( ( VIDEOREG8 ( NV_PVIDEO_INTR ) & NV_PVIDEO_INTR_BUFFER_0_PENDING ) | |
( status & NV_PMC_INTR_0_PVIDEO_PENDING ) ) VIDEOREG ( NV_PVIDEO_INTR ) = NV_PVIDEO_INTR_BUFFER_0_RESET ;
} while ( more ) ;
VIDEOREG ( NV_PMC_INTR_EN_0 ) = NV_PMC_INTR_EN_0_INTA_HARDWARE ;
return ;
}
static BOOLEAN __stdcall ISR ( PKINTERRUPT Interrupt , PVOID ServiceContext )
{
//Interruption Service Routine (triggered by interrupt signal IRQ3)
int next ;
if ( pb_running = = 0 ) return FALSE ;
//really, not for us at all
if ( VIDEOREG ( NV_PMC_INTR_0 ) = = NV_PMC_INTR_0_NOT_PENDING ) return FALSE ;
//is it the VBlank event? (if so, proceed screen swapping immediately & in DPC)
if ( VIDEOREG ( NV_PMC_INTR_0 ) & NV_PMC_INTR_0_PCRTC_PENDING )
{
//Need to show next back buffer to show up? (do it now, it's urgent)
if ( pb_debug_screen_active = = 0 )
if ( pb_BackBufferbReady [ pb_BackBufferNxtVBL ] = = 1 ) VIDEOREG ( PCRTC_START ) = pb_FBAddr [ pb_BackBufferIndex [ pb_BackBufferNxtVBL ] ] & 0x03FFFFFF ;
}
VIDEOREG ( NV_PMC_INTR_EN_0 ) = NV_PMC_INTR_EN_0_INTA_DISABLED ;
//handle longer & non urgent stuff later with the Dpc
KeInsertQueueDpc ( & pb_DPCObject , NULL , NULL ) ;
return TRUE ;
}
static int pb_install_gpu_interrupt ( void )
{
int r ;
KIRQL irql ;
ULONG vector ;
vector = HalGetInterruptVector ( GPU_IRQ , & irql ) ;
KeInitializeDpc ( & pb_DPCObject , & DPC , NULL ) ;
KeInitializeInterrupt ( & pb_InterruptObject ,
& ISR ,
NULL ,
vector ,
irql ,
LevelSensitive ,
TRUE ) ;
r = KeConnectInterrupt ( & pb_InterruptObject ) ;
return r ;
}
static void pb_uninstall_gpu_interrupt ( void )
{
KeDisconnectInterrupt ( & pb_InterruptObject ) ;
}
static DWORD pb_wait_until_tiles_not_busy ( void )
{
DWORD old_dma_push ;
while ( ( ( VIDEOREG8 ( NV_PFIFO_CACHE1_STATUS ) & NV_PFIFO_CACHE1_STATUS_LOW_MARK_EMPTY ) = = 0 ) | |
( ( VIDEOREG8 ( NV_PFIFO_RUNOUT_STATUS ) & NV_PFIFO_RUNOUT_STATUS_LOW_MARK_EMPTY ) = = 0 ) | |
( ( VIDEOREG8 ( NV_PFIFO_CACHE1_DMA_PUSH ) & NV_PFIFO_CACHE1_DMA_PUSH_STATE_BUSY ) ! = 0 ) )
{
pb_fifo_handler ( ) ;
if ( VIDEOREG ( NV_PGRAPH_INTR ) ! = NV_PGRAPH_INTR_NOT_PENDING ) pb_gr_handler ( ) ;
if ( VIDEOREG ( NV_PMC_INTR_0 ) & NV_PMC_INTR_0_PCRTC_PENDING ) pb_vbl_handler ( ) ;
}
old_dma_push = VIDEOREG ( NV_PFIFO_CACHE1_DMA_PUSH ) ;
VIDEOREG ( NV_PFIFO_CACHE1_DMA_PUSH ) = NV_PFIFO_CACHE1_DMA_PUSH_ACCESS_DISABLE ;
while ( ( VIDEOREG8 ( NV_PFIFO_CACHE1_DMA_PUSH ) & NV_PFIFO_CACHE1_DMA_PUSH_STATE_BUSY ) ! = 0 ) ;
return old_dma_push ;
}
static void pb_release_tile ( int index , int clear_offset )
{
DWORD * pTile ;
DWORD * pZcomp ;
DWORD * p ;
DWORD addr ;
DWORD data ;
DWORD old_dma_push ;
old_dma_push = pb_wait_until_tiles_not_busy ( ) ;
//points tile in NV_PFB
pTile = ( DWORD * ) ( VIDEO_BASE + NV_PFB_TILE + index * 16 ) ;
//points tile in NV_PGRAPH
p = ( DWORD * ) ( VIDEO_BASE + NV_PGRAPH_TILE_XBOX + index * 16 ) ;
//points tile in NV_PGRAPH_RDI(0x10)
addr = ( ( index * 4 + 0x10 ) & NV_PGRAPH_RDI_INDEX_ADDRESS ) | ( ( 0xEA < < 16 ) & NV_PGRAPH_RDI_INDEX_SELECT ) ;
data = 0 ;
do
{
pb_wait_until_gr_not_busy ( ) ;
* ( pTile + 0 ) = 0 ;
* ( p + 0 ) = 0 ;
VIDEOREG ( NV_PGRAPH_RDI_INDEX ) = addr ; VIDEOREG ( NV_PGRAPH_RDI_DATA ) = data ;
} while ( * ( pTile + 0 ) ! = * ( p + 0 ) ) ;
//points tile Zcomp in NV_PFB
pZcomp = ( DWORD * ) ( VIDEO_BASE + NV_PFB_ZCOMP + index * 4 ) ;
//points tile Zcomp in NV_PGRAPH
p = ( DWORD * ) ( VIDEO_BASE + NV_PGRAPH_ZCOMP_XBOX + index * 4 ) ;
//points tile Zcomp in NV_PGRAPH_RDI(0x90)
addr = ( ( index * 4 + 0x90 ) & NV_PGRAPH_RDI_INDEX_ADDRESS ) | ( ( 0xEA < < 16 ) & NV_PGRAPH_RDI_INDEX_SELECT ) ;
data = 0 ;
* ( pZcomp + 0 ) = 0 ;
* ( p + 0 ) = 0 ;
VIDEOREG ( NV_PGRAPH_RDI_INDEX ) = addr ; VIDEOREG ( NV_PGRAPH_RDI_DATA ) = data ;
if ( clear_offset )
{
VIDEOREG ( NV_PFB_ZCOMP_OFFSET ) = 0 ;
VIDEOREG ( NV_PGRAPH_ZCOMP_OFFSET_XBOX ) = 0 ;
}
VIDEOREG ( NV_PFIFO_CACHE1_DMA_PUSH ) = old_dma_push ;
}
void pb_assign_tile ( int tile_index ,
DWORD tile_addr ,
DWORD tile_size ,
DWORD tile_pitch ,
DWORD tile_z_start_tag ,
DWORD tile_z_offset ,
DWORD tile_flags )
{
DWORD old_dma_push ;
DWORD addr10 ;
DWORD addr30 ;
DWORD addr50 ;
DWORD addr90 ;
DWORD tile_tail ;
DWORD * pTile ;
DWORD * pZcomp ;
DWORD * p ;
DWORD EncodedZStartTag ;
DWORD EncodedZOffset ;
# ifdef DBG
if ( ( tile_addr & 0x3fff ) | | ( tile_size & 0x3fff ) ) debugPrint ( " pb_assign_tile: addr & size not well aligned \n " ) ;
# endif
old_dma_push = pb_wait_until_tiles_not_busy ( ) ;
//points at tile in NV_PGRAPH_RDI(0x10(Addr),0x30(Tail) & 0x50(Pitch))
addr10 = ( ( tile_index * 4 + 0x10 ) & NV_PGRAPH_RDI_INDEX_ADDRESS ) | ( ( 0xEA < < 16 ) & NV_PGRAPH_RDI_INDEX_SELECT ) ;
addr30 = ( ( tile_index * 4 + 0x30 ) & NV_PGRAPH_RDI_INDEX_ADDRESS ) | ( ( 0xEA < < 16 ) & NV_PGRAPH_RDI_INDEX_SELECT ) ;
addr50 = ( ( tile_index * 4 + 0x50 ) & NV_PGRAPH_RDI_INDEX_ADDRESS ) | ( ( 0xEA < < 16 ) & NV_PGRAPH_RDI_INDEX_SELECT ) ;
tile_tail = tile_addr + tile_size - 1 ;
//points tile in NV_PFB
pTile = ( DWORD * ) ( VIDEO_BASE + NV_PFB_TILE + tile_index * 16 ) ;
//points tile in NV_PGRAPH
p = ( DWORD * ) ( VIDEO_BASE + NV_PGRAPH_TILE_XBOX + tile_index * 16 ) ;
do
{
pb_wait_until_gr_not_busy ( ) ;
* ( pTile + 0 ) = tile_addr | 2 | ( tile_flags & 1 ) ;
* ( p + 0 ) = tile_addr | 2 | ( tile_flags & 1 ) ;
VIDEOREG ( NV_PGRAPH_RDI_INDEX ) = addr10 ; VIDEOREG ( NV_PGRAPH_RDI_DATA ) = tile_addr | 2 | ( tile_flags & 1 ) ;
* ( pTile + 1 ) = tile_tail ;
* ( p + 1 ) = tile_tail ;
VIDEOREG ( NV_PGRAPH_RDI_INDEX ) = addr30 ; VIDEOREG ( NV_PGRAPH_RDI_DATA ) = tile_tail ;
* ( pTile + 2 ) = tile_pitch ;
* ( p + 2 ) = tile_pitch ;
VIDEOREG ( NV_PGRAPH_RDI_INDEX ) = addr50 ; VIDEOREG ( NV_PGRAPH_RDI_DATA ) = tile_pitch ;
}
while ( ( * ( pTile + 0 ) ! = * ( p + 0 ) ) | |
( ( ( * ( pTile + 1 ) ) & 0xFFFFC000 ) ! = ( ( * ( p + 1 ) ) & 0xFFFFC000 ) ) | |
( * ( pTile + 2 ) ! = * ( p + 2 ) ) ) ;
if ( tile_flags & 0x80000000 ) //Tag in use?
{
EncodedZStartTag = ( tile_z_start_tag > > 2 ) | 0x80000000 ;
if ( tile_flags & 0x04000000 ) EncodedZStartTag | = 0x04000000 ;
//points tile Zcomp in NV_PFB
pZcomp = ( DWORD * ) ( VIDEO_BASE + NV_PFB_ZCOMP + tile_index * 4 ) ;
//points tile Zcomp in NV_PGRAPH
p = ( DWORD * ) ( VIDEO_BASE + NV_PGRAPH_ZCOMP_XBOX + tile_index * 4 ) ;
//points tile Zcomp in NV_PGRAPH_RDI(0x90)
addr90 = ( ( tile_index * 4 + 0x90 ) & NV_PGRAPH_RDI_INDEX_ADDRESS ) | ( ( 0xEA < < 16 ) & NV_PGRAPH_RDI_INDEX_SELECT ) ;
do
{
pb_wait_until_gr_not_busy ( ) ;
* ( pZcomp + 0 ) = EncodedZStartTag ;
* ( p + 0 ) = EncodedZStartTag ;
VIDEOREG ( NV_PGRAPH_RDI_INDEX ) = addr90 ; VIDEOREG ( NV_PGRAPH_RDI_DATA ) = EncodedZStartTag ;
} while ( * ( pZcomp + 0 ) ! = * ( p + 0 ) ) ;
if ( tile_z_offset )
{
EncodedZOffset = tile_z_offset | tile_index | 0x80000000 ;
do
{
pb_wait_until_gr_not_busy ( ) ;
VIDEOREG ( NV_PFB_ZCOMP_OFFSET ) = EncodedZOffset ;
VIDEOREG ( NV_PGRAPH_ZCOMP_OFFSET_XBOX ) = EncodedZOffset ;
} while ( VIDEOREG ( NV_PFB_ZCOMP_OFFSET ) ! = VIDEOREG ( NV_PGRAPH_ZCOMP_OFFSET_XBOX ) ) ;
}
}
VIDEOREG ( NV_PFIFO_CACHE1_DMA_PUSH ) = old_dma_push ;
}
static void pb_prepare_tiles ( void )
{
DWORD * pTile ;
DWORD * pTlimit ;
DWORD * pTsize ;
DWORD * pZcomp ;
DWORD Tile ;
DWORD Tlimit ;
DWORD Tsize ;
DWORD Zcomp ;
DWORD Zcomp_offset ;
DWORD Config0 ;
DWORD Config1 ;
DWORD * p ;
int i ;
p = ( DWORD * ) ( VIDEO_BASE + NV_PGRAPH_TILE_XBOX ) ;
pTlimit = ( DWORD * ) ( VIDEO_BASE + NV_PFB_TLIMIT ) ;
pTsize = ( DWORD * ) ( VIDEO_BASE + NV_PFB_TSIZE ) ;
pTile = ( DWORD * ) ( VIDEO_BASE + NV_PFB_TILE ) ;
//Copy 8 Tiles details from NV_PFB to NV_PGRAPH and to NV_PGRAPH_RDI(0x10)
for ( i = 0x10 ; i < 0x30 ; i + = 4 )
{
Tile = * ( pTile + 0 ) ;
* ( p + 0 ) = Tile ;
VIDEOREG ( NV_PGRAPH_RDI_INDEX ) = ( ( i + 0x00 ) & NV_PGRAPH_RDI_INDEX_ADDRESS ) | ( ( 0xEA < < 16 ) & NV_PGRAPH_RDI_INDEX_SELECT ) ;
VIDEOREG ( NV_PGRAPH_RDI_DATA ) = Tile ;
Tlimit = * ( pTlimit + 0 ) ;
* ( p + 1 ) = Tlimit ;
VIDEOREG ( NV_PGRAPH_RDI_INDEX ) = ( ( i + 0x20 ) & NV_PGRAPH_RDI_INDEX_ADDRESS ) | ( ( 0xEA < < 16 ) & NV_PGRAPH_RDI_INDEX_SELECT ) ;
VIDEOREG ( NV_PGRAPH_RDI_DATA ) = Tlimit ;
Tsize = * ( pTsize + 0 ) ;
* ( p + 2 ) = Tsize ;
VIDEOREG ( NV_PGRAPH_RDI_INDEX ) = ( ( i + 0x40 ) & NV_PGRAPH_RDI_INDEX_ADDRESS ) | ( ( 0xEA < < 16 ) & NV_PGRAPH_RDI_INDEX_SELECT ) ;
VIDEOREG ( NV_PGRAPH_RDI_DATA ) = Tsize ;
p + = 4 ; //move 16 bytes forward
pTile + = 4 ;
pTlimit + = 4 ;
pTsize + = 4 ;
}
p = ( DWORD * ) ( VIDEO_BASE + NV_PGRAPH_ZCOMP_XBOX ) ;
pZcomp = ( DWORD * ) ( VIDEO_BASE + NV_PFB_ZCOMP ) ;
//Copy 8 Tiles Zcomp from NV_PFB to NV_PGRAPH and to NV_PGRAPH_RDI(0x90)
for ( i = 0x90 ; i < 0x110 ; i + = 4 )
{
Zcomp = * ( pZcomp + 0 ) ;
* ( p + 0 ) = Zcomp ;
VIDEOREG ( NV_PGRAPH_RDI_INDEX ) = ( ( i + 0x00 ) & NV_PGRAPH_RDI_INDEX_ADDRESS ) | ( ( 0xEA < < 16 ) & NV_PGRAPH_RDI_INDEX_SELECT ) ;
VIDEOREG ( NV_PGRAPH_RDI_DATA ) = Tsize ;
p + + ; //move 4 bytes forward
pZcomp + + ;
}
//Copy 3 parameters from NV_PFB to NV_PGRAPH and to NV_PGRAPH_RDI(sel 0xEA : 0xC, 0 & 4)
Zcomp_offset = VIDEOREG ( NV_PFB_ZCOMP_OFFSET ) ;
VIDEOREG ( NV_PGRAPH_ZCOMP_OFFSET_XBOX ) = Zcomp_offset ;
VIDEOREG ( NV_PGRAPH_RDI_INDEX ) = ( ( 0x0C ) & NV_PGRAPH_RDI_INDEX_ADDRESS ) | ( ( 0xEA < < 16 ) & NV_PGRAPH_RDI_INDEX_SELECT ) ;
VIDEOREG ( NV_PGRAPH_RDI_DATA ) = Zcomp_offset ;
Config0 = VIDEOREG ( NV_PFB_CFG0 ) ;
VIDEOREG ( NV_PGRAPH_CFG0_XBOX ) = Config0 ;
VIDEOREG ( NV_PGRAPH_RDI_INDEX ) = ( ( 0x00 ) & NV_PGRAPH_RDI_INDEX_ADDRESS ) | ( ( 0xEA < < 16 ) & NV_PGRAPH_RDI_INDEX_SELECT ) ;
VIDEOREG ( NV_PGRAPH_RDI_DATA ) = Config0 ;
Config1 = VIDEOREG ( NV_PFB_CFG1 ) ;
VIDEOREG ( NV_PGRAPH_CFG1_XBOX ) = Config1 ;
VIDEOREG ( NV_PGRAPH_RDI_INDEX ) = ( ( 0x04 ) & NV_PGRAPH_RDI_INDEX_ADDRESS ) | ( ( 0xEA < < 16 ) & NV_PGRAPH_RDI_INDEX_SELECT ) ;
VIDEOREG ( NV_PGRAPH_RDI_DATA ) = Config1 ;
}
static void pb_create_dma_ctx ( DWORD ChannelID ,
DWORD Class ,
DWORD Base ,
DWORD Limit ,
struct s_CtxDma * pDmaObject )
{
DWORD Addr ;
DWORD AddrSpace ;
DWORD Inst ;
DWORD dma_flags ;
Addr = 0 ;
AddrSpace = 0 ;
if ( ( Base & 0xF0000000 ) ! = 0x80000000 )
{
Addr = Base ;
AddrSpace = ADDR_FBMEM ;
}
else
{
Addr = Base & 0x03FFFFFF ;
AddrSpace = ADDR_SYSMEM ;
}
Inst = pb_FreeInst ; pb_FreeInst + = 1 ; //reserve 1 block (16 bytes)
dma_flags = Class ;
dma_flags | = 0x00003000 ;
if ( AddrSpace = = ADDR_AGPMEM ) dma_flags | = 0x00030000 ;
if ( AddrSpace = = ADDR_SYSMEM ) dma_flags | = 0x00020000 ;
dma_flags | = 0x00008000 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + 0x08 ) = Addr | 3 ; //0x00000003|Addr
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + 0x0C ) = Addr | 3 ; //0x00000003|Addr
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + 0x00 ) = dma_flags ; //0x???sB0cl ???=Addr&0xFFF
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + 0x04 ) = Limit ; //0x03FFAFFF (MAXRAM)
memset ( pDmaObject , 0 , sizeof ( struct s_CtxDma ) ) ;
pDmaObject - > ChannelID = ChannelID ;
pDmaObject - > Inst = Inst ;
pDmaObject - > Class = Class ;
pDmaObject - > isGr = 0 ;
}
static void pb_bind_channel ( struct s_CtxDma * pCtxDmaObject )
{
DWORD entry ;
DWORD * p ;
//entry in hash table
entry = ( ( ( pCtxDmaObject - > ChannelID > > 11 ) ^ pCtxDmaObject - > ChannelID ) > > 11 ) ^ pCtxDmaObject - > ChannelID ;
//entry*8 max valid value is 0x1000
//points at entry in hash table (table element size is 8 bytes = 2 dwords)
p = ( DWORD * ) ( VIDEO_BASE + pb_FifoHTAddr + entry * 8 ) ;
* ( p + 0 ) = pCtxDmaObject - > ChannelID ;
* ( p + 1 ) = ( 0x80000000 ) |
( pb_FifoChannelID < < 24 ) |
( pCtxDmaObject - > isGr < < 16 ) |
( pCtxDmaObject - > Inst & 0xFFFF ) ;
}
static void pb_3D_init ( void )
{
DWORD Inst ;
int channel ;
int i ;
DWORD offset ;
DWORD offset_cmn ;
DWORD offset_pipe ;
DWORD offset_4dwords ;
DWORD offset_20dwords ;
//Initialization of 3 big structures in PRAMIN area
//At offset 0x0000 size=0x231C bytes=0x1A9C+0x0880
//At offset 0x231C size=0x0C00 bytes
//At offset 0x2F1C size=0x0784 bytes
//Padding 4 dwords (at offset 0x36A0 size=0x0010 bytes?)
channel = pb_FifoChannelID ;
Inst = pb_GrCtxInst [ channel ] ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + 0x000 ) | = 1 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + 0x33C ) = 0xFFFF0000 ;
for ( i = 0x340 ; i < = 0x39C ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + 0x3A0 ) = 0x0FFF0000 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + 0x3A4 ) = 0x0FFF0000 ;
for ( i = 0x3A8 ; i < = 0x478 ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + 0x47C ) = 0x00000101 ;
for ( i = 0x480 ; i < = 0x48C ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + 0x490 ) = 0x00000111 ;
for ( i = 0x494 ; i < = 0x4A4 ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + 0x4A8 ) = 0x44400000 ;
for ( i = 0x4AC ; i < = 0x4D0 ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0 ;
for ( i = 0x4D4 ; i < = 0x4E0 ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0x00030303 ;
for ( i = 0x4E4 ; i < = 0x4F0 ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0 ;
for ( i = 0x4F4 ; i < = 0x500 ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0x00080000 ;
for ( i = 0x504 ; i < = 0x508 ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0 ;
for ( i = 0x50C ; i < = 0x518 ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0x01012000 ;
for ( i = 0x51C ; i < = 0x528 ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0x000105B8 ;
for ( i = 0x52C ; i < = 0x538 ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0x00080008 ;
for ( i = 0x53C ; i < = 0x558 ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0 ;
for ( i = 0x55C ; i < = 0x578 ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0x07FF0000 ; //8 dwords
for ( i = 0x57C ; i < = 0x598 ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0x07FF0000 ; //8 dwords
for ( i = 0x59C ; i < = 0x5A0 ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + 0x5A4 ) = 0x4B7FFFFF ;
for ( i = 0x5A8 ; i < = 0x5F8 ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + 0x5FC ) = 0x00000001 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + 0x600 ) = 0 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + 0x604 ) = 0x00004000 ;
for ( i = 0x608 ; i < = 0x60C ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + 0x610 ) = 0x00000001 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + 0x614 ) = 0 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + 0x618 ) = 0x00040000 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + 0x61C ) = 0x00010000 ;
for ( i = 0x620 ; i < = 0x628 ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0 ;
for ( i = 0x62C ; i < = 0x6B4 ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0 ; //35 dwords
for ( i = 0x6B8 ; i < = 0x728 ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0 ; //26 dwords
for ( i = 0x72C ; i < = 0x79C ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0 ; //26 dwords
for ( i = 0x7A0 ; i < = 0x810 ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0 ; //26 dwords
for ( i = 0x814 ; i < = 0x818 ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0 ; //2 dwords
for ( i = 0x81C ; i < = 0xA18 ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0 ; //128 dwords
for ( i = 0xA1C ; i < = 0xC18 ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0 ; //128 dwords
for ( i = 0xC1C ; i < = 0xE18 ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0 ; //128 dwords
for ( i = 0xE1C ; i < = 0x1018 ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0 ; //128 dwords
for ( i = 0x101C ; i < = 0x1318 ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0 ; //192 dwords
for ( i = 0x131C ; i < = 0x1A98 ; i + = 4 ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + i ) = 0 ; //224 dwords
offset = 0x1A9C / 4 ; //number of dwords initialized so far = 0x6A7
for ( i = 0 ; i < 0x88 ; i + + ) //136 blocks (unit=16 bytes=4 dwords)
{
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset * 4 + i * 16 + 0x00 ) = 0x10700FF9 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset * 4 + i * 16 + 0x04 ) = 0x0436086C ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset * 4 + i * 16 + 0x08 ) = 0x000C001B ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset * 4 + i * 16 + 0x0C ) = 0 ;
offset + = 4 ;
}
offset_cmn = offset ; //0x231C/4
for ( i = 0 ; i < 0x300 ; i + + ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset * 4 + i * 4 ) = 0 ; //768 dwords
offset + = 0x300 ; //0xC00 bytes
offset_pipe = offset ; //0x2F1C/4
for ( i = 0 ; i < 0x68 ; i + + ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset * 4 + i * 4 ) = 0 ; //104 dwords
offset + = 0x68 ;
for ( i = 0 ; i < 0xD0 ; i + + ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset * 4 + i * 4 ) = 0 ; //208 dwords
offset + = 0xD0 ;
offset_4dwords = offset ;
for ( i = 0 ; i < 0x04 ; i + + ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset * 4 + i * 4 ) = 0 ; //004 dwords
offset + = 0x04 ;
offset_20dwords = offset ;
for ( i = 0 ; i < 0x14 ; i + + ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset * 4 + i * 4 ) = 0 ; //020 dwords
offset + = 0x14 ;
for ( i = 0 ; i < 0x0F ; i + + ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset * 4 + i * 4 ) = 0 ; //015 dwords
offset + = 0x0F ;
for ( i = 0 ; i < 0x0E ; i + + ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset * 4 + i * 4 ) = 0 ; //014 dwords
offset + = 0x0E ;
for ( i = 0 ; i < 0x44 ; i + + ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset * 4 + i * 4 ) = 0 ; //068 dwords
offset + = 0x44 ;
for ( i = 0 ; i < 0x20 ; i + + ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset * 4 + i * 4 ) = 0 ; //032 dwords
offset + = 0x20 ;
for ( i = 0 ; i < 0x0F ; i + + ) VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset * 4 + i * 4 ) = 0 ; //015 dwords
offset + = 0x0F ;
//total: +0x1E0
//theoretically, offset=0x369C/4=0xDA7
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset * 4 ) = 0 ;
offset + + ;
//total: +0x1E1
//theoretically, offset=0x36A0/4=0xDA8
//Padding : 4 dwords?
//total: +0x1E5
//theoretically, offset=0x36B0/4=0xDAC
# ifdef DBG
if ( offset + 4 ! = 0x36B0 / 4 ) debugPrint ( " pb_3D_init: bad final value for offset \n " ) ;
# endif
//floating point post-initializations in cmn structure
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_cmn * 4 + 0x380 ) = 0x3F800000 ; //1.0f
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_cmn * 4 + 0x384 ) = 0x00000000 ; //0.0f
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_cmn * 4 + 0x388 ) = 0x00000000 ; //0.0f
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_cmn * 4 + 0x38C ) = 0x00000000 ; //0.0f
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_cmn * 4 + 0x3C0 ) = 0x40000000 ; //2.0f
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_cmn * 4 + 0x3C4 ) = 0x3F800000 ; //1.0f
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_cmn * 4 + 0x3C8 ) = 0x3F000000 ; //0.5f
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_cmn * 4 + 0x3CC ) = 0x00000000 ; //0.0f
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_cmn * 4 + 0x3D0 ) = 0x40000000 ; //2.0f
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_cmn * 4 + 0x3D4 ) = 0x3F800000 ; //1.0f
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_cmn * 4 + 0x3D8 ) = 0x00000000 ; //0.0f
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_cmn * 4 + 0x3DC ) = 0xBF800000 ; //-1.0f
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_cmn * 4 + 0x3E0 ) = 0x00000000 ; //0.0f
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_cmn * 4 + 0x3E4 ) = 0xBF800000 ; //-1.0f
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_cmn * 4 + 0x3E8 ) = 0x00000000 ; //0.0f
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_cmn * 4 + 0x3EC ) = 0x00000000 ; //0.0f
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_cmn * 4 + 0x390 ) = 0x00000000 ; //0.0f
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_cmn * 4 + 0x394 ) = 0x3F800000 ; //1.0f
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_cmn * 4 + 0x398 ) = 0x00000000 ; //0.0f
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_cmn * 4 + 0x39C ) = 0x00000000 ; //0.0f
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_cmn * 4 + 0x3F0 ) = 0x00000000 ; //0.0f
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_cmn * 4 + 0x3F4 ) = 0x00000000 ; //0.0f
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_cmn * 4 + 0x3F8 ) = 0x00000000 ; //0.0f
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_cmn * 4 + 0x3FC ) = 0x00000000 ; //0.0f
//post-initializations in pipe structure
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_pipe * 4 + 0x160 ) = 0 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_pipe * 4 + 0x164 ) = 0 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_pipe * 4 + 0x168 ) = 0 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_pipe * 4 + 0x16C ) = 0 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_pipe * 4 + 0x100 ) = 0 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_pipe * 4 + 0x104 ) = 0 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_pipe * 4 + 0x108 ) = 0x000FE000 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_pipe * 4 + 0x10C ) = 0 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_pipe * 4 + 0x110 ) = 0 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_pipe * 4 + 0x114 ) = 0 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_pipe * 4 + 0x118 ) = 0 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_pipe * 4 + 0x11C ) = 0 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_pipe * 4 + 0x130 ) = 0 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_pipe * 4 + 0x134 ) = 0 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_pipe * 4 + 0x138 ) = 0 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_pipe * 4 + 0x13C ) = 0 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_pipe * 4 + 0x180 ) = 0 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_pipe * 4 + 0x184 ) = 0x000003F8 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_pipe * 4 + 0x188 ) = 0 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_pipe * 4 + 0x18C ) = 0 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_4dwords * 4 ) = 0x002FE000 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_20dwords * 4 + 0x010 ) = 0x001C527C ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_20dwords * 4 + 0x014 ) = 0x001C527C ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_20dwords * 4 + 0x018 ) = 0x001C527C ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_20dwords * 4 + 0x01C ) = 0x001C527C ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_20dwords * 4 + 0x020 ) = 0x001C527C ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_20dwords * 4 + 0x024 ) = 0x001C527C ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_20dwords * 4 + 0x028 ) = 0x001C527C ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + offset_20dwords * 4 + 0x02C ) = 0x001C527C ;
# ifdef DBG
//at this point pb_GrCtxID and pb_FifoChannelID must be different
//debugPrint("pb_3D_init: gr=%d fifo=%d\n",pb_GrCtxID,pb_FifoChannelID);
# endif
}
static void pb_create_gr_ctx ( int ChannelID ,
int Class ,
struct s_CtxDma * pGrObject )
{
DWORD flags ;
DWORD flags3D ;
int size ;
DWORD Inst ;
flags3D = 0 ;
if ( ( Class ! = GR_CLASS_30 ) & &
( Class ! = GR_CLASS_39 ) & &
( Class ! = GR_CLASS_62 ) & &
( Class ! = GR_CLASS_97 ) & &
( Class ! = GR_CLASS_9F ) )
{
//"CreateGrObject invalid class number"
size = Class ;
}
else
{
size = 16 ; //16 bytes
if ( Class = = GR_CLASS_97 )
{
size = 0x330 ; //816 bytes
flags3D = 1 ;
}
}
Inst = pb_FreeInst ; pb_FreeInst + = ( size > > 4 ) ;
if ( flags3D )
{
pb_3DGrCtxInst [ pb_FifoChannelID ] = Inst ;
pb_3D_init ( ) ;
}
flags = Class & 0x000000FF ;
flags3D = 0x00000000 ;
if ( Class = = GR_CLASS_39 ) flags | = 0x01000000 ;
if ( Class = = GR_CLASS_97 ) flags3D = 0x00000A00 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + 0x00 ) = flags ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + 0x04 ) = flags3D ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + 0x08 ) = 0 ;
VIDEOREG ( NV_PRAMIN + ( Inst < < 4 ) + 0x0C ) = 0 ;
memset ( pGrObject , 0 , sizeof ( struct s_CtxDma ) ) ;
pGrObject - > ChannelID = ChannelID ;
pGrObject - > Class = Class ;
pGrObject - > isGr = 1 ;
pGrObject - > Inst = Inst ;
}
static void pb_start ( void )
{
if ( pb_disable_gpu = = 0 ) //do we really want to send data to GPU?
{
//asks push buffer Dma engine to detect incoming Dma data (written at pb_Put)
pb_cache_flush ( ) ;
* ( pb_DmaUserAddr + 0x40 / 4 ) = ( ( DWORD ) pb_Put ) & 0x03FFFFFF ;
//from now any write will be detected
# ifdef DBG
if ( ( * ( pb_DmaUserAddr + 0x44 / 4 ) ) > 0x04000000 )
{
debugPrint ( " pb_start: wrong GetAddr \n " ) ;
return ;
}
# endif
}
}
static void pb_jump_to_head ( void )
{
//Have Dma engine pointer point at push buffer head again.
//(so we don't run into the tail of push buffer)
//The best method would be to call this once per frame since it costs time.
//Of course, avoid writing more data than push buffer size in 1 frame time.
//If it happens you will get a message suggesting to call pb_reset more often
//or to enlarge push buffer (with pb_size, before calling pb_init).
//Default size is 512Kb (128*1024 dwords)
DWORD * pGetAddr ;
DWORD TimeStampTicks ;
# ifdef DBG
if ( pb_BeginEndPair )
{
debugPrint ( " pb_reset musn't be called inside a begin-end block. \n " ) ;
return ;
}
# endif
//writes a jump command
//forces GPU to jump at push buffer head address at next fetch
* ( pb_Put + 0 ) = 1 + ( ( ( DWORD ) pb_Head ) & 0x0FFFFFFF ) ;
pb_Put = pb_Head ;
pb_start ( ) ;
TimeStampTicks = KeTickCount ;
//wait for arrival of Gpu Get to push buffer head
do
{
if ( ( * ( pb_DmaUserAddr + 0x44 / 4 ) ) > 0x04000000 )
{
# ifdef DBG
debugPrint ( " pb_reset: bad getaddr \n " ) ;
# endif
return ;
}
if ( KeTickCount - TimeStampTicks > TICKSTIMEOUT )
{
debugPrint ( " pb_reset: too long \n " ) ;
break ;
}
//converts physical address into virtual address
pGetAddr = ( DWORD * ) ( ( * ( pb_DmaUserAddr + 0x44 / 4 ) ) | 0x80000000 ) ;
} while ( pGetAddr ! = pb_Head ) ;
}
//public functions
int pb_busy ( void )
{
DWORD PutAddr ;
DWORD GetAddr ;
GetAddr = * ( pb_DmaUserAddr + 0x44 / 4 ) ;
# ifdef DBG
if ( GetAddr > 0x04000000 )
{
debugPrint ( " pb_busy: wrong GetAddr \n " ) ;
return 0 ;
}
# endif
PutAddr = ( DWORD ) pb_Put ;
if ( ( GetAddr ^ PutAddr ) & 0x0FFFFFFF ) return 1 ; //means different addresses
if ( VIDEOREG ( NV_PGRAPH_STATUS ) ) return 1 ;
return 0 ;
}
DWORD pb_back_buffer_width ( void )
{
return pb_FrameBuffersWidth ;
}
DWORD pb_back_buffer_height ( void )
{
return pb_FrameBuffersHeight ;
}
DWORD pb_back_buffer_pitch ( void )
{
return pb_FrameBuffersPitch ;
}
DWORD * pb_back_buffer ( void )
{
return ( DWORD * ) pb_FBAddr [ pb_back_index ] ;
}
DWORD * pb_extra_buffer ( int index_buffer )
{
if ( index_buffer > pb_ExtraBuffersCount )
{
debugPrint ( " pb_target_extra_buffer: buffer index out of range \n " ) ;
return pb_back_buffer ( ) ;
}
return ( DWORD * ) pb_EXAddr [ index_buffer ] ;
}
void pb_target_back_buffer ( void )
{
DWORD * p ;
DWORD width ;
DWORD height ;
DWORD pitch ;
DWORD pitch_depth_stencil ;
DWORD dma_flags ;
DWORD dma_addr ;
DWORD dma_limit ;
int flag ;
int depth_stencil ;
width = pb_FrameBuffersWidth ;
height = pb_FrameBuffersHeight ;
pitch = pb_FrameBuffersPitch ;
pitch_depth_stencil = pb_DepthStencilPitch ;
//DMA channel 9 is used by GPU in order to render pixels
dma_addr = pb_FBAddr [ pb_back_index ] & 0x03FFFFFF ;
dma_limit = height * pitch - 1 ; //(last byte)
dma_flags = DMA_CLASS_3D | 0x0000B000 ;
dma_addr | = 3 ;
p = pb_begin ( ) ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_WAIT_MAKESPACE , 0 ) ; p + = 2 ;
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , NV_PRAMIN + ( pb_DmaChID9Inst < < 4 ) + 0x08 , dma_addr ) ; p + = 3 ; //set params addr,data
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT , PB_SETOUTER ) ; p + = 2 ; //calls subprogID PB_SETOUTER: does VIDEOREG(addr)=data
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , NV_PRAMIN + ( pb_DmaChID9Inst < < 4 ) + 0x0C , dma_addr ) ; p + = 3 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT , PB_SETOUTER ) ; p + = 2 ;
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , NV_PRAMIN + ( pb_DmaChID9Inst < < 4 ) + 0x00 , dma_flags ) ; p + = 3 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT , PB_SETOUTER ) ; p + = 2 ;
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , NV_PRAMIN + ( pb_DmaChID9Inst < < 4 ) + 0x04 , dma_limit ) ; p + = 3 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT , PB_SETOUTER ) ; p + = 2 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_SET_OBJECT3 , 9 ) ; p + = 2 ;
pb_end ( p ) ;
//DMA channel 11 is used by GPU in order to bitblt images
dma_addr = pb_FBAddr [ pb_back_index ] & 0x03FFFFFF ;
dma_limit = height * pitch - 1 ; //(last byte)
dma_flags = DMA_CLASS_3D | 0x0000B000 ;
dma_addr | = 3 ;
p = pb_begin ( ) ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_WAIT_MAKESPACE , 0 ) ; p + = 2 ;
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , NV_PRAMIN + ( pb_DmaChID11Inst < < 4 ) + 0x08 , dma_addr ) ; p + = 3 ; //set params addr,data
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT , PB_SETOUTER ) ; p + = 2 ; //calls subprogID PB_SETOUTER: does VIDEOREG(addr)=data
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , NV_PRAMIN + ( pb_DmaChID11Inst < < 4 ) + 0x0C , dma_addr ) ; p + = 3 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT , PB_SETOUTER ) ; p + = 2 ;
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , NV_PRAMIN + ( pb_DmaChID11Inst < < 4 ) + 0x00 , dma_flags ) ; p + = 3 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT , PB_SETOUTER ) ; p + = 2 ;
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , NV_PRAMIN + ( pb_DmaChID11Inst < < 4 ) + 0x04 , dma_limit ) ; p + = 3 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT , PB_SETOUTER ) ; p + = 2 ;
pb_push1to ( SUBCH_4 , p , NV20_TCL_PRIMITIVE_3D_SET_OBJECT2 , 11 ) ; p + = 2 ;
pb_end ( p ) ;
depth_stencil = 1 ;
if ( depth_stencil ! = - 1 ) //don't care
if ( pb_DepthStencilLast ! = depth_stencil ) //changed?
{
//DMA channel 10 is used by GPU in order to render depth stencil
if ( depth_stencil )
{
dma_addr = pb_DSAddr & 0x03FFFFFF ;
dma_limit = height * pitch_depth_stencil - 1 ; //(last byte)
dma_flags = DMA_CLASS_3D | 0x0000B000 ;
dma_addr | = 3 ;
flag = 1 ;
}
else
{
dma_addr = 0 ;
dma_limit = 0 ;
dma_flags = DMA_CLASS_3D | 0x0000B000 ;
dma_addr | = 3 ;
flag = 0 ;
pitch_depth_stencil = pitch ;
}
p = pb_begin ( ) ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_WAIT_MAKESPACE , 0 ) ; p + = 2 ;
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , NV_PRAMIN + ( pb_DmaChID10Inst < < 4 ) + 0x08 , dma_addr ) ; p + = 3 ; //set params addr,data
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT , PB_SETOUTER ) ; p + = 2 ; //calls subprogID PB_SETOUTER: does VIDEOREG(addr)=data
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , NV_PRAMIN + ( pb_DmaChID10Inst < < 4 ) + 0x0C , dma_addr ) ; p + = 3 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT , PB_SETOUTER ) ; p + = 2 ;
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , NV_PRAMIN + ( pb_DmaChID10Inst < < 4 ) + 0x00 , dma_flags ) ; p + = 3 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT , PB_SETOUTER ) ; p + = 2 ;
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , NV_PRAMIN + ( pb_DmaChID10Inst < < 4 ) + 0x04 , dma_limit ) ; p + = 3 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT , PB_SETOUTER ) ; p + = 2 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_SET_OBJECT4 , 10 ) ; p + = 2 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_DEPTH_TEST_ENABLE , flag ) ; p + = 2 ; //ZEnable=TRUE or FALSE (But don't use W, see below)
2010-12-27 01:01:25 +00:00
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_STENCIL_TEST_ENABLE , 1 ) ; p + = 2 ; //StencilEnable=TRUE
2010-12-04 16:14:34 +00:00
pb_end ( p ) ;
pb_DepthStencilLast = depth_stencil ;
}
p = pb_begin ( ) ;
pb_push3 ( p , NV20_TCL_PRIMITIVE_3D_BUFFER_PITCH , ( pitch_depth_stencil < < 16 ) | ( pitch & 0xFFFF ) , 0 , 0 ) ; p + = 4 ;
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_VIEWPORT_HORIZ , width < < 16 , height < < 16 ) ; p + = 3 ;
//Default (0x00100001)
//We use W (0x00010000)
//We don't enable YUV (0x10000000)
//We don't use floating point depth (0x00001000)
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_W_YUV_FPZ_FLAGS , 0x00110001 ) ; p + = 2 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_BUFFER_FORMAT , pb_GPUFrameBuffersFormat | pb_FBVFlag ) ; p + = 2 ;
pb_end ( p ) ;
}
void pb_target_extra_buffer ( int index_buffer )
{
DWORD * p ;
DWORD width ;
DWORD height ;
DWORD pitch ;
DWORD pitch_depth_stencil ;
DWORD dma_flags ;
DWORD dma_addr ;
DWORD dma_limit ;
int flag ;
int depth_stencil ;
if ( index_buffer > = pb_ExtraBuffersCount )
{
debugPrint ( " pb_target_extra_buffer: buffer index out of range \n " ) ;
return ;
}
width = pb_FrameBuffersWidth ;
height = pb_FrameBuffersHeight ;
pitch = pb_FrameBuffersPitch ;
pitch_depth_stencil = pb_DepthStencilPitch ;
//DMA channel 9 is used by GPU in order to render pixels
dma_addr = pb_EXAddr [ index_buffer ] & 0x03FFFFFF ;
dma_limit = height * pitch - 1 ; //(last byte)
dma_flags = DMA_CLASS_3D | 0x0000B000 ;
dma_addr | = 3 ;
p = pb_begin ( ) ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_WAIT_MAKESPACE , 0 ) ; p + = 2 ;
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , NV_PRAMIN + ( pb_DmaChID9Inst < < 4 ) + 0x08 , dma_addr ) ; p + = 3 ; //set params addr,data
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT , PB_SETOUTER ) ; p + = 2 ; //calls subprogID PB_SETOUTER: does VIDEOREG(addr)=data
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , NV_PRAMIN + ( pb_DmaChID9Inst < < 4 ) + 0x0C , dma_addr ) ; p + = 3 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT , PB_SETOUTER ) ; p + = 2 ;
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , NV_PRAMIN + ( pb_DmaChID9Inst < < 4 ) + 0x00 , dma_flags ) ; p + = 3 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT , PB_SETOUTER ) ; p + = 2 ;
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , NV_PRAMIN + ( pb_DmaChID9Inst < < 4 ) + 0x04 , dma_limit ) ; p + = 3 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT , PB_SETOUTER ) ; p + = 2 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_SET_OBJECT3 , 9 ) ; p + = 2 ;
pb_end ( p ) ;
//DMA channel 11 is used by GPU in order to bitblt images
dma_addr = pb_EXAddr [ index_buffer ] & 0x03FFFFFF ;
dma_limit = height * pitch - 1 ; //(last byte)
dma_flags = DMA_CLASS_3D | 0x0000B000 ;
dma_addr | = 3 ;
p = pb_begin ( ) ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_WAIT_MAKESPACE , 0 ) ; p + = 2 ;
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , NV_PRAMIN + ( pb_DmaChID11Inst < < 4 ) + 0x08 , dma_addr ) ; p + = 3 ; //set params addr,data
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT , PB_SETOUTER ) ; p + = 2 ; //calls subprogID PB_SETOUTER: does VIDEOREG(addr)=data
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , NV_PRAMIN + ( pb_DmaChID11Inst < < 4 ) + 0x0C , dma_addr ) ; p + = 3 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT , PB_SETOUTER ) ; p + = 2 ;
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , NV_PRAMIN + ( pb_DmaChID11Inst < < 4 ) + 0x00 , dma_flags ) ; p + = 3 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT , PB_SETOUTER ) ; p + = 2 ;
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , NV_PRAMIN + ( pb_DmaChID11Inst < < 4 ) + 0x04 , dma_limit ) ; p + = 3 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT , PB_SETOUTER ) ; p + = 2 ;
pb_push1to ( SUBCH_4 , p , NV20_TCL_PRIMITIVE_3D_SET_OBJECT2 , 11 ) ; p + = 2 ;
pb_end ( p ) ;
depth_stencil = 1 ;
if ( depth_stencil ! = - 1 ) //don't care
if ( pb_DepthStencilLast ! = depth_stencil ) //changed?
{
//DMA channel 10 is used by GPU in order to render depth stencil
if ( depth_stencil )
{
dma_addr = pb_DSAddr & 0x03FFFFFF ;
dma_limit = height * pitch_depth_stencil - 1 ; //(last byte)
dma_flags = DMA_CLASS_3D | 0x0000B000 ;
dma_addr | = 3 ;
flag = 1 ;
}
else
{
dma_addr = 0 ;
dma_limit = 0 ;
dma_flags = DMA_CLASS_3D | 0x0000B000 ;
dma_addr | = 3 ;
flag = 0 ;
pitch_depth_stencil = pitch ;
}
p = pb_begin ( ) ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_WAIT_MAKESPACE , 0 ) ; p + = 2 ;
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , NV_PRAMIN + ( pb_DmaChID10Inst < < 4 ) + 0x08 , dma_addr ) ; p + = 3 ; //set params addr,data
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT , PB_SETOUTER ) ; p + = 2 ; //calls subprogID PB_SETOUTER: does VIDEOREG(addr)=data
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , NV_PRAMIN + ( pb_DmaChID10Inst < < 4 ) + 0x0C , dma_addr ) ; p + = 3 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT , PB_SETOUTER ) ; p + = 2 ;
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , NV_PRAMIN + ( pb_DmaChID10Inst < < 4 ) + 0x00 , dma_flags ) ; p + = 3 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT , PB_SETOUTER ) ; p + = 2 ;
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , NV_PRAMIN + ( pb_DmaChID10Inst < < 4 ) + 0x04 , dma_limit ) ; p + = 3 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT , PB_SETOUTER ) ; p + = 2 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_SET_OBJECT4 , 10 ) ; p + = 2 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_DEPTH_TEST_ENABLE , flag ) ; p + = 2 ; //ZEnable=TRUE or FALSE (But don't use W, see below)
2010-12-27 01:01:25 +00:00
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_STENCIL_TEST_ENABLE , 1 ) ; p + = 2 ; //StencilEnable=TRUE
2010-12-04 16:14:34 +00:00
pb_end ( p ) ;
pb_DepthStencilLast = depth_stencil ;
}
p = pb_begin ( ) ;
pb_push3 ( p , NV20_TCL_PRIMITIVE_3D_BUFFER_PITCH , ( pitch_depth_stencil < < 16 ) | ( pitch & 0xFFFF ) , 0 , 0 ) ; p + = 4 ;
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_VIEWPORT_HORIZ , width < < 16 , height < < 16 ) ; p + = 3 ;
//Default (0x00100001)
//We use W (0x00010000)
//We don't enable YUV (0x10000000)
//We don't use floating point depth (0x00001000)
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_W_YUV_FPZ_FLAGS , 0x00110001 ) ; p + = 2 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_BUFFER_FORMAT , pb_GPUFrameBuffersFormat | pb_FBVFlag ) ; p + = 2 ;
pb_end ( p ) ;
}
DWORD pb_get_vbl_counter ( void )
{
return pb_vbl_counter ; //allows caller to know if a frame has been missed
}
DWORD pb_wait_for_vbl ( void )
{
NtWaitForSingleObject ( pb_VBlankEvent , FALSE , NULL ) ;
return pb_vbl_counter ; //allows caller to know if a frame has been missed
}
void pb_print ( char * format , . . . )
{
char buffer [ 512 ] ;
int i ;
va_list argList ;
va_start ( argList , format ) ;
vsprintf ( buffer , format , argList ) ;
va_end ( argList ) ;
for ( i = 0 ; i < strlen ( buffer ) ; i + + ) pb_print_char ( buffer [ i ] ) ;
}
void pb_printat ( int row , int col , char * format , . . . )
{
char buffer [ 512 ] ;
int i ;
if ( ( row > = 0 ) & & ( row < ROWS ) ) pb_next_row = row ;
if ( ( col > = 0 ) & & ( col < COLS ) ) pb_next_col = col ;
va_list argList ;
va_start ( argList , format ) ;
vsprintf ( buffer , format , argList ) ;
va_end ( argList ) ;
for ( i = 0 ; i < strlen ( buffer ) ; i + + ) pb_print_char ( buffer [ i ] ) ;
}
void pb_erase_text_screen ( void )
{
pb_next_row = 0 ;
pb_next_col = 0 ;
memset ( pb_text_screen , 0 , sizeof ( pb_text_screen ) ) ;
}
void pb_draw_text_screen ( void )
{
int i , j , k , l , m , x1 , x2 , y ;
unsigned char c ;
for ( i = 0 ; i < ROWS ; i + + )
for ( j = 0 ; j < COLS ; j + + )
{
c = pb_text_screen [ i ] [ j ] ;
if ( ( c = = ' ' ) | | ( c = = ' \t ' ) ) pb_text_screen [ i ] [ j ] = 0 ;
}
//convert pb_text_screen characters into push buffer commands
//TODO: replace rectangle fill with texture copy when available!
for ( i = 0 ; i < ROWS ; i + + )
for ( j = 0 ; j < COLS ; j + + )
{
c = pb_text_screen [ i ] [ j ] ;
if ( c )
{
for ( l = 0 , x1 = - 1 , x2 = - 1 ; l < 8 ; l + + , x1 = - 1 , x2 = - 1 )
for ( k = 0 , m = 0x80 ; k < 8 ; k + + , m > > = 1 )
if ( systemFont [ c * 8 + l ] & m )
{
if ( x1 > = 0 )
x2 = 20 + j * 10 + k ;
else
x1 = 20 + j * 10 + k ;
}
else
{
if ( x2 > = 0 )
{
y = 25 + i * 25 + l * 2 ;
pb_fill ( x1 , y , x2 - x1 + 1 , 2 , 0xFFFFFF ) ;
x1 = x2 = - 1 ;
}
else
if ( x1 > = 0 )
{
y = 25 + i * 25 + l * 2 ;
pb_fill ( x1 , y , 1 , 2 , 0xFFFFFF ) ;
x1 = - 1 ;
}
}
}
}
}
void pb_extra_buffers ( int n )
{
if ( n > MAX_EXTRA_BUFFERS )
debugPrint ( " Too many extra buffers \n " ) ;
else
pb_ExtraBuffersCount = n ;
}
void pb_size ( DWORD size )
{
if ( pb_running )
debugPrint ( " Can't set size while push buffer Dma engine is running. \n " ) ;
else
{
if ( size < 64 * 1024 )
debugPrint ( " Push buffer size must be equal or larger than 64Kb. \n " ) ;
else
if ( ( size - 1 ) & size )
debugPrint ( " Push buffer size must be a power of 2. \n " ) ;
else
pb_Size = size ;
}
}
void pb_reset ( void )
{
pb_jump_to_head ( ) ;
}
DWORD * pb_begin ( void )
{
# ifdef DBG
if ( pb_Put > = pb_Tail ) debugPrint ( " ERROR! Push buffer overflow! Use pb_reset more often or enlarge push buffer! \n " ) ;
if ( pb_BeginEndPair = = 1 ) debugPrint ( " pb_start without a pb_end earlier \n " ) ;
pb_BeginEndPair = 1 ;
pb_PushIndex = 0 ;
pb_PushNext = pb_Put ;
pb_PushStart = pb_Put ;
# endif
return pb_Put ;
}
# ifdef LOG
static FILE * fd ;
static int logging = 0 ;
void pb_start_log ( void )
{
if ( logging ) return ;
logging = 1 ;
fd = fopen ( " pbkit_record.txt " , " w " ) ;
}
void pb_stop_log ( void )
{
if ( logging = = 0 ) return ;
logging = 0 ;
fclose ( fd ) ;
}
# endif
void pb_end ( DWORD * pEnd )
{
DWORD TimeStamp1 ;
DWORD TimeStamp2 ;
int i ;
# ifdef LOG
DWORD * p ;
int n ;
if ( logging )
{
p = pb_PushStart ;
while ( p ! = pEnd )
{
n = ( * p > > 18 ) & 0x7FF ;
fprintf ( fd , " 0x%08x, " , * ( p + + ) ) ;
for ( i = 0 ; i < n ; i + + ) fprintf ( fd , " 0x%x, " , * ( p + + ) ) ;
fprintf ( fd , " \n " ) ;
}
}
# endif
# ifdef DBG
if ( pb_BeginEndPair = = 0 ) debugPrint ( " pb_end without a pb_start \n " ) ;
pb_BeginEndPair = 0 ;
# endif
pb_Put = pEnd ;
pb_start ( ) ; //start (or continue) reading and sending data to GPU
if ( pb_trace_mode ) //do we want to wait until block data has been sent (for debugging GPU errors)?
{
TimeStamp1 = KeTickCount ;
//wait until all begin-end block has been sent to GPU
while ( pb_busy ( ) )
{
TimeStamp2 = KeTickCount ;
if ( TimeStamp2 - TimeStamp1 > TICKSTIMEOUT )
{
debugPrint ( " pb_end: Busy for too long (%d) (%08x) \n " ,
( ( DWORD ) ( pb_Put ) - ( DWORD ) ( pb_Head ) ) ,
VIDEOREG ( NV_PFIFO_CACHE1_DMA_GET )
) ;
break ;
}
}
}
}
void pb_push1to ( DWORD subchannel , DWORD * p , DWORD command , DWORD param1 )
{
# ifdef DBG
if ( p ! = pb_PushNext ) debugPrint ( " pb_push1to: new write address invalid or not following previous write addresses \n " ) ;
if ( pb_BeginEndPair = = 0 ) debugPrint ( " pb_push1to: missing pb_begin earlier \n " ) ;
pb_PushIndex + = 2 ;
pb_PushNext + = 2 ;
if ( pb_PushIndex > 128 ) debugPrint ( " pb_push1to: begin-end block musn't exceed 128 dwords \n " ) ;
# endif
* ( p + 0 ) = EncodeMethod ( subchannel , command , 1 ) ;
* ( p + 1 ) = param1 ;
}
void pb_push2to ( DWORD subchannel , DWORD * p , DWORD command , DWORD param1 , DWORD param2 )
{
# ifdef DBG
if ( p ! = pb_PushNext ) debugPrint ( " pb_push2to : new write address invalid or not following previous write addresses \n " ) ;
if ( pb_BeginEndPair = = 0 ) debugPrint ( " pb_push2to : missing pb_begin earlier \n " ) ;
pb_PushIndex + = 3 ;
pb_PushNext + = 3 ;
if ( pb_PushIndex > 128 ) debugPrint ( " pb_push2to: begin-end block musn't exceed 128 dwords \n " ) ;
# endif
* ( p + 0 ) = EncodeMethod ( subchannel , command , 2 ) ;
* ( p + 1 ) = param1 ;
* ( p + 2 ) = param2 ;
}
void pb_push3to ( DWORD subchannel , DWORD * p , DWORD command , DWORD param1 , DWORD param2 , DWORD param3 )
{
# ifdef DBG
if ( p ! = pb_PushNext ) debugPrint ( " pb_push3to : new write address invalid or not following previous write addresses \n " ) ;
if ( pb_BeginEndPair = = 0 ) debugPrint ( " pb_push3to : missing pb_begin earlier \n " ) ;
pb_PushIndex + = 4 ;
pb_PushNext + = 4 ;
if ( pb_PushIndex > 128 ) debugPrint ( " pb_push3to: begin-end block musn't exceed 128 dwords \n " ) ;
# endif
* ( p + 0 ) = EncodeMethod ( subchannel , command , 3 ) ;
* ( p + 1 ) = param1 ;
* ( p + 2 ) = param2 ;
* ( p + 3 ) = param3 ;
}
void pb_push4to ( DWORD subchannel , DWORD * p , DWORD command , DWORD param1 , DWORD param2 , DWORD param3 , DWORD param4 )
{
# ifdef DBG
if ( p ! = pb_PushNext ) debugPrint ( " pb_push4to : new write address invalid or not following previous write addresses \n " ) ;
if ( pb_BeginEndPair = = 0 ) debugPrint ( " pb_push4to : missing pb_begin earlier \n " ) ;
pb_PushIndex + = 5 ;
pb_PushNext + = 5 ;
if ( pb_PushIndex > 128 ) debugPrint ( " pb_push4to: begin-end block musn't exceed 128 dwords \n " ) ;
# endif
* ( p + 0 ) = EncodeMethod ( subchannel , command , 4 ) ;
* ( p + 1 ) = param1 ;
* ( p + 2 ) = param2 ;
* ( p + 3 ) = param3 ;
* ( p + 4 ) = param4 ;
}
void pb_push1 ( DWORD * p , DWORD command , DWORD param1 )
{
# ifdef DBG
if ( p ! = pb_PushNext ) debugPrint ( " pb_push1: new write address invalid or not following previous write addresses \n " ) ;
if ( pb_BeginEndPair = = 0 ) debugPrint ( " pb_push1: missing pb_begin earlier \n " ) ;
pb_PushIndex + = 2 ;
pb_PushNext + = 2 ;
if ( pb_PushIndex > 128 ) debugPrint ( " pb_push1: begin-end block musn't exceed 128 dwords \n " ) ;
# endif
* ( p + 0 ) = EncodeMethod ( SUBCH_3D , command , 1 ) ;
* ( p + 1 ) = param1 ;
}
void pb_push2 ( DWORD * p , DWORD command , DWORD param1 , DWORD param2 )
{
# ifdef DBG
if ( p ! = pb_PushNext ) debugPrint ( " pb_push2 : new write address invalid or not following previous write addresses \n " ) ;
if ( pb_BeginEndPair = = 0 ) debugPrint ( " pb_push2 : missing pb_begin earlier \n " ) ;
pb_PushIndex + = 3 ;
pb_PushNext + = 3 ;
if ( pb_PushIndex > 128 ) debugPrint ( " pb_push2: begin-end block musn't exceed 128 dwords \n " ) ;
# endif
* ( p + 0 ) = EncodeMethod ( SUBCH_3D , command , 2 ) ;
* ( p + 1 ) = param1 ;
* ( p + 2 ) = param2 ;
}
void pb_push3 ( DWORD * p , DWORD command , DWORD param1 , DWORD param2 , DWORD param3 )
{
# ifdef DBG
if ( p ! = pb_PushNext ) debugPrint ( " pb_push3 : new write address invalid or not following previous write addresses \n " ) ;
if ( pb_BeginEndPair = = 0 ) debugPrint ( " pb_push3 : missing pb_begin earlier \n " ) ;
pb_PushIndex + = 4 ;
pb_PushNext + = 4 ;
if ( pb_PushIndex > 128 ) debugPrint ( " pb_push3: begin-end block musn't exceed 128 dwords \n " ) ;
# endif
* ( p + 0 ) = EncodeMethod ( SUBCH_3D , command , 3 ) ;
* ( p + 1 ) = param1 ;
* ( p + 2 ) = param2 ;
* ( p + 3 ) = param3 ;
}
void pb_push4 ( DWORD * p , DWORD command , DWORD param1 , DWORD param2 , DWORD param3 , DWORD param4 )
{
# ifdef DBG
if ( p ! = pb_PushNext ) debugPrint ( " pb_push4 : new write address invalid or not following previous write addresses \n " ) ;
if ( pb_BeginEndPair = = 0 ) debugPrint ( " pb_push4 : missing pb_begin earlier \n " ) ;
pb_PushIndex + = 5 ;
pb_PushNext + = 5 ;
if ( pb_PushIndex > 128 ) debugPrint ( " pb_push4: begin-end block musn't exceed 128 dwords \n " ) ;
# endif
* ( p + 0 ) = EncodeMethod ( SUBCH_3D , command , 4 ) ;
* ( p + 1 ) = param1 ;
* ( p + 2 ) = param2 ;
* ( p + 3 ) = param3 ;
* ( p + 4 ) = param4 ;
}
void pb_push4f ( DWORD * p , DWORD command , float param1 , float param2 , float param3 , float param4 )
{
# ifdef DBG
if ( p ! = pb_PushNext ) debugPrint ( " pb_push4f : new write address invalid or not following previous write addresses \n " ) ;
if ( pb_BeginEndPair = = 0 ) debugPrint ( " pb_push4f : missing pb_begin earlier \n " ) ;
pb_PushIndex + = 5 ;
pb_PushNext + = 5 ;
if ( pb_PushIndex > 128 ) debugPrint ( " pb_push4f: begin-end block musn't exceed 128 dwords \n " ) ;
# endif
* ( p + 0 ) = EncodeMethod ( SUBCH_3D , command , 4 ) ;
* ( ( float * ) ( p + 1 ) ) = param1 ;
* ( ( float * ) ( p + 2 ) ) = param2 ;
* ( ( float * ) ( p + 3 ) ) = param3 ;
* ( ( float * ) ( p + 4 ) ) = param4 ;
}
void pb_push_transposed_matrix ( DWORD * p , DWORD command , float * m )
{
# ifdef DBG
if ( p ! = pb_PushNext ) debugPrint ( " pb_push_transposed_matrix : new write address invalid or not following previous write addresses \n " ) ;
if ( pb_BeginEndPair = = 0 ) debugPrint ( " pb_push_transposed_matrix : missing pb_begin earlier \n " ) ;
pb_PushIndex + = 17 ;
pb_PushNext + = 17 ;
if ( pb_PushIndex > 128 ) debugPrint ( " pb_push_transposed_matrix : begin-end block musn't exceed 128 dwords \n " ) ;
# endif
* ( p + + ) = EncodeMethod ( SUBCH_3D , command , 16 ) ;
* ( ( float * ) p + + ) = m [ _11 ] ;
* ( ( float * ) p + + ) = m [ _21 ] ;
* ( ( float * ) p + + ) = m [ _31 ] ;
* ( ( float * ) p + + ) = m [ _41 ] ;
* ( ( float * ) p + + ) = m [ _12 ] ;
* ( ( float * ) p + + ) = m [ _22 ] ;
* ( ( float * ) p + + ) = m [ _32 ] ;
* ( ( float * ) p + + ) = m [ _42 ] ;
* ( ( float * ) p + + ) = m [ _13 ] ;
* ( ( float * ) p + + ) = m [ _23 ] ;
* ( ( float * ) p + + ) = m [ _33 ] ;
* ( ( float * ) p + + ) = m [ _43 ] ;
* ( ( float * ) p + + ) = m [ _14 ] ;
* ( ( float * ) p + + ) = m [ _24 ] ;
* ( ( float * ) p + + ) = m [ _34 ] ;
* ( ( float * ) p + + ) = m [ _44 ] ;
}
void pb_show_front_screen ( void )
{
VIDEOREG ( PCRTC_START ) = pb_FBAddr [ pb_front_index ] & 0x03FFFFFF ;
pb_debug_screen_active = 0 ;
}
void pb_show_debug_screen ( void )
{
VIDEOREG ( PCRTC_START ) = ( ( DWORD ) XVideoGetFB ( ) ) & 0x0FFFFFFF ;
pb_debug_screen_active = 1 ;
}
void pb_show_depth_screen ( void )
{
VIDEOREG ( PCRTC_START ) = pb_DSAddr & 0x0FFFFFFF ;
pb_debug_screen_active = 1 ;
}
void pb_set_viewport ( int dwx , int dwy , int width , int height , float zmin , float zmax )
{
DWORD * p ;
DWORD dwzminscaled ;
DWORD dwzmaxscaled ;
float x , y , w , h ;
if ( dwx < 0 ) dwx = 0 ;
if ( dwy < 0 ) dwy = 0 ;
if ( dwx + width > pb_FrameBuffersWidth ) width = pb_FrameBuffersWidth - dwx ;
if ( dwy + height > pb_FrameBuffersHeight ) height = pb_FrameBuffersHeight - dwy ;
pb_Viewport_x = dwx ;
pb_Viewport_y = dwy ;
pb_Viewport_width = width ;
pb_Viewport_height = height ;
pb_Viewport_zmin = zmin ;
pb_Viewport_zmax = zmax ;
x = 0.53125f + ( float ) dwx ;
y = 0.53125f + ( float ) dwy ;
w = 0.5f * ( ( float ) pb_Viewport_width ) ;
h = - 0.5f * ( ( float ) pb_Viewport_height ) ;
* ( ( float * ) & dwzminscaled ) = zmin * pb_ZScale ;
* ( ( float * ) & dwzmaxscaled ) = zmax * pb_ZScale ;
/*
p = pb_begin ( ) ;
pb_push4f ( p , NV20_TCL_PRIMITIVE_3D_VIEWPORT_OX , x + 0.53125f , y + 0.53125f , 0.0f , 0.0f ) ; p + = 5 ;
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_DEPTH_RANGE_NEAR , dwzminscaled , dwzmaxscaled ) ; p + = 3 ;
pb_end ( p ) ;
*/
p = pb_begin ( ) ;
pb_push4f ( p , NV20_TCL_PRIMITIVE_3D_VIEWPORT_OX , x + w , y - h , zmin * pb_ZScale , 0.0f ) ; p + = 5 ;
pb_push4f ( p , NV20_TCL_PRIMITIVE_3D_VIEWPORT_PX_DIV2 , w , h , ( zmax - zmin ) * pb_ZScale , 0.0f ) ; p + = 5 ;
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_DEPTH_RANGE_NEAR , dwzminscaled , dwzmaxscaled ) ; p + = 3 ;
pb_end ( p ) ;
}
void pb_fill ( int x , int y , int w , int h , DWORD color )
{
DWORD * p ;
int x1 , y1 , x2 , y2 ;
x1 = x ;
y1 = y ;
x2 = x + w ;
y2 = y + h ;
//if you supply 32 bits color and res is 16 bits, apply function below
//color=((color>>8)&0xF800)|((color>>5)&0x07E0)|((color>>3)&0x001F);
p = pb_begin ( ) ;
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_CLEAR_VALUE_HORIZ , 2 ) ; //sets rectangle coordinates
* ( p + + ) = ( ( x2 - 1 ) < < 16 ) | x1 ;
* ( p + + ) = ( ( y2 - 1 ) < < 16 ) | y1 ;
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_CLEAR_VALUE_DEPTH , 3 ) ; //sets data used to fill in rectangle
* ( p + + ) = 0 ; //(depth<<8)|stencil
* ( p + + ) = color ; //color
* ( p + + ) = 0xF0 ; //triggers the HW rectangle fill (0x03 for D&S)
pb_end ( p ) ;
}
//ALWAYS use this at beginning of frame or you may lose one third of performance because
//automatic compression algorithm for tile #1 can't afford any garbage left behind...
//Also, try to draw from closest distance to farest distance to help algorithm
//Depth is set to max and stencil is set to 0. We assume D24S8 format is used.
//Implies that depth test function is set to "less or equal"
void pb_erase_depth_stencil_buffer ( int x , int y , int w , int h )
{
DWORD * p ;
int x1 , y1 , x2 , y2 ;
x1 = x ;
y1 = y ;
x2 = x + w ;
y2 = y + h ;
p = pb_begin ( ) ;
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_CLEAR_VALUE_HORIZ , 2 ) ; //sets rectangle coordinates
* ( p + + ) = ( ( x2 - 1 ) < < 16 ) | x1 ;
* ( p + + ) = ( ( y2 - 1 ) < < 16 ) | y1 ;
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_CLEAR_VALUE_DEPTH , 3 ) ; //sets data used to fill in rectangle
* ( p + + ) = 0xffffff00 ; //(depth<<8)|stencil
* ( p + + ) = 0 ; //color
* ( p + + ) = 0x03 ; //triggers the HW rectangle fill (only on D&S)
pb_end ( p ) ;
}
//returns 1 if we have to retry later (means no free buffer, draw more details next time)
int pb_finished ( void )
{
DWORD * p ;
if ( pb_BackBufferbReady [ pb_BackBufferNxt ] ) return 1 ; //table is full, retry later
//insert in push buffer the commands to trigger screen swapping at next VBlank
p = pb_begin ( ) ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_ASK_FOR_IDLE , 0 ) ; p + = 2 ; //ask for idle
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_NOP , 0 ) ; p + = 2 ; //wait for idle
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_WAIT_MAKESPACE , 0 ) ; p + = 2 ; //wait/makespace (obtains null status)
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , pb_back_index ) ; p + = 2 ; //set param=back buffer index to show up
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT , PB_FINISHED ) ; p + = 2 ; //subprogID PB_FINISHED: gets frame ready to show up soon
// pb_push1(p,NV20_TCL_PRIMITIVE_3D_STALL_PIPELINE,0); p+=2; //stall gpu pipeline (not sure it's needed in triple buffering technic)
pb_end ( p ) ;
//insert in push buffer the commands to trigger selection of next back buffer
//(because previous ones may not have finished yet, so need to use 0x0100 call)
pb_back_index = ( pb_back_index + 1 ) % 3 ;
pb_target_back_buffer ( ) ;
return 0 ;
}
void pb_kill ( void )
{
void * pSavedData ;
int i ;
DWORD old_caches , old_push , old_pull ;
DWORD * p ;
DWORD TimeStampTicks ;
int counter ;
# ifdef DBG
// debugPrint("Waiting until Dma is not busy\n");
# endif
if ( pb_Put )
{
pb_start ( ) ;
pb_wait_until_gr_not_busy ( ) ;
* ( pb_Put ) = ( ( ( DWORD ) pb_Head ) & 0x0FFFFFFF ) + 1 ; //writes a jump to push buffer head
pb_Put = pb_Head ;
pb_start ( ) ;
TimeStampTicks = KeTickCount ;
while ( 1 )
{
if ( ( * ( pb_DmaUserAddr + 0x44 / 4 ) ) > 0x04000000 )
{
debugPrint ( " pb_kill: Bad get addr \n " ) ;
break ;
}
//did GetAddr reach push buffer head as planned?
if ( ( ( * ( pb_DmaUserAddr + 0x44 / 4 ) ) & 0x0FFFFFFF ) = = ( ( ( DWORD ) pb_Head ) & 0x0FFFFFFF ) ) break ;
if ( KeTickCount - TimeStampTicks > TICKSTIMEOUT )
{
debugPrint ( " pb_kill: Dma busy for too long \n " ) ;
break ;
}
}
}
# ifdef DBG
// if (KeTickCount-TimeStampTicks<=TICKSTIMEOUT) debugPrint("Dma not busy. All is ok.\n");
# endif
//wait until screen swapping is finished (if one is on its way)
while ( pb_BackBufferbReady [ pb_BackBufferNxt ] ) ;
pb_running = 0 ;
if ( pb_ExtraBuffersCount ) MmFreeContiguousMemory ( ( PVOID ) pb_EXAddr [ 0 ] ) ;
if ( pb_DepthStencilAddr ) MmFreeContiguousMemory ( ( PVOID ) pb_DepthStencilAddr ) ;
if ( pb_FrameBuffersAddr ) MmFreeContiguousMemory ( ( PVOID ) pb_FrameBuffersAddr ) ;
if ( pb_DmaBuffer8 ) MmFreeContiguousMemory ( pb_DmaBuffer8 ) ;
if ( pb_DmaBuffer2 ) MmFreeContiguousMemory ( pb_DmaBuffer2 ) ;
if ( pb_DmaBuffer7 ) MmFreeContiguousMemory ( pb_DmaBuffer7 ) ;
if ( pb_Head ) MmFreeContiguousMemory ( pb_Head ) ;
//eventually restore a previously saved video mode
pSavedData = AvGetSavedDataAddress ( ) ;
if ( pSavedData = = 0 ) AvSendTVEncoderOption ( ( PVOID ) VIDEO_BASE , VIDEO_ENC_VIDEOENABLE , 1 , NULL ) ;
//restore system completely
for ( i = 0 ; i < 8 ; i + + ) pb_release_tile ( i , 1 ) ;
VIDEOREG ( NV_PFIFO_DMA_TIMESLICE ) = NV_PFIFO_DMA_TIMESLICE_ALL_DISABLE ;
while ( ( ( VIDEOREG8 ( NV_PFIFO_CACHE1_STATUS ) & NV_PFIFO_CACHE1_STATUS_LOW_MARK_EMPTY ) = = 0 ) | |
( ( VIDEOREG8 ( NV_PFIFO_RUNOUT_STATUS ) & NV_PFIFO_RUNOUT_STATUS_LOW_MARK_EMPTY ) = = 0 ) | |
( ( VIDEOREG8 ( NV_PFIFO_CACHE1_DMA_PUSH ) & NV_PFIFO_CACHE1_DMA_PUSH_STATE_BUSY ) ! = 0 ) )
{
pb_fifo_handler ( ) ;
if ( VIDEOREG ( NV_PGRAPH_INTR ) ! = NV_PGRAPH_INTR_NOT_PENDING ) pb_gr_handler ( ) ;
if ( VIDEOREG ( NV_PMC_INTR_0 ) & NV_PMC_INTR_0_PCRTC_PENDING ) pb_vbl_handler ( ) ;
}
VIDEOREG ( NV_PFIFO_CACHE1_DMA_PUSH ) = NV_PFIFO_CACHE1_DMA_PUSH_ACCESS_DISABLE ;
while ( ( VIDEOREG8 ( NV_PFIFO_CACHE1_DMA_PUSH ) & NV_PFIFO_CACHE1_DMA_PUSH_STATE_BUSY ) ! = 0 ) ;
VIDEOREG ( NV_PFIFO_CACHES ) = NV_PFIFO_CACHES_ALL_DISABLE ;
VIDEOREG ( NV_PFIFO_CACHE0_PUSH0 ) = NV_PFIFO_CACHE0_PUSH0_ACCESS_DISABLE ;
VIDEOREG ( NV_PFIFO_CACHE0_PULL0 ) = NV_PFIFO_CACHE0_PULL0_ACCESS_DISABLE ;
VIDEOREG ( NV_PFIFO_CACHE1_PUSH0 ) = NV_PFIFO_CACHE1_PUSH0_ACCESS_DISABLE ;
VIDEOREG ( NV_PFIFO_CACHE1_PULL0 ) = NV_PFIFO_CACHE1_PULL0_ACCESS_DISABLE ;
pb_set_fifo_channel ( 1 ) ;
VIDEOREG ( NV_PFIFO_CACHE1_PUT ) = 0 ;
VIDEOREG ( NV_PFIFO_CACHE1_GET ) = 0 ;
old_caches = VIDEOREG ( NV_PFIFO_CACHES ) ;
old_push = VIDEOREG ( NV_PFIFO_CACHE1_PUSH0 ) ;
old_pull = VIDEOREG ( NV_PFIFO_CACHE1_PULL0 ) ;
VIDEOREG ( NV_PFIFO_CACHES ) = NV_PFIFO_CACHES_ALL_DISABLE ;
VIDEOREG ( NV_PFIFO_CACHE1_PUSH0 ) = NV_PFIFO_CACHE1_PUSH0_ACCESS_DISABLE ;
VIDEOREG ( NV_PFIFO_CACHE1_PULL0 ) = NV_PFIFO_CACHE1_PULL0_ACCESS_DISABLE ;
//Neutralize DMA (for channels 0 and 1)
for ( i = 0 ; i < 2 ; i + + )
{
if ( pb_FifoChannelsReady ) //any active channel?
{
p = ( DWORD * ) ( VIDEO_BASE + pb_FifoFCAddr + i * 64 ) ;
* ( p + 1 ) = * ( p + 0 ) ; //DMA_GET=DMA_PUT
* ( p + 4 ) = 0 ; //DMA_STATE=0
}
}
VIDEOREG ( NV_PFIFO_CACHE1_PULL0 ) = old_pull ;
VIDEOREG ( NV_PFIFO_CACHE1_PUSH0 ) = old_push ;
VIDEOREG ( NV_PFIFO_CACHES ) = old_caches ;
VIDEOREG ( NV_PFIFO_DMA ) = NV_PFIFO_DMA_NOT_PENDING ;
VIDEOREG ( NV_PFIFO_INTR_EN_0 ) = NV_PFIFO_INTR_EN_0_ALL_DISABLE ;
pb_load_gr_ctx ( NONE ) ;
//restore most essential outer registers
VIDEOREG ( NV_PFB_CFG0 ) = pb_OldFBConfig0 ;
VIDEOREG ( NV_PFB_CFG1 ) = pb_OldFBConfig1 ;
VIDEOREG ( NV_PMC_ENABLE ) = pb_OldMCEnable ;
VIDEOREG ( NV_PMC_INTR_EN_0 ) = pb_OldMCInterrupt ;
VIDEOREG ( PCRTC_START ) = pb_OldVideoStart ;
pb_uninstall_gpu_interrupt ( ) ;
NtClose ( pb_VBlankEvent ) ;
}
int pb_init ( void )
{
DWORD old ;
DWORD mdiv , ndiv , odiv , pdiv , result ;
BYTE old_color_31 ;
BYTE old_color_82 ;
DWORD baseaddr , baseaddr2 ;
int i , j , k ;
DWORD * p ;
struct s_CtxDma sDmaObject2 ;
struct s_CtxDma sDmaObject3 ;
struct s_CtxDma sDmaObject4 ;
struct s_CtxDma sDmaObject5 ;
struct s_CtxDma sDmaObject6 ;
struct s_CtxDma sDmaObject7 ;
struct s_CtxDma sDmaObject8 ;
struct s_CtxDma sDmaObject9 ;
struct s_CtxDma sDmaObject10 ;
struct s_CtxDma sDmaObject11 ;
struct s_CtxDma sDmaObject12 ;
struct s_CtxDma sGrObject13 ;
struct s_CtxDma sGrObject14 ;
struct s_CtxDma sGrObject16 ;
struct s_CtxDma sGrObject17 ;
DWORD UserAddr ;
DWORD TimeStamp1 ;
DWORD TimeStamp2 ;
DWORD GetAddr ;
DWORD PutAddr ;
//Dma channel properties
int dma_trig = 128 ; //min 8 max 256
int dma_size = 128 ; //min 32 max 256
int dma_max_reqs = 8 ; //min 0 max 15
DWORD dummy ;
DWORD channel ;
DWORD * pGrCtxTable ;
VIDEO_MODE vm ;
DWORD format ;
DWORD BackBufferCount ;
DWORD BackBufferFormat ;
DWORD DepthStencilFormat ;
DWORD Width ;
DWORD Height ;
DWORD FrameBufferCount ;
DWORD HScale ;
DWORD VScale ;
DWORD HSize ;
DWORD VSize ;
DWORD Pitch ;
DWORD Addr ;
DWORD Size ;
DWORD FBAddr ;
DWORD FBSize ;
DWORD DSAddr ;
DWORD DSSize ;
DWORD EXAddr ;
DWORD EXSize ;
int n ;
DWORD value ;
if ( pb_running ) return - 8 ;
//reset global vars (except pb_Size)
pb_3DGrCtxInst [ 0 ] = 0 ;
pb_3DGrCtxInst [ 1 ] = 0 ;
pb_FifoChannelsReady = 0 ;
pb_FifoChannelsMode = NV_PFIFO_MODE_ALL_PIO ;
pb_FifoChannelID = 0 ;
pb_GammaRampIdx = 0 ;
for ( i = 0 ; i < 3 ; i + + ) pb_GammaRampbReady [ i ] = 0 ;
for ( k = 0 ; k < 3 ; k + + ) for ( i = 0 ; i < 3 ; i + + ) for ( j = 0 ; j < 256 ; j + + ) pb_GammaRamp [ k ] [ i ] [ j ] = j ;
pb_BackBufferNxt = 0 ;
for ( i = 0 ; i < 5 ; i + + ) pb_BackBufferbReady [ i ] = 0 ;
pb_Put = NULL ;
pb_PutRunSize = 0 ;
pb_FrameBuffersAddr = 0 ;
pb_DmaBuffer8 = MmAllocateContiguousMemoryEx ( 32 , 0 , MAXRAM , 0 , 4 ) ;
pb_DmaBuffer2 = MmAllocateContiguousMemoryEx ( 32 , 0 , MAXRAM , 0 , 4 ) ;
pb_DmaBuffer7 = MmAllocateContiguousMemoryEx ( 32 , 0 , MAXRAM , 0 , 4 ) ;
//NumberOfBytes,LowestAcceptableAddress,HighestAcceptableAddress,Alignment,ProtectionType
if ( ( pb_DmaBuffer8 = = NULL ) | | ( pb_DmaBuffer2 = = NULL ) | | ( pb_DmaBuffer7 = = NULL ) ) return - 2 ;
memset ( pb_DmaBuffer8 , 0 , 32 ) ;
memset ( pb_DmaBuffer2 , 0 , 32 ) ;
memset ( pb_DmaBuffer7 , 0 , 32 ) ;
pb_Head = MmAllocateContiguousMemoryEx ( pb_Size + 8 * 1024 , 0 , MAXRAM , 0 , 0x404 ) ;
//NumberOfBytes,LowestAcceptableAddress,HighestAcceptableAddress,Alignment OPTIONAL,ProtectionType
if ( pb_Head = = NULL ) return - 3 ;
memset ( pb_Head , 0 , pb_Size + 8 * 1024 ) ;
pb_Tail = pb_Head + pb_Size / 4 ;
pb_Put = pb_Head ;
pb_BackBufferNxt = 0 ; //increments when we finish drawing a frame
pb_BackBufferbReady [ 0 ] = 0 ;
pb_BackBufferbReady [ 1 ] = 0 ;
pb_BackBufferbReady [ 2 ] = 0 ;
pb_BackBufferNxtVBL = 0 ; //increments when VBlank event fires
//initialize push buffer DMA engine
//DMA=Direct Memory Access (means CPU is not involved in the data transfert)
NtCreateEvent ( & pb_VBlankEvent , NULL , NotificationEvent , FALSE ) ;
VIDEOREG ( NV_PBUS_PCI_NV_1 ) | = NV_PBUS_PCI_NV_1_BUS_MASTER_ENABLED ;
VIDEOREG ( PCRTC_INTR_EN ) = PCRTC_INTR_EN_VBLANK_DISABLED ;
VIDEOREG ( NV_PTIMER_INTR_EN_0 ) = NV_PTIMER_INTR_EN_0_ALARM_DISABLED ;
if ( pb_install_gpu_interrupt ( ) = = 0 )
{
if ( pb_DmaBuffer8 ) MmFreeContiguousMemory ( pb_DmaBuffer8 ) ;
if ( pb_DmaBuffer2 ) MmFreeContiguousMemory ( pb_DmaBuffer2 ) ;
if ( pb_DmaBuffer7 ) MmFreeContiguousMemory ( pb_DmaBuffer7 ) ;
if ( pb_Head ) MmFreeContiguousMemory ( pb_Head ) ;
NtClose ( pb_VBlankEvent ) ;
return - 4 ; //OpenXDK probably hooked IRQ3 already
}
//backup of the most essential outer registers (pb_kill will restore them)
pb_OldMCEnable = VIDEOREG ( NV_PMC_ENABLE ) ;
pb_OldMCInterrupt = VIDEOREG ( NV_PMC_INTR_EN_0 ) ;
pb_OldFBConfig0 = VIDEOREG ( NV_PFB_CFG0 ) ;
pb_OldFBConfig1 = VIDEOREG ( NV_PFB_CFG1 ) ;
pb_OldVideoStart = ( ( DWORD ) XVideoGetFB ( ) ) & 0x03FFFFFF ;
VIDEOREG ( NV_PBUS_PCI_NV_12 ) = NV_PBUS_PCI_NV_12_ROM_DECODE_DISABLED ;
VIDEOREG ( NV_PBUS_PCI_NV_3 ) = NV_PBUS_PCI_NV_3_LATENCY_TIMER_248_CLOCKS ;
VIDEOREG ( NV_PMC_ENABLE ) = NV_PMC_ENABLE_ALL_ENABLE ;
VIDEOREG ( NV_PMC_INTR_EN_0 ) = NV_PMC_INTR_EN_0_INTA_HARDWARE ;
mdiv = ( VIDEOREG ( NV_PRAMDAC_NVPLL_COEFF ) & NV_PRAMDAC_NVPLL_COEFF_MDIV ) ;
ndiv = ( VIDEOREG ( NV_PRAMDAC_NVPLL_COEFF ) & NV_PRAMDAC_NVPLL_COEFF_NDIV ) > > 8 ;
odiv = 1 ;
pdiv = ( VIDEOREG ( NV_PRAMDAC_NVPLL_COEFF ) & NV_PRAMDAC_NVPLL_COEFF_PDIV ) > > 16 ;
if ( mdiv )
{
//Xtal in Xbox is at 16.666 Mhz but we want 31.25Mhz for GPU...
if ( ( ( DW_XTAL_16MHZ * ndiv ) / ( odiv < < pdiv ) ) / mdiv ! = 233333324 )
{
//This PLL configuration doesn't create a 233.33 Mhz freq from Xtal
//Have this issure reported so we can update source for that case
debugPrint ( " PLL=%d \n " , ( ( DW_XTAL_16MHZ * ndiv ) / ( odiv < < pdiv ) ) / mdiv ) ;
return - 5 ;
}
}
else
{
pb_kill ( ) ;
return - 5 ; //invalid GPU internal PLL (Phase Locked Loop=GPU freq generator)
}
//program GPU timer in order to obtain 31.25Mhz (we assume PLL creates 233.33Mhz)
VIDEOREG ( NV_PTIMER_NUMERATOR ) = 56968 ; //233333324/56968*7629=31247365 (31.25Mhz)
VIDEOREG ( NV_PTIMER_DENOMINATOR ) = 7629 ;
VIDEOREG ( NV_PTIMER_ALARM_0 ) = 0xFFFFFFFF ;
//The Gpu instance memory is a special place in PRAMIN area (VRAM attached to RAM?)
//Essential Gpu data will be stored there, for, I guess, top speed access.
if ( ( VIDEOREG ( NV_PFB_CFG0 ) & NV_PFB_CFG0_PART_3 ) ! = 3 )
{
pb_kill ( ) ;
return - 6 ;
}
pb_GpuInstMem = ( DWORD ) MmClaimGpuInstanceMemory ( 0xFFFFFFFF , & baseaddr ) ;
//returns 0x83FF0000 //0x10000
//physical_memory(0x83FF0000)=0x03FF0000
if ( pb_GpuInstMem = = 0 )
{
pb_kill ( ) ;
return - 7 ;
}
pb_GpuInstMem - = INSTANCE_MEM_MAXSIZE ; //-0x5000=-20480=-20Kb
// =0x83FEB000
//a hash table
pb_FifoHTAddr = baseaddr + NV_PRAMIN ; //0x10000+NV_PRAMIN(0x700000)
VIDEOREG ( NV_PFIFO_RAMHT ) = ( ( baseaddr > > 8 ) & NV_PFIFO_RAMHT_BASE_ADDRESS ) | NV_PFIFO_RAMHT_SEARCH_128 ;
// =NV_PFIFO_RAMHT_BASE_ADDRESS_10000
//FC (size 0x80)
pb_FifoFCAddr = baseaddr + NV_PRAMIN + 0x1000 ; //=0x11000+NV_PRAMIN
//U1 (size 0x20) Unknown1
pb_FifoU1Addr = baseaddr + NV_PRAMIN + 0x1080 ; //=0x11080+NV_PRAMIN
//FC (dwFifoFCAddr, but 128 bytes aligned, with flag 0x200)
baseaddr2 = ( ( pb_FifoFCAddr + 0x80 ) & 0x1FC00 ) | 0x200 ; //0x11200
VIDEOREG ( NV_PFIFO_RAMFC ) = baseaddr2 < < 7 | ( ( pb_FifoFCAddr > > 8 ) & NV_PFIFO_RAMFC_BASE_ADDRESS ) ;
// |NV_PFIFO_RAMFC_BASE_ADDRESS_11000
//=0x00890110 (theoretical value)
//=0x008A0110 (current value read under openxdk : |0x400 instead of |0x200)
pb_FreeInst = ( pb_FifoU1Addr - NV_PRAMIN + 0x20 ) > > 4 ;
// =0x110A (unit=16 bytes block)
VIDEOREG ( NV_PFB_NVM ) = VIDEOREG ( NV_PFB_NVM ) & NV_PFB_NVM_MODE_DISABLE ;
//zeroes whole GPU instance memory
for ( i = 0 ; i < INSTANCE_MEM_MAXSIZE ; i + = 4 ) VIDEOREG ( NV_PRAMIN + baseaddr + i ) = 0 ;
//reserve 8 blocks (128 bytes) for GrCtxTable
//(2 first dwords will point at the 2 graphic contexts for the 2 channels)
pb_GrCtxTableInst = pb_FreeInst ; pb_FreeInst + = 8 ;
VIDEOREG8 ( NV_PRMCIO_CRX__COLOR ) = 31 ; old_color_31 = VIDEOREG8 ( NV_PRMCIO_CR__COLOR ) ;
VIDEOREG8 ( NV_PRMCIO_CRX__COLOR ) = 31 ; VIDEOREG8 ( NV_PRMCIO_CR__COLOR ) = 87 ;
VIDEOREG8 ( NV_PRMCIO_CRX__COLOR ) = 82 ; old_color_82 = VIDEOREG8 ( NV_PRMCIO_CR__COLOR ) ;
VIDEOREG8 ( NV_PRMCIO_CRX__COLOR ) = 82 ; VIDEOREG8 ( NV_PRMCIO_CR__COLOR ) = old_color_82 + 4 ;
VIDEOREG ( NV_PVIDEO_DEBUG_2 ) = ( VIDEOREG ( NV_PVIDEO_DEBUG_2 ) & NV_PVIDEO_DEBUG_2_BURST1_CLEAR ) | NV_PVIDEO_DEBUG_2_BURST1_INIT ;
VIDEOREG ( NV_PVIDEO_DEBUG_2 ) = ( VIDEOREG ( NV_PVIDEO_DEBUG_2 ) & NV_PVIDEO_DEBUG_2_BURST2_CLEAR ) | NV_PVIDEO_DEBUG_2_BURST2_INIT ;
VIDEOREG ( NV_PVIDEO_DEBUG_3 ) = ( VIDEOREG ( NV_PVIDEO_DEBUG_3 ) & NV_PVIDEO_DEBUG_3_WATER_MARK1_CLEAR ) | NV_PVIDEO_DEBUG_3_WATER_MARK1_INIT ;
VIDEOREG ( NV_PVIDEO_DEBUG_3 ) = ( VIDEOREG ( NV_PVIDEO_DEBUG_3 ) & NV_PVIDEO_DEBUG_3_WATER_MARK2_CLEAR ) | NV_PVIDEO_DEBUG_3_WATER_MARK2_INIT ;
VIDEOREG8 ( NV_PRMCIO_CRX__COLOR ) = 32 ; VIDEOREG8 ( NV_PRMCIO_CR__COLOR ) = 41 ;
VIDEOREG8 ( NV_PRMCIO_CRX__COLOR ) = 27 ; VIDEOREG8 ( NV_PRMCIO_CR__COLOR ) = 5 ;
if ( old_color_31 = = 0 )
{
VIDEOREG8 ( NV_PRMCIO_CRX__COLOR ) = 31 ; VIDEOREG8 ( NV_PRMCIO_CR__COLOR ) = 153 ;
}
VIDEOREG ( NV_PCRTC_CONFIG ) = ( VIDEOREG ( NV_PCRTC_CONFIG ) & ~ NV_PCRTC_CONFIG_START_ADDRESS ) | NV_PCRTC_CONFIG_START_ADDRESS_HSYNC ;
//3 replaced with 2=(3&~7)|2
VIDEOREG ( NV_PVIDEO_LUMINANCE_0 ) = NV_PVIDEO_LUMINANCE_CONTRAST_UNITY | NV_PVIDEO_LUMINANCE_BRIGHTNESS_UNITY ;
VIDEOREG ( NV_PVIDEO_LUMINANCE_1 ) = NV_PVIDEO_LUMINANCE_CONTRAST_UNITY | NV_PVIDEO_LUMINANCE_BRIGHTNESS_UNITY ;
VIDEOREG ( NV_PVIDEO_CHROMINANCE_0 ) = NV_PVIDEO_CHROMINANCE_SAT_COS_UNITY | NV_PVIDEO_CHROMINANCE_SAT_SIN_UNITY ;
VIDEOREG ( NV_PVIDEO_CHROMINANCE_1 ) = NV_PVIDEO_CHROMINANCE_SAT_COS_UNITY | NV_PVIDEO_CHROMINANCE_SAT_SIN_UNITY ;
//maybe let's preserve previous setting
//VIDEOREG(NV_PVIDEO_OFFSET_0)=NV_PVIDEO_OFFSET_VALUE_ZERO;
//VIDEOREG(NV_PVIDEO_OFFSET_1)=NV_PVIDEO_OFFSET_VALUE_ZERO;
VIDEOREG ( NV_PVIDEO_SIZE_IN_0 ) = NV_PVIDEO_SIZE_IN_UNKNOWN_WIDTH | NV_PVIDEO_SIZE_IN_UNKNOWN_HEIGHT ;
VIDEOREG ( NV_PVIDEO_SIZE_IN_1 ) = NV_PVIDEO_SIZE_IN_UNKNOWN_WIDTH | NV_PVIDEO_SIZE_IN_UNKNOWN_HEIGHT ;
VIDEOREG ( NV_PVIDEO_POINT_IN_0 ) = NV_PVIDEO_POINT_IN_S_ORIGIN | NV_PVIDEO_POINT_IN_T_ORIGIN ;
VIDEOREG ( NV_PVIDEO_POINT_IN_1 ) = NV_PVIDEO_POINT_IN_S_ORIGIN | NV_PVIDEO_POINT_IN_T_ORIGIN ;
VIDEOREG ( NV_PVIDEO_DS_DX_0 ) = NV_PVIDEO_DS_DX_RATIO_UNITY ;
VIDEOREG ( NV_PVIDEO_DS_DX_1 ) = NV_PVIDEO_DS_DX_RATIO_UNITY ;
VIDEOREG ( NV_PVIDEO_DT_DY_0 ) = NV_PVIDEO_DT_DY_RATIO_UNITY ;
VIDEOREG ( NV_PVIDEO_DT_DY_1 ) = NV_PVIDEO_DT_DY_RATIO_UNITY ;
pb_GrCtxID = NONE ;
VIDEOREG ( NV_PGRAPH_CHANNEL_CTX_TABLE ) = pb_GrCtxTableInst & NV_PGRAPH_CHANNEL_CTX_TABLE_INST ;
p = ( DWORD * ) ( VIDEO_BASE + NV_PRAMIN + ( pb_GrCtxTableInst < < 4 ) ) ;
* ( p + 0 ) = 0 ; //we don't point at the 2 graphic contexts yet
* ( p + 1 ) = 0 ;
VIDEOREG ( NV_PFIFO_CACHE1_PUT ) = 0 ;
VIDEOREG ( NV_PFIFO_CACHE1_GET ) = 0 ;
VIDEOREG ( NV_PFIFO_CACHE1_DMA_PUT ) = 0 ;
VIDEOREG ( NV_PFIFO_CACHE1_DMA_GET ) = 0 ;
VIDEOREG ( NV_PFIFO_CACHE0_HASH ) = 0 ;
VIDEOREG ( NV_PFIFO_CACHE1_HASH ) = 0 ;
VIDEOREG ( NV_PFIFO_MODE ) = NV_PFIFO_MODE_ALL_PIO ;
VIDEOREG ( NV_PFIFO_DMA ) = NV_PFIFO_DMA_NOT_PENDING ;
VIDEOREG ( NV_PFIFO_SIZE ) = 0 ;
VIDEOREG ( NV_PFIFO_CACHE1_DMA_STATE ) = 0 ;
VIDEOREG ( NV_PFIFO_RUNOUT_PUT ) = 0 ;
VIDEOREG ( NV_PFIFO_RUNOUT_GET ) = 0 ;
pb_running = 1 ;
old = VIDEOREG ( NV_PBUS_PCI_NV_19 ) ;
VIDEOREG ( NV_PBUS_PCI_NV_19 ) = old & NV_PBUS_PCI_NV_19_AGP_COMMAND_SBA_ENABLE_OFF & NV_PBUS_PCI_NV_19_AGP_COMMAND_AGP_ENABLE_OFF ;
VIDEOREG ( NV_PBUS_PCI_NV_19 ) = old ;
VIDEOREG ( PCRTC_INTR ) = PCRTC_INTR_VBLANK_RESET ;
VIDEOREG ( PCRTC_INTR_EN ) = PCRTC_INTR_EN_VBLANK_ENABLED ;
//VIDEOREG(NV_PTIMER_TIME_0)=0;
//VIDEOREG(NV_PTIMER_TIME_1)=ticks; //time & date in ticks (nasty calculation, let's skip it for now)
VIDEOREG ( NV_PGRAPH_FIFO ) = NV_PGRAPH_FIFO_ACCESS_DISABLE ;
VIDEOREG ( NV_PMC_ENABLE ) = VIDEOREG ( NV_PMC_ENABLE ) & NV_PMC_ENABLE_PGRAPH_DISABLED ;
VIDEOREG ( NV_PMC_ENABLE ) = VIDEOREG ( NV_PMC_ENABLE ) | NV_PMC_ENABLE_PGRAPH_ENABLED ;
VIDEOREG ( NV_PGRAPH_DEBUG_0 ) = NV_PGRAPH_DEBUG_0_NO_RESET ;
VIDEOREG ( NV_PGRAPH_DEBUG_1 ) = NV_PGRAPH_DEBUG_1_VTX_PTE_ENABLED |
NV_PGRAPH_DEBUG_1_VTX_CACHE_ENABLED |
NV_PGRAPH_DEBUG_1_VTX_FILE_ENABLED |
NV_PGRAPH_DEBUG_1_DRAWDIR_Y_INCR |
NV_PGRAPH_DEBUG_1_INSTANCE_ENABLED |
NV_PGRAPH_DEBUG_1_CTX_ENABLED ;
VIDEOREG ( NV_PGRAPH_DEBUG_7 ) = NV_PGRAPH_DEBUG_7_UNKNOWN_OPTIONS ;
VIDEOREG ( NV_PGRAPH_DEBUG_3 ) = NV_PGRAPH_DEBUG_3_FLUSHING_ENABLED |
NV_PGRAPH_DEBUG_3_SYNC_TO_CRTC_ENABLED |
NV_PGRAPH_DEBUG_3_FAST_DATA_STRTCH_ENABLED |
NV_PGRAPH_DEBUG_3_FAST_3D_SHADOW_DATA_ENABLED |
NV_PGRAPH_DEBUG_3_FAST_DMA_READ_ENABLED |
NV_PGRAPH_DEBUG_3_IDLE_FILTER_ENABLED |
NV_PGRAPH_DEBUG_3_SINGLE_CYCLE_LOAD_ENABLED |
NV_PGRAPH_DEBUG_3_BILINEAR_3D_ENABLED |
NV_PGRAPH_DEBUG_3_VOLATILE_RESET_ENABLED |
NV_PGRAPH_DEBUG_3_DATA_CHECK_ENABLED |
NV_PGRAPH_DEBUG_3_FORMAT_CHECK_ENABLED |
NV_PGRAPH_DEBUG_3_DMA_CHECK_ENABLED |
NV_PGRAPH_DEBUG_3_STATE_CHECK_ENABLED |
NV_PGRAPH_DEBUG_3_IMAGE_64BIT_ENABLED |
NV_PGRAPH_DEBUG_3_XFMODE_COALESCE_ENABLED |
NV_PGRAPH_DEBUG_3_CTX_METHODS_ENABLED |
NV_PGRAPH_DEBUG_3_OP_METHODS_ENABLED |
NV_PGRAPH_DEBUG_3_IGNORE_PATCHVALID_ENABLED ;
VIDEOREG ( NV_PGRAPH_DEBUG_4 ) = NV_PGRAPH_DEBUG_4_ALL_DISABLE ;
VIDEOREG ( NV_PGRAPH_DEBUG_5 ) = NV_PGRAPH_DEBUG_5_ZCULL_SPARE2_ENABLED ;
if ( VIDEOREG ( NV_PBUS_ROM_VERSION ) & NV_PBUS_ROM_VERSION_MASK )
VIDEOREG ( NV_PGRAPH_UNKNOWN_400B80 ) = 0x45EAD10F ;
else
VIDEOREG ( NV_PGRAPH_UNKNOWN_400B80 ) = 0x45EAD10E ;
VIDEOREG ( NV_PGRAPH_UNKNOWN_400B84 ) = 0 ;
VIDEOREG ( NV_PGRAPH_UNKNOWN_400B88 ) = 0 ;
VIDEOREG ( NV_PGRAPH_UNKNOWN_400098 ) = 0x78 ;
VIDEOREG ( NV_PGRAPH_UNKNOWN_40009C ) = 0x40 ;
VIDEOREG ( NV_PGRAPH_CHANNEL_CTX_TABLE ) = pb_GrCtxTableInst & NV_PGRAPH_CHANNEL_CTX_TABLE_INST ;
pb_wait_until_gr_not_busy ( ) ;
pb_prepare_tiles ( ) ;
VIDEOREG ( NV_PGRAPH_CTX_SWITCH1 ) = NV_PGRAPH_CTX_SWITCH1_ALL_DISABLE ;
VIDEOREG ( NV_PGRAPH_CTX_SWITCH2 ) = NV_PGRAPH_CTX_SWITCH2_ALL_DISABLE ;
VIDEOREG ( NV_PGRAPH_CTX_SWITCH3 ) = NV_PGRAPH_CTX_SWITCH3_ALL_DISABLE ;
VIDEOREG ( NV_PGRAPH_CTX_SWITCH4 ) = NV_PGRAPH_CTX_SWITCH4_ALL_DISABLE ;
VIDEOREG ( NV_PGRAPH_CTX_CONTROL ) = NV_PGRAPH_CTX_CONTROL_DEVICE_ENABLED ;
VIDEOREG ( NV_PGRAPH_FFINTFC_ST2 ) = NV_PGRAPH_FFINTFC_ST2_CHID_STATUS_VALID ;
pb_load_gr_ctx ( pb_GrCtxID ) ;
VIDEOREG ( NV_PGRAPH_INTR ) = NV_PGRAPH_INTR_ALL_ENABLE ;
VIDEOREG ( NV_PGRAPH_INTR_EN ) = NV_PGRAPH_INTR_EN_ALL_ENABLE ;
VIDEOREG ( NV_PFIFO_CACHE1_DMA_FETCH ) = NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
NV_PFIFO_CACHE1_DMA_FETCH_SIZE_32_BYTES |
NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_15 ;
VIDEOREG ( NV_PFIFO_DMA_TIMESLICE ) = NV_PFIFO_DMA_TIMESLICE_SELECT_128K |
NV_PFIFO_DMA_TIMESLICE_TIMEOUT_ENABLED ;
VIDEOREG ( NV_PFIFO_DELAY_0 ) = 255 & NV_PFIFO_DELAY_0_WAIT_RETRY ;
VIDEOREG ( NV_PFIFO_CACHES ) = NV_PFIFO_CACHES_ALL_DISABLE ;
VIDEOREG ( NV_PFIFO_CACHE0_PUSH0 ) = NV_PFIFO_CACHE0_PUSH0_ACCESS_DISABLE ;
VIDEOREG ( NV_PFIFO_CACHE0_PULL0 ) = NV_PFIFO_CACHE0_PULL0_ACCESS_DISABLE ;
VIDEOREG ( NV_PFIFO_CACHE1_PUSH0 ) = NV_PFIFO_CACHE1_PUSH0_ACCESS_DISABLE ;
VIDEOREG ( NV_PFIFO_CACHE1_PULL0 ) = NV_PFIFO_CACHE1_PULL0_ACCESS_DISABLE ;
VIDEOREG ( NV_PFIFO_CACHE1_DMA_PUSH ) = NV_PFIFO_CACHE1_DMA_PUSH_ACCESS_DISABLE ;
pb_set_fifo_channel ( 1 ) ;
VIDEOREG ( NV_PFIFO_CACHE1_PUT ) = 0 ; //&NV_PFIFO_CACHE1_PUT_ADDRESS
VIDEOREG ( NV_PFIFO_CACHE1_GET ) = 0 ; //&NV_PFIFO_CACHE1_GET_ADDRESS
VIDEOREG ( NV_PFIFO_CACHE1_PULL0 ) = NV_PFIFO_CACHE1_PULL0_ACCESS_ENABLE ;
VIDEOREG ( NV_PFIFO_CACHE1_PUSH0 ) = NV_PFIFO_CACHE1_PUSH0_ACCESS_ENABLE ;
VIDEOREG ( NV_PFIFO_CACHES ) = NV_PFIFO_CACHES_REASSIGN_ENABLED ;
VIDEOREG ( NV_PFIFO_CACHES ) = NV_PFIFO_CACHES_ALL_DISABLE ;
VIDEOREG ( NV_PFIFO_INTR_0 ) = NV_PFIFO_INTR_0_ALL_RESET ;
VIDEOREG ( NV_PFIFO_INTR_EN_0 ) = NV_PFIFO_INTR_EN_0_ALL_ENABLE ; ;
//calculate number of CPU cycles per second
HalReadWritePCISpace ( 0 , 0x60 , 0x6C , & value , 4 , FALSE ) ;
//BusNumber,SlotNumber,RegisterNumber,pBuffer,Length,bWritePCISpace
if ( value & 0xFF )
pb_CpuFrequency = 5.5f * ( ( float ) ( ( value > > 8 ) & 0xFF ) ) * ( XTAL_16MHZ / ( ( float ) ( value & 0xFF ) ) ) ;
else
pb_CpuFrequency = 733.33f ; //Mhz, theoretically
pb_create_dma_ctx ( 3 , DMA_CLASS_3D , 0 , MAXRAM , & sDmaObject3 ) ;
pb_create_dma_ctx ( 5 , DMA_CLASS_2 , 0 , MAXRAM , & sDmaObject5 ) ;
pb_create_dma_ctx ( 4 , DMA_CLASS_3 , 0 , MAXRAM , & sDmaObject4 ) ;
pb_create_dma_ctx ( 9 , DMA_CLASS_3D , 0 , MAXRAM , & sDmaObject9 ) ;
pb_create_dma_ctx ( 10 , DMA_CLASS_3D , 0 , MAXRAM , & sDmaObject10 ) ;
pb_create_dma_ctx ( 11 , DMA_CLASS_3D , 0 , MAXRAM , & sDmaObject11 ) ;
pb_DmaChID9Inst = sDmaObject9 . Inst ;
pb_DmaChID10Inst = sDmaObject10 . Inst ;
pb_DmaChID11Inst = sDmaObject11 . Inst ;
pb_create_dma_ctx ( 2 , DMA_CLASS_3 , ( DWORD ) pb_DmaBuffer2 , 0x1F , & sDmaObject2 ) ;
pb_create_dma_ctx ( 7 , DMA_CLASS_3D , ( DWORD ) pb_DmaBuffer7 , 0x1F , & sDmaObject7 ) ;
//this one is damn important. memory address 0x80000000 acts as a trigger.
pb_create_dma_ctx ( 12 , DMA_CLASS_3D , 0x80000000 , 0x10000000 , & sDmaObject12 ) ;
pb_create_dma_ctx ( 8 , DMA_CLASS_3D , ( DWORD ) pb_DmaBuffer8 , 0x20 , & sDmaObject8 ) ;
pb_create_dma_ctx ( 6 , DMA_CLASS_2 , 0 , MAXRAM , & sDmaObject6 ) ;
//we initialized channel 0 first, that will match graphic context 0
pb_FifoChannelID = 0 ;
pb_FifoChannelsMode = NV_PFIFO_MODE_ALL_PIO ;
pb_FifoBigInst = pb_FreeInst ; pb_FreeInst + = 0x37F ; //895 blocks=14320 bytes=0x37F0 bytes
dummy = VIDEOREG ( NV_PFIFO_CACHES ) ;
channel = pb_FifoChannelID ;
VIDEOREG ( NV_PFIFO_CACHES ) = NV_PFIFO_CACHES_ALL_DISABLE ;
//zeroes 0x37F0 bytes (0xDFC/4=0x37F blocks, 4 dwords in 1 block)
for ( i = 0 ; i < 0xDFC ; i + + ) VIDEOREG ( NV_PRAMIN + ( pb_FifoBigInst < < 4 ) + i * 4 ) = 0 ;
//here we go, we initialize first graphic context pointer
pGrCtxTable = ( DWORD * ) ( VIDEO_BASE + NV_PRAMIN + ( pb_GrCtxTableInst < < 4 ) ) ;
* ( pGrCtxTable + channel ) = pb_FifoBigInst ;
pb_GrCtxInst [ channel ] = pb_FifoBigInst ;
//points at channel details in PRAMIN area
p = ( DWORD * ) ( VIDEO_BASE + pb_FifoFCAddr + channel * 64 ) ;
//zeroes details
for ( i = 0 ; i < 16 ; i + + ) * ( p + i ) = 0 ;
//set dma instance, future value for VIDEOREG(NV_PFIFO_CACHE1_DMA_INSTANCE)
* ( p + 3 ) = sDmaObject6 . Inst ;
//encode trig & size
dma_trig = ( dma_trig > > 3 ) - 1 ;
dma_size = ( dma_size > > 5 ) - 1 ;
//set dma fetch, future value for VIDEOREG(NV_PFIFO_CACHE1_DMA_FETCH)
* ( p + 5 ) = ( ( dma_trig < < 3 ) & NV_PFIFO_CACHE1_DMA_FETCH_TRIG ) |
( ( dma_size < < 13 ) & NV_PFIFO_CACHE1_DMA_FETCH_SIZE ) |
( ( dma_max_reqs < < 16 ) & NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS ) ;
pb_FifoChannelsMode | = ( 1 < < channel ) ;
VIDEOREG ( NV_PFIFO_MODE ) = pb_FifoChannelsMode ;
VIDEOREG ( NV_PFIFO_CACHE1_PUSH0 ) = NV_PFIFO_CACHE1_PUSH0_ACCESS_DISABLE ;
VIDEOREG ( NV_PFIFO_CACHE1_PULL0 ) = NV_PFIFO_CACHE1_PULL0_ACCESS_DISABLE ;
VIDEOREG ( NV_PFIFO_CACHE1_PUSH1 ) = channel & NV_PFIFO_CACHE1_PUSH1_CHID ;
if ( pb_FifoChannelsMode & ( 1 < < channel ) ) VIDEOREG ( NV_PFIFO_CACHE1_PUSH1 ) | = NV_PFIFO_CACHE1_PUSH1_MODE_DMA ;
VIDEOREG ( NV_PFIFO_CACHE1_DMA_PUT ) = 0 ; //&NV_PFIFO_CACHE1_DMA_PUT_OFFSET
VIDEOREG ( NV_PFIFO_CACHE1_DMA_GET ) = 0 ; //&NV_PFIFO_CACHE1_DMA_GET_OFFSET
VIDEOREG ( NV_PFIFO_CACHE1_DMA_INSTANCE ) = * ( p + 3 ) ;
VIDEOREG ( NV_PFIFO_CACHE1_DMA_CTL ) = NV_PFIFO_CACHE1_DMA_CTL_ALL_DISABLE ;
VIDEOREG ( NV_PFIFO_CACHE1_DMA_STATE ) = NV_PFIFO_CACHE1_DMA_STATE_METHOD_COUNT_0 ;
VIDEOREG ( NV_PFIFO_CACHE1_ENGINE ) = NV_PFIFO_CACHE1_ENGINE_ALL_SW ;
VIDEOREG ( NV_PFIFO_CACHE1_DMA_FETCH ) = * ( p + 5 ) ;
if ( pb_FifoChannelsMode & ( 1 < < channel ) ) VIDEOREG ( NV_PFIFO_CACHE1_DMA_PUSH ) = NV_PFIFO_CACHE1_DMA_PUSH_ACCESS_ENABLE ;
VIDEOREG ( NV_PFIFO_CACHE1_PULL0 ) = NV_PFIFO_CACHE1_PULL0_ACCESS_ENABLE ;
VIDEOREG ( NV_PFIFO_CACHE1_PUSH0 ) = NV_PFIFO_CACHE1_PUSH0_ACCESS_ENABLE ;
VIDEOREG ( NV_PFIFO_CACHES ) = NV_PFIFO_CACHES_REASSIGN_ENABLED ;
pb_FifoChannelsReady | = ( 1 < < channel ) ;
UserAddr = VIDEO_BASE + NV_USER + ( pb_FifoChannelID < < 16 ) ;
pb_bind_channel ( & sDmaObject6 ) ;
pb_bind_channel ( & sDmaObject12 ) ;
pb_bind_channel ( & sDmaObject2 ) ;
pb_bind_channel ( & sDmaObject7 ) ;
pb_bind_channel ( & sDmaObject4 ) ;
pb_bind_channel ( & sDmaObject5 ) ;
pb_bind_channel ( & sDmaObject3 ) ;
pb_bind_channel ( & sDmaObject9 ) ;
pb_bind_channel ( & sDmaObject10 ) ;
pb_bind_channel ( & sDmaObject11 ) ;
pb_bind_channel ( & sDmaObject8 ) ;
//These objects match the GPU sub channels (3D, 2, 3, 4, in that order)
pb_create_gr_ctx ( 13 , GR_CLASS_97 , & sGrObject13 ) ;
pb_create_gr_ctx ( 14 , GR_CLASS_39 , & sGrObject14 ) ;
pb_create_gr_ctx ( 16 , GR_CLASS_9F , & sGrObject16 ) ;
pb_create_gr_ctx ( 17 , GR_CLASS_62 , & sGrObject17 ) ;
pb_bind_channel ( & sGrObject13 ) ;
pb_bind_channel ( & sGrObject14 ) ;
pb_bind_channel ( & sGrObject16 ) ;
pb_bind_channel ( & sGrObject17 ) ;
pb_DmaUserAddr = ( DWORD * ) UserAddr ; //VIDEOBASE+NV_USER+(0<<16)
pb_PushBase = ( DWORD ) pb_Head ;
pb_PushLimit = ( DWORD ) pb_Tail ;
//This is the magic part of the whole push buffer DMA engine thing...
//Both these instructions are necessary, remove one, then no dma engine!
* ( ( DWORD * ) 0x80000000 ) = ( ( ( DWORD ) pb_Head ) & 0x0FFFFFFF ) + 1 ;
__asm__ __volatile__ ( " wbinvd " ) ;
//assembler instruction wbinvd : write back and invalidate cache
pb_start ( ) ; //start checking if new data has been written and send it to GPU
//(nothing will be sent, since we sent nothing yet)
TimeStamp1 = KeTickCount ;
# ifdef DBG
// debugPrint("Waiting undil DMA is ready\n");
# endif
//wait until DMA is ready
while ( 1 )
{
GetAddr = * ( pb_DmaUserAddr + 0x44 / 4 ) ;
if ( GetAddr > 0x04000000 )
{
debugPrint ( " pb_init: Bad getaddr \n " ) ;
pb_kill ( ) ;
return - 9 ;
}
PutAddr = ( ( DWORD ) pb_Put ) ;
if ( ( ( GetAddr ^ PutAddr ) & 0x0FFFFFFF ) = = 0 ) break ; //means same addresses (Dma is ready)
TimeStamp2 = KeTickCount ;
if ( TimeStamp2 - TimeStamp1 > TICKSTIMEOUT )
{
debugPrint ( " pb_init: Dma didn't get ready in time \n " ) ;
pb_kill ( ) ;
return - 10 ;
}
}
# ifdef DBG
// debugPrint("Dma is ready!!!\n");
# endif
* ( ( DWORD * ) 0x80000000 ) = 0xFFFFFFFF ;
//Let's start initializing inner GPU registers!!!
//These commands assign DMA channels to push buffer subchannels
//and associate some specific GPU parts to specific Dma channels
p = pb_begin ( ) ;
pb_push1to ( SUBCH_2 , p , NV20_TCL_PRIMITIVE_SET_MAIN_OBJECT , 14 ) ; p + = 2 ;
pb_push1to ( SUBCH_3 , p , NV20_TCL_PRIMITIVE_SET_MAIN_OBJECT , 16 ) ; p + = 2 ;
pb_push1to ( SUBCH_4 , p , NV20_TCL_PRIMITIVE_SET_MAIN_OBJECT , 17 ) ; p + = 2 ;
pb_push1to ( SUBCH_3D , p , NV20_TCL_PRIMITIVE_SET_MAIN_OBJECT , 13 ) ; p + = 2 ;
pb_push1to ( SUBCH_2 , p , NV20_TCL_PRIMITIVE_3D_SET_OBJECT0 , 7 ) ; p + = 2 ;
pb_push1to ( SUBCH_3 , p , NV20_TCL_PRIMITIVE_3D_SET_OBJECT5 , 17 ) ; p + = 2 ;
pb_push1to ( SUBCH_3 , p , NV20_TCL_PRIMITIVE_3D_SET_OBJECT_UNKNOWN , 3 ) ; p + = 2 ;
pb_push2to ( SUBCH_4 , p , NV20_TCL_PRIMITIVE_3D_SET_OBJECT1 , 3 , 11 ) ; p + = 3 ;
pb_end ( p ) ; //calls pb_start() which will trigger the reading and sending to GPU (asynchronous, no waiting)
//setup needed for color computations
p = pb_begin ( ) ;
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_SET_OBJECT0 , 3 ) ;
* ( p + + ) = 2 ;
* ( p + + ) = 3 ;
* ( p + + ) = 3 ;
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_SET_OBJECT2A , 6 ) ;
* ( p + + ) = 4 ;
* ( p + + ) = 9 ;
* ( p + + ) = 10 ;
* ( p + + ) = 3 ;
* ( p + + ) = 3 ;
* ( p + + ) = 8 ;
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_SET_OBJECT8 , 1 ) ;
* ( p + + ) = 12 ;
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_ACTIVATE_COLORS , 1 ) ;
* ( p + + ) = 0 ;
pb_end ( p ) ;
p = pb_begin ( ) ;
pb_push1 ( p , 0x09FC , 1 ) ; p + = 2 ;
pb_push4f ( p , 0x0A50 , 0.0f , 0.0f , 0.0f , 1.0f ) ; p + = 5 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_EDGE_FLAG , 1 ) ; p + = 2 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_TX_SHADER_PREVIOUS , 0x00210000 ) ; p + = 2 ; //(PSTextureInput) What previous stage is used at each stage
pb_push1 ( p , 0x1D80 , 1 ) ; p + = 2 ;
pb_push1 ( p , 0x1E68 , 0x7F800000 ) ; p + = 2 ;
pb_push1 ( p , 0x1D78 , 1 ) ; p + = 2 ;
pb_end ( p ) ;
p = pb_begin ( ) ;
pb_push_transposed_matrix ( p , NV20_TCL_PRIMITIVE_3D_CLIP_PLANE_A ( 0 ) , pb_IdentityMatrix ) ; p + = 17 ;
pb_push_transposed_matrix ( p , NV20_TCL_PRIMITIVE_3D_CLIP_PLANE_A ( 4 ) , pb_IdentityMatrix ) ; p + = 17 ;
pb_push_transposed_matrix ( p , NV20_TCL_PRIMITIVE_3D_CLIP_PLANE_A ( 8 ) , pb_IdentityMatrix ) ; p + = 17 ;
pb_push_transposed_matrix ( p , NV20_TCL_PRIMITIVE_3D_CLIP_PLANE_A ( 12 ) , pb_IdentityMatrix ) ; p + = 17 ;
/* pb_push1(p,NV20_TCL_PRIMITIVE_3D_CLIP_PLANE_ENABLE(0),0x2202); p+=2;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_CLIP_PLANE_ENABLE ( 1 ) , 0x2202 ) ; p + = 2 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_CLIP_PLANE_ENABLE ( 2 ) , 0x2202 ) ; p + = 2 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_CLIP_PLANE_ENABLE ( 3 ) , 0x2202 ) ; p + = 2 ;
*/ pb_push4f ( p , 0x09D0 , 0.0f , 0.0f , 1.0f , 0.0f ) ; p + = 5 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_ID , 0x0000003C ) ; p + = 2 ; //set shader constants cursor at C-36
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X , 12 ) ; //loads C-36, C-35 & C-34
memcpy ( p , pb_FixedPipelineConstants , 12 * 4 ) ; p + = 12 ; //used by common xbox shaders, but I doubt we will use them.
//(also usually C-37 is screen center offset Decals vector & c-38 is Scales vector)
pb_end ( p ) ;
//Frame buffers creation
//So far, tested only with 640*480 32 bits (default openxdk res)
//Even if it's a waste of memory, for now, we will leave the openxdk (& SDL)
//default frame buffer untouched. debugPrint (& SDL) will still target it.
//We will provide functions pb_show_debug_screen() and pb_show_front_screen()
//in order to let user (developper) toggle between screens at will.
pb_FrameBuffersAddr = 0 ;
pb_DepthStencilAddr = 0 ;
pb_DepthStencilLast = - 2 ;
vm = XVideoGetMode ( ) ;
if ( vm . bpp = = 32 ) pb_GPUFrameBuffersFormat = 0x128 ; //A8R8G8B8
else pb_GPUFrameBuffersFormat = 0x113 ; //R5G6B5 (0x123 if D24S8 used, bpp 16 untested)
pb_ZScale = 16777215.0f ; //D24S8
Width = vm . width ;
Height = vm . height ;
BackBufferCount = 2 ; //triple buffering technic!
//allows dynamic details adjustment
pb_FrameBuffersCount = BackBufferCount + 1 ; //front buffer + back buffers
pb_FrameBuffersWidth = Width ;
pb_FrameBuffersHeight = Height ;
HScale = 1 ;
VScale = 1 ;
HSize = HScale * Width ; //Total width
VSize = VScale * Height ; //Total height
//Front and back buffers (tile #0)
FrameBufferCount = BackBufferCount + 1 ;
//pitch is the gap between start of a pixel line and start of next pixel line
//(not necessarily the size of a pixel line, because of hardware optimization)
Pitch = ( ( ( vm . bpp * HSize ) > > 3 ) + 0x3F ) & 0xFFFFFFC0 ; //64 units aligned
pb_FrameBuffersPitch = Pitch ;
//look for a standard listed pitch value greater or equal to theoretical one
for ( i = 0 ; i < 16 ; i + + )
{
if ( pb_TilePitches [ i ] > = Pitch )
{
Pitch = pb_TilePitches [ i ] ;
break ;
}
}
Size = Pitch * VSize ;
//verify 64 bytes alignment for size of a frame buffer
if ( Size & ( 64 - 1 ) ) debugPrint ( " pb_init: FBSize is not well aligned. \n " ) ;
pb_FBSize = Size ;
//multiply size by number of physical frame buffers in order to obtain global size
FBSize = Size * FrameBufferCount ;
//Huge alignment enforcement (16 Kb aligned!) for the global size
FBSize = ( FBSize + 0x3FFF ) & 0xFFFFC000 ;
FBAddr = ( DWORD ) MmAllocateContiguousMemoryEx ( FBSize , 0 , 0x03FFB000 , 0x4000 , 0x404 ) ;
//NumberOfBytes,LowestAcceptableAddress,HighestAcceptableAddress,Alignment OPTIONAL,ProtectionType
pb_FBGlobalSize = FBSize ;
pb_FrameBuffersAddr = FBAddr ;
if ( ! FBAddr )
{
pb_kill ( ) ;
return - 11 ;
}
for ( i = 0 ; i < FrameBufferCount ; i + + )
{
pb_FBAddr [ i ] = FBAddr ;
FBAddr + = Size ;
}
//8 separate memory contiguous memory zones can be assigned to 8 GPU 'tiles'
//simultaneously. GPU will apply automatic optimizations or caching on tiles.
//The most important one is the automatic compression of data (by chunk of
//16 dwords) in the depth stencil buffer. This buffer reading and writing
//consumes most of the GPU time. By replacing the 16 dwords by a few dwords
//(2 or 4), potential performance gain is about one third of frame time (60fps).
//It is necessary to clear depth stencil buffer entirely at beginning of
//each frame and draw things from closest depth to farest depth in order to
//take full benefit of this very important feature. All fast games use it.
//Compression is calculated by picking up central value of 4x4 block and
//coding global x & y variation, plus all needed adjustments necessary to
//fully recover original values. Compression is aborted if the 16 dwords have
//very different values (will occur at the edges of projected triangles).
pb_assign_tile ( 0 , //int tile_index,
pb_FrameBuffersAddr & 0x03FFFFFF , //DWORD tile_addr,
FBSize , //DWORD tile_size,
Pitch , //DWORD tile_pitch,
0 , //DWORD tile_z_start_tag,
0 , //DWORD tile_z_offset,
0 //DWORD tile_flags
) ;
//Depth stencil buffer (tile #1)
//pitch is the gap between start of a pixel line and start of next pixel line
//(not necessarily the size of a pixel line, because of hardware optimization)
Pitch = ( ( ( vm . bpp * HSize ) > > 3 ) + 0x3F ) & 0xFFFFFFC0 ; //64 units aligned
pb_DepthStencilPitch = Pitch ;
//look for a standard listed pitch value greater or equal to theoretical one
for ( i = 0 ; i < 16 ; i + + )
{
if ( pb_TilePitches [ i ] > = Pitch )
{
Pitch = pb_TilePitches [ i ] ;
break ;
}
}
Size = Pitch * VSize ;
//verify 64 bytes alignment for size of a frame buffer
if ( Size & ( 64 - 1 ) ) debugPrint ( " pb_init: DSSize is not well aligned. \n " ) ;
pb_DSSize = Size ;
//multiply size by number of physical frame buffers in order to obtain global size
DSSize = Size * FrameBufferCount ;
//Huge alignment enforcement (16 Kb aligned!) for the global size
DSSize = ( DSSize + 0x3FFF ) & 0xFFFFC000 ;
DSAddr = ( DWORD ) MmAllocateContiguousMemoryEx ( FBSize , 0 , 0x03FFB000 , 0x4000 , 0x404 ) ;
//NumberOfBytes,LowestAcceptableAddress,HighestAcceptableAddress,Alignment OPTIONAL,ProtectionType
pb_DepthStencilAddr = DSAddr ;
if ( ! DSAddr )
{
pb_kill ( ) ;
return - 11 ;
}
pb_DSAddr = DSAddr ;
pb_assign_tile ( 1 , //int tile_index,
pb_DepthStencilAddr & 0x03FFFFFF , //DWORD tile_addr,
DSSize , //DWORD tile_size,
Pitch , //DWORD tile_pitch,
0 , //DWORD tile_z_start_tag,
0 , //DWORD tile_z_offset,
0x84000001 //DWORD tile_flags (0x04000000 for 32 bits)
) ;
if ( pb_ExtraBuffersCount )
{
//Extra back buffers (tile #2)
//pitch is the gap between start of a pixel line and start of next pixel line
//(not necessarily the size of a pixel line, because of hardware optimization)
Pitch = ( ( ( vm . bpp * HSize ) > > 3 ) + 0x3F ) & 0xFFFFFFC0 ; //64 units aligned
//look for a standard listed pitch value greater or equal to theoretical one
for ( i = 0 ; i < 16 ; i + + )
{
if ( pb_TilePitches [ i ] > = Pitch )
{
Pitch = pb_TilePitches [ i ] ;
break ;
}
}
Size = Pitch * VSize ;
//verify 64 bytes alignment for size of a frame buffer
if ( Size & ( 64 - 1 ) ) debugPrint ( " pb_init: EXSize is not well aligned. \n " ) ;
//multiply size by number of physical frame buffers in order to obtain global size
EXSize = Size * pb_ExtraBuffersCount ;
//Huge alignment enforcement (16 Kb aligned!) for the global size
EXSize = ( EXSize + 0x3FFF ) & 0xFFFFC000 ;
EXAddr = ( DWORD ) MmAllocateContiguousMemoryEx ( EXSize , 0 , 0x03FFB000 , 0x4000 , 0x404 ) ;
//NumberOfBytes,LowestAcceptableAddress,HighestAcceptableAddress,Alignment OPTIONAL,ProtectionType
if ( ! EXAddr )
{
pb_kill ( ) ;
return - 11 ;
}
for ( i = 0 ; i < pb_ExtraBuffersCount ; i + + )
{
pb_EXAddr [ i ] = EXAddr ;
EXAddr + = Size ;
}
pb_assign_tile ( 2 , //int tile_index,
pb_EXAddr [ 0 ] & 0x03FFFFFF , //DWORD tile_addr,
EXSize , //DWORD tile_size,
Pitch , //DWORD tile_pitch,
0 , //DWORD tile_z_start_tag,
0 , //DWORD tile_z_offset,
0 //DWORD tile_flags
) ;
}
pb_FBVFlag = 0x0000 ; //Quincunx & Gaussian need special flags. We don't, for now.
pb_XScale = ( float ) HScale ;
pb_YScale = ( float ) VScale ;
if ( pb_YScale < pb_XScale ) pb_GlobalScale = pb_YScale ; else pb_GlobalScale = pb_XScale ;
i = ( DWORD ) ( 2.0f * ( pb_GlobalScale ) + 0.5f ) ;
switch ( i )
{
case 0 :
pb_Bias = - 8.0f ;
break ;
case 1 :
pb_Bias = 0.53125f ;
break ;
case 2 : //0.0f
case 3 : //0.585f
case 4 : //1.0f
case 5 : //1.322f
case 6 : //1.585f
case 7 : //1.907f
case 8 : //2.0f
pb_Bias = pb_BiasTable [ i - 2 ] ;
break ;
}
p = pb_begin ( ) ;
n = pb_FrameBuffersCount ; //(BackBufferCount+1)
pb_push3 ( p , NV20_TCL_PRIMITIVE_3D_MAIN_TILES_INDICES , 0 , 1 , n ) ; p + = 4 ;
pb_end ( p ) ;
//set area where GPU is allowed to draw pixels
pb_set_viewport ( 0 , 0 , vm . width * HScale , vm . height * VScale , 0.0f , 1.0f ) ;
//set vertex shader type
p = pb_begin ( ) ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_SHADER_TYPE , SHADER_TYPE_INTERNAL ) ; p + = 2 ;
pb_end ( p ) ;
//no scissors (accept pixels in 8 rectangles covering all screen)
p = pb_begin ( ) ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_VIEWPORT_CLIP_MODE , 0 ) ; p + = 2 ; //accept pixels inside scissor rectangles union (1=reject)
for ( i = 0 ; i < 8 ; i + + )
{
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_VIEWPORT_CLIP_HORIZ ( i ) , 0 | ( ( vm . width * HScale - 1 ) < < 16 ) ) ; p + = 2 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_VIEWPORT_CLIP_VERT ( i ) , 0 | ( ( vm . height * VScale - 1 ) < < 16 ) ) ; p + = 2 ;
}
pb_end ( p ) ;
//funcs: never(0x200), less(0x201), equal(0x202), less or equal(0x203)
//greater(0x204), not equal(0x205), greater or equal(0x206), always(0x207)
//various intial settings (simple states)
p = pb_begin ( ) ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_DEPTH_FUNC , 0x203 ) ; p + = 2 ; //Depth comparison function="less or equal"
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_ALPHA_FUNC_FUNC , 0x207 ) ; p + = 2 ; //Alpha comparison function="always"
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_BLEND_FUNC_ENABLE , 0 ) ; p + = 2 ; //AlphaBlendEnable=FALSE
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_ALPHA_FUNC_ENABLE , 0 ) ; p + = 2 ; //AlphaTestEnable=FALSE
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_ALPHA_FUNC_REF , 0 ) ; p + = 2 ; //AlphaRef=0
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_BLEND_FUNC_SRC , 1 ) ; p + = 2 ; //SrcBlend=(1,1,1,1)
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_BLEND_FUNC_DST , 0 ) ; p + = 2 ; //DstBlend=(0,0,0,0)
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_DEPTH_WRITE_ENABLE , 1 ) ; p + = 2 ; //ZWriteEnable=TRUE
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_DITHER_ENABLE , 0 ) ; p + = 2 ; //DitherEnable=FALSE
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_SHADE_MODEL , 0x1D01 ) ; p + = 2 ; //ShadeMode="gouraud"
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_COLOR_MASK , 0x01010101 ) ; p + = 2 ; // ColorWriteEnable=abgr
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_STENCIL_OP_ZFAIL , 0x1E00 ) ; p + = 2 ; //StencilZFail="keep"
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_STENCIL_OP_ZPASS , 0x1E00 ) ; p + = 2 ; //StencilPass="keep"
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_STENCIL_FUNC_FUNC , 0x207 ) ; p + = 2 ; // Stencil comparison function="always"
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_STENCIL_FUNC_REF , 0 ) ; p + = 2 ; //StencilRef=0
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_STENCIL_FUNC_MASK , 0xFFFFFFFF ) ; p + = 2 ; //StencilMask=0xFFFFFFFF
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_STENCIL_MASK , 0xFFFFFFFF ) ; p + = 2 ; //StencilWriteMask=0xFFFFFFFF
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_BLEND_EQUATION , 0x8006 ) ; p + = 2 ; //Blend operator="add"
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_BLEND_COLOR , 0 ) ; p + = 2 ; //BlendColor=0x000000
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_SWATHWIDTH , 4 ) ; p + = 2 ; //SwathWidth=128
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_POLYGON_OFFSET_FACTOR , 0 ) ; p + = 2 ; //PolygonOffZSlopeScale=0.0f (because ZBias=0.0f)
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_POLYGON_OFFSET_UNITS , 0 ) ; p + = 2 ; //PolygonOffZOffset=0.0f (because ZBias=0.0f)
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_POLYGON_OFFSET_POINT_ENABLE , 0 ) ; p + = 2 ; //PtOffEnable=FALSE (because ZBias=0.0f)
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_POLYGON_OFFSET_LINE_ENABLE , 0 ) ; p + = 2 ; //WireFrameOffEnable=FALSE (because ZBias=0.0f)
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_POLYGON_OFFSET_FILL_ENABLE , 0 ) ; p + = 2 ; //SolidOffEnable=FALSE (because ZBias=0.0f)
pb_end ( p ) ;
//various intial settings (complex states)
p = pb_begin ( ) ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_VERTEX_BLEND_ENABLE , 0 ) ; p + = 2 ; //VertexBlend="disable"
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FOG_COLOR , 0 ) ; p + = 2 ; //FogColor=0x000000
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_POLYGON_MODE_FRONT , 0x1B02 , 0x1B02 ) ; p + = 3 ; //FillMode="solid" BackFillMode="point"
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_NORMALIZE_ENABLE , 0 ) ; p + = 2 ; //NormalizeNormals=FALSE
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_STENCIL_OP_FAIL , 0x1E00 ) ; p + = 2 ; //StencilFail="keep"
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FRONT_FACE , 0x900 ) ; p + = 2 ; //FrontFace="clockwise"
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_CULL_FACE_ENABLE , 1 ) ; p + = 2 ; //CullModeEnable=TRUE
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_CULL_FACE , 0x405 ) ; p + = 2 ; //CullMode="FrontFace opposite" (counterclockwise)
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_COLOR_LOGIC_OP_ENABLE , 0 ) ; p + = 2 ; //Logic operator="none"
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_LINE_SMOOTH_ENABLE , 0 , 0 ) ; p + = 3 ; //EdgeAntiAlias=0
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_MULTISAMPLE , 0xFFFF0001 ) ; p + = 2 ; //MultiSampleAntiAliasing=TRUE & MultiSampleMask=0xFFFF
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_SHADOW_FUNC_FUNC , 0 ) ; p + = 2 ; //Shadow comparison function="never"
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_LINE_WIDTH , ( DWORD ) ( 1.0f * 8.0f * pb_GlobalScale + 0.5f ) ) ; p + = 2 ; //LineWidth=1.0f =>8 (0-511)
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_WAIT_MAKESPACE , 0 ) ; p + = 2 ; //prepare subprogram call (wait/makespace, will obtain null status)
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , 1 ) ; p + = 2 ; //set parameter for subprogram (TRUE)
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT , PB_SETNOISE ) ; p + = 2 ; //call subprogID PB_SETNOISE: Dxt1NoiseEnable=TRUE
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_CULL_ENABLE , 3 ) ; p + = 2 ; //bit0:OcclusionCullEnable=TRUE & bit1:StencilCullEnable=TRUE
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_WAIT_MAKESPACE , 0 ) ; p + = 2 ; //prepare subprogram call (wait/makespace, will obtain null status)
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , NV_PGRAPH_DEBUG_5 , NV_PGRAPH_DEBUG_5_ZCULL_SPARE2_ENABLED ) ; p + = 3 ; //set parameters A & B: DoNotCullUncompressed=FALSE (|8 otherwise)
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT , PB_SETOUTER ) ; p + = 2 ; //calls subprogID PB_SETOUTER: does VIDEOREG(ParamA)=ParamB
if ( VIDEOREG ( NV_PBUS_ROM_VERSION ) & NV_PBUS_ROM_VERSION_MASK )
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , NV_PGRAPH_UNKNOWN_400B80 , ( 0x45EAD10F & ~ 0x18100000 ) ) ; //RopZCmpAlwaysRead=FALSE (bit27) & RopZRead=FALSE (bit20)
else
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_PARAMETER_A , NV_PGRAPH_UNKNOWN_400B80 , ( 0x45EAD10E & ~ 0x18100000 ) ) ;
p + = 3 ;
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT , PB_SETOUTER ) ; p + = 2 ; //calls subprogID PB_SETOUTER: does VIDEOREG(ParamA)=ParamB
pb_end ( p ) ;
//various intial settings (texture stages states)
p = pb_begin ( ) ;
pb_push1 ( p , 0x1b68 , 0 ) ; p + = 2 ; //texture stage 1 BumpEnvMat00=0.0f (stage +1 because no pixel shader used yet)
pb_push1 ( p , 0x1b6c , 0 ) ; p + = 2 ; //texture stage 1 BumpEnvMat01=0.0f
pb_push1 ( p , 0x1b70 , 0 ) ; p + = 2 ; //texture stage 1 BumpEnvMat11=0.0f
pb_push1 ( p , 0x1b74 , 0 ) ; p + = 2 ; //texture stage 1 BumpEnvMat10=0.0f
pb_push1 ( p , 0x1b78 , 0 ) ; p + = 2 ; //texture stage 1 BumpEnvMatLightScale=0.0f
pb_push1 ( p , 0x1b7c , 0 ) ; p + = 2 ; //texture stage 1 BumpEnvMatLightOffset=0.0f
pb_push3 ( p , 0x03c0 , 0 , 0 , 0 ) ; p + = 4 ; //texture stages 0 TexCoordIndex="passthru"
pb_push1 ( p , 0x1b24 , 0 ) ; p + = 2 ; //texture stage 0 BorderColor=0x000000
pb_push1 ( p , 0x0ae0 , 0 ) ; p + = 2 ; //texture stage 0 ColorKeyColor=0x000000
pb_push1 ( p , 0x1ba8 , 0 ) ; p + = 2 ; //texture stage 2 BumpEnvMat00=0.0f (stage +1 because no pixel shader used yet)
pb_push1 ( p , 0x1bac , 0 ) ; p + = 2 ; //texture stage 2 BumpEnvMat01=0.0f
pb_push1 ( p , 0x1bb0 , 0 ) ; p + = 2 ; //texture stage 2 BumpEnvMat11=0.0f
pb_push1 ( p , 0x1bb4 , 0 ) ; p + = 2 ; //texture stage 2 BumpEnvMat10=0.0f
pb_push1 ( p , 0x1bb8 , 0 ) ; p + = 2 ; //texture stage 2 BumpEnvMatLightScale=0.0f
pb_push1 ( p , 0x1bbc , 0 ) ; p + = 2 ; //texture stage 2 BumpEnvMatLightOffset=0.0f
pb_push3 ( p , 0x03d0 , 0 , 0 , 0 ) ; p + = 4 ; //texture stages 1 TexCoordIndex="passthru"
pb_push1 ( p , 0x1b64 , 0 ) ; p + = 2 ; //texture stage 1 BorderColor=0x000000
pb_push1 ( p , 0x0ae4 , 0 ) ; p + = 2 ; //texture stage 1 ColorKeyColor=0x000000
pb_push1 ( p , 0x1be8 , 0 ) ; p + = 2 ; //texture stage 3 BumpEnvMat00=0.0f (stage +1 because no pixel shader used yet)
pb_push1 ( p , 0x1bec , 0 ) ; p + = 2 ; //texture stage 3 BumpEnvMat01=0.0f
pb_push1 ( p , 0x1bf0 , 0 ) ; p + = 2 ; //texture stage 3 BumpEnvMat11=0.0f
pb_push1 ( p , 0x1bf4 , 0 ) ; p + = 2 ; //texture stage 3 BumpEnvMat10=0.0f
pb_push1 ( p , 0x1bf8 , 0 ) ; p + = 2 ; //texture stage 3 BumpEnvMatLightScale=0.0f
pb_push1 ( p , 0x1bfc , 0 ) ; p + = 2 ; //texture stage 3 BumpEnvMatLightOffset=0.0f
pb_push3 ( p , 0x03e0 , 0 , 0 , 0 ) ; p + = 4 ; //texture stages 2 TexCoordIndex="passthru"
pb_push1 ( p , 0x1ba4 , 0 ) ; p + = 2 ; //texture stage 2 BorderColor=0x000000
pb_push1 ( p , 0x0ae8 , 0 ) ; p + = 2 ; //texture stage 2 ColorKeyColor=0x000000
pb_push3 ( p , 0x03f0 , 0 , 0 , 0 ) ; p + = 4 ; //texture stages 3 TexCoordIndex="passthru"
pb_push1 ( p , 0x1be4 , 0 ) ; p + = 2 ; //texture stage 3 BorderColor=0x000000
pb_push1 ( p , 0x0aec , 0 ) ; p + = 2 ; //texture stage 3 ColorKeyColor=0x000000
pb_end ( p ) ;
memset ( ( DWORD * ) pb_FBAddr [ 0 ] , 0 , pb_FBSize ) ;
memset ( ( DWORD * ) pb_DSAddr , 0 , pb_DSSize ) ;
pb_back_index = 1 ; //frame buffer #1 is the back buffer for now
pb_target_back_buffer ( ) ; //tells GPU what is the frame buffer target
pb_front_index = 0 ; //frame buffer #0 is the front buffer for now
pb_show_front_screen ( ) ; //show it
return 0 ;
}
//enqueues shaders micro-code into push buffer stream
//(not recommended for pixel shader: slow and redundant)
DWORD * pb_push_mcode ( DWORD * p , DWORD * mcode )
{
DWORD size ;
if ( ( ( * mcode ) & 0xFFFF0000 ) ! = 0x43210000 ) //pixel shader registers values
{
//Pixel shader initialization (on xbox it's just registers initialization)
//1-8 stages where (alpha and rgb processed in parallel)
//2x4 inputs redirected to (a,b,c,d) can produce 2x3 outputs (a*b,c*d or a*b+c*d)
//redirected to v0-v1, t0-t3, or r0-r1 (r0=final result at final stage)
pb_push2 ( p , NV20_TCL_PRIMITIVE_3D_RC_COLOR0 , pb_gpu_registers [ 48 ] , pb_gpu_registers [ 49 ] ) ; p + = 3 ; //PSFinalCombinerC0 & C1
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_TX_SHADER_CULL_MODE , pb_gpu_registers [ 50 ] ) ; p + = 2 ; //PSCompareMode (0 means fragment killed if r<0 or s<0 or t<0 or q<0, used in clipplane mode)
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_TX_SHADER_OP , pb_gpu_registers [ 51 ] ) ; p + = 2 ; //PSTextureModes=1 (1<<(stage*5) is project 2D: argb=texture(r/q,s/q) usually q=1.0f)
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_TX_SHADER_DOTMAPPING , pb_gpu_registers [ 52 ] ) ; p + = 2 ; //PSDotMapping (0 means [0,255]argb from texture=>[0.0,1.0](r,g,b))
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_TX_SHADER_PREVIOUS , pb_gpu_registers [ 53 ] ) ; p + = 2 ; //PSInputTextureSource (usual value for 4 stages: 0x00210000, what previous stage each stage uses)
pb_push1 ( p , NV20_TCL_PRIMITIVE_3D_RC_ENABLE , pb_gpu_registers [ 54 ] ) ; p + = 2 ; //PSCombinerCount (stages usage count=1, r0.a LSB controls mux, C0's & C1's may be different)
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_RC_IN_ALPHA ( 0 ) , 8 ) ; memcpy ( p , & pb_gpu_registers [ 0 ] , 8 * 4 ) ; p + = 8 ; //8 PSAlphaInputs
//Inputs: 8x 0xaabbccdd
//0=0 1=c0 2=c1 3=fog.rgb 4=v0 5=v1 8=t0 0xb=t3 0xc=r0 0xd=r1 0x10=x.a default=|0.rgb|
//0x20=1-|x| 0x40=2*max(0,x)-1("_bx2") 0x60=1-2*max(0,x) 0x80=max(0,x)-0.5f("_bias") 0xa0=0.5f-max(0,x) 0xc0=x 0xf0=-x
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_RC_OUT_ALPHA ( 0 ) , 8 ) ; memcpy ( p , & pb_gpu_registers [ 8 ] , 8 * 4 ) ; p + = 8 ; //8 PSAlphaOutputs
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_RC_IN_RGB ( 0 ) , 8 ) ; memcpy ( p , & pb_gpu_registers [ 16 ] , 8 * 4 ) ; p + = 8 ; //8 PSRGBInputs
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_RC_OUT_RGB ( 0 ) , 8 ) ; memcpy ( p , & pb_gpu_registers [ 24 ] , 8 * 4 ) ; p + = 8 ; //8 PSRGBOutputs
//Outputs: 8x 0xFlags+<> <:a*b dest >:c*d dest +:a*b+c*d dest with 0xc=r0 0=discared, i.e no destination
//Flags: 2(ab)/1(cd)="* is replaced with dot product", 4="+ is replaced with (r0.a LSB or MSB not set)?(a*b):(c*d)"
//Flags: 8=-0.5f (then) 0x10=*2.0f 0x20=*4.0f 0x40=*0.5f
//Flags: 0x80(ab)/0x40(cd)=result.b propagates to result.a on rgb side (case of dp3 r0,?n,?n for example)
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_RC_CONSTANT_COLOR0 ( 0 ) , 16 ) ; memcpy ( p , & pb_gpu_registers [ 32 ] , 16 * 4 ) ; p + = 16 ; //8 C0's 8 C1's
return p ;
}
//enqueues a vertex shader setup:
size = ( * ( mcode + + ) ) & 0xFFFF ;
if ( size > 136 * 5 + 96 * 7 + 8 )
{
debugPrint ( " pb_push_mcode: Wrong vertex shader size \n " ) ;
return NULL ;
}
memcpy ( p , mcode , size * 4 ) ; p + = size ;
return p ;
}
//converts pseudo-code register into encoded xbox gpu pixel shader input register
static int pb_preg2psreg ( struct s_PseudoReg * pReg )
{
int reg = 0xc ; //r0
switch ( pReg - > reg )
{
case 8 : reg = 0xc + pReg - > num ; break ; //r0-r1 (side effect: r2=0(0) r3=fog.rgb r4=v0 r5=v1 r6=v1r0sum(0xe) r7=EFprod(0xf))
case 9 : reg = 4 + pReg - > num ; break ; //v0-v1 (side effect: v2=v1r0sum(0xe) v3=EFprod(0xf) v4=c0 v5=c1 v6=0 v7=0)
case 0xa : reg = 1 + pReg - > num ; //c0-c1 (ps constants Cn are 0xaarrggbb dwords)
//Pseudo code created by psa.exe allows to define C0-C7 but
//NVidia pixel shaders only refers to C0-C1, but they may be different
//at each stage. So there is not only one way to map them.
//Since this function supports only 1 stage, we use only c0-c1 (c2-c3 for 2nd stage, later, eventually)
//thus, we can choose to have c4-c7 match non standard xbox gpu specific registers at any stage
if ( pReg - > num = = 4 ) reg = 0 ; //c4=zero
if ( pReg - > num = = 5 ) reg = 3 ; //c5=fog.rgb
if ( pReg - > num = = 6 ) reg = 0xe ; //c6=v1r0sum
if ( pReg - > num = = 7 ) reg = 0xf ; //c7=EFprod (see final combiner comment below)
break ;
case 0xb : reg = 8 + pReg - > num ; break ; //t0-t3
}
switch ( pReg - > mod )
{
case 0 : reg | = 0xc0 ; break ; //x
case 1 : reg | = 0xe0 ; break ; //-x
case 2 : reg | = 0x80 ; break ; //x_bias (x-0.5f)
case 3 : reg | = 0xa0 ; break ; //-x_bias -(x-0.5f)
case 4 : reg | = 0x40 ; break ; //x_bx2 (|x|*2.0f-1.0f)
case 5 : reg | = 0x60 ; break ; //-x_bx2 -(|x|*2.0f-1.0f)
case 6 : reg | = 0x20 ; break ; //1-|x| (0x00=|x|)
case 7 : debugPrint ( " pb_preg2psreg: ?n_x2 modifier is not supported \n " ) ; break ; //x_x2 (|x|*2) is not supported
default : debugPrint ( " pb_preg2psreg: Unrecognized modifier %d \n " , pReg - > mod ) ; break ;
}
return reg ;
}
//reads data from pseudo-code stream and fills in structure
static void pb_read_pregs ( DWORD * pcode , struct s_PseudoRegs * pRegs , int n )
{
DWORD code ;
struct s_PseudoReg * pReg ;
pRegs - > n = n ;
if ( n > = 1 ) //dest
{
code = * ( pcode + + ) ;
pReg = & pRegs - > dest ; //ps: 8=r 9=v 0xa=c 0xb=t
pReg - > reg = ( code > > 28 ) & 0xf ; //vs: 8=r 0xa=c 0xb=a 0xc=oP(oP0=oPos oP1=oFog oP2=oPts) 0xd=oD 0xe=oT
pReg - > num = ( code > > 0 ) & 0xf ;
pReg - > msk = ( code > > 16 ) & 0xf ; //bit0=x/r bit1=y/g bit2=z/b bit3=w/a (need to reverse order for xbox gpu)
pReg - > msk = ( ( pReg - > msk & 8 ) > > 3 ) | ( ( pReg - > msk & 4 ) > > 1 ) | ( ( pReg - > msk & 2 ) < < 1 ) | ( ( pReg - > msk & 1 ) < < 3 ) ;
if ( pReg - > reg = = 8 ) pb_tmp_registers [ pReg - > num ] = 1 ; //markup for actually used temporary registers
}
if ( n > = 2 ) //src0
{
code = * ( pcode + + ) ;
pReg = & pRegs - > src0 ; //ps: 8=r 9=v 0xa=c 0xb=t
pReg - > reg = ( code > > 28 ) & 0xf ; //vs: 8=r 9=v 0xa=c 0xb=a
pReg - > num = ( code > > 0 ) & 0xf ;
pReg - > mod = ( code > > 24 ) & 0xf ; //0=x 1=-x (ps: 2=x_bias 3=-x_bias 4=x_bx2 5=-x_bx2 6=1-x 7=x_x2(not supported))
pReg - > swz = ( code > > 16 ) & 0xff ; //.p0p1p2p3=>p3p2p1p0 with 00=x/r 01=y/g 10=z/b 11=w/a (need to reverse order for xbox gpu)
pReg - > swz = ( ( pReg - > swz & 0xc0 ) > > 6 ) | ( ( pReg - > swz & 0x30 ) > > 2 ) | ( ( pReg - > swz & 0xc ) < < 2 ) | ( ( pReg - > swz & 3 ) < < 6 ) ;
pReg - > idx = ( code > > 13 ) & 1 ; //vs: if set, means cn to be replaced with c[a0.x+n]
}
if ( n > = 3 ) //src1
{
code = * ( pcode + + ) ;
pReg = & pRegs - > src1 ; //ps: 8=r 9=v 0xa=c 0xb=t
pReg - > reg = ( code > > 28 ) & 0xf ; //vs: 8=r 9=v 0xa=c 0xb=a
pReg - > num = ( code > > 0 ) & 0xf ;
pReg - > mod = ( code > > 24 ) & 0xf ; //0=x 1=-x (ps: 2=x_bias 3=-x_bias 4=x_bx2 5=-x_bx2 6=1-x 7=x_x2(not supported))
pReg - > swz = ( code > > 16 ) & 0xff ; //.p0p1p2p3=>p3p2p1p0 with 00=x/r 01=y/g 10=z/b 11=w/a (need to reverse order for xbox gpu)
pReg - > swz = ( ( pReg - > swz & 0xc0 ) > > 6 ) | ( ( pReg - > swz & 0x30 ) > > 2 ) | ( ( pReg - > swz & 0xc ) < < 2 ) | ( ( pReg - > swz & 3 ) < < 6 ) ;
pReg - > idx = ( code > > 13 ) & 1 ; //vs: if set, means cn to be replaced with c[a0.x+n]
}
if ( n > = 4 ) //src2
{
code = * ( pcode + + ) ;
pReg = & pRegs - > src2 ; //ps: 8=r 9=v 0xa=c 0xb=t
pReg - > reg = ( code > > 28 ) & 0xf ; //vs: 8=r 9=v 0xa=c 0xb=a
pReg - > num = ( code > > 0 ) & 0xf ;
pReg - > mod = ( code > > 24 ) & 0xf ; //0=x 1=-x (ps: 2=x_bias 3=-x_bias 4=x_bx2 5=-x_bx2 6=1-x 7=x_x2(not supported))
pReg - > swz = ( code > > 16 ) & 0xff ; //.p0p1p2p3=>p3p2p1p0 with 00=x/r 01=y/g 10=z/b 11=w/a (need to reverse order for xbox gpu)
pReg - > swz = ( ( pReg - > swz & 0xc0 ) > > 6 ) | ( ( pReg - > swz & 0x30 ) > > 2 ) | ( ( pReg - > swz & 0xc ) < < 2 ) | ( ( pReg - > swz & 3 ) < < 6 ) ;
pReg - > idx = ( code > > 13 ) & 1 ; //vs: if set, means cn to be replaced with c[a0.x+n]
}
}
//sets usual parts of vertex shader micro-code (instruction independant parts)
static int pb_set_mcode ( DWORD * p , struct s_PseudoRegs * pRegs )
{
//xbox gpu micro-code format:
//renouveau constants:
//| | | | | | | | | DWORD#0 (0)
//| |scalar#|vector#|(0-95)const_src|inp_src| source0_high | DWORD#1
//|source0_low| source1 | source2_high | DWORD#2
//|src2low|vtmpmsk|temp_id|stmpmsk|destmsk|x| (const) dest |p|i| | DWORD#3
//'x' bit allows to choose a constant as destination.
//Shader must be declared with a special type previously
//in order to get this priviledge and runs much slower.
//x=1 : destination is not a constant register
//x=0 : destination is a constant register (4 bits dest field becomes 8 bits const dest field)
//The way I describe things (using c,v,r characters):
//| | | | | | | | | DWORD#0 (0)
//| |sc_code|op_code|(0-191) c_numbr|v_numbr|m|source0_swizzle| DWORD#1 (96=>C0 on xbox)
//|r_numbr|cvr|m|source1_swizzle|r_numbr|cvr|m|source2_swizzle|r_n DWORD#2
//r? dest:
//umbr|cvr|dst_msk|r_numbr|sdstmsk|0 0 0 0|1|1 1 1 1 1 1 1 1|0|i| | DWORD#3
//o? dest: (o0=oPos o1-2=oT6-7(n/a) o3-4=oD0-1(ff) o5=oFog o6=oPts o7-8=oT4-5(bf) o9-12=oT0-3)
//umbr|cvr|0 0 0 0|0 1 1 1|0 0 0 0|dst_msk|1|0 0 0 0|o_numbr|s|i| | DWORD#3
//c? dest: (shaders that can write into constants run slower and have special type)
//umbr|cvr|0 0 0 0|0 1 1 1|0 0 0 0|dst_msk|0|(0-191) c_numbr|s|i| | DWORD#3 (96=>C0 on xbox)
//a0 dest: (only allowed in instruction mov a0.x,...)
//| |cvr|0 0 0 0|0 1 1 1|0 0 0 0|0 0 0 0|1 1 1 1 1 1 1 1 1|0|i| | DWORD#3
//i: 0=cn 1=c[a0.x+n] (if any constant is used as any of the sources)
//s: set if scalar function result is expected in destination
//no c: c_numbr=0
//no v: v_numbr=0
//m: 0=x 1=-x
//cvr: (can't set more than 1 c and more than 1 v as src)
//01=r
//10=v
//11=c
//missing src: m=0(x) swizzle=00011011(.xyzw) r_numbr=0(0) cvr=10(v)
DWORD src0 , src1 , src2 ;
* ( p + 0 ) = NV20_VP_INST0_KNOWN ; //always 0
* ( p + 1 ) = 0 ;
* ( p + 2 ) = 0 ;
* ( p + 3 ) = 0 ;
if ( pRegs - > n < 2 ) //it's a nop
{ //src0, src1 & src2 are missing (set them to v0.xyzw)
* ( p + 1 ) | = 0x1b ;
* ( p + 2 ) | = ( NV20_VP_SRC_REG_TYPE_INPUT < < NV20_VP_SRC_REG_TYPE_SHIFT ) < < NV20_VP_INST_SRC0L_SHIFT ;
* ( p + 2 ) | = ( ( 0x1b < < NV20_VP_SRC_REG_SWZ_ALL_SHIFT ) | ( NV20_VP_SRC_REG_TYPE_INPUT < < NV20_VP_SRC_REG_TYPE_SHIFT ) ) < < NV20_VP_INST_SRC1_SHIFT ;
* ( p + 2 ) | = ( ( 0x1b < < NV20_VP_SRC_REG_SWZ_ALL_SHIFT ) > > NV20_VP_SRC2_HIGH_SHIFT ) < < NV20_VP_INST_SRC2H_SHIFT ;
* ( p + 3 ) | = ( NV20_VP_SRC_REG_TYPE_INPUT < < NV20_VP_SRC_REG_TYPE_SHIFT ) < < NV20_VP_INST_SRC2L_SHIFT ;
* ( p + 3 ) | = 0x00700ff8 ;
return 0 ;
}
switch ( pRegs - > dest . reg ) //8=r 0xa=c 0xb=a 0xc=oP(oP0=oPos oP1=oFog oP2=oPts) 0xd=oD 0xe=oT
{
case 8 : * ( p + 3 ) | = 0x00000ff8 | ( pRegs - > dest . msk < < NV20_VP_INST_VTEMP_WRITEMASK_SHIFT ) | ( pRegs - > dest . num < < NV20_VP_INST_DEST_TEMP_ID_SHIFT ) ; break ; //r (dest=255 NV20_VP_INST_CONST_DEST_FLAG set)
case 0xa : * ( p + 3 ) | = 0x00700000 | ( pRegs - > dest . msk < < NV20_VP_INST_DEST_WRITEMASK_SHIFT ) | ( pRegs - > dest . num < < NV20_VP_INST_CONST_DEST_SHIFT ) ; break ; //c (shaders that can write into constants run slower, NV20_VP_INST_CONST_DEST_FLAG cleared)
case 0xb : * ( p + 3 ) | = 0x00700ff8 ; break ; //dest a0 (mask is zero in micro-code but is considered as .x) (only valid for "mov a0.x,...") (r_dest=7 dest=255 NV20_VP_INST_CONST_DEST_FLAG set)
case 0xc : * ( p + 3 ) | = 0x00700800 | ( pRegs - > dest . msk < < NV20_VP_INST_DEST_WRITEMASK_SHIFT ) | ( ( pRegs - > dest . num ? ( pRegs - > dest . num = = 1 ? NV20_VP_INST_DEST_FOG : NV20_VP_INST_DEST_PTS ) : NV20_VP_INST_DEST_POS ) < < NV20_VP_INST_DEST_SHIFT ) ; break ; //o(oP0=oPos=o0 oP1=oFog=o5 oP2=oPts=o6) (r_dest=7 NV20_VP_INST_CONST_DEST_FLAG set)
case 0xd : * ( p + 3 ) | = 0x00700800 | ( pRegs - > dest . msk < < NV20_VP_INST_DEST_WRITEMASK_SHIFT ) | ( ( pRegs - > dest . num ? NV20_VP_INST_DEST_COL1 : NV20_VP_INST_DEST_COL0 ) < < NV20_VP_INST_DEST_SHIFT ) ; break ; //o(oD0-1=o3-4(front faces)) (r_dest=7 NV20_VP_INST_CONST_DEST_FLAG set)
case 0xe : * ( p + 3 ) | = 0x00700800 | ( pRegs - > dest . msk < < NV20_VP_INST_DEST_WRITEMASK_SHIFT ) | ( ( ( pRegs - > dest . num < 4 ) ? NV20_VP_INST_DEST_TC ( pRegs - > dest . num ) : ( ( pRegs - > dest . num < 6 ) ? pRegs - > dest . num + 3 : pRegs - > dest . num - 5 ) ) < < NV20_VP_INST_DEST_SHIFT ) ; break ; //o(oT0-3=o9-12 oT4-5=o7-8(bf) oT6-7=o1-2(n/a)) (r_dest=7 NV20_VP_INST_CONST_DEST_FLAG set)
//(on xbox, oT4-5 act as oD0-1 for back faces, oT6-7 do not exist, and r12 is an alias for oPos)
default : debugPrint ( " Unrecognized destination register \n " ) ; return - 1 ; break ;
}
src0 = ( pRegs - > src0 . mod * NV20_VP_SRC_REG_NEGATE ) | ( pRegs - > src0 . swz < < NV20_VP_SRC_REG_SWZ_ALL_SHIFT ) ;
switch ( pRegs - > src0 . reg ) //8=r 9=v 0xa=c 0xb=a
{
case 8 : src0 | = ( NV20_VP_SRC_REG_TYPE_TEMP < < NV20_VP_SRC_REG_TYPE_SHIFT ) | ( pRegs - > src0 . num < < NV20_VP_SRC_REG_TEMP_ID_SHIFT ) ; break ; //r
case 9 : src0 | = ( NV20_VP_SRC_REG_TYPE_INPUT < < NV20_VP_SRC_REG_TYPE_SHIFT ) ; * ( p + 1 ) | = ( pRegs - > src0 . num < < NV20_VP_INST_INPUT_SRC_SHIFT ) ; break ; //v
case 0xa : src0 | = ( NV20_VP_SRC_REG_TYPE_CONST < < NV20_VP_SRC_REG_TYPE_SHIFT ) ; * ( p + 1 ) | = ( ( pRegs - > src0 . num + 96 ) < < NV20_VP_INST_CONST_SRC_SHIFT ) ; break ; //c
default : debugPrint ( " Unrecognized src0 register \n " ) ; return - 2 ; break ;
}
* ( p + 1 ) | = ( ( src0 & NV20_VP_SRC0_HIGH_MASK ) > > NV20_VP_SRC0_HIGH_SHIFT ) < < NV20_VP_INST_SRC0H_SHIFT ;
* ( p + 2 ) | = ( src0 & NV20_VP_SRC0_LOW_MASK ) < < NV20_VP_INST_SRC0L_SHIFT ;
* ( p + 3 ) | = pRegs - > src0 . idx * NV20_VP_INST_INDEX_CONST ;
if ( pRegs - > n = = 2 )
{ //src1 & src2 are missing (set them to v0.xyzw)
* ( p + 2 ) | = ( ( 0x1b < < NV20_VP_SRC_REG_SWZ_ALL_SHIFT ) | ( NV20_VP_SRC_REG_TYPE_INPUT < < NV20_VP_SRC_REG_TYPE_SHIFT ) ) < < NV20_VP_INST_SRC1_SHIFT ;
* ( p + 2 ) | = ( ( 0x1b < < NV20_VP_SRC_REG_SWZ_ALL_SHIFT ) > > NV20_VP_SRC2_HIGH_SHIFT ) < < NV20_VP_INST_SRC2H_SHIFT ;
* ( p + 3 ) | = ( NV20_VP_SRC_REG_TYPE_INPUT < < NV20_VP_SRC_REG_TYPE_SHIFT ) < < NV20_VP_INST_SRC2L_SHIFT ;
return 0 ;
}
src1 = ( pRegs - > src1 . mod * NV20_VP_SRC_REG_NEGATE ) | ( pRegs - > src1 . swz < < NV20_VP_SRC_REG_SWZ_ALL_SHIFT ) ;
switch ( pRegs - > src1 . reg ) //8=r 9=v 0xa=c 0xb=a
{
case 8 : src1 | = ( NV20_VP_SRC_REG_TYPE_TEMP < < NV20_VP_SRC_REG_TYPE_SHIFT ) | ( pRegs - > src1 . num < < NV20_VP_SRC_REG_TEMP_ID_SHIFT ) ; break ; //r
case 9 : src1 | = ( NV20_VP_SRC_REG_TYPE_INPUT < < NV20_VP_SRC_REG_TYPE_SHIFT ) ; * ( p + 1 ) | = ( pRegs - > src1 . num < < NV20_VP_INST_INPUT_SRC_SHIFT ) ; break ; //v
case 0xa : src1 | = ( NV20_VP_SRC_REG_TYPE_CONST < < NV20_VP_SRC_REG_TYPE_SHIFT ) ; * ( p + 1 ) | = ( ( pRegs - > src1 . num + 96 ) < < NV20_VP_INST_CONST_SRC_SHIFT ) ; break ; //c
default : debugPrint ( " Unrecognized src1 register \n " ) ; return - 3 ; break ;
}
* ( p + 2 ) | = src1 < < NV20_VP_INST_SRC1_SHIFT ;
* ( p + 3 ) | = pRegs - > src1 . idx * NV20_VP_INST_INDEX_CONST ;
if ( pRegs - > n = = 3 )
{ //src2 is missing (set it to v0.xyzw)
* ( p + 2 ) | = ( ( 0x1b < < NV20_VP_SRC_REG_SWZ_ALL_SHIFT ) > > NV20_VP_SRC2_HIGH_SHIFT ) < < NV20_VP_INST_SRC2H_SHIFT ;
* ( p + 3 ) | = ( NV20_VP_SRC_REG_TYPE_INPUT < < NV20_VP_SRC_REG_TYPE_SHIFT ) < < NV20_VP_INST_SRC2L_SHIFT ;
return 0 ;
}
src2 = ( pRegs - > src2 . mod * NV20_VP_SRC_REG_NEGATE ) | ( pRegs - > src2 . swz < < NV20_VP_SRC_REG_SWZ_ALL_SHIFT ) ;
switch ( pRegs - > src2 . reg ) //8=r 9=v 0xa=c 0xb=a
{
case 8 : src2 | = ( NV20_VP_SRC_REG_TYPE_TEMP < < NV20_VP_SRC_REG_TYPE_SHIFT ) | ( pRegs - > src2 . num < < NV20_VP_SRC_REG_TEMP_ID_SHIFT ) ; break ; //r
case 9 : src2 | = ( NV20_VP_SRC_REG_TYPE_INPUT < < NV20_VP_SRC_REG_TYPE_SHIFT ) ; * ( p + 1 ) | = ( pRegs - > src2 . num < < NV20_VP_INST_INPUT_SRC_SHIFT ) ; break ; //v
case 0xa : src2 | = ( NV20_VP_SRC_REG_TYPE_CONST < < NV20_VP_SRC_REG_TYPE_SHIFT ) ; * ( p + 1 ) | = ( ( pRegs - > src2 . num + 96 ) < < NV20_VP_INST_CONST_SRC_SHIFT ) ; break ; //c
default : return - 4 ; debugPrint ( " Unrecognized src2 register \n " ) ; break ;
}
* ( p + 2 ) | = ( ( src2 & NV20_VP_SRC2_HIGH_MASK ) > > NV20_VP_SRC2_HIGH_SHIFT ) < < NV20_VP_INST_SRC2H_SHIFT ;
* ( p + 3 ) | = ( src2 & NV20_VP_SRC2_LOW_MASK ) < < NV20_VP_INST_SRC2L_SHIFT ;
* ( p + 3 ) | = pRegs - > src2 . idx * NV20_VP_INST_INDEX_CONST ;
return 0 ;
}
//converts shaders pseudo-code into xbox gpu micro-code
//(not recommended for pixel shader: slow and incomplete)
DWORD * pb_pcode2mcode ( const DWORD * pseudocode )
{
DWORD * p ;
DWORD constant ;
DWORD size ;
DWORD * pcode ;
int i , n ;
struct s_PseudoRegs sRegs ;
pcode = ( DWORD * ) pseudocode ;
if ( pcode = = NULL )
{
debugPrint ( " pb_pcode2mcode: NULL parameter \n " ) ;
return NULL ;
}
//pb_tmp_registers will tell us unused registers.
//this array is updated by pb_read_regs() when tmp registers are detected as destination
memset ( pb_tmp_registers , 0 , sizeof ( pb_tmp_registers ) ) ;
if ( * pcode = = 0xffff0101 ) //ps_1_1
{
pcode + + ;
//currently supported (not a lot, but manual ps registers setting is possible):
//- only 1 stage (1 or 2 instructions to set r0, with or without 1 'tex t0' instruction)
//- modifier -?n
//- modifier ?n_bias (-0.5f)
//- modifier ?n_bx2 (*2.0f)
//- modifier 1-|?n|
//- def cn, r, g, b, a
//- nop
//- tex t0
//- mov r0, ?n (r0=?n)
//- mul r0, ?n, ?n (r0=?n*?n)
//- dp3 r0, ?n, ?n (r0=?n.?n)
//- add r0, ?n, ?n (r0=?n+?n)
//- sub r0, ?n, ?n (r0=?n-n)
//- mad r0, ?n, ?n, ?n (r0=?n*?n+?n)
//- lrp r0, src0, src1, src2 (r0=src0*src1+(1-src0)*src2)
//- cnd r0, r0.a, src1, src2 (r0=(r0.a>0.5f)?src1:src2) (if r0.a MSB is used for mux)
//- coherent destination mask & swizzle (no swizzle or .rgba, .xyzw, .a, .x, .rgb, .xyz for separate rgb/alpha processing)
p = & pb_gpu_registers [ 0 ] ;
//It's recommended to learn initializing registers oneself
//in order to avoid resetting most of this -probably useless- default values
memset ( & pb_gpu_registers [ 0 ] , 0 , sizeof ( pb_gpu_registers ) ) ;
p [ 0 ] = 0xd4301010 ; //PSAlphaInput for stage 0: a.a=v0.a b.a=1.a-|0.a|
p [ 8 ] = 0x000000c0 ; //PSAlphaOutput for stage 0: r0.a=a*b
p [ 16 ] = 0xc4200000 ; //PSRGBInput for stage 0: a.rgb=v0.rgb b.rgb=1.rgb-|0.rgb|
p [ 24 ] = 0x000000c0 ; //PSRGBOutput for stage 0: r0.rgb=a*b
//p[32] //C0's constants
//p[40] //C1's constants
//p[48] //final combiner C0 constant
//p[49] //final combiner C1 constant
//p[50] //PSCompareMode (used only for texture mode clipplane)
//p[51] //PSTextureModes (1 is project 2D: argb=texture(r/q,s/q) usually q=1.0f)
//p[52] //PSDotMapping (0 means [0,255]argb from texture=>[0.0,1.0](r,g,b))
//p[53] //PSInputTextureSource (most logical value is 0x00210000 when texture stages 2 & 3 are used)
p [ 54 ] = 0x11101 ; //PSCombinerCount ("stages usage count" | "C0 & C1 may be different from stage to stage" | "r0.a MSB used for mux")
//These default settings do "mov r0,v0"
//'final combiner' is an additional invisible (free) stage doing this:
//final pixel.rgb = A * B + (1 - A) * C + D
//final pixel.alpha = G.b or G.a (.a modifier must be used if you want .a)
//Also all values are clamped to 0..1 (negative values become zero)
//Inner registers NV20_TCL_PRIMITIVE_3D_RC_FINAL0 and following one
//define inputs and modifiers for the 7 parameters A,B,C,D and E,F,G,? (?=0x80, unknown)
//Here are a few useful values depending what you want to do:
//fog on & specular on : 0x130e0300,0x00001c80 (means pixel.rgb=fog.a * (r0.rgb + v1.rgb) + (1 - fog.a) * fog.rgb & pixel.a=r0.a)
//fog on & specular off : 0x130c0300,0x00001c80 (means pixel.rgb=fog.a * r0.rgb + (1 - fog.a) * fog.rgb & pixel.a=r0.a)
//fog off & specular on : 0x0000000e,0x00001c80 (means pixel.rgb=r0.rgb + v1.rgb & pixel.a=r0.a)
//fog off & specular off : 0x0000000c,0x00001c80 (means D=r0.rgb & G=r0.a, so final pixel.rgb=r0.rgb & pixel.a=r0.a)
//These special read-only registers are also available at final combiner stage (maybe also at any stage?):
//zero = 0 (0x0 is the numeric code for this register, modifier is bits 7-4, mapped to C4)
//fog = fog (0x3, fog.rgb returns the fog color inner register value, mapped to pseudocode C5 -fog.a is fog transparency, coming from fog table, I guess-)
//v1r0sum = r0 + v1 (0xe, I've mapped it to pseudocode C6 in pcode2mcode, useful when specular v1 is to be used)
//EFprod = E * F (0xf, I've mapped it to pseudocode C7 in pcode2mcode, useful for pixel shader optimization, i.e reduce number of stages)
//Codes for normal registers:
//C0 => 0x1
//C1 => 0x2
//v0 => 0x4
//v1 => 0x5
//t0 => 0x8
//t1 => 0x9
//t2 => 0xa
//t3 => 0xb
//r0 => 0xc
//r1 => 0xd
//Modifiers (Or it to code above):
//default 0x00=|0.rgb| 0x10=x.a
//0x20=1-|x| 0x40=2*max(0,x)-1("_bx2") 0x60=1-2*max(0,x) 0x80=max(0,x)-0.5f("_bias") 0xa0=0.5f-max(0,x) 0xc0=x 0xf0=-x
while ( * pcode ! = 0x0000ffff )
{
switch ( * ( pcode + + ) )
{
case 0x00000000 : //nop
case 0x40000000 : //+nop...
break ;
case 0x00000001 : //mov r0, ?n (r0=?n)
case 0x40000001 : //+mov...
pb_read_pregs ( pcode , & sRegs , 2 ) ; pcode + = 2 ;
if ( ( sRegs . dest . reg ! = 8 ) | | ( sRegs . dest . num ! = 0 ) ) { debugPrint ( " pb_pcode2mcode: Unsupported destination register \n " ) ; return NULL ; }
if ( sRegs . dest . msk & 1 ) p [ 0 ] = 0x10301010 | ( pb_preg2psreg ( & sRegs . src0 ) < < 24 ) ; //PSAlphaInput for stage 0: a.a=?.a b.a=1-|0.a|
if ( ( sRegs . dest . msk & 0xe ) = = 0xe ) p [ 16 ] = 0x00200000 | ( pb_preg2psreg ( & sRegs . src0 ) < < 24 ) ; //PSRGBInput for stage 0: a.rgb=?.rgb b.rgb=1.rgb-|0.rgb|
break ;
case 0x00000002 : //add r0, ?n, ?n (r0=?n+?n)
case 0x40000002 : //+add...
pb_read_pregs ( pcode , & sRegs , 3 ) ; pcode + = 3 ;
if ( ( sRegs . dest . reg ! = 8 ) | | ( sRegs . dest . num ! = 0 ) ) { debugPrint ( " pb_pcode2mcode: Unsupported destination register \n " ) ; return NULL ; }
if ( sRegs . dest . msk & 1 )
{
p [ 0 ] = 0x10301030 | ( pb_preg2psreg ( & sRegs . src0 ) < < 24 ) | ( pb_preg2psreg ( & sRegs . src1 ) < < 8 ) ; //PSAlphaInput for stage 0: a.a=?.a b.a=1.a-|0.a| c.a=?.a d=1.a-|0.a|
p [ 8 ] = 0x00000c00 ; //PSAlphaOutput for stage 0: r0.a=a*b+c*d
}
if ( ( sRegs . dest . msk & 0xe ) = = 0xe )
{
p [ 16 ] = 0x00200020 | ( pb_preg2psreg ( & sRegs . src0 ) < < 24 ) | ( pb_preg2psreg ( & sRegs . src1 ) < < 8 ) ; //PSRGBInput for stage 0: a.rgb=?.rgb b.rgb=1.rgb-|0.rgb| c.rgb=?.rgb d.rgb=1.rgb-|0.rgb|
p [ 24 ] = 0x00000c00 ; //PSRGBOutput for stage 0: r0.rgb=a*b+c*d
}
break ;
case 0x00000003 : //sub r0, ?n, ?n (r0=?n-?n)
case 0x40000003 : //+sub...
pb_read_pregs ( pcode , & sRegs , 3 ) ; pcode + = 3 ;
if ( ( sRegs . dest . reg ! = 8 ) | | ( sRegs . dest . num ! = 0 ) ) { debugPrint ( " pb_pcode2mcode: Unsupported destination register \n " ) ; return NULL ; }
if ( sRegs . src1 . mod < 6 )
sRegs . src1 . mod ^ = 1 ; //inverts src1 sign
else
{
debugPrint ( " pb_pcode2mcode: sub not supported if src1 has 1-|x| modifier \n " ) ;
return NULL ;
}
if ( sRegs . dest . msk & 1 )
{
p [ 0 ] = 0x10301030 | ( pb_preg2psreg ( & sRegs . src0 ) < < 24 ) | ( pb_preg2psreg ( & sRegs . src1 ) < < 8 ) ; //PSAlphaInput for stage 0: a.a=?.a b.a=1.a-|0.a| c.a=?.a d=1.a-|0.a|
p [ 8 ] = 0x00000c00 ; //PSAlphaOutput for stage 0: r0.a=a*b+c*d
}
if ( ( sRegs . dest . msk & 0xe ) = = 0xe )
{
p [ 16 ] = 0x00200020 | ( pb_preg2psreg ( & sRegs . src0 ) < < 24 ) | ( pb_preg2psreg ( & sRegs . src1 ) < < 8 ) ; //PSRGBInput for stage 0: a.rgb=?.rgb b.rgb=1.rgb-|0.rgb| c.rgb=?.rgb d.rgb=1.rgb-|0.rgb|
p [ 24 ] = 0x00000c00 ; //PSRGBOutput for stage 0: r0.rgb=a*b+c*d
}
break ;
case 0x00000004 : //mad r0, ?n, ?n, ?n (r0=?n*?n+?n)
case 0x40000004 : //+mad...
pb_read_pregs ( pcode , & sRegs , 4 ) ; pcode + = 4 ;
if ( ( sRegs . dest . reg ! = 8 ) | | ( sRegs . dest . num ! = 0 ) ) { debugPrint ( " pb_pcode2mcode: Unsupported destination register \n " ) ; return NULL ; }
if ( sRegs . dest . msk & 1 )
{
p [ 0 ] = 0x10101030 | ( pb_preg2psreg ( & sRegs . src0 ) < < 24 ) | ( pb_preg2psreg ( & sRegs . src1 ) < < 16 ) | ( pb_preg2psreg ( & sRegs . src2 ) < < 8 ) ; //PSAlphaInput for stage 0: a.a=?.a b.a=?.a c.a=?.a d.a=1-|0.a|
p [ 8 ] = 0x00000c00 ; //PSAlphaOutput for stage 0: r0.a=a*b+c*d
}
if ( ( sRegs . dest . msk & 0xe ) = = 0xe )
{
p [ 16 ] = 0x00000020 | ( pb_preg2psreg ( & sRegs . src0 ) < < 24 ) | ( pb_preg2psreg ( & sRegs . src1 ) < < 16 ) | ( pb_preg2psreg ( & sRegs . src2 ) < < 8 ) ; //PSRGBInput for stage 0: a.rgb=?.rgb b.rgb=?.rgb c.rgb=?.rgb d.rgb=1-|0.rgb|
p [ 24 ] = 0x00000c00 ; //PSRGBOutput for stage 0: r0.rgb=a*b+c*d
}
break ;
case 0x00000005 : //mul r0, ?n, ?n (r0=?n*?n)
case 0x40000005 : //+mul...
pb_read_pregs ( pcode , & sRegs , 3 ) ; pcode + = 3 ;
if ( ( sRegs . dest . reg ! = 8 ) | | ( sRegs . dest . num ! = 0 ) ) { debugPrint ( " pb_pcode2mcode: Unsupported destination register \n " ) ; return NULL ; }
if ( sRegs . dest . msk & 1 ) p [ 0 ] = 0x10101010 | ( pb_preg2psreg ( & sRegs . src0 ) < < 24 ) | ( pb_preg2psreg ( & sRegs . src1 ) < < 16 ) ; //PSAlphaInput for stage 0: a.a=?.a b.a=?.a
if ( ( sRegs . dest . msk & 0xe ) = = 0xe ) p [ 16 ] = 0x00000000 | ( pb_preg2psreg ( & sRegs . src0 ) < < 24 ) | ( pb_preg2psreg ( & sRegs . src1 ) < < 16 ) ; //PSRGBInput for stage 0: a.rgb=?.rgb b.rgb=?.rgb
break ;
case 0x00000008 : //dp3 r0, ?n, ?n (r0=?n.?n)
case 0x40000008 : //+dp3...
pb_read_pregs ( pcode , & sRegs , 3 ) ; pcode + = 3 ;
if ( ( sRegs . dest . reg ! = 8 ) | | ( sRegs . dest . num ! = 0 ) ) { debugPrint ( " pb_pcode2mcode: Unsupported destination register \n " ) ; return NULL ; }
if ( ( sRegs . dest . msk & 0xf ) = = 0xe ) //dp3 r0.xyz, ...
{
p [ 16 ] = 0x00000000 | ( pb_preg2psreg ( & sRegs . src0 ) < < 24 ) | ( pb_preg2psreg ( & sRegs . src1 ) < < 16 ) ; //PSRGBInput for stage 0: a.rgb=?.rgb b.rgb=?.rgb
p [ 24 ] = 0x000020c0 ; //PSRGBOutput for stage 0: r0.rgb=a.b (dot product)
}
if ( ( sRegs . dest . msk & 0xf ) = = 0xf ) //dp3 r0, ...
{
p [ 0 ] = 0x10101010 ;
p [ 8 ] = 0x00000000 ; //PSAlphaOutput for stage 0: discarded (we will use the b->a propagate bit on rgb side)
p [ 16 ] = 0x00000000 | ( pb_preg2psreg ( & sRegs . src0 ) < < 24 ) | ( pb_preg2psreg ( & sRegs . src1 ) < < 16 ) ; //PSRGBInput for stage 0: a.rgb=?.rgb b.rgb=?.rgb
p [ 24 ] = 0x000820c0 ; //PSRGBOutput for stage 0: r0.rgb=a.b (dot product) (and r0.b propagates to r0.a)
}
break ;
case 0x00000012 : //lrp r0, src0, src1, src2 (r0=src0*src1+(1-src0)*src2)
case 0x40000012 : //+lrp...
pb_read_pregs ( pcode , & sRegs , 4 ) ; pcode + = 4 ;
if ( ( sRegs . dest . reg ! = 8 ) | | ( sRegs . dest . num ! = 0 ) ) { debugPrint ( " pb_pcode2mcode: Unsupported destination register \n " ) ; return NULL ; }
if ( sRegs . src0 . mod ) { debugPrint ( " pb_pcode2mcode(lrp): Unsupported source 0 modifier \n " ) ; return NULL ; }
if ( sRegs . dest . msk & 1 )
{
p [ 0 ] = 0x10101030 | ( pb_preg2psreg ( & sRegs . src0 ) < < 24 ) | ( pb_preg2psreg ( & sRegs . src1 ) < < 16 ) | ( pb_preg2psreg ( & sRegs . src2 ) < < 8 ) | ( pb_preg2psreg ( & sRegs . src0 ) & 0xf ) ; //PSAlphaInput for stage 0: a.a=src0.a b.a=src1.a c.a=src2.a d.a=1-|src0.a|
p [ 8 ] = 0x00000c00 ; //PSAlphaOutput for stage 0: r0.a=a*b+c*d
}
if ( ( sRegs . dest . msk & 0xe ) = = 0xe )
{
p [ 16 ] = 0x00000020 | ( pb_preg2psreg ( & sRegs . src0 ) < < 24 ) | ( pb_preg2psreg ( & sRegs . src1 ) < < 16 ) | ( pb_preg2psreg ( & sRegs . src2 ) < < 8 ) | ( pb_preg2psreg ( & sRegs . src0 ) & 0xf ) ; //PSRGBInput for stage 0: a.rgb=src0.rgb b.rgb=src1.rgb c.rgb=src2.rgb d.rgb=1-|src0.rgb|
p [ 24 ] = 0x00000c00 ; //PSRGBOutput for stage 0: r0.rgb=a*b+c*d
}
break ;
case 0x00000042 : //tex t0
case 0x40000042 : //+tex...
//We assume tn has been replaced with texture color
//because of a previous correct texture stage initialization
pb_read_pregs ( pcode , & sRegs , 1 ) ; pcode + = 1 ;
if ( sRegs . dest . num ) { debugPrint ( " pb_pcode2mcode: Only 'tex t0' is supported \n " ) ; return NULL ; }
p [ 51 ] = 0x00000001 ; //PSTextureModes (1<<(stage*5) is project 2D: argb=texture(r/q,s/q) usually q=1.0f)
break ;
case 0x00000050 : //cnd r0, r0.a, src1, src2 (r0=(r0.a>0.5f)?src1:src2) (if r0.a MSB used for mux)
case 0x40000050 : //+cnd...
pb_read_pregs ( pcode , & sRegs , 4 ) ; pcode + = 4 ;
if ( ( sRegs . dest . reg ! = 8 ) | | ( sRegs . dest . num ! = 0 ) ) { debugPrint ( " pb_pcode2mcode: Unsupported destination register \n " ) ; return NULL ; }
if ( sRegs . dest . msk & 1 )
{
p [ 0 ] = 0x10301030 | ( pb_preg2psreg ( & sRegs . src2 ) < < 24 ) | ( pb_preg2psreg ( & sRegs . src1 ) < < 8 ) ; //PSAlphaInput for stage 0: a.a=src2.a b.a=1-|0.a| c.a=src1.a d.a=1-|0.a|
p [ 8 ] = 0x00004c00 ; //PSAlphaOutput for stage 0: r0.rgb=(r0.a MSB not set)?(a*b):(c*d)=(r0.a<=0.5f)?src2.rgb:src1.rgb
}
if ( ( sRegs . dest . msk & 0xe ) = = 0xe )
{
p [ 16 ] = 0x00200020 | ( pb_preg2psreg ( & sRegs . src2 ) < < 24 ) | ( pb_preg2psreg ( & sRegs . src1 ) < < 8 ) ; //PSRGBInput for stage 0: a.rgb=src2.rgb b.rgb=1.rgb-|0.rgb| c.rgb=src1.rgb d.rgb=1.rgb-|0.rgb|
p [ 24 ] = 0x00004c00 ; //PSRGBOutput for stage 0: r0.rgb=(r0.a MSB not set)?(a*b):(c*d)=(r0.a<=0.5f)?src2.rgb:src1.rgb
}
break ;
case 0x00000051 : //def cn, r, g, b, a
pb_read_pregs ( pcode , & sRegs , 1 ) ; pcode + = 1 ;
//converts 4 floats (r,g,b,a) into 1 dword 0xaarrggbb ([0,1.0f]=>[0,0xff])
constant = 0 ;
constant | = ( ( DWORD ) ( 255.0f * ( * ( ( float * ) ( pcode + 3 ) ) ) ) ) < < 24 ;
constant | = ( ( DWORD ) ( 255.0f * ( * ( ( float * ) ( pcode + 0 ) ) ) ) ) < < 16 ;
constant | = ( ( DWORD ) ( 255.0f * ( * ( ( float * ) ( pcode + 1 ) ) ) ) ) < < 8 ;
constant | = ( ( DWORD ) ( 255.0f * ( * ( ( float * ) ( pcode + 2 ) ) ) ) ) < < 0 ;
//distribute c0=>c0 stage 0, c1=>c1 stage 0, c2=>c0 stage 1, etc...
p [ 32 + 8 * ( sRegs . dest . num & 1 ) + ( sRegs . dest . num > > 1 ) ] = constant ;
pcode + = 4 ;
break ;
default :
debugPrint ( " pb_pcode2mcode: Unrecognized ps token #%08x \n " , * ( pcode - 1 ) ) ;
return NULL ;
}
}
return & pb_gpu_registers [ 0 ] ;
}
if ( * pcode ! = 0xfffe0101 ) //vs_1_1
{
debugPrint ( " pb_pcode2mcode: Shader version not supported \n " ) ;
return NULL ;
}
//it's a vertex shader! (vs_1_1 should be entirely supported by code below -report any issue-)
pcode + + ;
pb_exp_constflag = 0 ; //in order to not set taylor series exp macro constants up more than once
pb_log_constflag = 0 ; //in order to not set taylor series log macro constants up more than once
n = 0 ; //instructions counter (can't exceed 136 on xbox)
p = & pb_gpu_programnc [ 1 ] ; //push buffer compatible sequence setting up program and constants
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_PROGRAM_START_ID , 1 ) ; * ( p + + ) = 0 ; //set run address of shader
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_SHADER_TYPE , 2 ) ; * ( p + + ) = SHADER_TYPE_EXTERNAL ; * ( p + + ) = SHADER_SUBTYPE_REGULAR ; //set shader vertex type (external shader, regular: not allowed to write into constants -faster-)
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_FROM_ID , 1 ) ; * ( p + + ) = 0 ; //set cursor in order to load data into program area
while ( * pcode ! = 0x0000ffff )
{
if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode: Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
switch ( * ( pcode + + ) )
{
//standard pseudo-code:
case 0x00000000 : //nop
case 0x40000000 : //+nop
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ;
pb_read_pregs ( pcode , & sRegs , 0 ) ; pcode + = 0 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_NOP < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
break ;
case 0x00000001 : //mov dest,src0
case 0x40000001 : //+mov
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ;
pb_read_pregs ( pcode , & sRegs , 2 ) ; pcode + = 2 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
if ( sRegs . dest . reg = = 0xb )
* ( p + 1 ) | = NV20_VP_INST_OPCODE_ARL < < NV20_VP_INST_VEC_OPCODE_SHIFT ; //mov a0,...
else
* ( p + 1 ) | = NV20_VP_INST_OPCODE_MOV < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
break ;
case 0x00000002 : //add dest,src0,src1
case 0x40000002 : //+add
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ;
pb_read_pregs ( pcode , & sRegs , 3 ) ; pcode + = 3 ;
//src2 is used instead of src1 for add
sRegs . n = 4 ;
sRegs . src2 = sRegs . src1 ;
sRegs . src1 . reg = 9 ; //v0.xyzw for unused src
sRegs . src1 . num = 0 ;
sRegs . src1 . mod = 0 ;
sRegs . src1 . swz = 0x1b ;
sRegs . src1 . idx = 0 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_ADD < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
break ;
case 0x00000003 : //sub dest,src0,src1
case 0x40000003 : //+sub
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ;
pb_read_pregs ( pcode , & sRegs , 3 ) ; pcode + = 3 ;
sRegs . src1 . mod ^ = 1 ; //inverts src1 sign
//src2 is used instead of src1 for add
sRegs . n = 4 ;
sRegs . src2 = sRegs . src1 ;
sRegs . src1 . reg = 9 ; //v0.xyzw for unused src
sRegs . src1 . num = 0 ;
sRegs . src1 . mod = 0 ;
sRegs . src1 . swz = 0x1b ;
sRegs . src1 . idx = 0 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_ADD < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
break ;
case 0x00000004 : //mad dest,src0,src1,src2
case 0x40000004 : //+mad
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ;
pb_read_pregs ( pcode , & sRegs , 4 ) ; pcode + = 4 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_MAD < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
break ;
case 0x00000005 : //mul dest,src0,src1
case 0x40000005 : //+mul
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ;
pb_read_pregs ( pcode , & sRegs , 3 ) ; pcode + = 3 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_MUL < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
break ;
case 0x00000006 : //rcp dest,src0 (scalar 1/x function)
case 0x40000006 : //+rcp
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ;
pb_read_pregs ( pcode , & sRegs , 2 ) ; pcode + = 2 ;
//src2 is used instead of src0 in scalar functions
sRegs . n = 4 ;
sRegs . src2 = sRegs . src0 ;
sRegs . src0 . reg = 9 ; //v0.xyzw for unused src
sRegs . src0 . num = 0 ;
sRegs . src0 . mod = 0 ;
sRegs . src0 . swz = 0x1b ;
sRegs . src0 . idx = 0 ;
sRegs . src1 . reg = 9 ; //v0.xyzw for unused src
sRegs . src1 . num = 0 ;
sRegs . src1 . mod = 0 ;
sRegs . src1 . swz = 0x1b ;
sRegs . src1 . idx = 0 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_RCP < < NV20_VP_INST_SCA_OPCODE_SHIFT ;
if ( sRegs . dest . reg ! = 8 ) //not r
* ( p + 3 ) | = NV20_VP_INST_DEST_SCA ; //warns GPU that destination will receive scalar function result
else
{
//scalar temp dest mask=temp dest mask & temp dest mask=0
* ( p + 3 ) | = ( ( * ( p + 3 ) ) & NV20_VP_INST_VTEMP_WRITEMASK_MASK ) > > ( NV20_VP_INST_VTEMP_WRITEMASK_SHIFT - NV20_VP_INST_STEMP_WRITEMASK_SHIFT ) ;
* ( p + 3 ) & = ~ NV20_VP_INST_VTEMP_WRITEMASK_MASK ;
}
p + = 4 ;
break ;
case 0x00000007 : //rsq dest,src0 (scalar 1/sqrt(x) function)
case 0x40000007 : //+rsq
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ;
pb_read_pregs ( pcode , & sRegs , 2 ) ; pcode + = 2 ;
//src2 is used instead of src0 in scalar functions
sRegs . n = 4 ;
sRegs . src2 = sRegs . src0 ;
sRegs . src0 . reg = 9 ; //v0.xyzw for unused src
sRegs . src0 . num = 0 ;
sRegs . src0 . mod = 0 ;
sRegs . src0 . swz = 0x1b ;
sRegs . src0 . idx = 0 ;
sRegs . src1 . reg = 9 ; //v0.xyzw for unused src
sRegs . src1 . num = 0 ;
sRegs . src1 . mod = 0 ;
sRegs . src1 . swz = 0x1b ;
sRegs . src1 . idx = 0 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_RSQ < < NV20_VP_INST_SCA_OPCODE_SHIFT ;
if ( sRegs . dest . reg ! = 8 ) //not r
* ( p + 3 ) | = NV20_VP_INST_DEST_SCA ; //warns GPU that destination will receive scalar function result
else
{
//scalar temp dest mask=temp dest mask & temp dest mask=0
* ( p + 3 ) | = ( ( * ( p + 3 ) ) & NV20_VP_INST_VTEMP_WRITEMASK_MASK ) > > ( NV20_VP_INST_VTEMP_WRITEMASK_SHIFT - NV20_VP_INST_STEMP_WRITEMASK_SHIFT ) ;
* ( p + 3 ) & = ~ NV20_VP_INST_VTEMP_WRITEMASK_MASK ;
}
p + = 4 ;
break ;
case 0x00000008 : //dp3 dest,src0,src1
case 0x40000008 : //+dp3
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ;
pb_read_pregs ( pcode , & sRegs , 3 ) ; pcode + = 3 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_DP3 < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
break ;
case 0x00000009 : //dp4 dest,src0,src1
case 0x40000009 : //+dp4
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ;
pb_read_pregs ( pcode , & sRegs , 3 ) ; pcode + = 3 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_DP4 < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
break ;
case 0x0000000a : //min dest,src0,src1
case 0x4000000a : //+min
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ;
pb_read_pregs ( pcode , & sRegs , 3 ) ; pcode + = 3 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_MIN < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
break ;
case 0x0000000b : //max dest,src0,src1
case 0x4000000b : //+max
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ;
pb_read_pregs ( pcode , & sRegs , 3 ) ; pcode + = 3 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_MAX < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
break ;
case 0x0000000c : //slt dest,src0,src1 (set dest=1 if src0<src1)
case 0x4000000c : //+slt
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ;
pb_read_pregs ( pcode , & sRegs , 3 ) ; pcode + = 3 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_SLT < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
break ;
case 0x0000000d : //sge dest,src0,src1 (set dest=1 if src0>=src1)
case 0x4000000d : //+sge
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ;
pb_read_pregs ( pcode , & sRegs , 3 ) ; pcode + = 3 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_SGE < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
break ;
case 0x0000000e : //exp dest,src0 (macro expanding many full precision instructions: slow)
case 0x4000000e : //+exp
if ( pb_exp_constflag = = 0 ) //exp macro constants already set?
{
pb_exp_constflag = 1 ;
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_ID , 1 ) ; * ( p + + ) = 94 ; //set cursor in order to load data into C-2 and C-1 (xbox accepts C-96 up to C-1)
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X , 4 ) ; //Taylor series related coefficients
* ( ( float * ) ( p + + ) ) = 1.0f ; //C-2.x a
* ( ( float * ) ( p + + ) ) = - 6.93147182e-1 ; //C-2.y b
* ( ( float * ) ( p + + ) ) = 2.40226462e-1 ; //C-2.z c
* ( ( float * ) ( p + + ) ) = - 5.55036440e-2 ; //C-2.w d
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X , 4 ) ;
* ( ( float * ) ( p + + ) ) = 9.61597636e-3 ; //C-1.x e
* ( ( float * ) ( p + + ) ) = - 1.32823968e-3 ; //C-1.y f
* ( ( float * ) ( p + + ) ) = 1.47491097e-4 ; //C-1.z g
* ( ( float * ) ( p + + ) ) = - 1.08635004e-5 ; //C-1.w h
}
//after a first step x=expp(src0)
//we will compute ri.w=ax^0+bx^1+cx^2+dx^3+...+hx^7
//i.e ri.w=x*(x*(x*(x*(x*(x*(x*h+g)+f)+e)+d)+c)+b)+a
//then exp(x)=x*(1/ri.w)
//expp ri, src0 (first partial precision calculation & preserve x in ri.x)
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(exp): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
pb_read_pregs ( pcode , & sRegs , 2 ) ; //but don't increment pcode yet (so we can read again dest later)
//look for unused temp register i
for ( i = 0 ; i < 16 ; i + + ) if ( pb_tmp_registers [ i ] = = 0 ) break ;
if ( i = = 16 ) { debugPrint ( " pb_pcode2mcode: exp macro needs 1 temporary register (none left) \n " ) ; return NULL ; }
sRegs . dest . reg = 8 ; //replace dest with ri.x
sRegs . dest . num = i ;
sRegs . dest . msk = 8 ; //.x
//src2 is used instead of src0 in scalar functions
sRegs . n = 4 ;
sRegs . src2 = sRegs . src0 ;
sRegs . src0 . reg = 9 ; //v0.xyzw for unused src
sRegs . src0 . num = 0 ;
sRegs . src0 . mod = 0 ;
sRegs . src0 . swz = 0x1b ;
sRegs . src0 . idx = 0 ;
sRegs . src1 . reg = 9 ; //v0.xyzw for unused src
sRegs . src1 . num = 0 ;
sRegs . src1 . mod = 0 ;
sRegs . src1 . swz = 0x1b ;
sRegs . src1 . idx = 0 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode(exp): Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_EXP < < NV20_VP_INST_SCA_OPCODE_SHIFT ;
//scalar temp dest mask=temp dest mask & temp dest mask=0
* ( p + 3 ) | = ( ( * ( p + 3 ) ) & NV20_VP_INST_VTEMP_WRITEMASK_MASK ) > > ( NV20_VP_INST_VTEMP_WRITEMASK_SHIFT - NV20_VP_INST_STEMP_WRITEMASK_SHIFT ) ;
* ( p + 3 ) & = ~ NV20_VP_INST_VTEMP_WRITEMASK_MASK ;
p + = 4 ;
//mov ri.w, C-1.w
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(exp): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
sRegs . n = 2 ;
sRegs . dest . msk = 1 ; //.w
sRegs . src0 . reg = 0xa ; //c
sRegs . src0 . num = - 1 ;
sRegs . src0 . swz = 0xff ; //.wwww
sRegs . src0 . mod = 0 ;
sRegs . src0 . idx = 0 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode(exp): Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_MOV < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
//mad ri.w, ri.w, ri.x, C-1.z (next=x*(previous+constant))
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(exp): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
sRegs . n = 4 ;
sRegs . src0 . reg = 8 ; //r
sRegs . src0 . num = i ;
sRegs . src1 . reg = 8 ; //r
sRegs . src1 . num = i ;
sRegs . src1 . swz = 0 ; //.xxxx
sRegs . src1 . mod = 0 ;
sRegs . src1 . idx = 0 ;
sRegs . src2 . reg = 0xa ; //c
sRegs . src2 . num = - 1 ;
sRegs . src2 . swz = 0xaa ; //.zzzz
sRegs . src2 . mod = 0 ;
sRegs . src2 . idx = 0 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode(exp): Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_MAD < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
//mad ri.w, ri.w, ri.x, C-1.y
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(exp): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
sRegs . src2 . swz = 0x55 ; //.yyyy
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode(exp): Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_MAD < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
//mad ri.w, ri.w, ri.x, C-1.x
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(exp): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
sRegs . src2 . swz = 0 ; //.xxxx
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode(exp): Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_MAD < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
//mad ri.w, ri.w, ri.x, C-2.w
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(exp): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
sRegs . src2 . num = - 2 ;
sRegs . src2 . swz = 0xff ; //.wwww
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode(exp): Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_MAD < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
//mad ri.w, ri.w, ri.x, C-2.z
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(exp): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
sRegs . src2 . swz = 0xaa ; //.zzzz
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode(exp): Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_MAD < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
//mad ri.w, ri.w, ri.x, C-2.y
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(exp): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
sRegs . src2 . swz = 0x55 ; //.yyyy
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode(exp): Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_MAD < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
//mad ri.w, ri.w, ri.x, C-2.x
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(exp): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
sRegs . src2 . swz = 0 ; //.xxxx
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode(exp): Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_MAD < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
//rcp ri.w, ri.w (ri.w=1/ri.w)
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(exp): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
//src2 is used instead of src0 in scalar functions
sRegs . n = 4 ;
sRegs . src2 = sRegs . src0 ;
sRegs . src0 . reg = 9 ; //v0.xyzw for unused src
sRegs . src0 . num = 0 ;
sRegs . src0 . mod = 0 ;
sRegs . src0 . swz = 0x1b ;
sRegs . src0 . idx = 0 ;
sRegs . src1 . reg = 9 ; //v0.xyzw for unused src
sRegs . src1 . num = 0 ;
sRegs . src1 . mod = 0 ;
sRegs . src1 . swz = 0x1b ;
sRegs . src1 . idx = 0 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode(exp): Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_RCP < < NV20_VP_INST_SCA_OPCODE_SHIFT ;
//scalar temp dest mask=temp dest mask & temp dest mask=0
* ( p + 3 ) | = ( ( * ( p + 3 ) ) & NV20_VP_INST_VTEMP_WRITEMASK_MASK ) > > ( NV20_VP_INST_VTEMP_WRITEMASK_SHIFT - NV20_VP_INST_STEMP_WRITEMASK_SHIFT ) ;
* ( p + 3 ) & = ~ NV20_VP_INST_VTEMP_WRITEMASK_MASK ;
p + = 4 ;
//mul dest, ri.w, ri.x
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(exp): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
pb_read_pregs ( pcode , & sRegs , 2 ) ; pcode + = 2 ; //read dest again and preserve it
sRegs . n = 3 ;
sRegs . src0 . reg = 8 ; //r
sRegs . src0 . num = i ;
sRegs . src0 . swz = 0xff ; //.wwww
sRegs . src0 . mod = 0 ;
sRegs . src0 . idx = 0 ;
sRegs . src1 . reg = 8 ; //r
sRegs . src1 . num = i ;
sRegs . src1 . swz = 0 ; //.xxxx
sRegs . src1 . mod = 0 ;
sRegs . src1 . idx = 0 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode(exp): Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_MUL < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
break ;
case 0x0000000f : //log dest,src0 (macro expanding many full precision instructions: slow)
case 0x4000000f : //+log
if ( pb_log_constflag = = 0 ) //log macro constants already set?
{
pb_log_constflag = 1 ;
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_ID , 1 ) ; * ( p + + ) = 93 ; //set cursor in order to load data into C-5, C-4 and C-3 (xbox accepts C-96 up to C-1)
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X , 4 ) ; //Taylor series related coefficients
* ( ( float * ) ( p + + ) ) = 1.0f ; //C-5.x
* ( ( float * ) ( p + + ) ) = 0.0f ;
* ( ( float * ) ( p + + ) ) = 0.0f ;
* ( ( float * ) ( p + + ) ) = 0.0f ;
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X , 4 ) ;
* ( ( float * ) ( p + + ) ) = 1.44268966f ; //C-4.x a
* ( ( float * ) ( p + + ) ) = - 7.21165776e-1 ; //C-4.y b
* ( ( float * ) ( p + + ) ) = 4.78684813e-1 ; //C-4.z c
* ( ( float * ) ( p + + ) ) = - 3.47305417e-1 ; //C-4.w d
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X , 4 ) ;
* ( ( float * ) ( p + + ) ) = 2.41873696e-1 ; //C-3.x e
* ( ( float * ) ( p + + ) ) = - 1.37531206e-1 ; //C-3.y f
* ( ( float * ) ( p + + ) ) = 5.20646796e-2 ; //C-3.z g
* ( ( float * ) ( p + + ) ) = - 9.31049418e-3 ; //C-3.w h
}
//after a first step y=logp(src0)
//we will compute ri.w=ax^0+bx^1+cx^2+dx^3+...+hx^7
//i.e ri.w=x*(x*(x*(x*(x*(x*(x*h+g)+f)+e)+d)+c)+b)+a
//then log(y)=x*ri.w+y (with x=y-1)
//logp ri.xy, src0 (first partial precision calculation & preserve y in ri.y)
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(log): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
pb_read_pregs ( pcode , & sRegs , 2 ) ; //but don't increment pcode yet (so we can read again dest later)
//look for unused temp register i
for ( i = 0 ; i < 16 ; i + + ) if ( pb_tmp_registers [ i ] = = 0 ) break ;
if ( i = = 16 ) { debugPrint ( " pb_pcode2mcode: log macro needs 1 temporary register (none left) \n " ) ; return NULL ; }
sRegs . dest . reg = 8 ; //replace dest with ri.x
sRegs . dest . num = i ;
sRegs . dest . msk = 0xc ; //.xy
//src2 is used instead of src0 in scalar functions
sRegs . n = 4 ;
sRegs . src2 = sRegs . src0 ;
sRegs . src0 . reg = 9 ; //v0.xyzw for unused src
sRegs . src0 . num = 0 ;
sRegs . src0 . mod = 0 ;
sRegs . src0 . swz = 0x1b ;
sRegs . src0 . idx = 0 ;
sRegs . src1 . reg = 9 ; //v0.xyzw for unused src
sRegs . src1 . num = 0 ;
sRegs . src1 . mod = 0 ;
sRegs . src1 . swz = 0x1b ;
sRegs . src1 . idx = 0 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode(log): Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_LOG < < NV20_VP_INST_SCA_OPCODE_SHIFT ;
//scalar temp dest mask=temp dest mask & temp dest mask=0
* ( p + 3 ) | = ( ( * ( p + 3 ) ) & NV20_VP_INST_VTEMP_WRITEMASK_MASK ) > > ( NV20_VP_INST_VTEMP_WRITEMASK_SHIFT - NV20_VP_INST_STEMP_WRITEMASK_SHIFT ) ;
* ( p + 3 ) & = ~ NV20_VP_INST_VTEMP_WRITEMASK_MASK ;
p + = 4 ;
//sub ri.x, ri.x, C-5.x (x=y-1)
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(log): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
sRegs . n = 3 ;
sRegs . dest . msk = 8 ; //.x
sRegs . src0 . reg = 8 ;
sRegs . src0 . num = i ;
sRegs . src0 . swz = 0 ; //.xxxx
sRegs . src0 . mod = 0 ;
sRegs . src0 . idx = 0 ;
//src2 is used instead of src1 for add
sRegs . n = 4 ;
sRegs . src2 . reg = 0xa ; //c
sRegs . src2 . num = - 5 ;
sRegs . src2 . swz = 0 ; //.xxxx
sRegs . src2 . mod = 1 ; //-
sRegs . src2 . idx = 0 ;
sRegs . src1 . reg = 9 ; //v0.xyzw for unused src
sRegs . src1 . num = 0 ;
sRegs . src1 . mod = 0 ;
sRegs . src1 . swz = 0x1b ;
sRegs . src1 . idx = 0 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode(log): Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_ADD < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
//mov ri.w, C-3.w
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(log): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
sRegs . n = 2 ;
sRegs . dest . msk = 1 ; //.w
sRegs . src0 . reg = 0xa ; //c
sRegs . src0 . num = - 3 ;
sRegs . src0 . swz = 0xff ; //.wwww
sRegs . src0 . mod = 0 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode(log): Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_MOV < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
//mad ri.w, ri.w, ri.x, C-3.z (next=x*(previous+constant))
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(log): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
sRegs . n = 4 ;
sRegs . src0 . reg = 8 ; //r
sRegs . src0 . num = i ;
sRegs . src1 . reg = 8 ; //r
sRegs . src1 . num = i ;
sRegs . src1 . swz = 0 ; //.xxxx
sRegs . src1 . mod = 0 ;
sRegs . src2 . reg = 0xa ; //c
sRegs . src2 . num = - 3 ;
sRegs . src2 . swz = 0xaa ; //.zzzz
sRegs . src2 . mod = 0 ;
sRegs . src2 . idx = 0 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode(log): Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_MAD < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
//mad ri.w, ri.w, ri.x, C-3.y
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(log): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
sRegs . src2 . swz = 0x55 ; //.yyyy
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode(log): Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_MAD < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
//mad ri.w, ri.w, ri.x, C-3.x
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(log): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
sRegs . src2 . swz = 0 ; //.xxxx
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode(log): Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_MAD < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
//mad ri.w, ri.w, ri.x, C-4.w
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(log): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
sRegs . src2 . num = - 4 ;
sRegs . src2 . swz = 0xff ; //.wwww
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode(log): Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_MAD < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
//mad ri.w, ri.w, ri.x, C-4.z
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(log): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
sRegs . src2 . swz = 0xaa ; //.zzzz
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode(log): Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_MAD < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
//mad ri.w, ri.w, ri.x, C-4.y
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(log): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
sRegs . src2 . swz = 0x55 ; //.yyyy
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode(log): Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_MAD < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
//mad ri.w, ri.w, ri.x, C-4.x
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(log): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
sRegs . src2 . swz = 0 ; //.xxxx
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode(log): Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_MAD < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
//mad dest, ri.w, ri.x, ri.y
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(log): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
pb_read_pregs ( pcode , & sRegs , 2 ) ; pcode + = 2 ; //read dest again and preserve it
sRegs . n = 4 ;
sRegs . src0 . reg = 8 ; //r
sRegs . src0 . num = i ;
sRegs . src0 . swz = 0xff ; //.wwww
sRegs . src0 . mod = 0 ;
sRegs . src0 . idx = 0 ;
//pb_read_pregs shouldn't have changed src1
sRegs . src2 . reg = 8 ; //r
sRegs . src2 . num = i ;
sRegs . src2 . swz = 0x55 ; //.yyyy
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode(log): Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_MAD < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
break ;
case 0x00000010 : //lit dest,src0 (scalar lighting calculation function)
case 0x40000010 : //+lit
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ;
pb_read_pregs ( pcode , & sRegs , 2 ) ; pcode + = 2 ;
//src2 is used instead of src0 in scalar functions
sRegs . n = 4 ;
sRegs . src2 = sRegs . src0 ;
sRegs . src0 . reg = 9 ; //v0.xyzw for unused src
sRegs . src0 . num = 0 ;
sRegs . src0 . mod = 0 ;
sRegs . src0 . swz = 0x1b ;
sRegs . src0 . idx = 0 ;
sRegs . src1 . reg = 9 ; //v0.xyzw for unused src
sRegs . src1 . num = 0 ;
sRegs . src1 . mod = 0 ;
sRegs . src1 . swz = 0x1b ;
sRegs . src1 . idx = 0 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_LIT < < NV20_VP_INST_SCA_OPCODE_SHIFT ;
if ( sRegs . dest . reg ! = 8 ) //not r
* ( p + 3 ) | = NV20_VP_INST_DEST_SCA ; //warns GPU that destination will receive scalar function result
else
{
//scalar temp dest mask=temp dest mask & temp dest mask=0
* ( p + 3 ) | = ( ( * ( p + 3 ) ) & NV20_VP_INST_VTEMP_WRITEMASK_MASK ) > > ( NV20_VP_INST_VTEMP_WRITEMASK_SHIFT - NV20_VP_INST_STEMP_WRITEMASK_SHIFT ) ;
* ( p + 3 ) & = ~ NV20_VP_INST_VTEMP_WRITEMASK_MASK ;
}
p + = 4 ;
break ;
case 0x00000011 : //dst dest,src0,src1 (calculates distance)
case 0x40000011 : //+dst
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ;
pb_read_pregs ( pcode , & sRegs , 3 ) ; pcode + = 3 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_DST < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
break ;
case 0x00000012 : //frc dest,src0 (calculates fractional part -let's consider it same as expp for now-)
case 0x40000012 : //+frc
case 0x00000013 : //frc dest,src0 (calculates fractional part -let's consider it same as expp for now-)
case 0x40000013 : //+frc
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ;
pb_read_pregs ( pcode , & sRegs , 2 ) ; pcode + = 2 ;
//src2 is used instead of src0 in scalar functions
sRegs . n = 4 ;
sRegs . src2 = sRegs . src0 ;
sRegs . src0 . reg = 9 ; //v0.xyzw for unused src
sRegs . src0 . num = 0 ;
sRegs . src0 . mod = 0 ;
sRegs . src0 . swz = 0x1b ;
sRegs . src0 . idx = 0 ;
sRegs . src1 . reg = 9 ; //v0.xyzw for unused src
sRegs . src1 . num = 0 ;
sRegs . src1 . mod = 0 ;
sRegs . src1 . swz = 0x1b ;
sRegs . src1 . idx = 0 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_EXP < < NV20_VP_INST_SCA_OPCODE_SHIFT ;
if ( sRegs . dest . reg ! = 8 ) //not r
* ( p + 3 ) | = NV20_VP_INST_DEST_SCA ; //warns GPU that destination will receive scalar function result
else
{
//scalar temp dest mask=temp dest mask & temp dest mask=0
* ( p + 3 ) | = ( ( * ( p + 3 ) ) & NV20_VP_INST_VTEMP_WRITEMASK_MASK ) > > ( NV20_VP_INST_VTEMP_WRITEMASK_SHIFT - NV20_VP_INST_STEMP_WRITEMASK_SHIFT ) ;
* ( p + 3 ) & = ~ NV20_VP_INST_VTEMP_WRITEMASK_MASK ;
}
p + = 4 ;
break ;
case 0x00000014 : //m4x4 dest, src0, ?i (matrix multiply)
case 0x40000014 : //+m4x4
//dp4 dest.x, src0, ?i
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(m4x4): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
pb_read_pregs ( pcode , & sRegs , 3 ) ; pcode + = 3 ;
if ( ( sRegs . src0 . swz ! = 0x1b ) | |
( sRegs . src1 . swz ! = 0x1b ) | |
( sRegs . src0 . mod ) | |
( sRegs . src1 . mod ) ) { debugPrint ( " pb_pcode2mcode: Modifiers or swizles not allowed in matrices multiplication macros \n " ) ; return NULL ; }
sRegs . dest . msk = 8 ; //.x
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_DP4 < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
//dp4 dest.y, src0, ?i+1
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(m4x4): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
sRegs . dest . msk = 4 ; //.y
if ( sRegs . src1 . reg = = 0xc ) sRegs . src1 . num = ( sRegs . src1 . num + 1 ) % 96 ; else sRegs . src1 . num = ( sRegs . src1 . num + 1 ) % 16 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_DP4 < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
//dp4 dest.z, src0, ?i+2
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(m4x4): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
sRegs . dest . msk = 2 ; //.z
if ( sRegs . src1 . reg = = 0xc ) sRegs . src1 . num = ( sRegs . src1 . num + 1 ) % 96 ; else sRegs . src1 . num = ( sRegs . src1 . num + 1 ) % 16 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_DP4 < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
//dp4 dest.w, src0, ?i+3
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(m4x4): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
sRegs . dest . msk = 1 ; //.w
if ( sRegs . src1 . reg = = 0xc ) sRegs . src1 . num = ( sRegs . src1 . num + 1 ) % 96 ; else sRegs . src1 . num = ( sRegs . src1 . num + 1 ) % 16 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_DP4 < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
break ;
case 0x00000015 : //m4x3 dest, src0, ?i (matrix multiply)
case 0x40000015 : //+m4x3
//dp4 dest.x, src0, ?i
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(m4x3): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
pb_read_pregs ( pcode , & sRegs , 3 ) ; pcode + = 3 ;
if ( ( sRegs . src0 . swz ! = 0x1b ) | |
( sRegs . src1 . swz ! = 0x1b ) | |
( sRegs . src0 . mod ) | |
( sRegs . src1 . mod ) ) { debugPrint ( " pb_pcode2mcode: Modifiers or swizles not allowed in matrices multiplication macros \n " ) ; return NULL ; }
sRegs . dest . msk = 8 ; //.x
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_DP4 < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
//dp4 dest.y, src0, ?i+1
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(m4x3): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
sRegs . dest . msk = 4 ; //.y
if ( sRegs . src1 . reg = = 0xc ) sRegs . src1 . num = ( sRegs . src1 . num + 1 ) % 96 ; else sRegs . src1 . num = ( sRegs . src1 . num + 1 ) % 16 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_DP4 < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
//dp4 dest.z, src0, ?i+2
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(m4x3): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
sRegs . dest . msk = 2 ; //.z
if ( sRegs . src1 . reg = = 0xc ) sRegs . src1 . num = ( sRegs . src1 . num + 1 ) % 96 ; else sRegs . src1 . num = ( sRegs . src1 . num + 1 ) % 16 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_DP4 < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
break ;
case 0x00000016 : //m3x4 dest, src0, ?i (matrix multiply)
case 0x40000016 : //+m3x4
//dp3 dest.x, src0, ?i
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(m3x4): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
pb_read_pregs ( pcode , & sRegs , 3 ) ; pcode + = 3 ;
if ( ( sRegs . src0 . swz ! = 0x1b ) | |
( sRegs . src1 . swz ! = 0x1b ) | |
( sRegs . src0 . mod ) | |
( sRegs . src1 . mod ) ) { debugPrint ( " pb_pcode2mcode: Modifiers or swizles not allowed in matrices multiplication macros \n " ) ; return NULL ; }
sRegs . dest . msk = 8 ; //.x
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_DP3 < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
//dp3 dest.y, src0, ?i+1
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(m3x4): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
sRegs . dest . msk = 4 ; //.y
if ( sRegs . src1 . reg = = 0xc ) sRegs . src1 . num = ( sRegs . src1 . num + 1 ) % 96 ; else sRegs . src1 . num = ( sRegs . src1 . num + 1 ) % 16 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_DP3 < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
//dp3 dest.z, src0, ?i+2
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(m3x4): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
sRegs . dest . msk = 2 ; //.z
if ( sRegs . src1 . reg = = 0xc ) sRegs . src1 . num = ( sRegs . src1 . num + 1 ) % 96 ; else sRegs . src1 . num = ( sRegs . src1 . num + 1 ) % 16 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_DP3 < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
//dp3 dest.w, src0, ?i+3
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(m3x4): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
sRegs . dest . msk = 1 ; //.w
if ( sRegs . src1 . reg = = 0xc ) sRegs . src1 . num = ( sRegs . src1 . num + 1 ) % 96 ; else sRegs . src1 . num = ( sRegs . src1 . num + 1 ) % 16 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_DP3 < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
break ;
case 0x00000017 : //m3x3 dest, src0, ?i (matrix multiply)
case 0x40000017 : //+m3x3
//dp3 dest.x, src0, ?i
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(m3x3): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
pb_read_pregs ( pcode , & sRegs , 3 ) ; pcode + = 3 ;
if ( ( sRegs . src0 . swz ! = 0x1b ) | |
( sRegs . src1 . swz ! = 0x1b ) | |
( sRegs . src0 . mod ) | |
( sRegs . src1 . mod ) ) { debugPrint ( " pb_pcode2mcode: Modifiers or swizles not allowed in matrices multiplication macros \n " ) ; return NULL ; }
sRegs . dest . msk = 8 ; //.x
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_DP3 < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
//dp3 dest.y, src0, ?i+1
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(m3x3): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
sRegs . dest . msk = 4 ; //.y
if ( sRegs . src1 . reg = = 0xc ) sRegs . src1 . num = ( sRegs . src1 . num + 1 ) % 96 ; else sRegs . src1 . num = ( sRegs . src1 . num + 1 ) % 16 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_DP3 < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
//dp3 dest.z, src0, ?i+2
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(m3x3): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
sRegs . dest . msk = 2 ; //.z
if ( sRegs . src1 . reg = = 0xc ) sRegs . src1 . num = ( sRegs . src1 . num + 1 ) % 96 ; else sRegs . src1 . num = ( sRegs . src1 . num + 1 ) % 16 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_DP3 < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
break ;
case 0x00000018 : //m3x2 dest, src0, ?i (matrix multiply)
case 0x40000018 : //+m3x2
//dp3 dest.x, src0, ?i
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(m3x2): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
pb_read_pregs ( pcode , & sRegs , 3 ) ; pcode + = 3 ;
if ( ( sRegs . src0 . swz ! = 0x1b ) | |
( sRegs . src1 . swz ! = 0x1b ) | |
( sRegs . src0 . mod ) | |
( sRegs . src1 . mod ) ) { debugPrint ( " pb_pcode2mcode: Modifiers or swizles not allowed in matrices multiplication macros \n " ) ; return NULL ; }
sRegs . dest . msk = 8 ; //.x
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_DP3 < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
//dp3 dest.y, src0, ?i+1
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ; if ( n = = 136 ) { debugPrint ( " pb_pcode2mcode(m3x2): Too many instructions: max=136 (including expanded macros) \n " ) ; return NULL ; }
sRegs . dest . msk = 4 ; //.y
if ( sRegs . src1 . reg = = 0xc ) sRegs . src1 . num = ( sRegs . src1 . num + 1 ) % 96 ; else sRegs . src1 . num = ( sRegs . src1 . num + 1 ) % 16 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_DP3 < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
break ;
case 0x0000004e : //expp dest,src0 (scalar partial precision exponential function)
case 0x4000004e : //+expp
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ;
pb_read_pregs ( pcode , & sRegs , 2 ) ; pcode + = 2 ;
//src2 is used instead of src0 in scalar functions
sRegs . n = 4 ;
sRegs . src2 = sRegs . src0 ;
sRegs . src0 . reg = 9 ; //v0.xyzw for unused src
sRegs . src0 . num = 0 ;
sRegs . src0 . mod = 0 ;
sRegs . src0 . swz = 0x1b ;
sRegs . src0 . idx = 0 ;
sRegs . src1 . reg = 9 ; //v0.xyzw for unused src
sRegs . src1 . num = 0 ;
sRegs . src1 . mod = 0 ;
sRegs . src1 . swz = 0x1b ;
sRegs . src1 . idx = 0 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_EXP < < NV20_VP_INST_SCA_OPCODE_SHIFT ;
if ( sRegs . dest . reg ! = 8 ) //not r
* ( p + 3 ) | = NV20_VP_INST_DEST_SCA ; //warns GPU that destination will receive scalar function result
else
{
//scalar temp dest mask=temp dest mask & temp dest mask=0
* ( p + 3 ) | = ( ( * ( p + 3 ) ) & NV20_VP_INST_VTEMP_WRITEMASK_MASK ) > > ( NV20_VP_INST_VTEMP_WRITEMASK_SHIFT - NV20_VP_INST_STEMP_WRITEMASK_SHIFT ) ;
* ( p + 3 ) & = ~ NV20_VP_INST_VTEMP_WRITEMASK_MASK ;
}
p + = 4 ;
break ;
case 0x0000004f : //logp dest,src0 (scalar partial precision logarithm function)
case 0x4000004f : //+logp
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ;
pb_read_pregs ( pcode , & sRegs , 2 ) ; pcode + = 2 ;
//src2 is used instead of src0 in scalar functions
sRegs . n = 4 ;
sRegs . src2 = sRegs . src0 ;
sRegs . src0 . reg = 9 ; //v0.xyzw for unused src
sRegs . src0 . num = 0 ;
sRegs . src0 . mod = 0 ;
sRegs . src0 . swz = 0x1b ;
sRegs . src0 . idx = 0 ;
sRegs . src1 . reg = 9 ; //v0.xyzw for unused src
sRegs . src1 . num = 0 ;
sRegs . src1 . mod = 0 ;
sRegs . src1 . swz = 0x1b ;
sRegs . src1 . idx = 0 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_LOG < < NV20_VP_INST_SCA_OPCODE_SHIFT ;
if ( sRegs . dest . reg ! = 8 ) //not r
* ( p + 3 ) | = NV20_VP_INST_DEST_SCA ; //warns GPU that destination will receive scalar function result
else
{
//scalar temp dest mask=temp dest mask & temp dest mask=0
* ( p + 3 ) | = ( ( * ( p + 3 ) ) & NV20_VP_INST_VTEMP_WRITEMASK_MASK ) > > ( NV20_VP_INST_VTEMP_WRITEMASK_SHIFT - NV20_VP_INST_STEMP_WRITEMASK_SHIFT ) ;
* ( p + 3 ) & = ~ NV20_VP_INST_VTEMP_WRITEMASK_MASK ;
}
p + = 4 ;
break ;
case 0x00000051 : //def cn x, y, z, w or def cn r, g, b, a
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_ID , 1 ) ; * ( p + + ) = ( ( * ( pcode + + ) ) & 0xff ) + 96 ; //set cursor in order to load data into Cn
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X , 4 ) ; * ( p + + ) = * ( pcode + + ) ; * ( p + + ) = * ( pcode + + ) ; * ( p + + ) = * ( pcode + + ) ; * ( p + + ) = * ( pcode + + ) ;
break ;
//non standard pseudo-code: nvidia-specific (vsa.exe won't accept these assembler instructions)
//workaround : use dp4 and rcp, then, in pseudo code, replace 9 with 0x100 and 6 with 0x101
case 0x00000100 : //dph dest,src0,src1 (homogeneous dot product: same as dp4 but src0.w is seen as 1.0f)
case 0x40000100 : //+dph
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ;
pb_read_pregs ( pcode , & sRegs , 3 ) ; pcode + = 3 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_DPH < < NV20_VP_INST_VEC_OPCODE_SHIFT ;
p + = 4 ;
break ;
case 0x00000101 : //rcc dest,src0 (clamped scalar 1/x function)
case 0x40000101 : //+rcc
pb_push ( p + + , NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0 , 4 ) ; n + + ;
pb_read_pregs ( pcode , & sRegs , 2 ) ; pcode + = 2 ;
//src2 is used instead of src0 in scalar functions
sRegs . n = 4 ;
sRegs . src2 = sRegs . src0 ;
sRegs . src0 . reg = 9 ; //v0.xyzw for unused src
sRegs . src0 . num = 0 ;
sRegs . src0 . mod = 0 ;
sRegs . src0 . swz = 0x1b ;
sRegs . src0 . idx = 0 ;
sRegs . src1 . reg = 9 ; //v0.xyzw for unused src
sRegs . src1 . num = 0 ;
sRegs . src1 . mod = 0 ;
sRegs . src1 . swz = 0x1b ;
sRegs . src1 . idx = 0 ;
if ( pb_set_mcode ( p , & sRegs ) ) { debugPrint ( " pb_pcode2mcode: Unrecognized token \n " ) ; return NULL ; }
* ( p + 1 ) | = NV20_VP_INST_OPCODE_RCC < < NV20_VP_INST_SCA_OPCODE_SHIFT ;
if ( sRegs . dest . reg ! = 8 ) //not r
* ( p + 3 ) | = NV20_VP_INST_DEST_SCA ; //warns GPU that destination will receive scalar function result
else
{
//scalar temp dest mask=temp dest mask & temp dest mask=0
* ( p + 3 ) | = ( ( * ( p + 3 ) ) & NV20_VP_INST_VTEMP_WRITEMASK_MASK ) > > ( NV20_VP_INST_VTEMP_WRITEMASK_SHIFT - NV20_VP_INST_STEMP_WRITEMASK_SHIFT ) ;
* ( p + 3 ) & = ~ NV20_VP_INST_VTEMP_WRITEMASK_MASK ;
}
p + = 4 ;
break ;
default :
debugPrint ( " pb_pcode2mcode: Unrecognized vs token #%08x \n " , * ( pcode - 1 ) ) ;
return NULL ;
}
}
* ( p - 1 ) | = NV20_VP_INST_LAST_INST ; //bit 0 of 4th dword means end of shader
pb_gpu_programnc [ 0 ] = p - & pb_gpu_programnc [ 1 ] ; //size
pb_gpu_programnc [ 0 ] | = 0x43210000 ; //personal vs marker
return & pb_gpu_programnc [ 0 ] ;
}