From 296e8a64ba70e6f91711696f350320f985adcf9e Mon Sep 17 00:00:00 2001 From: Joseph Mattello Date: Tue, 30 Nov 2021 23:36:46 -0500 Subject: [PATCH 1/7] vjag_memory.h add more struct definitions Signed-off-by: Joseph Mattello --- src/vjag_memory.h | 188 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 188 insertions(+) diff --git a/src/vjag_memory.h b/src/vjag_memory.h index 44749a92..aa6154e8 100644 --- a/src/vjag_memory.h +++ b/src/vjag_memory.h @@ -13,6 +13,194 @@ extern "C" { #endif +#pragma pack(push, 1) + typedef union Bits64 { + uint64_t DATA; + struct Bytes8 { +#ifdef LITTLE_ENDIAN + uint8_t b0; + uint8_t b1; + uint8_t b2; + uint8_t b3; + uint8_t b4; + uint8_t b5; + uint8_t b6; + uint8_t b7; +#else + uint8_t b7; + uint8_t b6; + uint8_t b5; + uint8_t b4; + uint8_t b3; + uint8_t b2; + uint8_t b1; + uint8_t b0; +#endif + } bytes; + } Bits64; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef union Bits32 { + uint32_t WORD; + struct Words { +#ifdef LITTLE_ENDIAN + uint16_t LWORD; + uint16_t UWORD; +#else + uint16_t UWORD; + uint16_t LWORD; +#endif + } words; + struct Bytes4 { +#ifdef LITTLE_ENDIAN + uint8_t LL; + uint8_t LU; + uint8_t UL; + uint8_t UU; // Upper upper [UU, UL, LU, LL] +#else + uint8_t UU; // Upper upper [UU, UL, LU, LL] + uint8_t UL; + uint8_t LU; + uint8_t LL; +#endif + } bytes; + struct TopThreeOne { +#ifdef LITTLE_ENDIAN + unsigned int : 1; + uint32_t value : 31; +#else + uint32_t value : 31; + unsigned int : 1; +#endif + } topThreeOne; + struct Bits { +#ifdef LITTLE_ENDIAN + unsigned int b0: 1; + unsigned int b1: 1; + unsigned int b2: 1; + unsigned int b3: 1; + unsigned int b4: 1; + unsigned int b5: 1; + unsigned int b6: 1; + unsigned int b7: 1; + unsigned int b8: 1; + unsigned int b9: 1; + unsigned int b10: 1; + unsigned int b11: 1; + unsigned int b12: 1; + unsigned int b13: 1; + unsigned int b14: 1; + unsigned int b15: 1; + unsigned int b16: 1; + unsigned int b17: 1; + unsigned int b18: 1; + unsigned int b19: 1; + unsigned int b20: 1; + unsigned int b21: 1; + unsigned int b22: 1; + unsigned int b23: 1; + unsigned int b24: 1; + unsigned int b25: 1; + unsigned int b26: 1; + unsigned int b27: 1; + unsigned int b28: 1; + unsigned int b29: 1; + unsigned int b30: 1; + unsigned int b31: 1; +#else + // reverse the order of the bit fields. + unsigned int b31: 1; + unsigned int b30: 1; + unsigned int b29: 1; + unsigned int b28: 1; + unsigned int b27: 1; + unsigned int b26: 1; + unsigned int b25: 1; + unsigned int b24: 1; + unsigned int b23: 1; + unsigned int b22: 1; + unsigned int b21: 1; + unsigned int b20: 1; + unsigned int b19: 1; + unsigned int b18: 1; + unsigned int b17: 1; + unsigned int b16: 1; + unsigned int b15: 1; + unsigned int b14: 1; + unsigned int b13: 1; + unsigned int b12: 1; + unsigned int b11: 1; + unsigned int b10: 1; + unsigned int b9: 1; + unsigned int b8: 1; + unsigned int b7: 1; + unsigned int b6: 1; + unsigned int b5: 1; + unsigned int b4: 1; + unsigned int b3: 1; + unsigned int b2: 1; + unsigned int b1: 1; + unsigned int b0: 1; +#endif + } bits; +} Bits32; +#pragma pack(pop) + +#pragma pack(push, 1) + typedef union OpCode { + uint16_t WORD; + struct Bytes2 { +#ifdef LITTLE_ENDIAN + uint8_t LBYTE; + uint8_t UBYTE; +#else + uint8_t UBYTE; + uint8_t LBYTE; +#endif + } Bytes; + struct Codes { +#ifdef LITTLE_ENDIAN + unsigned int second : 5; + unsigned int first : 5; + unsigned int index : 6; +#else + unsigned int index : 6; + unsigned int first : 5; + unsigned int second : 5; +#endif + } Codes; + } OpCode; +#pragma pack(pop) + + typedef OpCode U16Union; + +typedef union Offset { + uint32_t LONG; +#pragma pack(push, 1) + struct Members { +#ifdef LITTLE_ENDIAN + unsigned int offset : 31; + unsigned int bit : 1; +#else + unsigned int bit : 1; + unsigned int offset : 31; +#endif + } Members; +#pragma pack(pop) +} Offset; + +typedef union DSPLong { + uint32_t LONG; + struct Data { +#ifdef LITTLE_ENDIAN + uint16_t LWORD; + uint16_t UWORD; +#else + uint16_t UWORD; + uint16_t LWORD; +#endif + extern uint8_t jagMemSpace[]; extern uint8_t * jaguarMainRAM; From b71775e481f7bce3f1285e205b3d58fa15dd79bf Mon Sep 17 00:00:00 2001 From: Joseph Mattello Date: Tue, 30 Nov 2021 23:33:43 -0500 Subject: [PATCH 2/7] vjag_memory.h add GPUControl struct Signed-off-by: Joseph Mattello --- src/vjag_memory.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/vjag_memory.h b/src/vjag_memory.h index aa6154e8..a75ecf75 100644 --- a/src/vjag_memory.h +++ b/src/vjag_memory.h @@ -146,6 +146,26 @@ typedef union Bits32 { } bits; } Bits32; #pragma pack(pop) + +#pragma pack(push, 1) +typedef union GPUControl { + uint32_t WORD; + struct Words words; + struct Bits bits; + struct __attribute__ ((__packed__)) { +#ifdef LITTLE_ENDIAN + unsigned int : 6; + unsigned int irqMask: 5; + unsigned int : 21; +#else + unsigned int : 21; + unsigned int irqMask: 5; + unsigned int : 6; +#endif +} gpuIRQ; +#pragma pack(pop) + +} GPUControl; #pragma pack(push, 1) typedef union OpCode { From ea9917d429bfa5907c15938abf60482de28a107f Mon Sep 17 00:00:00 2001 From: Joseph Mattello Date: Thu, 14 Oct 2021 02:19:23 -0400 Subject: [PATCH 3/7] gpu.c gpucontrol as union GPU_RUNNING running macro was pretty slow on ARM for some reason. Bitswise structs are faster in my testing Signed-off-by: Joseph Mattello --- src/gpu.c | 72 +++++++++++++++++++++++++++---------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/src/gpu.c b/src/gpu.c index 66959e52..05ec6fac 100644 --- a/src/gpu.c +++ b/src/gpu.c @@ -177,7 +177,8 @@ static uint32_t gpu_flags; static uint32_t gpu_matrix_control; static uint32_t gpu_pointer_to_matrix; static uint32_t gpu_data_organization; -static uint32_t gpu_control; +static GPUControl gpu_control; + static uint32_t gpu_div_control; // There is a distinct advantage to having these separated out--there's no need to clear // a bit before writing a result. I.e., if the result of an operation leaves a zero in @@ -192,7 +193,7 @@ static uint32_t gpu_instruction; static uint32_t gpu_opcode_first_parameter; static uint32_t gpu_opcode_second_parameter; -#define GPU_RUNNING (gpu_control & 0x01) +#define GPU_RUNNING (gpu_control.bits.b0) #define RM gpu_reg[gpu_opcode_first_parameter] #define RN gpu_reg[gpu_opcode_second_parameter] @@ -342,34 +343,33 @@ uint32_t GPUReadLong(uint32_t offset, uint32_t who/*=UNKNOWN*/) { offset &= 0x1F; switch (offset) - { - case 0x00: - gpu_flag_c = (gpu_flag_c ? 1 : 0); - gpu_flag_z = (gpu_flag_z ? 1 : 0); - gpu_flag_n = (gpu_flag_n ? 1 : 0); - - gpu_flags = (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z; - - return gpu_flags & 0xFFFFC1FF; - case 0x04: - return gpu_matrix_control; - case 0x08: - return gpu_pointer_to_matrix; - case 0x0C: - return gpu_data_organization; - case 0x10: - return gpu_pc; - case 0x14: - return gpu_control; - case 0x18: - return gpu_hidata; - case 0x1C: - return gpu_remain; - default: // unaligned long read - break; - } - - return 0; + { + case 0x00: + gpu_flag_c = (gpu_flag_c ? 1 : 0); + gpu_flag_z = (gpu_flag_z ? 1 : 0); + gpu_flag_n = (gpu_flag_n ? 1 : 0); + + gpu_flags = (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z; + + return gpu_flags & 0xFFFFC1FF; + case 0x04: + return gpu_matrix_control; + case 0x08: + return gpu_pointer_to_matrix; + case 0x0C: + return gpu_data_organization; + case 0x10: + return gpu_pc; + case 0x14: + return gpu_control.WORD; + case 0x18: + return gpu_hidata; + case 0x1C: + return gpu_remain; + default: // unaligned long read + break; + } + return 0; } return (JaguarReadWord(offset, who) << 16) | JaguarReadWord(offset + 2, who); @@ -473,7 +473,7 @@ void GPUWriteLong(uint32_t offset, uint32_t data, uint32_t who/*=UNKNOWN*/) gpu_flag_c = (gpu_flags & CARRY_FLAG) >> 1; gpu_flag_n = (gpu_flags & NEGA_FLAG) >> 2; GPUUpdateRegisterBanks(); - gpu_control &= ~((gpu_flags & CINT04FLAGS) >> 3); // Interrupt latch clear bits + gpu_control.WORD &= ~((gpu_flags & CINT04FLAGS) >> 3); // Interrupt latch clear bits //Writing here is only an interrupt enable--this approach is just plain wrong! // GPUHandleIRQs(); //This, however, is A-OK! ;-) @@ -523,7 +523,7 @@ void GPUWriteLong(uint32_t offset, uint32_t data, uint32_t who/*=UNKNOWN*/) data &= ~0x04; } - gpu_control = (gpu_control & 0xF7C0) | (data & (~0xF7C0)); + gpu_control.WORD = (gpu_control.WORD & 0xF7C0) | (data & (~0xF7C0)); // if gpu wasn't running but is now running, execute a few cycles #ifdef GPU_SINGLE_STEPPING @@ -579,7 +579,7 @@ void GPUHandleIRQs(void) return; // Get the interrupt latch & enable bits - bits = (gpu_control >> 6) & 0x1F; + bits = gpu_control.gpuIRQ.irqMask; //(gpu_control >> 6) & 0x1F; mask = (gpu_flags >> 4) & 0x1F; // Bail out if latched interrupts aren't enabled @@ -618,11 +618,11 @@ void GPUHandleIRQs(void) void GPUSetIRQLine(int irqline, int state) { uint32_t mask = 0x0040 << irqline; - gpu_control &= ~mask; // Clear the interrupt latch + gpu_control.WORD &= ~mask; // Clear the interrupt latch if (state) { - gpu_control |= mask; // Assert the interrupt latch + gpu_control.WORD |= mask; // Assert the interrupt latch GPUHandleIRQs(); // And handle the interrupt... } } @@ -644,7 +644,7 @@ void GPUReset(void) gpu_pointer_to_matrix = 0x00000000; gpu_data_organization = 0xFFFFFFFF; gpu_pc = 0x00F03000; - gpu_control = 0x00002800; // Correctly sets this as TOM Rev. 2 + gpu_control.WORD = 0x00002800; // Correctly sets this as TOM Rev. 2 gpu_hidata = 0x00000000; gpu_remain = 0x00000000; // These two registers are RO/WO gpu_div_control = 0x00000000; From 58b01671429f7d333ec5fb88bcd22f99d40a7a35 Mon Sep 17 00:00:00 2001 From: Joseph Mattello Date: Thu, 14 Oct 2021 02:19:51 -0400 Subject: [PATCH 4/7] gpu.c inline some things Signed-off-by: Joseph Mattello --- src/gpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gpu.c b/src/gpu.c index 05ec6fac..f0dc274d 100644 --- a/src/gpu.c +++ b/src/gpu.c @@ -296,7 +296,7 @@ uint8_t GPUReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/) } // GPU word access (read) -uint16_t GPUReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/) +INLINE uint16_t GPUReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/) { if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000)) { @@ -325,7 +325,7 @@ uint16_t GPUReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/) } // GPU dword access (read) -uint32_t GPUReadLong(uint32_t offset, uint32_t who/*=UNKNOWN*/) +INLINE uint32_t GPUReadLong(uint32_t offset, uint32_t who/*=UNKNOWN*/) { if (offset >= 0xF02000 && offset <= 0xF020FF) { From f6c98e7cc6d350d6cab402cd3425f1cac63fcc94 Mon Sep 17 00:00:00 2001 From: Joseph Mattello Date: Thu, 14 Oct 2021 02:20:45 -0400 Subject: [PATCH 5/7] gpu.c use opcode in... Signed-off-by: Joseph Mattello --- src/gpu.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/gpu.c b/src/gpu.c index f0dc274d..930d1771 100644 --- a/src/gpu.c +++ b/src/gpu.c @@ -300,10 +300,11 @@ INLINE uint16_t GPUReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/) { if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000)) { - uint16_t data; - offset &= 0xFFF; - data = ((uint16_t)gpu_ram_8[offset] << 8) | (uint16_t)gpu_ram_8[offset+1]; - return data; + offset &= 0xFFF; + OpCode data; + data.Bytes.UBYTE = (uint16_t)gpu_ram_8[offset]; + data.Bytes.LBYTE = (uint16_t)gpu_ram_8[offset+1]; + return data.WORD; } else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20)) { From b197a1b0920359887d3678c0f8427df59067640c Mon Sep 17 00:00:00 2001 From: Joseph Mattello Date: Thu, 14 Oct 2021 19:13:50 -0400 Subject: [PATCH 6/7] gpu.c gpu_opcode_div use structs Signed-off-by: Joseph Mattello --- src/gpu.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/src/gpu.c b/src/gpu.c index 930d1771..01e61f84 100644 --- a/src/gpu.c +++ b/src/gpu.c @@ -1573,30 +1573,35 @@ INLINE static void gpu_opcode_abs(void) INLINE static void gpu_opcode_div(void) // RN / RM { + unsigned i; // Real algorithm, courtesy of SCPCD: NYAN! - uint32_t q = RN; - uint32_t r = 0; + Bits32 q; + q.WORD = RN; + + Bits32 r; + r.WORD = 0; // If 16.16 division, stuff top 16 bits of RN into remainder and put the // bottom 16 of RN in top 16 of quotient - if (gpu_div_control & 0x01) - q <<= 16, r = RN >> 16; + if (gpu_div_control & 0x01) { + r.WORD = q.words.UWORD; + q.words.UWORD = q.words.LWORD; + q.words.LWORD = 0; + } for(i=0; i<32; i++) { - uint32_t sign = r & 0x80000000; - r = (r << 1) | ((q >> 31) & 0x01); - r += (sign ? RM : -RM); - q = (q << 1) | (((~r) >> 31) & 0x01); + uint32_t sign = r.bits.b31; + r.WORD = (r.WORD << 1) | q.bits.b31; + r.WORD += (sign ? RM : -RM); + q.WORD = (q.WORD << 1) | !r.bits.b31; // (((~r) >> 31) & 0x01); } - RN = q; - gpu_remain = r; - + RN = q.WORD; + gpu_remain = r.WORD; } - INLINE static void gpu_opcode_imultn(void) { uint32_t res = (int32_t)((int16_t)RN * (int16_t)RM); From 2ae08c0548a70193c8f17c5898540fba9bc2c434 Mon Sep 17 00:00:00 2001 From: Joseph Mattello Date: Tue, 30 Nov 2021 23:43:31 -0500 Subject: [PATCH 7/7] gpu.c fix logical or warning Signed-off-by: Joseph Mattello --- src/gpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gpu.c b/src/gpu.c index 01e61f84..782e969e 100644 --- a/src/gpu.c +++ b/src/gpu.c @@ -443,7 +443,7 @@ void GPUWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/) return; } - else if ((offset == GPU_WORK_RAM_BASE + 0x0FFF) || (GPU_CONTROL_RAM_BASE + 0x1F)) + else if ((offset == GPU_WORK_RAM_BASE + 0x0FFF) || (offset == GPU_CONTROL_RAM_BASE + 0x1F)) return; // Have to be careful here--this can cause an infinite loop!