From 090ecb2562e368067fee83d174ecade9a3d827d7 Mon Sep 17 00:00:00 2001 From: Phillip Stephens Date: Tue, 3 Oct 2023 20:49:16 -0700 Subject: [PATCH] Split match and link OScache --- config/G2ME01/splits.txt | 1 + config/G2ME01/symbols.txt | 70 +++---- configure.py | 2 +- src/Dolphin/os/OSCache.c | 426 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 463 insertions(+), 36 deletions(-) create mode 100644 src/Dolphin/os/OSCache.c diff --git a/config/G2ME01/splits.txt b/config/G2ME01/splits.txt index 01f7506..7f43769 100644 --- a/config/G2ME01/splits.txt +++ b/config/G2ME01/splits.txt @@ -171,6 +171,7 @@ Dolphin/PPCArch.c: Dolphin/os/OSCache.c: .text start:0x8036EE9C end:0x8036F514 + .data start:0x803BE7B0 end:0x803BE9E0 Dolphin/os/OSContext.c: .text start:0x8036F514 end:0x8036FCF8 diff --git a/config/G2ME01/symbols.txt b/config/G2ME01/symbols.txt index e622c24..4f3dca0 100644 --- a/config/G2ME01/symbols.txt +++ b/config/G2ME01/symbols.txt @@ -16205,19 +16205,19 @@ __OSStopAudioSystem = .text:0x8036EDC4; // type:function size:0xD8 scope:global DCEnable = .text:0x8036EE9C; // type:function size:0x14 scope:global DCInvalidateRange = .text:0x8036EEB0; // type:function size:0x2C scope:global DCFlushRange = .text:0x8036EEDC; // type:function size:0x30 scope:global -fn_8036EF0C = .text:0x8036EF0C; // type:function size:0x30 +DCStoreRange = .text:0x8036EF0C; // type:function size:0x30 DCFlushRangeNoSync = .text:0x8036EF3C; // type:function size:0x2C scope:global -fn_8036EF68 = .text:0x8036EF68; // type:function size:0x2C -fn_8036EF94 = .text:0x8036EF94; // type:function size:0x2C +DCStoreRangeNoSync = .text:0x8036EF68; // type:function size:0x2C +DCZeroRange = .text:0x8036EF94; // type:function size:0x2C ICInvalidateRange = .text:0x8036EFC0; // type:function size:0x34 scope:global ICFlashInvalidate = .text:0x8036EFF4; // type:function size:0x10 scope:global ICEnable = .text:0x8036F004; // type:function size:0x14 scope:global -fn_8036F018 = .text:0x8036F018; // type:function size:0xCC -fn_8036F0E4 = .text:0x8036F0E4; // type:function size:0x38 +__LCEnable = .text:0x8036F018; // type:function size:0xCC +LCEnable = .text:0x8036F0E4; // type:function size:0x38 LCDisable = .text:0x8036F11C; // type:function size:0x28 scope:global -fn_8036F144 = .text:0x8036F144; // type:function size:0x24 align:4 -fn_8036F168 = .text:0x8036F168; // type:function size:0xAC -fn_8036F214 = .text:0x8036F214; // type:function size:0x14 +LCStoreBlocks = .text:0x8036F144; // type:function size:0x24 align:4 +LCStoreData = .text:0x8036F168; // type:function size:0xAC +LCQueueWait = .text:0x8036F214; // type:function size:0x14 L2GlobalInvalidate = .text:0x8036F228; // type:function size:0x98 scope:global DMAErrorHandler = .text:0x8036F2C0; // type:function size:0x160 scope:global __OSCacheInit = .text:0x8036F420; // type:function size:0xF4 scope:global @@ -16225,17 +16225,17 @@ __OSLoadFPUContext = .text:0x8036F514; // type:function size:0x124 scope:local __OSSaveFPUContext = .text:0x8036F638; // type:function size:0x128 scope:local OSSaveFPUContext = .text:0x8036F760; // type:function size:0x8 scope:global OSSetCurrentContext = .text:0x8036F768; // type:function size:0x5C scope:global -fn_8036F7C4 = .text:0x8036F7C4; // type:function size:0xC +OSGetCurrentContext = .text:0x8036F7C4; // type:function size:0xC OSSaveContext = .text:0x8036F7D0; // type:function size:0x80 scope:global OSLoadContext = .text:0x8036F850; // type:function size:0xD8 scope:global OSGetStackPointer = .text:0x8036F928; // type:function size:0x8 scope:global -fn_8036F930 = .text:0x8036F930; // type:function size:0x30 +OSSwitchFiber = .text:0x8036F930; // type:function size:0x30 OSClearContext = .text:0x8036F960; // type:function size:0x24 scope:global OSDumpContext = .text:0x8036F984; // type:function size:0x2A8 scope:global OSSwitchFPUContext = .text:0x8036FC2C; // type:function size:0x84 scope:local __OSContextInit = .text:0x8036FCB0; // type:function size:0x48 scope:global OSReport = .text:0x8036FCF8; // type:function size:0x80 scope:global -fn_8036FD78 = .text:0x8036FD78; // type:function size:0x12C +OSPanic = .text:0x8036FD78; // type:function size:0x12C OSSetErrorHandler = .text:0x8036FEA4; // type:function size:0x218 scope:global __OSUnhandledException = .text:0x803700BC; // type:function size:0x2E8 scope:global fn_803703A4 = .text:0x803703A4; // type:function size:0x188 @@ -16397,29 +16397,29 @@ fn_80379868 = .text:0x80379868; // type:function size:0x13C fn_803799A4 = .text:0x803799A4; // type:function size:0x14C align:4 SIProbe = .text:0x80379AF0; // type:function size:0x24 align:4 SISetSamplingRate = .text:0x80379B14; // type:function size:0xE4 scope:global -fn_80379BF8 = .text:0x80379BF8; // type:function size:0x24 -fn_80379C1C = .text:0x80379C1C; // type:function size:0x244 align:4 -fn_80379E60 = .text:0x80379E60; // type:function size:0x44 -fn_80379EA4 = .text:0x80379EA4; // type:function size:0x13C -fn_80379FE0 = .text:0x80379FE0; // type:function size:0x11C -fn_8037A0FC = .text:0x8037A0FC; // type:function size:0x3BC -fn_8037A4B8 = .text:0x8037A4B8; // type:function size:0x1E0 -fn_8037A698 = .text:0x8037A698; // type:function size:0xF0 -fn_8037A788 = .text:0x8037A788; // type:function size:0x68 -fn_8037A7F0 = .text:0x8037A7F0; // type:function size:0x1BC -fn_8037A9AC = .text:0x8037A9AC; // type:function size:0x54 -fn_8037AA00 = .text:0x8037AA00; // type:function size:0x24C -fn_8037AC4C = .text:0x8037AC4C; // type:function size:0x10C -fn_8037AD58 = .text:0x8037AD58; // type:function size:0x1A88 -fn_8037C7E0 = .text:0x8037C7E0; // type:function size:0x1A8C -fn_8037E26C = .text:0x8037E26C; // type:function size:0x1AAC -fn_8037FD18 = .text:0x8037FD18; // type:function size:0x67C -fn_80380394 = .text:0x80380394; // type:function size:0x6A8 -fn_80380A3C = .text:0x80380A3C; // type:function size:0x6A8 -fn_803810E4 = .text:0x803810E4; // type:function size:0xA0 align:4 -fn_80381184 = .text:0x80381184; // type:function size:0x464 -fn_803815E8 = .text:0x803815E8; // type:function size:0x90 -fn_80381678 = .text:0x80381678; // type:function size:0x3C +SIRefreshSamplingRate = .text:0x80379BF8; // type:function size:0x24 +THPVideoDecode = .text:0x80379C1C; // type:function size:0x244 align:4 +__THPSetupBuffers = .text:0x80379E60; // type:function size:0x44 +__THPReadFrameHeader = .text:0x80379EA4; // type:function size:0x13C +__THPReadScaneHeader = .text:0x80379FE0; // type:function size:0x11C +__THPReadQuantizationTable = .text:0x8037A0FC; // type:function size:0x3BC +__THPReadHuffmanTableSpecification = .text:0x8037A4B8; // type:function size:0x1E0 +__THPHuffGenerateSizeTable = .text:0x8037A698; // type:function size:0xF0 +__THPHuffGenerateCodeTable = .text:0x8037A788; // type:function size:0x68 +__THPHuffGenerateDecoderTables = .text:0x8037A7F0; // type:function size:0x1BC +__THPRestartDefinition = .text:0x8037A9AC; // type:function size:0x54 +__THPPrepBitStream = .text:0x8037AA00; // type:function size:0x24C +__THPDecompressYUV = .text:0x8037AC4C; // type:function size:0x10C +__THPDecompressiMCURow512x448 = .text:0x8037AD58; // type:function size:0x1A88 +__THPDecompressiMCURow640x480 = .text:0x8037C7E0; // type:function size:0x1A8C +__THPDecompressiMCURowNxN = .text:0x8037E26C; // type:function size:0x1AAC +__THPHuffDecodeDCTCompY = .text:0x8037FD18; // type:function size:0x67C +__THPHuffDecodeDCTCompU = .text:0x80380394; // type:function size:0x6A8 +__THPHuffDecodeDCTCompV = .text:0x80380A3C; // type:function size:0x6A8 +THPInit = .text:0x803810E4; // type:function size:0xA0 align:4 +THPAudioDecode = .text:0x80381184; // type:function size:0x464 +__THPAudioGetNewSample = .text:0x803815E8; // type:function size:0x90 +__THPAudioInitialize = .text:0x80381678; // type:function size:0x3C fn_803816B4 = .text:0x803816B4; // type:function size:0x44 AIInitDMA__FUlUl = .text:0x803816F8; // type:function size:0x88 scope:global AIStartDMA__Fv = .text:0x80381780; // type:function size:0x18 scope:global @@ -18734,7 +18734,7 @@ jumptable_803BE494 = .data:0x803BE494; // type:object size:0x90 scope:local @1 = .data:0x803BE528; // type:object size:0x44 scope:local data:string ResetFunctionInfo = .data:0x803BE720; // type:object size:0x10 scope:local DSPInitCode = .data:0x803BE730; // type:object size:0x80 scope:local -@69 = .data:0x803BE7B0; // type:object size:0x29 scope:local data:string +@69 = .data:0x803BE7B0; // type:object size:0x29 scope:local data:string_table lbl_803BE9E0 = .data:0x803BE9E0; // type:object size:0x1B4 @74 = .data:0x803BEB94; // type:object size:0x23 scope:local data:string @13 = .data:0x803BEBB8; // type:object size:0x16 scope:local data:string diff --git a/configure.py b/configure.py index 77534dd..2fd8688 100644 --- a/configure.py +++ b/configure.py @@ -308,7 +308,7 @@ def Rel(lib_name, objects): DolphinLib( "os", [ - Object(NonMatching, "Dolphin/os/OSCache.c"), + Object(Matching, "Dolphin/os/OSCache.c"), Object(NonMatching, "Dolphin/os/OSContext.c"), Object(NonMatching, "Dolphin/os/OSError.c"), ], diff --git a/src/Dolphin/os/OSCache.c b/src/Dolphin/os/OSCache.c new file mode 100644 index 0000000..3e5ca85 --- /dev/null +++ b/src/Dolphin/os/OSCache.c @@ -0,0 +1,426 @@ +#include "dolphin/PPCArch.h" +#include "dolphin/os.h" + +// Can't use this due to weird condition register issues +//#include "asm_types.h" +#define HID2 920 + +#include "dolphin/db.h" + +/* clang-format off */ +asm void DCEnable() { + nofralloc + sync + mfspr r3, HID0 + ori r3, r3, 0x4000 + mtspr HID0, r3 + blr +} + +asm void DCInvalidateRange(register void* addr, register u32 nBytes) { + nofralloc + cmplwi nBytes, 0 + blelr + clrlwi r5, addr, 27 + add nBytes, nBytes, r5 + addi nBytes, nBytes, 31 + srwi nBytes, nBytes, 5 + mtctr nBytes + +@1 + dcbi r0, addr + addi addr, addr, 32 + bdnz @1 + blr +} + + +asm void DCFlushRange(register void* addr, register u32 nBytes) { + nofralloc + cmplwi nBytes, 0 + blelr + clrlwi r5, addr, 27 + add nBytes, nBytes, r5 + addi nBytes, nBytes, 31 + srwi nBytes, nBytes, 5 + mtctr nBytes + +@1 + dcbf r0, addr + addi addr, addr, 32 + bdnz @1 + sc + blr +} + +asm void DCStoreRange(register void* addr, register u32 nBytes) { + nofralloc + cmplwi nBytes, 0 + blelr + clrlwi r5, addr, 27 + add nBytes, nBytes, r5 + addi nBytes, nBytes, 31 + srwi nBytes, nBytes, 5 + mtctr nBytes + +@1 + dcbst r0, addr + addi addr, addr, 32 + bdnz @1 + sc + + blr +} + +asm void DCFlushRangeNoSync(register void* addr, register u32 nBytes) { + nofralloc + cmplwi nBytes, 0 + blelr + clrlwi r5, addr, 27 + add nBytes, nBytes, r5 + addi nBytes, nBytes, 31 + srwi nBytes, nBytes, 5 + mtctr nBytes + +@1 + dcbf r0, addr + addi addr, addr, 32 + bdnz @1 + blr +} + + +asm void DCStoreRangeNoSync(register void* addr, register u32 nBytes) { + nofralloc + cmplwi nBytes, 0 + blelr + clrlwi r5, addr, 27 + add nBytes, nBytes, r5 + addi nBytes, nBytes, 31 + srwi nBytes, nBytes, 5 + mtctr nBytes + +@1 + dcbst r0, addr + addi addr, addr, 32 + bdnz @1 + + blr +} + +asm void DCZeroRange(register void* addr, register u32 nBytes) { + nofralloc + cmplwi nBytes, 0 + blelr + clrlwi r5, addr, 27 + add nBytes, nBytes, r5 + addi nBytes, nBytes, 31 + srwi nBytes, nBytes, 5 + mtctr nBytes + +@1 + dcbz r0, addr + addi addr, addr, 32 + bdnz @1 + + blr +} + + +asm void ICInvalidateRange(register void* addr, register u32 nBytes) { + nofralloc + nofralloc + cmplwi nBytes, 0 + blelr + clrlwi r5, addr, 27 + add nBytes, nBytes, r5 + addi nBytes, nBytes, 31 + srwi nBytes, nBytes, 5 + mtctr nBytes + +@1 + icbi r0, addr + addi addr, addr, 32 + bdnz @1 + sync + isync + + blr +} + + +asm void ICFlashInvalidate() { + nofralloc + mfspr r3, HID0 + ori r3, r3, 0x800 + mtspr HID0, r3 + blr +} + +asm void ICEnable() { + nofralloc + isync + mfspr r3, HID0 + ori r3, r3, 0x8000 + mtspr HID0, r3 + blr +} + +#define LC_LINES 512 +#define CACHE_LINES 1024 + +asm void __LCEnable() { + nofralloc + mfmsr r5 + ori r5, r5, 0x1000 + mtmsr r5 + + lis r3, OS_CACHED_REGION_PREFIX + li r4, CACHE_LINES + mtctr r4 +_touchloop: + dcbt 0,r3 + dcbst 0,r3 + addi r3,r3,32 + bdnz _touchloop + mfspr r4, HID2 + oris r4, r4, 0x100F + mtspr HID2, r4 + + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + lis r3, LC_BASE_PREFIX + ori r3, r3, 0x0002 + mtspr DBAT3L, r3 + ori r3, r3, 0x01fe + mtspr DBAT3U, r3 + isync + lis r3, LC_BASE_PREFIX + li r6, LC_LINES + mtctr r6 + li r6, 0 + +_lockloop: + dcbz_l r6, r3 + addi r3, r3, 32 + bdnz+ _lockloop + + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + + blr +} + +void LCEnable() { + BOOL enabled; + + enabled = OSDisableInterrupts(); + __LCEnable(); + OSRestoreInterrupts(enabled); +} + + +asm void LCDisable() { + nofralloc + lis r3, LC_BASE_PREFIX + li r4, LC_LINES + mtctr r4 +@1 + dcbi r0, r3 + addi r3, r3, 32 + bdnz @1 + mfspr r4, HID2 + rlwinm r4, r4, 0, 4, 2 + mtspr HID2, r4 + blr +} + + +asm void LCLoadBlocks(register void* destTag, register void* srcAddr, register u32 numBlocks) { + nofralloc + rlwinm r6, numBlocks, 30, 27, 31 + rlwinm srcAddr, srcAddr, 0, 4, 31 + or r6, r6, srcAddr + mtspr DMA_U, r6 + rlwinm r6, numBlocks, 2, 28, 29 + or r6, r6, destTag + ori r6, r6, 0x12 + mtspr DMA_L, r6 + blr +} + +asm void LCStoreBlocks(register void* destAddr, register void* srcTag, register u32 numBlocks) { + nofralloc + rlwinm r6, numBlocks, 30, 27, 31 + rlwinm destAddr, destAddr, 0, 4, 31 + or r6, r6, destAddr + mtspr DMA_U, r6 + rlwinm r6, numBlocks, 2, 28, 29 + or r6, r6, srcTag + ori r6, r6, 0x2 + mtspr DMA_L, r6 + blr +} + +/* clang-format on */ + +u32 LCLoadData(register void* destAddr, register void* srcAddr, register u32 nBytes) { + u32 numBlocks = (nBytes + 31) / 32; + u32 numTransactions = (numBlocks + 128 - 1) / 128; + + while (numBlocks > 0) { + if (numBlocks < 128) { + LCLoadBlocks(destAddr, srcAddr, numBlocks); + numBlocks = 0; + } else { + LCLoadBlocks(destAddr, srcAddr, 0); + numBlocks -= 128; + destAddr = (void*)((u32)destAddr + 4096); + srcAddr = (void*)((u32)srcAddr + 4096); + } + } + + return numTransactions; +} +u32 LCStoreData(void* destAddr, void* srcAddr, u32 nBytes) { + u32 numBlocks = (nBytes + 31) / 32; + u32 numTransactions = (numBlocks + 128 - 1) / 128; + + while (numBlocks > 0) { + if (numBlocks < 128) { + LCStoreBlocks(destAddr, srcAddr, numBlocks); + numBlocks = 0; + } else { + LCStoreBlocks(destAddr, srcAddr, 0); + numBlocks -= 128; + destAddr = (void*)((u32)destAddr + 4096); + srcAddr = (void*)((u32)srcAddr + 4096); + } + } + + return numTransactions; +} + +/* clang-format off */ +asm u32 LCQueueLength() { + nofralloc + mfspr r4, HID2 + rlwinm r3, r4, 8, 28, 31 + blr +} + +asm void LCQueueWait(register u32 len) { + nofralloc +@1 + mfspr r4, HID2 + rlwinm r4, r4, 8, 28, 31 + cmpw r4, r3 + bgt @1 + blr +} + +/* clang-format on */ +static void L2Disable(void) { + __sync(); + PPCMtl2cr(PPCMfl2cr() & ~0x80000000); + __sync(); +} + +void L2GlobalInvalidate(void) { + L2Disable(); + PPCMtl2cr(PPCMfl2cr() | 0x00200000); + while (PPCMfl2cr() & 0x00000001u) + ; + PPCMtl2cr(PPCMfl2cr() & ~0x00200000); + while (PPCMfl2cr() & 0x00000001u) { + DBPrintf(">>> L2 INVALIDATE : SHOULD NEVER HAPPEN\n"); + } +} + +static void L2Init(void) { + u32 oldMSR; + oldMSR = PPCMfmsr(); + __sync(); + PPCMtmsr(MSR_IR | MSR_DR); + __sync(); + L2Disable(); + L2GlobalInvalidate(); + PPCMtmsr(oldMSR); +} + +void L2Enable(void) { PPCMtl2cr((PPCMfl2cr() | L2CR_L2E) & ~L2CR_L2I); } + +void DMAErrorHandler(OSError error, OSContext* context, ...) { + u32 hid2 = PPCMfhid2(); + + OSReport("Machine check received\n"); + OSReport("HID2 = 0x%x SRR1 = 0x%x\n", hid2, context->srr1); + if (!(hid2 & (HID2_DCHERR | HID2_DNCERR | HID2_DCMERR | HID2_DQOERR)) || + !(context->srr1 & SRR1_DMA_BIT)) { + OSReport("Machine check was not DMA/locked cache related\n"); + OSDumpContext(context); + PPCHalt(); + } + + OSReport("DMAErrorHandler(): An error occurred while processing DMA.\n"); + OSReport("The following errors have been detected and cleared :\n"); + + if (hid2 & HID2_DCHERR) { + OSReport("\t- Requested a locked cache tag that was already in the cache\n"); + } + + if (hid2 & HID2_DNCERR) { + OSReport("\t- DMA attempted to access normal cache\n"); + } + + if (hid2 & HID2_DCMERR) { + OSReport("\t- DMA missed in data cache\n"); + } + + if (hid2 & HID2_DQOERR) { + OSReport("\t- DMA queue overflowed\n"); + } + + // write hid2 back to clear the error bits + PPCMthid2(hid2); +} + +void __OSCacheInit() { + if (!(PPCMfhid0() & HID0_ICE)) { + ICEnable(); + DBPrintf("L1 i-caches initialized\n"); + } + if (!(PPCMfhid0() & HID0_DCE)) { + DCEnable(); + DBPrintf("L1 d-caches initialized\n"); + } + + if (!(PPCMfl2cr() & L2CR_L2E)) { + L2Init(); + L2Enable(); + DBPrintf("L2 cache initialized\n"); + } + + OSSetErrorHandler(OS_ERROR_MACHINE_CHECK, DMAErrorHandler); + DBPrintf("Locked cache machine check handler installed\n"); +}