Path: blob/master/src/core/cpu_recompiler_riscv64.cpp
4802 views
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <[email protected]>1// SPDX-License-Identifier: CC-BY-NC-ND-4.023#include "cpu_recompiler_riscv64.h"4#include "cpu_code_cache_private.h"5#include "cpu_core_private.h"6#include "cpu_pgxp.h"7#include "gte.h"8#include "settings.h"9#include "timing_event.h"1011#include "common/align.h"12#include "common/assert.h"13#include "common/log.h"14#include "common/memmap.h"15#include "common/string_util.h"1617#include <limits>1819#ifdef CPU_ARCH_RISCV642021LOG_CHANNEL(Recompiler);2223#ifdef ENABLE_HOST_DISASSEMBLY24extern "C" {25#include "riscv-disas.h"26}27#endif2829// For LW/SW/etc.30#define PTR(x) ((u32)(((u8*)(x)) - ((u8*)&g_state))), RSTATE3132static constexpr u32 BLOCK_LINK_SIZE = 8; // auipc+jr3334#define RRET biscuit::a035#define RARG1 biscuit::a036#define RARG2 biscuit::a137#define RARG3 biscuit::a238#define RSCRATCH biscuit::t639#define RSTATE biscuit::s1040#define RMEMBASE biscuit::s114142static bool rvIsCallerSavedRegister(u32 id);43static bool rvIsValidSExtITypeImm(u32 imm);44static std::pair<s32, s32> rvGetAddressImmediates(const void* cur, const void* target);45static void rvMoveAddressToReg(biscuit::Assembler* armAsm, const biscuit::GPR& reg, const void* addr);46static void rvEmitMov(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, u32 imm);47static void rvEmitMov64(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& scratch, u64 imm);48static u32 rvEmitJmp(biscuit::Assembler* rvAsm, const void* ptr, const biscuit::GPR& link_reg = biscuit::zero);49static u32 rvEmitCall(biscuit::Assembler* rvAsm, const void* ptr);50static void rvEmitFarLoad(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr,51bool sign_extend_word = false);52static void rvEmitFarStore(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr,53const biscuit::GPR& tempreg = RSCRATCH);54static void rvEmitSExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word55static void rvEmitUExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word56static void rvEmitSExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word57static void rvEmitUExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word58static void rvEmitDSExtW(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> doubleword59static void rvEmitDUExtW(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> doubleword6061namespace CPU {6263using namespace biscuit;6465RISCV64Recompiler s_instance;66Recompiler* g_compiler = &s_instance;6768} // namespace CPU6970bool rvIsCallerSavedRegister(u32 id)71{72return (id == 1 || (id >= 3 && id < 8) || (id >= 10 && id <= 17) || (id >= 28 && id <= 31));73}7475bool rvIsValidSExtITypeImm(u32 imm)76{77return (static_cast<u32>((static_cast<s32>(imm) << 20) >> 20) == imm);78}7980std::pair<s32, s32> rvGetAddressImmediates(const void* cur, const void* target)81{82const s64 disp = static_cast<s64>(reinterpret_cast<intptr_t>(target) - reinterpret_cast<intptr_t>(cur));83Assert(disp >= static_cast<s64>(std::numeric_limits<s32>::min()) &&84disp <= static_cast<s64>(std::numeric_limits<s32>::max()));8586const s64 hi = disp + 0x800;87const s64 lo = disp - (hi & 0xFFFFF000);88return std::make_pair(static_cast<s32>(hi >> 12), static_cast<s32>((lo << 52) >> 52));89}9091void rvMoveAddressToReg(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr)92{93const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), addr);94rvAsm->AUIPC(reg, hi);95rvAsm->ADDI(reg, reg, lo);96}9798void rvEmitMov(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, u32 imm)99{100// Borrowed from biscuit, but doesn't emit an ADDI if the lower 12 bits are zero.101const u32 lower = imm & 0xFFF;102const u32 upper = (imm & 0xFFFFF000) >> 12;103const s32 simm = static_cast<s32>(imm);104if (rvIsValidSExtITypeImm(simm))105{106rvAsm->ADDI(rd, biscuit::zero, static_cast<s32>(lower));107}108else109{110const bool needs_increment = (lower & 0x800) != 0;111const u32 upper_imm = needs_increment ? upper + 1 : upper;112rvAsm->LUI(rd, upper_imm);113rvAsm->ADDI(rd, rd, static_cast<int32_t>(lower));114}115}116117void rvEmitMov64(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& scratch, u64 imm)118{119// TODO: Make better..120rvEmitMov(rvAsm, rd, static_cast<u32>(imm >> 32));121rvEmitMov(rvAsm, scratch, static_cast<u32>(imm));122rvAsm->SLLI64(rd, rd, 32);123rvAsm->SLLI64(scratch, scratch, 32);124rvAsm->SRLI64(scratch, scratch, 32);125rvAsm->ADD(rd, rd, scratch);126}127128u32 rvEmitJmp(biscuit::Assembler* rvAsm, const void* ptr, const biscuit::GPR& link_reg)129{130// TODO: use J if displacement is <1MB, needs a bool because backpatch must be 8 bytes131const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), ptr);132rvAsm->AUIPC(RSCRATCH, hi);133rvAsm->JALR(link_reg, lo, RSCRATCH);134return 8;135}136137u32 rvEmitCall(biscuit::Assembler* rvAsm, const void* ptr)138{139return rvEmitJmp(rvAsm, ptr, biscuit::ra);140}141142void rvEmitFarLoad(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr, bool sign_extend_word)143{144const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), addr);145rvAsm->AUIPC(reg, hi);146if (sign_extend_word)147rvAsm->LW(reg, lo, reg);148else149rvAsm->LWU(reg, lo, reg);150}151152[[maybe_unused]] void rvEmitFarStore(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr,153const biscuit::GPR& tempreg)154{155const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), addr);156rvAsm->AUIPC(tempreg, hi);157rvAsm->SW(reg, lo, tempreg);158}159160void rvEmitSExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs)161{162rvAsm->SLLI(rd, rs, 24);163rvAsm->SRAIW(rd, rd, 24);164}165166void rvEmitUExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs)167{168rvAsm->ANDI(rd, rs, 0xFF);169}170171void rvEmitSExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs)172{173rvAsm->SLLI(rd, rs, 16);174rvAsm->SRAIW(rd, rd, 16);175}176177void rvEmitUExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs)178{179rvAsm->SLLI(rd, rs, 16);180rvAsm->SRLI(rd, rd, 16);181}182183void rvEmitDSExtW(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs)184{185rvAsm->ADDIW(rd, rs, 0);186}187188void rvEmitDUExtW(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs)189{190rvAsm->SLLI64(rd, rs, 32);191rvAsm->SRLI64(rd, rd, 32);192}193194void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size)195{196#ifdef ENABLE_HOST_DISASSEMBLY197const u8* cur = static_cast<const u8*>(start);198const u8* end = cur + size;199char buf[256];200while (cur < end)201{202rv_inst inst;203size_t instlen;204inst_fetch(cur, &inst, &instlen);205disasm_inst(buf, std::size(buf), rv64, static_cast<u64>(reinterpret_cast<uintptr_t>(cur)), inst);206DEBUG_LOG("\t0x{:016X}\t{}", static_cast<u64>(reinterpret_cast<uintptr_t>(cur)), buf);207cur += instlen;208}209#else210ERROR_LOG("Not compiled with ENABLE_HOST_DISASSEMBLY.");211#endif212}213214u32 CPU::CodeCache::GetHostInstructionCount(const void* start, u32 size)215{216#ifdef ENABLE_HOST_DISASSEMBLY217const u8* cur = static_cast<const u8*>(start);218const u8* end = cur + size;219u32 icount = 0;220while (cur < end)221{222rv_inst inst;223size_t instlen;224inst_fetch(cur, &inst, &instlen);225cur += instlen;226icount++;227}228return icount;229#else230ERROR_LOG("Not compiled with ENABLE_HOST_DISASSEMBLY.");231return 0;232#endif233}234235u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)236{237using namespace biscuit;238239Assembler actual_asm(static_cast<u8*>(code), code_size);240Assembler* rvAsm = &actual_asm;241242Label dispatch;243Label run_events_and_dispatch;244245g_enter_recompiler = reinterpret_cast<decltype(g_enter_recompiler)>(rvAsm->GetCursorPointer());246{247// TODO: reserve some space for saving caller-saved registers248249// Need the CPU state for basically everything :-)250rvMoveAddressToReg(rvAsm, RSTATE, &g_state);251252// Fastmem setup253if (IsUsingFastmem())254rvAsm->LD(RMEMBASE, PTR(&g_state.fastmem_base));255256// Fall through to event dispatcher257}258259// check events then for frame done260{261Label skip_event_check;262rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));263rvAsm->LW(RARG2, PTR(&g_state.downcount));264rvAsm->BLTU(RARG1, RARG2, &skip_event_check);265266rvAsm->Bind(&run_events_and_dispatch);267g_run_events_and_dispatch = rvAsm->GetCursorPointer();268rvEmitCall(rvAsm, reinterpret_cast<const void*>(&TimingEvents::RunEvents));269270rvAsm->Bind(&skip_event_check);271}272273// TODO: align?274g_dispatcher = rvAsm->GetCursorPointer();275{276rvAsm->Bind(&dispatch);277278// x9 <- s_fast_map[pc >> 16]279rvAsm->LW(RARG1, PTR(&g_state.pc));280rvMoveAddressToReg(rvAsm, RARG3, g_code_lut.data());281rvAsm->SRLIW(RARG2, RARG1, 16);282rvAsm->SLLI(RARG2, RARG2, 3);283rvAsm->ADD(RARG2, RARG2, RARG3);284rvAsm->LD(RARG2, 0, RARG2);285rvAsm->SLLI64(RARG1, RARG1, 48); // idx = (pc & 0xFFFF) >> 2286rvAsm->SRLI64(RARG1, RARG1, 50);287rvAsm->SLLI(RARG1, RARG1, 3);288289// blr(x9[pc * 2]) (fast_map[idx])290rvAsm->ADD(RARG1, RARG1, RARG2);291rvAsm->LD(RARG1, 0, RARG1);292rvAsm->JR(RARG1);293}294295g_compile_or_revalidate_block = rvAsm->GetCursorPointer();296{297rvAsm->LW(RARG1, PTR(&g_state.pc));298rvEmitCall(rvAsm, reinterpret_cast<const void*>(&CompileOrRevalidateBlock));299rvAsm->J(&dispatch);300}301302g_discard_and_recompile_block = rvAsm->GetCursorPointer();303{304rvAsm->LW(RARG1, PTR(&g_state.pc));305rvEmitCall(rvAsm, reinterpret_cast<const void*>(&DiscardAndRecompileBlock));306rvAsm->J(&dispatch);307}308309g_interpret_block = rvAsm->GetCursorPointer();310{311rvEmitCall(rvAsm, CodeCache::GetInterpretUncachedBlockFunction());312rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));313rvAsm->LW(RARG2, PTR(&g_state.downcount));314rvAsm->BGE(RARG1, RARG2, &run_events_and_dispatch);315rvAsm->J(&dispatch);316}317318// TODO: align?319320return static_cast<u32>(rvAsm->GetCodeBuffer().GetSizeInBytes());321}322323void CPU::CodeCache::EmitAlignmentPadding(void* dst, size_t size)324{325constexpr u8 padding_value = 0x00;326std::memset(dst, padding_value, size);327}328329u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache)330{331// TODO: get rid of assembler construction here332{333biscuit::Assembler assembler(static_cast<u8*>(code), BLOCK_LINK_SIZE);334rvEmitCall(&assembler, dst);335336DebugAssert(assembler.GetCodeBuffer().GetSizeInBytes() <= BLOCK_LINK_SIZE);337if (assembler.GetCodeBuffer().GetRemainingBytes() > 0)338assembler.NOP();339}340341if (flush_icache)342MemMap::FlushInstructionCache(code, BLOCK_LINK_SIZE);343344return BLOCK_LINK_SIZE;345}346347CPU::RISCV64Recompiler::RISCV64Recompiler() = default;348349CPU::RISCV64Recompiler::~RISCV64Recompiler() = default;350351const void* CPU::RISCV64Recompiler::GetCurrentCodePointer()352{353return rvAsm->GetCursorPointer();354}355356void CPU::RISCV64Recompiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer,357u32 far_code_space)358{359Recompiler::Reset(block, code_buffer, code_buffer_space, far_code_buffer, far_code_space);360361// TODO: don't recreate this every time..362DebugAssert(!m_emitter && !m_far_emitter && !rvAsm);363m_emitter = std::make_unique<Assembler>(code_buffer, code_buffer_space);364m_far_emitter = std::make_unique<Assembler>(far_code_buffer, far_code_space);365rvAsm = m_emitter.get();366367// Need to wipe it out so it's correct when toggling fastmem.368m_host_regs = {};369370const u32 membase_idx = CodeCache::IsUsingFastmem() ? RMEMBASE.Index() : NUM_HOST_REGS;371for (u32 i = 0; i < NUM_HOST_REGS; i++)372{373HostRegAlloc& hra = m_host_regs[i];374375if (i == RARG1.Index() || i == RARG2.Index() || i == RARG3.Index() || i == RSCRATCH.Index() ||376i == RSTATE.Index() || i == membase_idx || i < 5 /* zero, ra, sp, gp, tp */)377{378continue;379}380381hra.flags = HR_USABLE | (rvIsCallerSavedRegister(i) ? 0 : HR_CALLEE_SAVED);382}383}384385void CPU::RISCV64Recompiler::SwitchToFarCode(bool emit_jump,386void (biscuit::Assembler::*inverted_cond)(biscuit::GPR, biscuit::GPR,387biscuit::Label*) /* = nullptr */,388const biscuit::GPR& rs1 /* = biscuit::zero */,389const biscuit::GPR& rs2 /* = biscuit::zero */)390{391DebugAssert(rvAsm == m_emitter.get());392if (emit_jump)393{394const void* target = m_far_emitter->GetCursorPointer();395if (inverted_cond)396{397Label skip;398(rvAsm->*inverted_cond)(rs1, rs2, &skip);399rvEmitJmp(rvAsm, target);400rvAsm->Bind(&skip);401}402else403{404rvEmitCall(rvAsm, target);405}406}407rvAsm = m_far_emitter.get();408}409410void CPU::RISCV64Recompiler::SwitchToNearCode(bool emit_jump)411{412DebugAssert(rvAsm == m_far_emitter.get());413if (emit_jump)414rvEmitJmp(rvAsm, m_emitter->GetCursorPointer());415rvAsm = m_emitter.get();416}417418void CPU::RISCV64Recompiler::EmitMov(const biscuit::GPR& dst, u32 val)419{420rvEmitMov(rvAsm, dst, val);421}422423void CPU::RISCV64Recompiler::EmitCall(const void* ptr)424{425rvEmitCall(rvAsm, ptr);426}427428void CPU::RISCV64Recompiler::SafeImmSExtIType(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm,429void (biscuit::Assembler::*iop)(GPR, GPR, u32),430void (biscuit::Assembler::*rop)(GPR, GPR, GPR))431{432DebugAssert(rd != RSCRATCH && rs != RSCRATCH);433434if (rvIsValidSExtITypeImm(imm))435{436(rvAsm->*iop)(rd, rs, imm);437return;438}439440rvEmitMov(rvAsm, RSCRATCH, imm);441(rvAsm->*rop)(rd, rs, RSCRATCH);442}443444void CPU::RISCV64Recompiler::SafeADDI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)445{446SafeImmSExtIType(rd, rs, imm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::ADDI),447&Assembler::ADD);448}449450void CPU::RISCV64Recompiler::SafeADDIW(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)451{452SafeImmSExtIType(rd, rs, imm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::ADDIW),453&Assembler::ADDW);454}455456void CPU::RISCV64Recompiler::SafeSUBIW(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)457{458const u32 nimm = static_cast<u32>(-static_cast<s32>(imm));459SafeImmSExtIType(rd, rs, nimm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::ADDIW),460&Assembler::ADDW);461}462463void CPU::RISCV64Recompiler::SafeANDI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)464{465SafeImmSExtIType(rd, rs, imm, &Assembler::ANDI, &Assembler::AND);466}467468void CPU::RISCV64Recompiler::SafeORI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)469{470SafeImmSExtIType(rd, rs, imm, &Assembler::ORI, &Assembler::OR);471}472473void CPU::RISCV64Recompiler::SafeXORI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)474{475SafeImmSExtIType(rd, rs, imm, &Assembler::XORI, &Assembler::XOR);476}477478void CPU::RISCV64Recompiler::SafeSLTI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)479{480SafeImmSExtIType(rd, rs, imm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::SLTI),481&Assembler::SLT);482}483484void CPU::RISCV64Recompiler::SafeSLTIU(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)485{486SafeImmSExtIType(rd, rs, imm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::SLTIU),487&Assembler::SLTU);488}489490void CPU::RISCV64Recompiler::EmitSExtB(const biscuit::GPR& rd, const biscuit::GPR& rs)491{492rvEmitSExtB(rvAsm, rd, rs);493}494495void CPU::RISCV64Recompiler::EmitUExtB(const biscuit::GPR& rd, const biscuit::GPR& rs)496{497rvEmitUExtB(rvAsm, rd, rs);498}499500void CPU::RISCV64Recompiler::EmitSExtH(const biscuit::GPR& rd, const biscuit::GPR& rs)501{502rvEmitSExtH(rvAsm, rd, rs);503}504505void CPU::RISCV64Recompiler::EmitUExtH(const biscuit::GPR& rd, const biscuit::GPR& rs)506{507rvEmitUExtH(rvAsm, rd, rs);508}509510void CPU::RISCV64Recompiler::EmitDSExtW(const biscuit::GPR& rd, const biscuit::GPR& rs)511{512rvEmitDSExtW(rvAsm, rd, rs);513}514515void CPU::RISCV64Recompiler::EmitDUExtW(const biscuit::GPR& rd, const biscuit::GPR& rs)516{517rvEmitDUExtW(rvAsm, rd, rs);518}519520void CPU::RISCV64Recompiler::GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size)521{522// store it first to reduce code size, because we can offset523// TODO: 64-bit displacement is needed :/524// rvMoveAddressToReg(rvAsm, RARG1, ram_ptr);525// rvMoveAddressToReg(rvAsm, RARG2, shadow_ptr);526rvEmitMov64(rvAsm, RARG1, RSCRATCH, static_cast<u64>(reinterpret_cast<uintptr_t>(ram_ptr)));527rvEmitMov64(rvAsm, RARG2, RSCRATCH, static_cast<u64>(reinterpret_cast<uintptr_t>(shadow_ptr)));528529u32 offset = 0;530Label block_changed;531532while (size >= 8)533{534rvAsm->LD(RARG3, offset, RARG1);535rvAsm->LD(RSCRATCH, offset, RARG2);536rvAsm->BNE(RARG3, RSCRATCH, &block_changed);537offset += 8;538size -= 8;539}540541while (size >= 4)542{543rvAsm->LW(RARG3, offset, RARG1);544rvAsm->LW(RSCRATCH, offset, RARG2);545rvAsm->BNE(RARG3, RSCRATCH, &block_changed);546offset += 4;547size -= 4;548}549550DebugAssert(size == 0);551552Label block_unchanged;553rvAsm->J(&block_unchanged);554rvAsm->Bind(&block_changed);555rvEmitJmp(rvAsm, CodeCache::g_discard_and_recompile_block);556rvAsm->Bind(&block_unchanged);557}558559void CPU::RISCV64Recompiler::GenerateICacheCheckAndUpdate()560{561if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache))562{563if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks))564{565rvEmitFarLoad(rvAsm, RARG2, GetFetchMemoryAccessTimePtr());566rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));567rvEmitMov(rvAsm, RARG3, m_block->size);568rvAsm->MULW(RARG2, RARG2, RARG3);569rvAsm->ADD(RARG1, RARG1, RARG2);570rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));571}572else573{574rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));575SafeADDIW(RARG1, RARG1, static_cast<u32>(m_block->uncached_fetch_ticks));576rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));577}578}579else if (m_block->icache_line_count > 0)580{581const auto& ticks_reg = RARG1;582const auto& current_tag_reg = RARG2;583const auto& existing_tag_reg = RARG3;584585// start of block, nothing should be using this586const auto& maddr_reg = biscuit::t0;587DebugAssert(!IsHostRegAllocated(maddr_reg.Index()));588589VirtualMemoryAddress current_pc = m_block->pc & ICACHE_TAG_ADDRESS_MASK;590rvAsm->LW(ticks_reg, PTR(&g_state.pending_ticks));591rvEmitMov(rvAsm, current_tag_reg, current_pc);592593for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE)594{595const TickCount fill_ticks = GetICacheFillTicks(current_pc);596if (fill_ticks <= 0)597continue;598599const u32 line = GetICacheLine(current_pc);600const u32 offset = OFFSETOF(State, icache_tags) + (line * sizeof(u32));601602// Offsets must fit in signed 12 bits.603Label cache_hit;604if (offset >= 2048)605{606SafeADDI(maddr_reg, RSTATE, offset);607rvAsm->LW(existing_tag_reg, 0, maddr_reg);608rvAsm->BEQ(existing_tag_reg, current_tag_reg, &cache_hit);609rvAsm->SW(current_tag_reg, 0, maddr_reg);610}611else612{613rvAsm->LW(existing_tag_reg, offset, RSTATE);614rvAsm->BEQ(existing_tag_reg, current_tag_reg, &cache_hit);615rvAsm->SW(current_tag_reg, offset, RSTATE);616}617618SafeADDIW(ticks_reg, ticks_reg, static_cast<u32>(fill_ticks));619rvAsm->Bind(&cache_hit);620621if (i != (m_block->icache_line_count - 1))622SafeADDIW(current_tag_reg, current_tag_reg, ICACHE_LINE_SIZE);623}624625rvAsm->SW(ticks_reg, PTR(&g_state.pending_ticks));626}627}628629void CPU::RISCV64Recompiler::GenerateCall(const void* func, s32 arg1reg /*= -1*/, s32 arg2reg /*= -1*/,630s32 arg3reg /*= -1*/)631{632if (arg1reg >= 0 && arg1reg != static_cast<s32>(RARG1.Index()))633rvAsm->MV(RARG1, GPR(arg1reg));634if (arg2reg >= 0 && arg2reg != static_cast<s32>(RARG2.Index()))635rvAsm->MV(RARG2, GPR(arg2reg));636if (arg3reg >= 0 && arg3reg != static_cast<s32>(RARG3.Index()))637rvAsm->MV(RARG3, GPR(arg3reg));638EmitCall(func);639}640641void CPU::RISCV64Recompiler::EndBlock(const std::optional<u32>& newpc, bool do_event_test)642{643if (newpc.has_value())644{645if (m_dirty_pc || m_compiler_pc != newpc)646{647EmitMov(RSCRATCH, newpc.value());648rvAsm->SW(RSCRATCH, PTR(&g_state.pc));649}650}651m_dirty_pc = false;652653// flush regs654Flush(FLUSH_END_BLOCK);655EndAndLinkBlock(newpc, do_event_test, false);656}657658void CPU::RISCV64Recompiler::EndBlockWithException(Exception excode)659{660// flush regs, but not pc, it's going to get overwritten661// flush cycles because of the GTE instruction stuff...662Flush(FLUSH_END_BLOCK | FLUSH_FOR_EXCEPTION | FLUSH_FOR_C_CALL);663664// TODO: flush load delay665666EmitMov(RARG1, Cop0Registers::CAUSE::MakeValueForException(excode, m_current_instruction_branch_delay_slot, false,667inst->cop.cop_n));668EmitMov(RARG2, m_current_instruction_pc);669if (excode != Exception::BP)670{671EmitCall(reinterpret_cast<const void*>(static_cast<void (*)(u32, u32)>(&CPU::RaiseException)));672}673else674{675EmitMov(RARG3, inst->bits);676EmitCall(reinterpret_cast<const void*>(&CPU::RaiseBreakException));677}678m_dirty_pc = false;679680EndAndLinkBlock(std::nullopt, true, false);681}682683void CPU::RISCV64Recompiler::EndAndLinkBlock(const std::optional<u32>& newpc, bool do_event_test, bool force_run_events)684{685// event test686// pc should've been flushed687DebugAssert(!m_dirty_pc && !m_block_ended);688m_block_ended = true;689690// TODO: try extracting this to a function691// TODO: move the cycle flush in here..692693// save cycles for event test694const TickCount cycles = std::exchange(m_cycles, 0);695696// pending_ticks += cycles697// if (pending_ticks >= downcount) { dispatch_event(); }698if (do_event_test || m_gte_done_cycle > cycles || cycles > 0)699rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));700if (do_event_test)701rvAsm->LW(RARG2, PTR(&g_state.downcount));702if (cycles > 0)703{704SafeADDIW(RARG1, RARG1, cycles);705rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));706}707if (m_gte_done_cycle > cycles)708{709SafeADDIW(RARG2, RARG1, m_gte_done_cycle - cycles);710rvAsm->SW(RARG1, PTR(&g_state.gte_completion_tick));711}712713if (do_event_test)714{715// TODO: see if we can do a far jump somehow with this..716Label cont;717rvAsm->BLT(RARG1, RARG2, &cont);718rvEmitJmp(rvAsm, CodeCache::g_run_events_and_dispatch);719rvAsm->Bind(&cont);720}721722// jump to dispatcher or next block723if (force_run_events)724{725rvEmitJmp(rvAsm, CodeCache::g_run_events_and_dispatch);726}727else if (!newpc.has_value())728{729rvEmitJmp(rvAsm, CodeCache::g_dispatcher);730}731else732{733const void* target =734(newpc.value() == m_block->pc) ?735CodeCache::CreateSelfBlockLink(m_block, rvAsm->GetCursorPointer(), rvAsm->GetBufferPointer(0)) :736CodeCache::CreateBlockLink(m_block, rvAsm->GetCursorPointer(), newpc.value());737rvEmitJmp(rvAsm, target);738}739}740741const void* CPU::RISCV64Recompiler::EndCompile(u32* code_size, u32* far_code_size)742{743u8* const code = m_emitter->GetBufferPointer(0);744*code_size = static_cast<u32>(m_emitter->GetCodeBuffer().GetSizeInBytes());745*far_code_size = static_cast<u32>(m_far_emitter->GetCodeBuffer().GetSizeInBytes());746rvAsm = nullptr;747m_far_emitter.reset();748m_emitter.reset();749return code;750}751752const char* CPU::RISCV64Recompiler::GetHostRegName(u32 reg) const753{754static constexpr std::array<const char*, 32> reg64_names = {755{"zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "s0", "s1", "a0", "a1", "a2", "a3", "a4", "a5",756"a6", "a7", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11", "t3", "t4", "t5", "t6"}};757return (reg < reg64_names.size()) ? reg64_names[reg] : "UNKNOWN";758}759760void CPU::RISCV64Recompiler::LoadHostRegWithConstant(u32 reg, u32 val)761{762EmitMov(GPR(reg), val);763}764765void CPU::RISCV64Recompiler::LoadHostRegFromCPUPointer(u32 reg, const void* ptr)766{767rvAsm->LW(GPR(reg), PTR(ptr));768}769770void CPU::RISCV64Recompiler::StoreHostRegToCPUPointer(u32 reg, const void* ptr)771{772rvAsm->SW(GPR(reg), PTR(ptr));773}774775void CPU::RISCV64Recompiler::StoreConstantToCPUPointer(u32 val, const void* ptr)776{777if (val == 0)778{779rvAsm->SW(zero, PTR(ptr));780return;781}782783EmitMov(RSCRATCH, val);784rvAsm->SW(RSCRATCH, PTR(ptr));785}786787void CPU::RISCV64Recompiler::CopyHostReg(u32 dst, u32 src)788{789if (src != dst)790rvAsm->MV(GPR(dst), GPR(src));791}792793void CPU::RISCV64Recompiler::AssertRegOrConstS(CompileFlags cf) const794{795DebugAssert(cf.valid_host_s || cf.const_s);796}797798void CPU::RISCV64Recompiler::AssertRegOrConstT(CompileFlags cf) const799{800DebugAssert(cf.valid_host_t || cf.const_t);801}802803biscuit::GPR CPU::RISCV64Recompiler::CFGetSafeRegS(CompileFlags cf, const biscuit::GPR& temp_reg)804{805if (cf.valid_host_s)806{807return GPR(cf.host_s);808}809else if (cf.const_s)810{811if (HasConstantRegValue(cf.MipsS(), 0))812return zero;813814EmitMov(temp_reg, GetConstantRegU32(cf.MipsS()));815return temp_reg;816}817else818{819WARNING_LOG("Hit memory path in CFGetSafeRegS() for {}", GetRegName(cf.MipsS()));820rvAsm->LW(temp_reg, PTR(&g_state.regs.r[cf.mips_s]));821return temp_reg;822}823}824825biscuit::GPR CPU::RISCV64Recompiler::CFGetSafeRegT(CompileFlags cf, const biscuit::GPR& temp_reg)826{827if (cf.valid_host_t)828{829return GPR(cf.host_t);830}831else if (cf.const_t)832{833if (HasConstantRegValue(cf.MipsT(), 0))834return zero;835836EmitMov(temp_reg, GetConstantRegU32(cf.MipsT()));837return temp_reg;838}839else840{841WARNING_LOG("Hit memory path in CFGetSafeRegT() for {}", GetRegName(cf.MipsT()));842rvAsm->LW(temp_reg, PTR(&g_state.regs.r[cf.mips_t]));843return temp_reg;844}845}846847biscuit::GPR CPU::RISCV64Recompiler::CFGetRegD(CompileFlags cf) const848{849DebugAssert(cf.valid_host_d);850return GPR(cf.host_d);851}852853biscuit::GPR CPU::RISCV64Recompiler::CFGetRegS(CompileFlags cf) const854{855DebugAssert(cf.valid_host_s);856return GPR(cf.host_s);857}858859biscuit::GPR CPU::RISCV64Recompiler::CFGetRegT(CompileFlags cf) const860{861DebugAssert(cf.valid_host_t);862return GPR(cf.host_t);863}864865biscuit::GPR CPU::RISCV64Recompiler::CFGetRegLO(CompileFlags cf) const866{867DebugAssert(cf.valid_host_lo);868return GPR(cf.host_lo);869}870871biscuit::GPR CPU::RISCV64Recompiler::CFGetRegHI(CompileFlags cf) const872{873DebugAssert(cf.valid_host_hi);874return GPR(cf.host_hi);875}876877void CPU::RISCV64Recompiler::MoveSToReg(const biscuit::GPR& dst, CompileFlags cf)878{879if (cf.valid_host_s)880{881if (cf.host_s != dst.Index())882rvAsm->MV(dst, GPR(cf.host_s));883}884else if (cf.const_s)885{886EmitMov(dst, GetConstantRegU32(cf.MipsS()));887}888else889{890WARNING_LOG("Hit memory path in MoveSToReg() for {}", GetRegName(cf.MipsS()));891rvAsm->LW(dst, PTR(&g_state.regs.r[cf.mips_s]));892}893}894895void CPU::RISCV64Recompiler::MoveTToReg(const biscuit::GPR& dst, CompileFlags cf)896{897if (cf.valid_host_t)898{899if (cf.host_t != dst.Index())900rvAsm->MV(dst, GPR(cf.host_t));901}902else if (cf.const_t)903{904EmitMov(dst, GetConstantRegU32(cf.MipsT()));905}906else907{908WARNING_LOG("Hit memory path in MoveTToReg() for {}", GetRegName(cf.MipsT()));909rvAsm->LW(dst, PTR(&g_state.regs.r[cf.mips_t]));910}911}912913void CPU::RISCV64Recompiler::MoveMIPSRegToReg(const biscuit::GPR& dst, Reg reg, bool ignore_load_delays)914{915DebugAssert(reg < Reg::count);916if (ignore_load_delays && m_load_delay_register == reg)917{918if (m_load_delay_value_register == NUM_HOST_REGS)919rvAsm->LW(dst, PTR(&g_state.load_delay_value));920else921rvAsm->MV(dst, GPR(m_load_delay_value_register));922}923else if (const std::optional<u32> hreg = CheckHostReg(0, Recompiler::HR_TYPE_CPU_REG, reg))924{925rvAsm->MV(dst, GPR(hreg.value()));926}927else if (HasConstantReg(reg))928{929EmitMov(dst, GetConstantRegU32(reg));930}931else932{933rvAsm->LW(dst, PTR(&g_state.regs.r[static_cast<u8>(reg)]));934}935}936937void CPU::RISCV64Recompiler::GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, Reg arg2reg /* = Reg::count */,938Reg arg3reg /* = Reg::count */)939{940DebugAssert(g_settings.gpu_pgxp_enable);941942Flush(FLUSH_FOR_C_CALL);943944if (arg2reg != Reg::count)945MoveMIPSRegToReg(RARG2, arg2reg);946if (arg3reg != Reg::count)947MoveMIPSRegToReg(RARG3, arg3reg);948949EmitMov(RARG1, arg1val);950EmitCall(func);951}952953void CPU::RISCV64Recompiler::Flush(u32 flags)954{955Recompiler::Flush(flags);956957if (flags & FLUSH_PC && m_dirty_pc)958{959StoreConstantToCPUPointer(m_compiler_pc, &g_state.pc);960m_dirty_pc = false;961}962963if (flags & FLUSH_INSTRUCTION_BITS)964{965// This sucks, but it's only used for fallbacks.966Panic("Not implemented");967}968969if (flags & FLUSH_LOAD_DELAY_FROM_STATE && m_load_delay_dirty)970{971// This sucks :(972// TODO: make it a function?973rvAsm->LBU(RARG1, PTR(&g_state.load_delay_reg));974rvAsm->LW(RARG2, PTR(&g_state.load_delay_value));975rvAsm->SLLI(RARG1, RARG1, 2); // *4976rvAsm->ADD(RARG1, RARG1, RSTATE);977rvAsm->SW(RARG2, OFFSETOF(CPU::State, regs.r[0]), RARG1);978rvAsm->LI(RSCRATCH, static_cast<u8>(Reg::count));979rvAsm->SB(RSCRATCH, PTR(&g_state.load_delay_reg));980m_load_delay_dirty = false;981}982983if (flags & FLUSH_LOAD_DELAY && m_load_delay_register != Reg::count)984{985if (m_load_delay_value_register != NUM_HOST_REGS)986FreeHostReg(m_load_delay_value_register);987988EmitMov(RSCRATCH, static_cast<u8>(m_load_delay_register));989rvAsm->SB(RSCRATCH, PTR(&g_state.load_delay_reg));990m_load_delay_register = Reg::count;991m_load_delay_dirty = true;992}993994if (flags & FLUSH_GTE_STALL_FROM_STATE && m_dirty_gte_done_cycle)995{996// May as well flush cycles while we're here.997// GTE spanning blocks is very rare, we _could_ disable this for speed.998rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));999rvAsm->LW(RARG2, PTR(&g_state.gte_completion_tick));1000if (m_cycles > 0)1001{1002SafeADDIW(RARG1, RARG1, m_cycles);1003m_cycles = 0;1004}1005Label no_stall;1006rvAsm->BGE(RARG1, RARG2, &no_stall);1007rvAsm->MV(RARG1, RARG2);1008rvAsm->Bind(&no_stall);1009rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));1010m_dirty_gte_done_cycle = false;1011}10121013if (flags & FLUSH_GTE_DONE_CYCLE && m_gte_done_cycle > m_cycles)1014{1015rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));10161017// update cycles at the same time1018if (flags & FLUSH_CYCLES && m_cycles > 0)1019{1020SafeADDIW(RARG1, RARG1, m_cycles);1021rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));1022m_gte_done_cycle -= m_cycles;1023m_cycles = 0;1024}10251026SafeADDIW(RARG1, RARG1, m_gte_done_cycle);1027rvAsm->SW(RARG1, PTR(&g_state.gte_completion_tick));1028m_gte_done_cycle = 0;1029m_dirty_gte_done_cycle = true;1030}10311032if (flags & FLUSH_CYCLES && m_cycles > 0)1033{1034rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));1035SafeADDIW(RARG1, RARG1, m_cycles);1036rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));1037m_gte_done_cycle = std::max<TickCount>(m_gte_done_cycle - m_cycles, 0);1038m_cycles = 0;1039}1040}10411042void CPU::RISCV64Recompiler::Compile_Fallback()1043{1044WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", m_current_instruction_pc,1045inst->bits);10461047Flush(FLUSH_FOR_INTERPRETER);10481049#if 01050cg->call(&CPU::RecompilerThunks::InterpretInstruction);10511052// TODO: make me less garbage1053// TODO: this is wrong, it flushes the load delay on the same cycle when we return.1054// but nothing should be going through here..1055Label no_load_delay;1056cg->movzx(RWARG1, cg->byte[PTR(&g_state.next_load_delay_reg)]);1057cg->cmp(RWARG1, static_cast<u8>(Reg::count));1058cg->je(no_load_delay, CodeGenerator::T_SHORT);1059cg->mov(RWARG2, cg->dword[PTR(&g_state.next_load_delay_value)]);1060cg->mov(cg->byte[PTR(&g_state.load_delay_reg)], RWARG1);1061cg->mov(cg->dword[PTR(&g_state.load_delay_value)], RWARG2);1062cg->mov(cg->byte[PTR(&g_state.next_load_delay_reg)], static_cast<u32>(Reg::count));1063cg->L(no_load_delay);10641065m_load_delay_dirty = EMULATE_LOAD_DELAYS;1066#else1067Panic("Fixme");1068#endif1069}10701071void CPU::RISCV64Recompiler::CheckBranchTarget(const biscuit::GPR& pcreg)1072{1073if (!g_settings.cpu_recompiler_memory_exceptions)1074return;10751076DebugAssert(pcreg != RSCRATCH);1077rvAsm->ANDI(RSCRATCH, pcreg, 0x3);1078SwitchToFarCode(true, &Assembler::BEQ, RSCRATCH, zero);10791080BackupHostState();1081EndBlockWithException(Exception::AdEL);10821083RestoreHostState();1084SwitchToNearCode(false);1085}10861087void CPU::RISCV64Recompiler::Compile_jr(CompileFlags cf)1088{1089const GPR pcreg = CFGetRegS(cf);1090CheckBranchTarget(pcreg);10911092rvAsm->SW(pcreg, PTR(&g_state.pc));10931094CompileBranchDelaySlot(false);1095EndBlock(std::nullopt, true);1096}10971098void CPU::RISCV64Recompiler::Compile_jalr(CompileFlags cf)1099{1100const GPR pcreg = CFGetRegS(cf);1101if (MipsD() != Reg::zero)1102SetConstantReg(MipsD(), GetBranchReturnAddress(cf));11031104CheckBranchTarget(pcreg);1105rvAsm->SW(pcreg, PTR(&g_state.pc));11061107CompileBranchDelaySlot(false);1108EndBlock(std::nullopt, true);1109}11101111void CPU::RISCV64Recompiler::Compile_bxx(CompileFlags cf, BranchCondition cond)1112{1113AssertRegOrConstS(cf);11141115const u32 taken_pc = GetConditionalBranchTarget(cf);11161117Flush(FLUSH_FOR_BRANCH);11181119DebugAssert(cf.valid_host_s);11201121// MipsT() here should equal zero for zero branches.1122DebugAssert(cond == BranchCondition::Equal || cond == BranchCondition::NotEqual || cf.MipsT() == Reg::zero);11231124Label taken;1125const GPR rs = CFGetRegS(cf);1126switch (cond)1127{1128case BranchCondition::Equal:1129case BranchCondition::NotEqual:1130{1131AssertRegOrConstT(cf);1132if (cf.const_t && HasConstantRegValue(cf.MipsT(), 0))1133{1134(cond == BranchCondition::Equal) ? rvAsm->BEQZ(rs, &taken) : rvAsm->BNEZ(rs, &taken);1135}1136else1137{1138const GPR rt = cf.valid_host_t ? CFGetRegT(cf) : RARG1;1139if (!cf.valid_host_t)1140MoveTToReg(RARG1, cf);1141if (cond == Recompiler::BranchCondition::Equal)1142rvAsm->BEQ(rs, rt, &taken);1143else1144rvAsm->BNE(rs, rt, &taken);1145}1146}1147break;11481149case BranchCondition::GreaterThanZero:1150{1151rvAsm->BGTZ(rs, &taken);1152}1153break;11541155case BranchCondition::GreaterEqualZero:1156{1157rvAsm->BGEZ(rs, &taken);1158}1159break;11601161case BranchCondition::LessThanZero:1162{1163rvAsm->BLTZ(rs, &taken);1164}1165break;11661167case BranchCondition::LessEqualZero:1168{1169rvAsm->BLEZ(rs, &taken);1170}1171break;1172}11731174BackupHostState();1175if (!cf.delay_slot_swapped)1176CompileBranchDelaySlot();11771178EndBlock(m_compiler_pc, true);11791180rvAsm->Bind(&taken);11811182RestoreHostState();1183if (!cf.delay_slot_swapped)1184CompileBranchDelaySlot();11851186EndBlock(taken_pc, true);1187}11881189void CPU::RISCV64Recompiler::Compile_addi(CompileFlags cf, bool overflow)1190{1191const GPR rs = CFGetRegS(cf);1192const GPR rt = CFGetRegT(cf);1193if (const u32 imm = inst->i.imm_sext32(); imm != 0)1194{1195if (!overflow)1196{1197SafeADDIW(rt, rs, imm);1198}1199else1200{1201SafeADDI(RARG1, rs, imm);1202SafeADDIW(rt, rs, imm);1203TestOverflow(RARG1, rt, rt);1204}1205}1206else if (rt.Index() != rs.Index())1207{1208rvAsm->MV(rt, rs);1209}1210}12111212void CPU::RISCV64Recompiler::Compile_addi(CompileFlags cf)1213{1214Compile_addi(cf, g_settings.cpu_recompiler_memory_exceptions);1215}12161217void CPU::RISCV64Recompiler::Compile_addiu(CompileFlags cf)1218{1219Compile_addi(cf, false);1220}12211222void CPU::RISCV64Recompiler::Compile_slti(CompileFlags cf)1223{1224Compile_slti(cf, true);1225}12261227void CPU::RISCV64Recompiler::Compile_sltiu(CompileFlags cf)1228{1229Compile_slti(cf, false);1230}12311232void CPU::RISCV64Recompiler::Compile_slti(CompileFlags cf, bool sign)1233{1234if (sign)1235SafeSLTI(CFGetRegT(cf), CFGetRegS(cf), inst->i.imm_sext32());1236else1237SafeSLTIU(CFGetRegT(cf), CFGetRegS(cf), inst->i.imm_sext32());1238}12391240void CPU::RISCV64Recompiler::Compile_andi(CompileFlags cf)1241{1242const GPR rt = CFGetRegT(cf);1243if (const u32 imm = inst->i.imm_zext32(); imm != 0)1244SafeANDI(rt, CFGetRegS(cf), imm);1245else1246EmitMov(rt, 0);1247}12481249void CPU::RISCV64Recompiler::Compile_ori(CompileFlags cf)1250{1251const GPR rt = CFGetRegT(cf);1252const GPR rs = CFGetRegS(cf);1253if (const u32 imm = inst->i.imm_zext32(); imm != 0)1254SafeORI(rt, rs, imm);1255else if (rt.Index() != rs.Index())1256rvAsm->MV(rt, rs);1257}12581259void CPU::RISCV64Recompiler::Compile_xori(CompileFlags cf)1260{1261const GPR rt = CFGetRegT(cf);1262const GPR rs = CFGetRegS(cf);1263if (const u32 imm = inst->i.imm_zext32(); imm != 0)1264SafeXORI(rt, rs, imm);1265else if (rt.Index() != rs.Index())1266rvAsm->MV(rt, rs);1267}12681269void CPU::RISCV64Recompiler::Compile_shift(CompileFlags cf,1270void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR),1271void (biscuit::Assembler::*op_const)(biscuit::GPR, biscuit::GPR, unsigned))1272{1273const GPR rd = CFGetRegD(cf);1274const GPR rt = CFGetRegT(cf);1275if (inst->r.shamt > 0)1276(rvAsm->*op_const)(rd, rt, inst->r.shamt);1277else if (rd.Index() != rt.Index())1278rvAsm->MV(rd, rt);1279}12801281void CPU::RISCV64Recompiler::Compile_sll(CompileFlags cf)1282{1283Compile_shift(cf, &Assembler::SLLW, &Assembler::SLLIW);1284}12851286void CPU::RISCV64Recompiler::Compile_srl(CompileFlags cf)1287{1288Compile_shift(cf, &Assembler::SRLW, &Assembler::SRLIW);1289}12901291void CPU::RISCV64Recompiler::Compile_sra(CompileFlags cf)1292{1293Compile_shift(cf, &Assembler::SRAW, &Assembler::SRAIW);1294}12951296void CPU::RISCV64Recompiler::Compile_variable_shift(1297CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR),1298void (biscuit::Assembler::*op_const)(biscuit::GPR, biscuit::GPR, unsigned))1299{1300const GPR rd = CFGetRegD(cf);13011302AssertRegOrConstS(cf);1303AssertRegOrConstT(cf);13041305const GPR rt = cf.valid_host_t ? CFGetRegT(cf) : RARG2;1306if (!cf.valid_host_t)1307MoveTToReg(rt, cf);13081309if (cf.const_s)1310{1311if (const u32 shift = GetConstantRegU32(cf.MipsS()); shift != 0)1312(rvAsm->*op_const)(rd, rt, shift & 31u);1313else if (rd.Index() != rt.Index())1314rvAsm->MV(rd, rt);1315}1316else1317{1318(rvAsm->*op)(rd, rt, CFGetRegS(cf));1319}1320}13211322void CPU::RISCV64Recompiler::Compile_sllv(CompileFlags cf)1323{1324Compile_variable_shift(cf, &Assembler::SLLW, &Assembler::SLLIW);1325}13261327void CPU::RISCV64Recompiler::Compile_srlv(CompileFlags cf)1328{1329Compile_variable_shift(cf, &Assembler::SRLW, &Assembler::SRLIW);1330}13311332void CPU::RISCV64Recompiler::Compile_srav(CompileFlags cf)1333{1334Compile_variable_shift(cf, &Assembler::SRAW, &Assembler::SRAIW);1335}13361337void CPU::RISCV64Recompiler::Compile_mult(CompileFlags cf, bool sign)1338{1339const GPR rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1;1340if (!cf.valid_host_s)1341MoveSToReg(rs, cf);13421343const GPR rt = cf.valid_host_t ? CFGetRegT(cf) : RARG2;1344if (!cf.valid_host_t)1345MoveTToReg(rt, cf);13461347// TODO: if lo/hi gets killed, we can use a 32-bit multiply1348const GPR lo = CFGetRegLO(cf);1349const GPR hi = CFGetRegHI(cf);13501351if (sign)1352{1353rvAsm->MUL(lo, rs, rt);1354rvAsm->SRAI64(hi, lo, 32);1355EmitDSExtW(lo, lo);1356}1357else1358{1359// Need to make it unsigned.1360EmitDUExtW(RARG1, rs);1361EmitDUExtW(RARG2, rt);1362rvAsm->MUL(lo, RARG1, RARG2);1363rvAsm->SRAI64(hi, lo, 32);1364EmitDSExtW(lo, lo);1365}1366}13671368void CPU::RISCV64Recompiler::Compile_mult(CompileFlags cf)1369{1370Compile_mult(cf, true);1371}13721373void CPU::RISCV64Recompiler::Compile_multu(CompileFlags cf)1374{1375Compile_mult(cf, false);1376}13771378void CPU::RISCV64Recompiler::Compile_div(CompileFlags cf)1379{1380// 36 Volume I: RISC-V User-Level ISA V2.21381const GPR rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1;1382if (!cf.valid_host_s)1383MoveSToReg(rs, cf);13841385const GPR rt = cf.valid_host_t ? CFGetRegT(cf) : RARG2;1386if (!cf.valid_host_t)1387MoveTToReg(rt, cf);13881389const GPR rlo = CFGetRegLO(cf);1390const GPR rhi = CFGetRegHI(cf);13911392Label done;1393Label not_divide_by_zero;1394rvAsm->BNEZ(rt, ¬_divide_by_zero);1395rvAsm->MV(rhi, rs); // hi = num1396rvAsm->SRAI64(rlo, rs, 63);1397rvAsm->ANDI(rlo, rlo, 2);1398rvAsm->ADDI(rlo, rlo, -1); // lo = s >= 0 ? -1 : 11399rvAsm->J(&done);14001401rvAsm->Bind(¬_divide_by_zero);1402Label not_unrepresentable;1403EmitMov(RSCRATCH, static_cast<u32>(-1));1404rvAsm->BNE(rt, RSCRATCH, ¬_unrepresentable);1405EmitMov(rlo, 0x80000000u);1406rvAsm->BNE(rs, rlo, ¬_unrepresentable);1407EmitMov(rhi, 0);1408rvAsm->J(&done);14091410rvAsm->Bind(¬_unrepresentable);14111412rvAsm->DIVW(rlo, rs, rt);1413rvAsm->REMW(rhi, rs, rt);14141415rvAsm->Bind(&done);1416}14171418void CPU::RISCV64Recompiler::Compile_divu(CompileFlags cf)1419{1420const GPR rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1;1421if (!cf.valid_host_s)1422MoveSToReg(rs, cf);14231424const GPR rt = cf.valid_host_t ? CFGetRegT(cf) : RARG2;1425if (!cf.valid_host_t)1426MoveTToReg(rt, cf);14271428const GPR rlo = CFGetRegLO(cf);1429const GPR rhi = CFGetRegHI(cf);14301431// Semantics match? :-)1432rvAsm->DIVUW(rlo, rs, rt);1433rvAsm->REMUW(rhi, rs, rt);1434}14351436void CPU::RISCV64Recompiler::TestOverflow(const biscuit::GPR& long_res, const biscuit::GPR& res,1437const biscuit::GPR& reg_to_discard)1438{1439SwitchToFarCode(true, &Assembler::BEQ, long_res, res);14401441BackupHostState();14421443// toss the result1444ClearHostReg(reg_to_discard.Index());14451446EndBlockWithException(Exception::Ov);14471448RestoreHostState();14491450SwitchToNearCode(false);1451}14521453void CPU::RISCV64Recompiler::Compile_dst_op(1454CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR),1455void (RISCV64Recompiler::*op_const)(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm),1456void (biscuit::Assembler::*op_long)(biscuit::GPR, biscuit::GPR, biscuit::GPR), bool commutative, bool overflow)1457{1458AssertRegOrConstS(cf);1459AssertRegOrConstT(cf);14601461const GPR rd = CFGetRegD(cf);14621463if (overflow)1464{1465const GPR rs = CFGetSafeRegS(cf, RARG1);1466const GPR rt = CFGetSafeRegT(cf, RARG2);1467(rvAsm->*op)(RARG3, rs, rt);1468(rvAsm->*op_long)(rd, rs, rt);1469TestOverflow(RARG3, rd, rd);1470return;1471}14721473if (cf.valid_host_s && cf.valid_host_t)1474{1475(rvAsm->*op)(rd, CFGetRegS(cf), CFGetRegT(cf));1476}1477else if (commutative && (cf.const_s || cf.const_t))1478{1479const GPR src = cf.const_s ? CFGetRegT(cf) : CFGetRegS(cf);1480if (const u32 cv = GetConstantRegU32(cf.const_s ? cf.MipsS() : cf.MipsT()); cv != 0)1481{1482(this->*op_const)(rd, src, cv);1483}1484else1485{1486if (rd.Index() != src.Index())1487rvAsm->MV(rd, src);1488overflow = false;1489}1490}1491else if (cf.const_s)1492{1493if (HasConstantRegValue(cf.MipsS(), 0))1494{1495(rvAsm->*op)(rd, zero, CFGetRegT(cf));1496}1497else1498{1499EmitMov(RSCRATCH, GetConstantRegU32(cf.MipsS()));1500(rvAsm->*op)(rd, RSCRATCH, CFGetRegT(cf));1501}1502}1503else if (cf.const_t)1504{1505const GPR rs = CFGetRegS(cf);1506if (const u32 cv = GetConstantRegU32(cf.const_s ? cf.MipsS() : cf.MipsT()); cv != 0)1507{1508(this->*op_const)(rd, rs, cv);1509}1510else1511{1512if (rd.Index() != rs.Index())1513rvAsm->MV(rd, rs);1514overflow = false;1515}1516}1517}15181519void CPU::RISCV64Recompiler::Compile_add(CompileFlags cf)1520{1521Compile_dst_op(cf, &Assembler::ADDW, &RISCV64Recompiler::SafeADDIW, &Assembler::ADD, true,1522g_settings.cpu_recompiler_memory_exceptions);1523}15241525void CPU::RISCV64Recompiler::Compile_addu(CompileFlags cf)1526{1527Compile_dst_op(cf, &Assembler::ADDW, &RISCV64Recompiler::SafeADDIW, &Assembler::ADD, true, false);1528}15291530void CPU::RISCV64Recompiler::Compile_sub(CompileFlags cf)1531{1532Compile_dst_op(cf, &Assembler::SUBW, &RISCV64Recompiler::SafeSUBIW, &Assembler::SUB, false,1533g_settings.cpu_recompiler_memory_exceptions);1534}15351536void CPU::RISCV64Recompiler::Compile_subu(CompileFlags cf)1537{1538Compile_dst_op(cf, &Assembler::SUBW, &RISCV64Recompiler::SafeSUBIW, &Assembler::SUB, false, false);1539}15401541void CPU::RISCV64Recompiler::Compile_and(CompileFlags cf)1542{1543AssertRegOrConstS(cf);1544AssertRegOrConstT(cf);15451546// special cases - and with self -> self, and with 0 -> 01547const GPR regd = CFGetRegD(cf);1548if (cf.MipsS() == cf.MipsT())1549{1550rvAsm->MV(regd, CFGetRegS(cf));1551return;1552}1553else if (HasConstantRegValue(cf.MipsS(), 0) || HasConstantRegValue(cf.MipsT(), 0))1554{1555EmitMov(regd, 0);1556return;1557}15581559Compile_dst_op(cf, &Assembler::AND, &RISCV64Recompiler::SafeANDI, &Assembler::AND, true, false);1560}15611562void CPU::RISCV64Recompiler::Compile_or(CompileFlags cf)1563{1564AssertRegOrConstS(cf);1565AssertRegOrConstT(cf);15661567// or/nor with 0 -> no effect1568const GPR regd = CFGetRegD(cf);1569if (HasConstantRegValue(cf.MipsS(), 0) || HasConstantRegValue(cf.MipsT(), 0) || cf.MipsS() == cf.MipsT())1570{1571cf.const_s ? MoveTToReg(regd, cf) : MoveSToReg(regd, cf);1572return;1573}15741575Compile_dst_op(cf, &Assembler::OR, &RISCV64Recompiler::SafeORI, &Assembler::OR, true, false);1576}15771578void CPU::RISCV64Recompiler::Compile_xor(CompileFlags cf)1579{1580AssertRegOrConstS(cf);1581AssertRegOrConstT(cf);15821583const GPR regd = CFGetRegD(cf);1584if (cf.MipsS() == cf.MipsT())1585{1586// xor with self -> zero1587EmitMov(regd, 0);1588return;1589}1590else if (HasConstantRegValue(cf.MipsS(), 0) || HasConstantRegValue(cf.MipsT(), 0))1591{1592// xor with zero -> no effect1593cf.const_s ? MoveTToReg(regd, cf) : MoveSToReg(regd, cf);1594return;1595}15961597Compile_dst_op(cf, &Assembler::XOR, &RISCV64Recompiler::SafeXORI, &Assembler::XOR, true, false);1598}15991600void CPU::RISCV64Recompiler::Compile_nor(CompileFlags cf)1601{1602Compile_or(cf);1603rvAsm->NOT(CFGetRegD(cf), CFGetRegD(cf));1604}16051606void CPU::RISCV64Recompiler::Compile_slt(CompileFlags cf)1607{1608Compile_slt(cf, true);1609}16101611void CPU::RISCV64Recompiler::Compile_sltu(CompileFlags cf)1612{1613Compile_slt(cf, false);1614}16151616void CPU::RISCV64Recompiler::Compile_slt(CompileFlags cf, bool sign)1617{1618AssertRegOrConstS(cf);1619AssertRegOrConstT(cf);16201621const GPR rd = CFGetRegD(cf);1622const GPR rs = CFGetSafeRegS(cf, RARG1);16231624if (cf.const_t && rvIsValidSExtITypeImm(GetConstantRegU32(cf.MipsT())))1625{1626if (sign)1627rvAsm->SLTI(rd, rs, GetConstantRegS32(cf.MipsT()));1628else1629rvAsm->SLTIU(rd, rs, GetConstantRegS32(cf.MipsT()));1630}1631else1632{1633const GPR rt = CFGetSafeRegT(cf, RARG2);1634if (sign)1635rvAsm->SLT(rd, rs, rt);1636else1637rvAsm->SLTU(rd, rs, rt);1638}1639}16401641biscuit::GPR CPU::RISCV64Recompiler::ComputeLoadStoreAddressArg(CompileFlags cf,1642const std::optional<VirtualMemoryAddress>& address,1643const std::optional<const biscuit::GPR>& reg)1644{1645const u32 imm = inst->i.imm_sext32();1646if (cf.valid_host_s && imm == 0 && !reg.has_value())1647return CFGetRegS(cf);16481649const GPR dst = reg.has_value() ? reg.value() : RARG1;1650if (address.has_value())1651{1652EmitMov(dst, address.value());1653}1654else if (imm == 0)1655{1656if (cf.valid_host_s)1657{1658if (const GPR src = CFGetRegS(cf); src.Index() != dst.Index())1659rvAsm->MV(dst, CFGetRegS(cf));1660}1661else1662{1663rvAsm->LW(dst, PTR(&g_state.regs.r[cf.mips_s]));1664}1665}1666else1667{1668if (cf.valid_host_s)1669{1670SafeADDIW(dst, CFGetRegS(cf), inst->i.imm_sext32());1671}1672else1673{1674rvAsm->LW(dst, PTR(&g_state.regs.r[cf.mips_s]));1675SafeADDIW(dst, dst, inst->i.imm_sext32());1676}1677}16781679return dst;1680}16811682template<typename RegAllocFn>1683biscuit::GPR CPU::RISCV64Recompiler::GenerateLoad(const biscuit::GPR& addr_reg, MemoryAccessSize size, bool sign,1684bool use_fastmem, const RegAllocFn& dst_reg_alloc)1685{1686if (use_fastmem)1687{1688m_cycles += Bus::RAM_READ_TICKS;16891690// TODO: Make this better. If we're loading the address from state, we can use LWU instead, and skip this.1691// TODO: LUT fastmem1692const GPR dst = dst_reg_alloc();1693rvAsm->SLLI64(RSCRATCH, addr_reg, 32);1694rvAsm->SRLI64(RSCRATCH, RSCRATCH, 32);16951696if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)1697{1698DebugAssert(addr_reg.Index() != RARG3.Index());1699rvAsm->SRLI64(RARG3, RSCRATCH, Bus::FASTMEM_LUT_PAGE_SHIFT);1700rvAsm->SLLI64(RARG3, RARG3, 8);1701rvAsm->ADD(RARG3, RARG3, RMEMBASE);1702rvAsm->LD(RARG3, 0, RARG3);1703rvAsm->ADD(RSCRATCH, RSCRATCH, RARG3);1704}1705else1706{1707rvAsm->ADD(RSCRATCH, RSCRATCH, RMEMBASE);1708}17091710u8* start = m_emitter->GetCursorPointer();1711switch (size)1712{1713case MemoryAccessSize::Byte:1714sign ? rvAsm->LB(dst, 0, RSCRATCH) : rvAsm->LBU(dst, 0, RSCRATCH);1715break;17161717case MemoryAccessSize::HalfWord:1718sign ? rvAsm->LH(dst, 0, RSCRATCH) : rvAsm->LHU(dst, 0, RSCRATCH);1719break;17201721case MemoryAccessSize::Word:1722rvAsm->LW(dst, 0, RSCRATCH);1723break;1724}17251726// We need a nop, because the slowmem jump might be more than 1MB away.1727rvAsm->NOP();17281729AddLoadStoreInfo(start, 8, addr_reg.Index(), dst.Index(), size, sign, true);1730return dst;1731}17321733if (addr_reg.Index() != RARG1.Index())1734rvAsm->MV(RARG1, addr_reg);17351736const bool checked = g_settings.cpu_recompiler_memory_exceptions;1737switch (size)1738{1739case MemoryAccessSize::Byte:1740{1741EmitCall(checked ? reinterpret_cast<const void*>(&RecompilerThunks::ReadMemoryByte) :1742reinterpret_cast<const void*>(&RecompilerThunks::UncheckedReadMemoryByte));1743}1744break;1745case MemoryAccessSize::HalfWord:1746{1747EmitCall(checked ? reinterpret_cast<const void*>(&RecompilerThunks::ReadMemoryHalfWord) :1748reinterpret_cast<const void*>(&RecompilerThunks::UncheckedReadMemoryHalfWord));1749}1750break;1751case MemoryAccessSize::Word:1752{1753EmitCall(checked ? reinterpret_cast<const void*>(&RecompilerThunks::ReadMemoryWord) :1754reinterpret_cast<const void*>(&RecompilerThunks::UncheckedReadMemoryWord));1755}1756break;1757}17581759// TODO: turn this into an asm function instead1760if (checked)1761{1762rvAsm->SRLI64(RSCRATCH, RRET, 63);1763SwitchToFarCode(true, &Assembler::BEQ, RSCRATCH, zero);1764BackupHostState();17651766// Need to stash this in a temp because of the flush.1767const GPR temp = GPR(AllocateTempHostReg(HR_CALLEE_SAVED));1768rvAsm->NEG(temp, RRET);1769rvAsm->SLLIW(temp, temp, 2);17701771Flush(FLUSH_FOR_C_CALL | FLUSH_FLUSH_MIPS_REGISTERS | FLUSH_FOR_EXCEPTION);17721773// cause_bits = (-result << 2) | BD | cop_n1774SafeORI(RARG1, temp,1775Cop0Registers::CAUSE::MakeValueForException(1776static_cast<Exception>(0), m_current_instruction_branch_delay_slot, false, inst->cop.cop_n));1777EmitMov(RARG2, m_current_instruction_pc);1778EmitCall(reinterpret_cast<const void*>(static_cast<void (*)(u32, u32)>(&CPU::RaiseException)));1779FreeHostReg(temp.Index());1780EndBlock(std::nullopt, true);17811782RestoreHostState();1783SwitchToNearCode(false);1784}17851786const GPR dst_reg = dst_reg_alloc();1787switch (size)1788{1789case MemoryAccessSize::Byte:1790{1791sign ? EmitSExtB(dst_reg, RRET) : EmitUExtB(dst_reg, RRET);1792}1793break;1794case MemoryAccessSize::HalfWord:1795{1796sign ? EmitSExtH(dst_reg, RRET) : EmitUExtH(dst_reg, RRET);1797}1798break;1799case MemoryAccessSize::Word:1800{1801// Need to undo the zero-extend.1802if (checked)1803rvEmitDSExtW(rvAsm, dst_reg, RRET);1804else if (dst_reg.Index() != RRET.Index())1805rvAsm->MV(dst_reg, RRET);1806}1807break;1808}18091810return dst_reg;1811}18121813void CPU::RISCV64Recompiler::GenerateStore(const biscuit::GPR& addr_reg, const biscuit::GPR& value_reg,1814MemoryAccessSize size, bool use_fastmem)1815{1816if (use_fastmem)1817{1818DebugAssert(value_reg != RSCRATCH);1819rvAsm->SLLI64(RSCRATCH, addr_reg, 32);1820rvAsm->SRLI64(RSCRATCH, RSCRATCH, 32);18211822if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)1823{1824DebugAssert(addr_reg.Index() != RARG3.Index());1825rvAsm->SRLI64(RARG3, RSCRATCH, Bus::FASTMEM_LUT_PAGE_SHIFT);1826rvAsm->SLLI64(RARG3, RARG3, 8);1827rvAsm->ADD(RARG3, RARG3, RMEMBASE);1828rvAsm->LD(RARG3, 0, RARG3);1829rvAsm->ADD(RSCRATCH, RSCRATCH, RARG3);1830}1831else1832{1833rvAsm->ADD(RSCRATCH, RSCRATCH, RMEMBASE);1834}18351836u8* start = m_emitter->GetCursorPointer();1837switch (size)1838{1839case MemoryAccessSize::Byte:1840rvAsm->SB(value_reg, 0, RSCRATCH);1841break;18421843case MemoryAccessSize::HalfWord:1844rvAsm->SH(value_reg, 0, RSCRATCH);1845break;18461847case MemoryAccessSize::Word:1848rvAsm->SW(value_reg, 0, RSCRATCH);1849break;1850}18511852// We need a nop, because the slowmem jump might be more than 1MB away.1853rvAsm->NOP();18541855AddLoadStoreInfo(start, 8, addr_reg.Index(), value_reg.Index(), size, false, false);1856return;1857}18581859if (addr_reg.Index() != RARG1.Index())1860rvAsm->MV(RARG1, addr_reg);1861if (value_reg.Index() != RARG2.Index())1862rvAsm->MV(RARG2, value_reg);18631864const bool checked = g_settings.cpu_recompiler_memory_exceptions;1865switch (size)1866{1867case MemoryAccessSize::Byte:1868{1869EmitCall(checked ? reinterpret_cast<const void*>(&RecompilerThunks::WriteMemoryByte) :1870reinterpret_cast<const void*>(&RecompilerThunks::UncheckedWriteMemoryByte));1871}1872break;1873case MemoryAccessSize::HalfWord:1874{1875EmitCall(checked ? reinterpret_cast<const void*>(&RecompilerThunks::WriteMemoryHalfWord) :1876reinterpret_cast<const void*>(&RecompilerThunks::UncheckedWriteMemoryHalfWord));1877}1878break;1879case MemoryAccessSize::Word:1880{1881EmitCall(checked ? reinterpret_cast<const void*>(&RecompilerThunks::WriteMemoryWord) :1882reinterpret_cast<const void*>(&RecompilerThunks::UncheckedWriteMemoryWord));1883}1884break;1885}18861887// TODO: turn this into an asm function instead1888if (checked)1889{1890SwitchToFarCode(true, &Assembler::BEQ, RRET, zero);1891BackupHostState();18921893// Need to stash this in a temp because of the flush.1894const GPR temp = GPR(AllocateTempHostReg(HR_CALLEE_SAVED));1895rvAsm->SLLIW(temp, RRET, 2);18961897Flush(FLUSH_FOR_C_CALL | FLUSH_FLUSH_MIPS_REGISTERS | FLUSH_FOR_EXCEPTION);18981899// cause_bits = (result << 2) | BD | cop_n1900SafeORI(RARG1, temp,1901Cop0Registers::CAUSE::MakeValueForException(1902static_cast<Exception>(0), m_current_instruction_branch_delay_slot, false, inst->cop.cop_n));1903EmitMov(RARG2, m_current_instruction_pc);1904EmitCall(reinterpret_cast<const void*>(static_cast<void (*)(u32, u32)>(&CPU::RaiseException)));1905FreeHostReg(temp.Index());1906EndBlock(std::nullopt, true);19071908RestoreHostState();1909SwitchToNearCode(false);1910}1911}19121913void CPU::RISCV64Recompiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,1914const std::optional<VirtualMemoryAddress>& address)1915{1916const std::optional<GPR> addr_reg = (g_settings.gpu_pgxp_enable && cf.MipsT() != Reg::zero) ?1917std::optional<GPR>(GPR(AllocateTempHostReg(HR_CALLEE_SAVED))) :1918std::optional<GPR>();1919FlushForLoadStore(address, false, use_fastmem);1920const GPR addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);1921const GPR data = GenerateLoad(addr, size, sign, use_fastmem, [this, cf]() {1922if (cf.MipsT() == Reg::zero)1923return RRET;19241925return GPR(AllocateHostReg(GetFlagsForNewLoadDelayedReg(),1926EMULATE_LOAD_DELAYS ? HR_TYPE_NEXT_LOAD_DELAY_VALUE : HR_TYPE_CPU_REG, cf.MipsT()));1927});19281929if (g_settings.gpu_pgxp_enable && cf.MipsT() != Reg::zero)1930{1931Flush(FLUSH_FOR_C_CALL);19321933EmitMov(RARG1, inst->bits);1934rvAsm->MV(RARG2, addr);1935rvAsm->MV(RARG3, data);1936EmitCall(s_pgxp_mem_load_functions[static_cast<u32>(size)][static_cast<u32>(sign)]);1937FreeHostReg(addr_reg.value().Index());1938}1939}19401941void CPU::RISCV64Recompiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,1942const std::optional<VirtualMemoryAddress>& address)1943{1944DebugAssert(size == MemoryAccessSize::Word && !sign);19451946const GPR addr = GPR(AllocateTempHostReg(HR_CALLEE_SAVED));1947FlushForLoadStore(address, false, use_fastmem);19481949// TODO: if address is constant, this can be simplified..19501951// If we're coming from another block, just flush the load delay and hope for the best..1952if (m_load_delay_dirty)1953UpdateLoadDelay();19541955// We'd need to be careful here if we weren't overwriting it..1956ComputeLoadStoreAddressArg(cf, address, addr);19571958// Do PGXP first, it does its own load.1959if (g_settings.gpu_pgxp_enable && inst->r.rt != Reg::zero)1960{1961Flush(FLUSH_FOR_C_CALL);1962EmitMov(RARG1, inst->bits);1963rvAsm->MV(RARG2, addr);1964MoveMIPSRegToReg(RARG3, inst->r.rt, true);1965EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_LWx));1966}19671968rvAsm->ANDI(RARG1, addr, ~0x3u);1969GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; });19701971if (inst->r.rt == Reg::zero)1972{1973FreeHostReg(addr.Index());1974return;1975}19761977// lwl/lwr from a load-delayed value takes the new value, but it itself, is load delayed, so the original value is1978// never written back. NOTE: can't trust T in cf because of the flush1979const Reg rt = inst->r.rt;1980GPR value;1981if (m_load_delay_register == rt)1982{1983const u32 existing_ld_rt = (m_load_delay_value_register == NUM_HOST_REGS) ?1984AllocateHostReg(HR_MODE_READ, HR_TYPE_LOAD_DELAY_VALUE, rt) :1985m_load_delay_value_register;1986RenameHostReg(existing_ld_rt, HR_MODE_WRITE, HR_TYPE_NEXT_LOAD_DELAY_VALUE, rt);1987value = GPR(existing_ld_rt);1988}1989else1990{1991if constexpr (EMULATE_LOAD_DELAYS)1992{1993value = GPR(AllocateHostReg(HR_MODE_WRITE, HR_TYPE_NEXT_LOAD_DELAY_VALUE, rt));1994if (const std::optional<u32> rtreg = CheckHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rt); rtreg.has_value())1995rvAsm->MV(value, GPR(rtreg.value()));1996else if (HasConstantReg(rt))1997EmitMov(value, GetConstantRegU32(rt));1998else1999rvAsm->LW(value, PTR(&g_state.regs.r[static_cast<u8>(rt)]));2000}2001else2002{2003value = GPR(AllocateHostReg(HR_MODE_READ | HR_MODE_WRITE, HR_TYPE_CPU_REG, rt));2004}2005}20062007DebugAssert(value.Index() != RARG2.Index() && value.Index() != RARG3.Index());2008rvAsm->ANDI(RARG2, addr, 3);2009rvAsm->SLLIW(RARG2, RARG2, 3); // *82010EmitMov(RARG3, 24);2011rvAsm->SUBW(RARG3, RARG3, RARG2);20122013if (inst->op == InstructionOp::lwl)2014{2015// const u32 mask = UINT32_C(0x00FFFFFF) >> shift;2016// new_value = (value & mask) | (RWRET << (24 - shift));2017EmitMov(RSCRATCH, 0xFFFFFFu);2018rvAsm->SRLW(RSCRATCH, RSCRATCH, RARG2);2019rvAsm->AND(value, value, RSCRATCH);2020rvAsm->SLLW(RRET, RRET, RARG3);2021rvAsm->OR(value, value, RRET);2022}2023else2024{2025// const u32 mask = UINT32_C(0xFFFFFF00) << (24 - shift);2026// new_value = (value & mask) | (RWRET >> shift);2027rvAsm->SRLW(RRET, RRET, RARG2);2028EmitMov(RSCRATCH, 0xFFFFFF00u);2029rvAsm->SLLW(RSCRATCH, RSCRATCH, RARG3);2030rvAsm->AND(value, value, RSCRATCH);2031rvAsm->OR(value, value, RRET);2032}20332034FreeHostReg(addr.Index());2035}20362037void CPU::RISCV64Recompiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,2038const std::optional<VirtualMemoryAddress>& address)2039{2040const u32 index = static_cast<u32>(inst->r.rt.GetValue());2041const auto [ptr, action] = GetGTERegisterPointer(index, true);2042const std::optional<GPR> addr_reg =2043g_settings.gpu_pgxp_enable ? std::optional<GPR>(GPR(AllocateTempHostReg(HR_CALLEE_SAVED))) : std::optional<GPR>();2044FlushForLoadStore(address, false, use_fastmem);2045const GPR addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);2046const GPR value = GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, [this, action = action]() {2047return (action == GTERegisterAccessAction::CallHandler && g_settings.gpu_pgxp_enable) ?2048GPR(AllocateTempHostReg(HR_CALLEE_SAVED)) :2049RRET;2050});20512052switch (action)2053{2054case GTERegisterAccessAction::Ignore:2055{2056break;2057}20582059case GTERegisterAccessAction::Direct:2060{2061rvAsm->SW(value, PTR(ptr));2062break;2063}20642065case GTERegisterAccessAction::SignExtend16:2066{2067EmitSExtH(RARG3, value);2068rvAsm->SW(RARG3, PTR(ptr));2069break;2070}20712072case GTERegisterAccessAction::ZeroExtend16:2073{2074EmitUExtH(RARG3, value);2075rvAsm->SW(RARG3, PTR(ptr));2076break;2077}20782079case GTERegisterAccessAction::CallHandler:2080{2081Flush(FLUSH_FOR_C_CALL);2082rvAsm->MV(RARG2, value);2083EmitMov(RARG1, index);2084EmitCall(reinterpret_cast<const void*>(>E::WriteRegister));2085break;2086}20872088case GTERegisterAccessAction::PushFIFO:2089{2090// SXY0 <- SXY12091// SXY1 <- SXY22092// SXY2 <- SXYP2093DebugAssert(value.Index() != RARG2.Index() && value.Index() != RARG3.Index());2094rvAsm->LW(RARG2, PTR(&g_state.gte_regs.SXY1[0]));2095rvAsm->LW(RARG3, PTR(&g_state.gte_regs.SXY2[0]));2096rvAsm->SW(RARG2, PTR(&g_state.gte_regs.SXY0[0]));2097rvAsm->SW(RARG3, PTR(&g_state.gte_regs.SXY1[0]));2098rvAsm->SW(value, PTR(&g_state.gte_regs.SXY2[0]));2099break;2100}21012102default:2103{2104Panic("Unknown action");2105return;2106}2107}21082109if (g_settings.gpu_pgxp_enable)2110{2111Flush(FLUSH_FOR_C_CALL);2112rvAsm->MV(RARG3, value);2113if (value.Index() != RRET.Index())2114FreeHostReg(value.Index());2115rvAsm->MV(RARG2, addr);2116FreeHostReg(addr_reg.value().Index());2117EmitMov(RARG1, inst->bits);2118EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_LWC2));2119}2120}21212122void CPU::RISCV64Recompiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,2123const std::optional<VirtualMemoryAddress>& address)2124{2125AssertRegOrConstS(cf);2126AssertRegOrConstT(cf);21272128const std::optional<GPR> addr_reg =2129g_settings.gpu_pgxp_enable ? std::optional<GPR>(GPR(AllocateTempHostReg(HR_CALLEE_SAVED))) : std::optional<GPR>();2130FlushForLoadStore(address, true, use_fastmem);2131const GPR addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);2132const GPR data = cf.valid_host_t ? CFGetRegT(cf) : RARG2;2133if (!cf.valid_host_t)2134MoveTToReg(RARG2, cf);21352136GenerateStore(addr, data, size, use_fastmem);21372138if (g_settings.gpu_pgxp_enable)2139{2140Flush(FLUSH_FOR_C_CALL);2141MoveMIPSRegToReg(RARG3, cf.MipsT());2142rvAsm->MV(RARG2, addr);2143EmitMov(RARG1, inst->bits);2144EmitCall(s_pgxp_mem_store_functions[static_cast<u32>(size)]);2145FreeHostReg(addr_reg.value().Index());2146}2147}21482149void CPU::RISCV64Recompiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,2150const std::optional<VirtualMemoryAddress>& address)2151{2152DebugAssert(size == MemoryAccessSize::Word && !sign);21532154// TODO: this can take over rt's value if it's no longer needed2155// NOTE: can't trust T in cf because of the alloc2156const GPR addr = GPR(AllocateTempHostReg(HR_CALLEE_SAVED));21572158FlushForLoadStore(address, true, use_fastmem);21592160// TODO: if address is constant, this can be simplified..2161// We'd need to be careful here if we weren't overwriting it..2162ComputeLoadStoreAddressArg(cf, address, addr);21632164if (g_settings.gpu_pgxp_enable)2165{2166Flush(FLUSH_FOR_C_CALL);2167EmitMov(RARG1, inst->bits);2168rvAsm->MV(RARG2, addr);2169MoveMIPSRegToReg(RARG3, inst->r.rt);2170EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_SWx));2171}21722173rvAsm->ANDI(RARG1, addr, ~0x3u);2174GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; });21752176rvAsm->ANDI(RSCRATCH, addr, 3);2177rvAsm->SLLIW(RSCRATCH, RSCRATCH, 3); // *82178rvAsm->ANDI(addr, addr, ~0x3u);21792180// Need to load down here for PGXP-off, because it's in a volatile reg that can get overwritten by flush.2181if (!g_settings.gpu_pgxp_enable)2182MoveMIPSRegToReg(RARG2, inst->r.rt);21832184if (inst->op == InstructionOp::swl)2185{2186// const u32 mem_mask = UINT32_C(0xFFFFFF00) << shift;2187// new_value = (RWRET & mem_mask) | (value >> (24 - shift));2188EmitMov(RARG3, 0xFFFFFF00u);2189rvAsm->SLLW(RARG3, RARG3, RSCRATCH);2190rvAsm->AND(RRET, RRET, RARG3);21912192EmitMov(RARG3, 24);2193rvAsm->SUBW(RARG3, RARG3, RSCRATCH);2194rvAsm->SRLW(RARG2, RARG2, RARG3);2195rvAsm->OR(RARG2, RARG2, RRET);2196}2197else2198{2199// const u32 mem_mask = UINT32_C(0x00FFFFFF) >> (24 - shift);2200// new_value = (RWRET & mem_mask) | (value << shift);2201rvAsm->SLLW(RARG2, RARG2, RSCRATCH);22022203EmitMov(RARG3, 24);2204rvAsm->SUBW(RARG3, RARG3, RSCRATCH);2205EmitMov(RSCRATCH, 0x00FFFFFFu);2206rvAsm->SRLW(RSCRATCH, RSCRATCH, RARG3);2207rvAsm->AND(RRET, RRET, RSCRATCH);2208rvAsm->OR(RARG2, RARG2, RRET);2209}22102211GenerateStore(addr, RARG2, MemoryAccessSize::Word, use_fastmem);2212FreeHostReg(addr.Index());2213}22142215void CPU::RISCV64Recompiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,2216const std::optional<VirtualMemoryAddress>& address)2217{2218const u32 index = static_cast<u32>(inst->r.rt.GetValue());2219const auto [ptr, action] = GetGTERegisterPointer(index, false);2220const GPR addr = (g_settings.gpu_pgxp_enable || action == GTERegisterAccessAction::CallHandler) ?2221GPR(AllocateTempHostReg(HR_CALLEE_SAVED)) :2222RARG1;2223const GPR data = g_settings.gpu_pgxp_enable ? GPR(AllocateTempHostReg(HR_CALLEE_SAVED)) : RARG2;2224FlushForLoadStore(address, true, use_fastmem);2225ComputeLoadStoreAddressArg(cf, address, addr);22262227switch (action)2228{2229case GTERegisterAccessAction::Direct:2230{2231rvAsm->LW(data, PTR(ptr));2232}2233break;22342235case GTERegisterAccessAction::CallHandler:2236{2237// should already be flushed.. except in fastmem case2238Flush(FLUSH_FOR_C_CALL);2239EmitMov(RARG1, index);2240EmitCall(reinterpret_cast<const void*>(>E::ReadRegister));2241rvAsm->MV(data, RRET);2242}2243break;22442245default:2246{2247Panic("Unknown action");2248}2249break;2250}22512252GenerateStore(addr, data, size, use_fastmem);22532254if (!g_settings.gpu_pgxp_enable)2255{2256if (addr.Index() != RARG1.Index())2257FreeHostReg(addr.Index());2258}2259else2260{2261// TODO: This can be simplified because we don't need to validate in PGXP..2262Flush(FLUSH_FOR_C_CALL);2263rvAsm->MV(RARG3, data);2264FreeHostReg(data.Index());2265rvAsm->MV(RARG2, addr);2266FreeHostReg(addr.Index());2267EmitMov(RARG1, inst->bits);2268EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_SWC2));2269}2270}22712272void CPU::RISCV64Recompiler::Compile_mtc0(CompileFlags cf)2273{2274// TODO: we need better constant setting here.. which will need backprop2275AssertRegOrConstT(cf);22762277const Cop0Reg reg = static_cast<Cop0Reg>(MipsD());2278const u32* ptr = GetCop0RegPtr(reg);2279const u32 mask = GetCop0RegWriteMask(reg);2280if (!ptr)2281{2282Compile_Fallback();2283return;2284}22852286if (mask == 0)2287{2288// if it's a read-only register, ignore2289DEBUG_LOG("Ignoring write to read-only cop0 reg {}", static_cast<u32>(reg));2290return;2291}22922293// for some registers, we need to test certain bits2294const bool needs_bit_test = (reg == Cop0Reg::SR);2295const GPR new_value = RARG1;2296const GPR old_value = RARG2;2297const GPR changed_bits = RARG3;2298const GPR mask_reg = RSCRATCH;22992300// Load old value2301rvAsm->LW(old_value, PTR(ptr));23022303// No way we fit this in an immediate..2304EmitMov(mask_reg, mask);23052306// update value2307// TODO: This is creating pointless MV instructions.. why?2308if (cf.valid_host_t)2309rvAsm->AND(new_value, CFGetRegT(cf), mask_reg);2310else2311EmitMov(new_value, GetConstantRegU32(cf.MipsT()) & mask);23122313if (needs_bit_test)2314rvAsm->XOR(changed_bits, old_value, new_value);2315rvAsm->NOT(mask_reg, mask_reg);2316rvAsm->AND(old_value, old_value, mask_reg);2317rvAsm->OR(new_value, old_value, new_value);2318rvAsm->SW(new_value, PTR(ptr));23192320if (reg == Cop0Reg::SR)2321{2322// TODO: replace with register backup2323// We could just inline the whole thing..2324Flush(FLUSH_FOR_C_CALL);23252326Label caches_unchanged;2327rvAsm->SRLIW(RSCRATCH, changed_bits, 16);2328rvAsm->ANDI(RSCRATCH, RSCRATCH, 1);2329rvAsm->BEQ(RSCRATCH, zero, &caches_unchanged);2330EmitCall(reinterpret_cast<const void*>(&CPU::UpdateMemoryPointers));2331rvAsm->LW(new_value, PTR(ptr));2332if (CodeCache::IsUsingFastmem())2333rvAsm->LD(RMEMBASE, PTR(&g_state.fastmem_base));2334rvAsm->Bind(&caches_unchanged);23352336TestInterrupts(RARG1);2337}2338else if (reg == Cop0Reg::CAUSE)2339{2340rvAsm->LW(RARG1, PTR(&g_state.cop0_regs.sr.bits));2341TestInterrupts(RARG1);2342}2343else if (reg == Cop0Reg::DCIC || reg == Cop0Reg::BPCM)2344{2345// need to check whether we're switching to debug mode2346Flush(FLUSH_FOR_C_CALL);2347EmitCall(reinterpret_cast<const void*>(&CPU::UpdateDebugDispatcherFlag));2348SwitchToFarCode(true, &Assembler::BEQ, RRET, zero);2349BackupHostState();2350Flush(FLUSH_FOR_EARLY_BLOCK_EXIT);2351EmitCall(reinterpret_cast<const void*>(&CPU::ExitExecution)); // does not return2352RestoreHostState();2353SwitchToNearCode(false);2354}2355}23562357void CPU::RISCV64Recompiler::Compile_rfe(CompileFlags cf)2358{2359// shift mode bits right two, preserving upper bits2360rvAsm->LW(RARG1, PTR(&g_state.cop0_regs.sr.bits));2361rvAsm->SRLIW(RSCRATCH, RARG1, 2);2362rvAsm->ANDI(RSCRATCH, RSCRATCH, 0xf);2363rvAsm->ANDI(RARG1, RARG1, ~0xfu);2364rvAsm->OR(RARG1, RARG1, RSCRATCH);2365rvAsm->SW(RARG1, PTR(&g_state.cop0_regs.sr.bits));23662367TestInterrupts(RARG1);2368}23692370void CPU::RISCV64Recompiler::TestInterrupts(const biscuit::GPR& sr)2371{2372DebugAssert(sr != RSCRATCH);23732374// if Iec == 0 then goto no_interrupt2375Label no_interrupt;2376rvAsm->ANDI(RSCRATCH, sr, 1);2377rvAsm->BEQZ(RSCRATCH, &no_interrupt);23782379// sr & cause2380rvAsm->LW(RSCRATCH, PTR(&g_state.cop0_regs.cause.bits));2381rvAsm->AND(sr, sr, RSCRATCH);23822383// ((sr & cause) & 0xff00) == 0 goto no_interrupt2384rvAsm->SRLIW(sr, sr, 8);2385rvAsm->ANDI(sr, sr, 0xFF);2386SwitchToFarCode(true, &Assembler::BEQ, sr, zero);23872388BackupHostState();23892390// Update load delay, this normally happens at the end of an instruction, but we're finishing it early.2391UpdateLoadDelay();23922393Flush(FLUSH_END_BLOCK | FLUSH_FOR_EXCEPTION | FLUSH_FOR_C_CALL);23942395// Can't use EndBlockWithException() here, because it'll use the wrong PC.2396// Can't use RaiseException() on the fast path if we're the last instruction, because the next PC is unknown.2397if (!iinfo->is_last_instruction)2398{2399EmitMov(RARG1, Cop0Registers::CAUSE::MakeValueForException(Exception::INT, iinfo->is_branch_instruction, false,2400(inst + 1)->cop.cop_n));2401EmitMov(RARG2, m_compiler_pc);2402EmitCall(reinterpret_cast<const void*>(static_cast<void (*)(u32, u32)>(&CPU::RaiseException)));2403m_dirty_pc = false;2404EndAndLinkBlock(std::nullopt, true, false);2405}2406else2407{2408if (m_dirty_pc)2409EmitMov(RARG1, m_compiler_pc);2410rvAsm->SW(biscuit::zero, PTR(&g_state.downcount));2411if (m_dirty_pc)2412rvAsm->SW(RARG1, PTR(&g_state.pc));2413m_dirty_pc = false;2414EndAndLinkBlock(std::nullopt, false, true);2415}24162417RestoreHostState();2418SwitchToNearCode(false);24192420rvAsm->Bind(&no_interrupt);2421}24222423void CPU::RISCV64Recompiler::Compile_mfc2(CompileFlags cf)2424{2425const u32 index = inst->cop.Cop2Index();2426const Reg rt = inst->r.rt;24272428const auto [ptr, action] = GetGTERegisterPointer(index, false);2429if (action == GTERegisterAccessAction::Ignore)2430return;24312432u32 hreg;2433if (action == GTERegisterAccessAction::Direct)2434{2435hreg = AllocateHostReg(GetFlagsForNewLoadDelayedReg(),2436EMULATE_LOAD_DELAYS ? HR_TYPE_NEXT_LOAD_DELAY_VALUE : HR_TYPE_CPU_REG, rt);2437rvAsm->LW(GPR(hreg), PTR(ptr));2438}2439else if (action == GTERegisterAccessAction::CallHandler)2440{2441Flush(FLUSH_FOR_C_CALL);2442EmitMov(RARG1, index);2443EmitCall(reinterpret_cast<const void*>(>E::ReadRegister));24442445hreg = AllocateHostReg(GetFlagsForNewLoadDelayedReg(),2446EMULATE_LOAD_DELAYS ? HR_TYPE_NEXT_LOAD_DELAY_VALUE : HR_TYPE_CPU_REG, rt);2447rvAsm->MV(GPR(hreg), RRET);2448}2449else2450{2451Panic("Unknown action");2452}24532454if (g_settings.gpu_pgxp_enable)2455{2456Flush(FLUSH_FOR_C_CALL);2457EmitMov(RARG1, inst->bits);2458rvAsm->MV(RARG2, GPR(hreg));2459EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_MFC2));2460}2461}24622463void CPU::RISCV64Recompiler::Compile_mtc2(CompileFlags cf)2464{2465const u32 index = inst->cop.Cop2Index();2466const auto [ptr, action] = GetGTERegisterPointer(index, true);2467if (action == GTERegisterAccessAction::Ignore)2468return;24692470if (action == GTERegisterAccessAction::Direct)2471{2472if (cf.const_t)2473StoreConstantToCPUPointer(GetConstantRegU32(cf.MipsT()), ptr);2474else2475rvAsm->SW(CFGetRegT(cf), PTR(ptr));2476}2477else if (action == GTERegisterAccessAction::SignExtend16 || action == GTERegisterAccessAction::ZeroExtend16)2478{2479const bool sign = (action == GTERegisterAccessAction::SignExtend16);2480if (cf.valid_host_t)2481{2482sign ? EmitSExtH(RARG1, CFGetRegT(cf)) : EmitUExtH(RARG1, CFGetRegT(cf));2483rvAsm->SW(RARG1, PTR(ptr));2484}2485else if (cf.const_t)2486{2487const u16 cv = Truncate16(GetConstantRegU32(cf.MipsT()));2488StoreConstantToCPUPointer(sign ? ::SignExtend32(cv) : ::ZeroExtend32(cv), ptr);2489}2490else2491{2492Panic("Unsupported setup");2493}2494}2495else if (action == GTERegisterAccessAction::CallHandler)2496{2497Flush(FLUSH_FOR_C_CALL);2498EmitMov(RARG1, index);2499MoveTToReg(RARG2, cf);2500EmitCall(reinterpret_cast<const void*>(>E::WriteRegister));2501}2502else if (action == GTERegisterAccessAction::PushFIFO)2503{2504// SXY0 <- SXY12505// SXY1 <- SXY22506// SXY2 <- SXYP2507DebugAssert(RRET.Index() != RARG2.Index() && RRET.Index() != RARG3.Index());2508rvAsm->LW(RARG2, PTR(&g_state.gte_regs.SXY1[0]));2509rvAsm->LW(RARG3, PTR(&g_state.gte_regs.SXY2[0]));2510rvAsm->SW(RARG2, PTR(&g_state.gte_regs.SXY0[0]));2511rvAsm->SW(RARG3, PTR(&g_state.gte_regs.SXY1[0]));2512if (cf.valid_host_t)2513rvAsm->SW(CFGetRegT(cf), PTR(&g_state.gte_regs.SXY2[0]));2514else if (cf.const_t)2515StoreConstantToCPUPointer(GetConstantRegU32(cf.MipsT()), &g_state.gte_regs.SXY2[0]);2516else2517Panic("Unsupported setup");2518}2519else2520{2521Panic("Unknown action");2522}2523}25242525void CPU::RISCV64Recompiler::Compile_cop2(CompileFlags cf)2526{2527TickCount func_ticks;2528GTE::InstructionImpl func = GTE::GetInstructionImpl(inst->bits, &func_ticks);25292530Flush(FLUSH_FOR_C_CALL);2531EmitMov(RARG1, inst->bits & GTE::Instruction::REQUIRED_BITS_MASK);2532EmitCall(reinterpret_cast<const void*>(func));25332534AddGTETicks(func_ticks);2535}25362537u32 CPU::Recompiler::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* code_address, u32 code_size,2538TickCount cycles_to_add, TickCount cycles_to_remove, u32 gpr_bitmask,2539u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed,2540bool is_load)2541{2542Assembler arm_asm(static_cast<u8*>(thunk_code), thunk_space);2543Assembler* rvAsm = &arm_asm;25442545static constexpr u32 GPR_SIZE = 8;25462547// save regs2548u32 num_gprs = 0;25492550for (u32 i = 0; i < NUM_HOST_REGS; i++)2551{2552if ((gpr_bitmask & (1u << i)) && rvIsCallerSavedRegister(i) && (!is_load || data_register != i))2553num_gprs++;2554}25552556const u32 stack_size = (((num_gprs + 1) & ~1u) * GPR_SIZE);25572558if (stack_size > 0)2559{2560rvAsm->ADDI(sp, sp, -static_cast<s32>(stack_size));25612562u32 stack_offset = 0;2563for (u32 i = 0; i < NUM_HOST_REGS; i++)2564{2565if ((gpr_bitmask & (1u << i)) && rvIsCallerSavedRegister(i) && (!is_load || data_register != i))2566{2567rvAsm->SD(GPR(i), stack_offset, sp);2568stack_offset += GPR_SIZE;2569}2570}2571}25722573if (cycles_to_add != 0)2574{2575// NOTE: we have to reload here, because memory writes can run DMA, which can screw with cycles2576Assert(rvIsValidSExtITypeImm(cycles_to_add));2577rvAsm->LW(RSCRATCH, PTR(&g_state.pending_ticks));2578rvAsm->ADDIW(RSCRATCH, RSCRATCH, cycles_to_add);2579rvAsm->SW(RSCRATCH, PTR(&g_state.pending_ticks));2580}25812582if (address_register != RARG1.Index())2583rvAsm->MV(RARG1, GPR(address_register));25842585if (!is_load)2586{2587if (data_register != RARG2.Index())2588rvAsm->MV(RARG2, GPR(data_register));2589}25902591switch (size)2592{2593case MemoryAccessSize::Byte:2594{2595rvEmitCall(rvAsm, is_load ? reinterpret_cast<const void*>(&RecompilerThunks::UncheckedReadMemoryByte) :2596reinterpret_cast<const void*>(&RecompilerThunks::UncheckedWriteMemoryByte));2597}2598break;2599case MemoryAccessSize::HalfWord:2600{2601rvEmitCall(rvAsm, is_load ? reinterpret_cast<const void*>(&RecompilerThunks::UncheckedReadMemoryHalfWord) :2602reinterpret_cast<const void*>(&RecompilerThunks::UncheckedWriteMemoryHalfWord));2603}2604break;2605case MemoryAccessSize::Word:2606{2607rvEmitCall(rvAsm, is_load ? reinterpret_cast<const void*>(&RecompilerThunks::UncheckedReadMemoryWord) :2608reinterpret_cast<const void*>(&RecompilerThunks::UncheckedWriteMemoryWord));2609}2610break;2611}26122613if (is_load)2614{2615const GPR dst = GPR(data_register);2616switch (size)2617{2618case MemoryAccessSize::Byte:2619{2620is_signed ? rvEmitSExtB(rvAsm, dst, RRET) : rvEmitUExtB(rvAsm, dst, RRET);2621}2622break;2623case MemoryAccessSize::HalfWord:2624{2625is_signed ? rvEmitSExtH(rvAsm, dst, RRET) : rvEmitUExtH(rvAsm, dst, RRET);2626}2627break;2628case MemoryAccessSize::Word:2629{2630if (dst.Index() != RRET.Index())2631rvAsm->MV(dst, RRET);2632}2633break;2634}2635}26362637if (cycles_to_remove != 0)2638{2639Assert(rvIsValidSExtITypeImm(-cycles_to_remove));2640rvAsm->LW(RSCRATCH, PTR(&g_state.pending_ticks));2641rvAsm->ADDIW(RSCRATCH, RSCRATCH, -cycles_to_remove);2642rvAsm->SW(RSCRATCH, PTR(&g_state.pending_ticks));2643}26442645// restore regs2646if (stack_size > 0)2647{2648u32 stack_offset = 0;2649for (u32 i = 0; i < NUM_HOST_REGS; i++)2650{2651if ((gpr_bitmask & (1u << i)) && rvIsCallerSavedRegister(i) && (!is_load || data_register != i))2652{2653rvAsm->LD(GPR(i), stack_offset, sp);2654stack_offset += GPR_SIZE;2655}2656}26572658rvAsm->ADDI(sp, sp, stack_size);2659}26602661rvEmitJmp(rvAsm, static_cast<const u8*>(code_address) + code_size);26622663return static_cast<u32>(rvAsm->GetCodeBuffer().GetSizeInBytes());2664}26652666#endif // CPU_ARCH_RISCV64266726682669