Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/src/core/cpu_recompiler_riscv64.cpp
4802 views
1
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <[email protected]>
2
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
3
4
#include "cpu_recompiler_riscv64.h"
5
#include "cpu_code_cache_private.h"
6
#include "cpu_core_private.h"
7
#include "cpu_pgxp.h"
8
#include "gte.h"
9
#include "settings.h"
10
#include "timing_event.h"
11
12
#include "common/align.h"
13
#include "common/assert.h"
14
#include "common/log.h"
15
#include "common/memmap.h"
16
#include "common/string_util.h"
17
18
#include <limits>
19
20
#ifdef CPU_ARCH_RISCV64
21
22
LOG_CHANNEL(Recompiler);
23
24
#ifdef ENABLE_HOST_DISASSEMBLY
25
extern "C" {
26
#include "riscv-disas.h"
27
}
28
#endif
29
30
// For LW/SW/etc.
31
#define PTR(x) ((u32)(((u8*)(x)) - ((u8*)&g_state))), RSTATE
32
33
static constexpr u32 BLOCK_LINK_SIZE = 8; // auipc+jr
34
35
#define RRET biscuit::a0
36
#define RARG1 biscuit::a0
37
#define RARG2 biscuit::a1
38
#define RARG3 biscuit::a2
39
#define RSCRATCH biscuit::t6
40
#define RSTATE biscuit::s10
41
#define RMEMBASE biscuit::s11
42
43
static bool rvIsCallerSavedRegister(u32 id);
44
static bool rvIsValidSExtITypeImm(u32 imm);
45
static std::pair<s32, s32> rvGetAddressImmediates(const void* cur, const void* target);
46
static void rvMoveAddressToReg(biscuit::Assembler* armAsm, const biscuit::GPR& reg, const void* addr);
47
static void rvEmitMov(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, u32 imm);
48
static void rvEmitMov64(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& scratch, u64 imm);
49
static u32 rvEmitJmp(biscuit::Assembler* rvAsm, const void* ptr, const biscuit::GPR& link_reg = biscuit::zero);
50
static u32 rvEmitCall(biscuit::Assembler* rvAsm, const void* ptr);
51
static void rvEmitFarLoad(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr,
52
bool sign_extend_word = false);
53
static void rvEmitFarStore(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr,
54
const biscuit::GPR& tempreg = RSCRATCH);
55
static void rvEmitSExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word
56
static void rvEmitUExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word
57
static void rvEmitSExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word
58
static void rvEmitUExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word
59
static void rvEmitDSExtW(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> doubleword
60
static void rvEmitDUExtW(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> doubleword
61
62
namespace CPU {
63
64
using namespace biscuit;
65
66
RISCV64Recompiler s_instance;
67
Recompiler* g_compiler = &s_instance;
68
69
} // namespace CPU
70
71
bool rvIsCallerSavedRegister(u32 id)
72
{
73
return (id == 1 || (id >= 3 && id < 8) || (id >= 10 && id <= 17) || (id >= 28 && id <= 31));
74
}
75
76
bool rvIsValidSExtITypeImm(u32 imm)
77
{
78
return (static_cast<u32>((static_cast<s32>(imm) << 20) >> 20) == imm);
79
}
80
81
std::pair<s32, s32> rvGetAddressImmediates(const void* cur, const void* target)
82
{
83
const s64 disp = static_cast<s64>(reinterpret_cast<intptr_t>(target) - reinterpret_cast<intptr_t>(cur));
84
Assert(disp >= static_cast<s64>(std::numeric_limits<s32>::min()) &&
85
disp <= static_cast<s64>(std::numeric_limits<s32>::max()));
86
87
const s64 hi = disp + 0x800;
88
const s64 lo = disp - (hi & 0xFFFFF000);
89
return std::make_pair(static_cast<s32>(hi >> 12), static_cast<s32>((lo << 52) >> 52));
90
}
91
92
void rvMoveAddressToReg(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr)
93
{
94
const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), addr);
95
rvAsm->AUIPC(reg, hi);
96
rvAsm->ADDI(reg, reg, lo);
97
}
98
99
void rvEmitMov(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, u32 imm)
100
{
101
// Borrowed from biscuit, but doesn't emit an ADDI if the lower 12 bits are zero.
102
const u32 lower = imm & 0xFFF;
103
const u32 upper = (imm & 0xFFFFF000) >> 12;
104
const s32 simm = static_cast<s32>(imm);
105
if (rvIsValidSExtITypeImm(simm))
106
{
107
rvAsm->ADDI(rd, biscuit::zero, static_cast<s32>(lower));
108
}
109
else
110
{
111
const bool needs_increment = (lower & 0x800) != 0;
112
const u32 upper_imm = needs_increment ? upper + 1 : upper;
113
rvAsm->LUI(rd, upper_imm);
114
rvAsm->ADDI(rd, rd, static_cast<int32_t>(lower));
115
}
116
}
117
118
void rvEmitMov64(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& scratch, u64 imm)
119
{
120
// TODO: Make better..
121
rvEmitMov(rvAsm, rd, static_cast<u32>(imm >> 32));
122
rvEmitMov(rvAsm, scratch, static_cast<u32>(imm));
123
rvAsm->SLLI64(rd, rd, 32);
124
rvAsm->SLLI64(scratch, scratch, 32);
125
rvAsm->SRLI64(scratch, scratch, 32);
126
rvAsm->ADD(rd, rd, scratch);
127
}
128
129
u32 rvEmitJmp(biscuit::Assembler* rvAsm, const void* ptr, const biscuit::GPR& link_reg)
130
{
131
// TODO: use J if displacement is <1MB, needs a bool because backpatch must be 8 bytes
132
const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), ptr);
133
rvAsm->AUIPC(RSCRATCH, hi);
134
rvAsm->JALR(link_reg, lo, RSCRATCH);
135
return 8;
136
}
137
138
u32 rvEmitCall(biscuit::Assembler* rvAsm, const void* ptr)
139
{
140
return rvEmitJmp(rvAsm, ptr, biscuit::ra);
141
}
142
143
void rvEmitFarLoad(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr, bool sign_extend_word)
144
{
145
const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), addr);
146
rvAsm->AUIPC(reg, hi);
147
if (sign_extend_word)
148
rvAsm->LW(reg, lo, reg);
149
else
150
rvAsm->LWU(reg, lo, reg);
151
}
152
153
[[maybe_unused]] void rvEmitFarStore(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr,
154
const biscuit::GPR& tempreg)
155
{
156
const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), addr);
157
rvAsm->AUIPC(tempreg, hi);
158
rvAsm->SW(reg, lo, tempreg);
159
}
160
161
void rvEmitSExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs)
162
{
163
rvAsm->SLLI(rd, rs, 24);
164
rvAsm->SRAIW(rd, rd, 24);
165
}
166
167
void rvEmitUExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs)
168
{
169
rvAsm->ANDI(rd, rs, 0xFF);
170
}
171
172
void rvEmitSExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs)
173
{
174
rvAsm->SLLI(rd, rs, 16);
175
rvAsm->SRAIW(rd, rd, 16);
176
}
177
178
void rvEmitUExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs)
179
{
180
rvAsm->SLLI(rd, rs, 16);
181
rvAsm->SRLI(rd, rd, 16);
182
}
183
184
void rvEmitDSExtW(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs)
185
{
186
rvAsm->ADDIW(rd, rs, 0);
187
}
188
189
void rvEmitDUExtW(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs)
190
{
191
rvAsm->SLLI64(rd, rs, 32);
192
rvAsm->SRLI64(rd, rd, 32);
193
}
194
195
void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size)
196
{
197
#ifdef ENABLE_HOST_DISASSEMBLY
198
const u8* cur = static_cast<const u8*>(start);
199
const u8* end = cur + size;
200
char buf[256];
201
while (cur < end)
202
{
203
rv_inst inst;
204
size_t instlen;
205
inst_fetch(cur, &inst, &instlen);
206
disasm_inst(buf, std::size(buf), rv64, static_cast<u64>(reinterpret_cast<uintptr_t>(cur)), inst);
207
DEBUG_LOG("\t0x{:016X}\t{}", static_cast<u64>(reinterpret_cast<uintptr_t>(cur)), buf);
208
cur += instlen;
209
}
210
#else
211
ERROR_LOG("Not compiled with ENABLE_HOST_DISASSEMBLY.");
212
#endif
213
}
214
215
u32 CPU::CodeCache::GetHostInstructionCount(const void* start, u32 size)
216
{
217
#ifdef ENABLE_HOST_DISASSEMBLY
218
const u8* cur = static_cast<const u8*>(start);
219
const u8* end = cur + size;
220
u32 icount = 0;
221
while (cur < end)
222
{
223
rv_inst inst;
224
size_t instlen;
225
inst_fetch(cur, &inst, &instlen);
226
cur += instlen;
227
icount++;
228
}
229
return icount;
230
#else
231
ERROR_LOG("Not compiled with ENABLE_HOST_DISASSEMBLY.");
232
return 0;
233
#endif
234
}
235
236
u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
237
{
238
using namespace biscuit;
239
240
Assembler actual_asm(static_cast<u8*>(code), code_size);
241
Assembler* rvAsm = &actual_asm;
242
243
Label dispatch;
244
Label run_events_and_dispatch;
245
246
g_enter_recompiler = reinterpret_cast<decltype(g_enter_recompiler)>(rvAsm->GetCursorPointer());
247
{
248
// TODO: reserve some space for saving caller-saved registers
249
250
// Need the CPU state for basically everything :-)
251
rvMoveAddressToReg(rvAsm, RSTATE, &g_state);
252
253
// Fastmem setup
254
if (IsUsingFastmem())
255
rvAsm->LD(RMEMBASE, PTR(&g_state.fastmem_base));
256
257
// Fall through to event dispatcher
258
}
259
260
// check events then for frame done
261
{
262
Label skip_event_check;
263
rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));
264
rvAsm->LW(RARG2, PTR(&g_state.downcount));
265
rvAsm->BLTU(RARG1, RARG2, &skip_event_check);
266
267
rvAsm->Bind(&run_events_and_dispatch);
268
g_run_events_and_dispatch = rvAsm->GetCursorPointer();
269
rvEmitCall(rvAsm, reinterpret_cast<const void*>(&TimingEvents::RunEvents));
270
271
rvAsm->Bind(&skip_event_check);
272
}
273
274
// TODO: align?
275
g_dispatcher = rvAsm->GetCursorPointer();
276
{
277
rvAsm->Bind(&dispatch);
278
279
// x9 <- s_fast_map[pc >> 16]
280
rvAsm->LW(RARG1, PTR(&g_state.pc));
281
rvMoveAddressToReg(rvAsm, RARG3, g_code_lut.data());
282
rvAsm->SRLIW(RARG2, RARG1, 16);
283
rvAsm->SLLI(RARG2, RARG2, 3);
284
rvAsm->ADD(RARG2, RARG2, RARG3);
285
rvAsm->LD(RARG2, 0, RARG2);
286
rvAsm->SLLI64(RARG1, RARG1, 48); // idx = (pc & 0xFFFF) >> 2
287
rvAsm->SRLI64(RARG1, RARG1, 50);
288
rvAsm->SLLI(RARG1, RARG1, 3);
289
290
// blr(x9[pc * 2]) (fast_map[idx])
291
rvAsm->ADD(RARG1, RARG1, RARG2);
292
rvAsm->LD(RARG1, 0, RARG1);
293
rvAsm->JR(RARG1);
294
}
295
296
g_compile_or_revalidate_block = rvAsm->GetCursorPointer();
297
{
298
rvAsm->LW(RARG1, PTR(&g_state.pc));
299
rvEmitCall(rvAsm, reinterpret_cast<const void*>(&CompileOrRevalidateBlock));
300
rvAsm->J(&dispatch);
301
}
302
303
g_discard_and_recompile_block = rvAsm->GetCursorPointer();
304
{
305
rvAsm->LW(RARG1, PTR(&g_state.pc));
306
rvEmitCall(rvAsm, reinterpret_cast<const void*>(&DiscardAndRecompileBlock));
307
rvAsm->J(&dispatch);
308
}
309
310
g_interpret_block = rvAsm->GetCursorPointer();
311
{
312
rvEmitCall(rvAsm, CodeCache::GetInterpretUncachedBlockFunction());
313
rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));
314
rvAsm->LW(RARG2, PTR(&g_state.downcount));
315
rvAsm->BGE(RARG1, RARG2, &run_events_and_dispatch);
316
rvAsm->J(&dispatch);
317
}
318
319
// TODO: align?
320
321
return static_cast<u32>(rvAsm->GetCodeBuffer().GetSizeInBytes());
322
}
323
324
void CPU::CodeCache::EmitAlignmentPadding(void* dst, size_t size)
325
{
326
constexpr u8 padding_value = 0x00;
327
std::memset(dst, padding_value, size);
328
}
329
330
u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache)
331
{
332
// TODO: get rid of assembler construction here
333
{
334
biscuit::Assembler assembler(static_cast<u8*>(code), BLOCK_LINK_SIZE);
335
rvEmitCall(&assembler, dst);
336
337
DebugAssert(assembler.GetCodeBuffer().GetSizeInBytes() <= BLOCK_LINK_SIZE);
338
if (assembler.GetCodeBuffer().GetRemainingBytes() > 0)
339
assembler.NOP();
340
}
341
342
if (flush_icache)
343
MemMap::FlushInstructionCache(code, BLOCK_LINK_SIZE);
344
345
return BLOCK_LINK_SIZE;
346
}
347
348
CPU::RISCV64Recompiler::RISCV64Recompiler() = default;
349
350
CPU::RISCV64Recompiler::~RISCV64Recompiler() = default;
351
352
const void* CPU::RISCV64Recompiler::GetCurrentCodePointer()
353
{
354
return rvAsm->GetCursorPointer();
355
}
356
357
void CPU::RISCV64Recompiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer,
358
u32 far_code_space)
359
{
360
Recompiler::Reset(block, code_buffer, code_buffer_space, far_code_buffer, far_code_space);
361
362
// TODO: don't recreate this every time..
363
DebugAssert(!m_emitter && !m_far_emitter && !rvAsm);
364
m_emitter = std::make_unique<Assembler>(code_buffer, code_buffer_space);
365
m_far_emitter = std::make_unique<Assembler>(far_code_buffer, far_code_space);
366
rvAsm = m_emitter.get();
367
368
// Need to wipe it out so it's correct when toggling fastmem.
369
m_host_regs = {};
370
371
const u32 membase_idx = CodeCache::IsUsingFastmem() ? RMEMBASE.Index() : NUM_HOST_REGS;
372
for (u32 i = 0; i < NUM_HOST_REGS; i++)
373
{
374
HostRegAlloc& hra = m_host_regs[i];
375
376
if (i == RARG1.Index() || i == RARG2.Index() || i == RARG3.Index() || i == RSCRATCH.Index() ||
377
i == RSTATE.Index() || i == membase_idx || i < 5 /* zero, ra, sp, gp, tp */)
378
{
379
continue;
380
}
381
382
hra.flags = HR_USABLE | (rvIsCallerSavedRegister(i) ? 0 : HR_CALLEE_SAVED);
383
}
384
}
385
386
void CPU::RISCV64Recompiler::SwitchToFarCode(bool emit_jump,
387
void (biscuit::Assembler::*inverted_cond)(biscuit::GPR, biscuit::GPR,
388
biscuit::Label*) /* = nullptr */,
389
const biscuit::GPR& rs1 /* = biscuit::zero */,
390
const biscuit::GPR& rs2 /* = biscuit::zero */)
391
{
392
DebugAssert(rvAsm == m_emitter.get());
393
if (emit_jump)
394
{
395
const void* target = m_far_emitter->GetCursorPointer();
396
if (inverted_cond)
397
{
398
Label skip;
399
(rvAsm->*inverted_cond)(rs1, rs2, &skip);
400
rvEmitJmp(rvAsm, target);
401
rvAsm->Bind(&skip);
402
}
403
else
404
{
405
rvEmitCall(rvAsm, target);
406
}
407
}
408
rvAsm = m_far_emitter.get();
409
}
410
411
void CPU::RISCV64Recompiler::SwitchToNearCode(bool emit_jump)
412
{
413
DebugAssert(rvAsm == m_far_emitter.get());
414
if (emit_jump)
415
rvEmitJmp(rvAsm, m_emitter->GetCursorPointer());
416
rvAsm = m_emitter.get();
417
}
418
419
void CPU::RISCV64Recompiler::EmitMov(const biscuit::GPR& dst, u32 val)
420
{
421
rvEmitMov(rvAsm, dst, val);
422
}
423
424
void CPU::RISCV64Recompiler::EmitCall(const void* ptr)
425
{
426
rvEmitCall(rvAsm, ptr);
427
}
428
429
void CPU::RISCV64Recompiler::SafeImmSExtIType(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm,
430
void (biscuit::Assembler::*iop)(GPR, GPR, u32),
431
void (biscuit::Assembler::*rop)(GPR, GPR, GPR))
432
{
433
DebugAssert(rd != RSCRATCH && rs != RSCRATCH);
434
435
if (rvIsValidSExtITypeImm(imm))
436
{
437
(rvAsm->*iop)(rd, rs, imm);
438
return;
439
}
440
441
rvEmitMov(rvAsm, RSCRATCH, imm);
442
(rvAsm->*rop)(rd, rs, RSCRATCH);
443
}
444
445
void CPU::RISCV64Recompiler::SafeADDI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)
446
{
447
SafeImmSExtIType(rd, rs, imm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::ADDI),
448
&Assembler::ADD);
449
}
450
451
void CPU::RISCV64Recompiler::SafeADDIW(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)
452
{
453
SafeImmSExtIType(rd, rs, imm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::ADDIW),
454
&Assembler::ADDW);
455
}
456
457
void CPU::RISCV64Recompiler::SafeSUBIW(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)
458
{
459
const u32 nimm = static_cast<u32>(-static_cast<s32>(imm));
460
SafeImmSExtIType(rd, rs, nimm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::ADDIW),
461
&Assembler::ADDW);
462
}
463
464
void CPU::RISCV64Recompiler::SafeANDI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)
465
{
466
SafeImmSExtIType(rd, rs, imm, &Assembler::ANDI, &Assembler::AND);
467
}
468
469
void CPU::RISCV64Recompiler::SafeORI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)
470
{
471
SafeImmSExtIType(rd, rs, imm, &Assembler::ORI, &Assembler::OR);
472
}
473
474
void CPU::RISCV64Recompiler::SafeXORI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)
475
{
476
SafeImmSExtIType(rd, rs, imm, &Assembler::XORI, &Assembler::XOR);
477
}
478
479
void CPU::RISCV64Recompiler::SafeSLTI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)
480
{
481
SafeImmSExtIType(rd, rs, imm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::SLTI),
482
&Assembler::SLT);
483
}
484
485
void CPU::RISCV64Recompiler::SafeSLTIU(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)
486
{
487
SafeImmSExtIType(rd, rs, imm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::SLTIU),
488
&Assembler::SLTU);
489
}
490
491
void CPU::RISCV64Recompiler::EmitSExtB(const biscuit::GPR& rd, const biscuit::GPR& rs)
492
{
493
rvEmitSExtB(rvAsm, rd, rs);
494
}
495
496
void CPU::RISCV64Recompiler::EmitUExtB(const biscuit::GPR& rd, const biscuit::GPR& rs)
497
{
498
rvEmitUExtB(rvAsm, rd, rs);
499
}
500
501
void CPU::RISCV64Recompiler::EmitSExtH(const biscuit::GPR& rd, const biscuit::GPR& rs)
502
{
503
rvEmitSExtH(rvAsm, rd, rs);
504
}
505
506
void CPU::RISCV64Recompiler::EmitUExtH(const biscuit::GPR& rd, const biscuit::GPR& rs)
507
{
508
rvEmitUExtH(rvAsm, rd, rs);
509
}
510
511
void CPU::RISCV64Recompiler::EmitDSExtW(const biscuit::GPR& rd, const biscuit::GPR& rs)
512
{
513
rvEmitDSExtW(rvAsm, rd, rs);
514
}
515
516
void CPU::RISCV64Recompiler::EmitDUExtW(const biscuit::GPR& rd, const biscuit::GPR& rs)
517
{
518
rvEmitDUExtW(rvAsm, rd, rs);
519
}
520
521
void CPU::RISCV64Recompiler::GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size)
522
{
523
// store it first to reduce code size, because we can offset
524
// TODO: 64-bit displacement is needed :/
525
// rvMoveAddressToReg(rvAsm, RARG1, ram_ptr);
526
// rvMoveAddressToReg(rvAsm, RARG2, shadow_ptr);
527
rvEmitMov64(rvAsm, RARG1, RSCRATCH, static_cast<u64>(reinterpret_cast<uintptr_t>(ram_ptr)));
528
rvEmitMov64(rvAsm, RARG2, RSCRATCH, static_cast<u64>(reinterpret_cast<uintptr_t>(shadow_ptr)));
529
530
u32 offset = 0;
531
Label block_changed;
532
533
while (size >= 8)
534
{
535
rvAsm->LD(RARG3, offset, RARG1);
536
rvAsm->LD(RSCRATCH, offset, RARG2);
537
rvAsm->BNE(RARG3, RSCRATCH, &block_changed);
538
offset += 8;
539
size -= 8;
540
}
541
542
while (size >= 4)
543
{
544
rvAsm->LW(RARG3, offset, RARG1);
545
rvAsm->LW(RSCRATCH, offset, RARG2);
546
rvAsm->BNE(RARG3, RSCRATCH, &block_changed);
547
offset += 4;
548
size -= 4;
549
}
550
551
DebugAssert(size == 0);
552
553
Label block_unchanged;
554
rvAsm->J(&block_unchanged);
555
rvAsm->Bind(&block_changed);
556
rvEmitJmp(rvAsm, CodeCache::g_discard_and_recompile_block);
557
rvAsm->Bind(&block_unchanged);
558
}
559
560
void CPU::RISCV64Recompiler::GenerateICacheCheckAndUpdate()
561
{
562
if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache))
563
{
564
if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks))
565
{
566
rvEmitFarLoad(rvAsm, RARG2, GetFetchMemoryAccessTimePtr());
567
rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));
568
rvEmitMov(rvAsm, RARG3, m_block->size);
569
rvAsm->MULW(RARG2, RARG2, RARG3);
570
rvAsm->ADD(RARG1, RARG1, RARG2);
571
rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));
572
}
573
else
574
{
575
rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));
576
SafeADDIW(RARG1, RARG1, static_cast<u32>(m_block->uncached_fetch_ticks));
577
rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));
578
}
579
}
580
else if (m_block->icache_line_count > 0)
581
{
582
const auto& ticks_reg = RARG1;
583
const auto& current_tag_reg = RARG2;
584
const auto& existing_tag_reg = RARG3;
585
586
// start of block, nothing should be using this
587
const auto& maddr_reg = biscuit::t0;
588
DebugAssert(!IsHostRegAllocated(maddr_reg.Index()));
589
590
VirtualMemoryAddress current_pc = m_block->pc & ICACHE_TAG_ADDRESS_MASK;
591
rvAsm->LW(ticks_reg, PTR(&g_state.pending_ticks));
592
rvEmitMov(rvAsm, current_tag_reg, current_pc);
593
594
for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE)
595
{
596
const TickCount fill_ticks = GetICacheFillTicks(current_pc);
597
if (fill_ticks <= 0)
598
continue;
599
600
const u32 line = GetICacheLine(current_pc);
601
const u32 offset = OFFSETOF(State, icache_tags) + (line * sizeof(u32));
602
603
// Offsets must fit in signed 12 bits.
604
Label cache_hit;
605
if (offset >= 2048)
606
{
607
SafeADDI(maddr_reg, RSTATE, offset);
608
rvAsm->LW(existing_tag_reg, 0, maddr_reg);
609
rvAsm->BEQ(existing_tag_reg, current_tag_reg, &cache_hit);
610
rvAsm->SW(current_tag_reg, 0, maddr_reg);
611
}
612
else
613
{
614
rvAsm->LW(existing_tag_reg, offset, RSTATE);
615
rvAsm->BEQ(existing_tag_reg, current_tag_reg, &cache_hit);
616
rvAsm->SW(current_tag_reg, offset, RSTATE);
617
}
618
619
SafeADDIW(ticks_reg, ticks_reg, static_cast<u32>(fill_ticks));
620
rvAsm->Bind(&cache_hit);
621
622
if (i != (m_block->icache_line_count - 1))
623
SafeADDIW(current_tag_reg, current_tag_reg, ICACHE_LINE_SIZE);
624
}
625
626
rvAsm->SW(ticks_reg, PTR(&g_state.pending_ticks));
627
}
628
}
629
630
void CPU::RISCV64Recompiler::GenerateCall(const void* func, s32 arg1reg /*= -1*/, s32 arg2reg /*= -1*/,
631
s32 arg3reg /*= -1*/)
632
{
633
if (arg1reg >= 0 && arg1reg != static_cast<s32>(RARG1.Index()))
634
rvAsm->MV(RARG1, GPR(arg1reg));
635
if (arg2reg >= 0 && arg2reg != static_cast<s32>(RARG2.Index()))
636
rvAsm->MV(RARG2, GPR(arg2reg));
637
if (arg3reg >= 0 && arg3reg != static_cast<s32>(RARG3.Index()))
638
rvAsm->MV(RARG3, GPR(arg3reg));
639
EmitCall(func);
640
}
641
642
void CPU::RISCV64Recompiler::EndBlock(const std::optional<u32>& newpc, bool do_event_test)
643
{
644
if (newpc.has_value())
645
{
646
if (m_dirty_pc || m_compiler_pc != newpc)
647
{
648
EmitMov(RSCRATCH, newpc.value());
649
rvAsm->SW(RSCRATCH, PTR(&g_state.pc));
650
}
651
}
652
m_dirty_pc = false;
653
654
// flush regs
655
Flush(FLUSH_END_BLOCK);
656
EndAndLinkBlock(newpc, do_event_test, false);
657
}
658
659
void CPU::RISCV64Recompiler::EndBlockWithException(Exception excode)
660
{
661
// flush regs, but not pc, it's going to get overwritten
662
// flush cycles because of the GTE instruction stuff...
663
Flush(FLUSH_END_BLOCK | FLUSH_FOR_EXCEPTION | FLUSH_FOR_C_CALL);
664
665
// TODO: flush load delay
666
667
EmitMov(RARG1, Cop0Registers::CAUSE::MakeValueForException(excode, m_current_instruction_branch_delay_slot, false,
668
inst->cop.cop_n));
669
EmitMov(RARG2, m_current_instruction_pc);
670
if (excode != Exception::BP)
671
{
672
EmitCall(reinterpret_cast<const void*>(static_cast<void (*)(u32, u32)>(&CPU::RaiseException)));
673
}
674
else
675
{
676
EmitMov(RARG3, inst->bits);
677
EmitCall(reinterpret_cast<const void*>(&CPU::RaiseBreakException));
678
}
679
m_dirty_pc = false;
680
681
EndAndLinkBlock(std::nullopt, true, false);
682
}
683
684
void CPU::RISCV64Recompiler::EndAndLinkBlock(const std::optional<u32>& newpc, bool do_event_test, bool force_run_events)
685
{
686
// event test
687
// pc should've been flushed
688
DebugAssert(!m_dirty_pc && !m_block_ended);
689
m_block_ended = true;
690
691
// TODO: try extracting this to a function
692
// TODO: move the cycle flush in here..
693
694
// save cycles for event test
695
const TickCount cycles = std::exchange(m_cycles, 0);
696
697
// pending_ticks += cycles
698
// if (pending_ticks >= downcount) { dispatch_event(); }
699
if (do_event_test || m_gte_done_cycle > cycles || cycles > 0)
700
rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));
701
if (do_event_test)
702
rvAsm->LW(RARG2, PTR(&g_state.downcount));
703
if (cycles > 0)
704
{
705
SafeADDIW(RARG1, RARG1, cycles);
706
rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));
707
}
708
if (m_gte_done_cycle > cycles)
709
{
710
SafeADDIW(RARG2, RARG1, m_gte_done_cycle - cycles);
711
rvAsm->SW(RARG1, PTR(&g_state.gte_completion_tick));
712
}
713
714
if (do_event_test)
715
{
716
// TODO: see if we can do a far jump somehow with this..
717
Label cont;
718
rvAsm->BLT(RARG1, RARG2, &cont);
719
rvEmitJmp(rvAsm, CodeCache::g_run_events_and_dispatch);
720
rvAsm->Bind(&cont);
721
}
722
723
// jump to dispatcher or next block
724
if (force_run_events)
725
{
726
rvEmitJmp(rvAsm, CodeCache::g_run_events_and_dispatch);
727
}
728
else if (!newpc.has_value())
729
{
730
rvEmitJmp(rvAsm, CodeCache::g_dispatcher);
731
}
732
else
733
{
734
const void* target =
735
(newpc.value() == m_block->pc) ?
736
CodeCache::CreateSelfBlockLink(m_block, rvAsm->GetCursorPointer(), rvAsm->GetBufferPointer(0)) :
737
CodeCache::CreateBlockLink(m_block, rvAsm->GetCursorPointer(), newpc.value());
738
rvEmitJmp(rvAsm, target);
739
}
740
}
741
742
const void* CPU::RISCV64Recompiler::EndCompile(u32* code_size, u32* far_code_size)
743
{
744
u8* const code = m_emitter->GetBufferPointer(0);
745
*code_size = static_cast<u32>(m_emitter->GetCodeBuffer().GetSizeInBytes());
746
*far_code_size = static_cast<u32>(m_far_emitter->GetCodeBuffer().GetSizeInBytes());
747
rvAsm = nullptr;
748
m_far_emitter.reset();
749
m_emitter.reset();
750
return code;
751
}
752
753
const char* CPU::RISCV64Recompiler::GetHostRegName(u32 reg) const
754
{
755
static constexpr std::array<const char*, 32> reg64_names = {
756
{"zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "s0", "s1", "a0", "a1", "a2", "a3", "a4", "a5",
757
"a6", "a7", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11", "t3", "t4", "t5", "t6"}};
758
return (reg < reg64_names.size()) ? reg64_names[reg] : "UNKNOWN";
759
}
760
761
void CPU::RISCV64Recompiler::LoadHostRegWithConstant(u32 reg, u32 val)
762
{
763
EmitMov(GPR(reg), val);
764
}
765
766
void CPU::RISCV64Recompiler::LoadHostRegFromCPUPointer(u32 reg, const void* ptr)
767
{
768
rvAsm->LW(GPR(reg), PTR(ptr));
769
}
770
771
void CPU::RISCV64Recompiler::StoreHostRegToCPUPointer(u32 reg, const void* ptr)
772
{
773
rvAsm->SW(GPR(reg), PTR(ptr));
774
}
775
776
void CPU::RISCV64Recompiler::StoreConstantToCPUPointer(u32 val, const void* ptr)
777
{
778
if (val == 0)
779
{
780
rvAsm->SW(zero, PTR(ptr));
781
return;
782
}
783
784
EmitMov(RSCRATCH, val);
785
rvAsm->SW(RSCRATCH, PTR(ptr));
786
}
787
788
void CPU::RISCV64Recompiler::CopyHostReg(u32 dst, u32 src)
789
{
790
if (src != dst)
791
rvAsm->MV(GPR(dst), GPR(src));
792
}
793
794
void CPU::RISCV64Recompiler::AssertRegOrConstS(CompileFlags cf) const
795
{
796
DebugAssert(cf.valid_host_s || cf.const_s);
797
}
798
799
void CPU::RISCV64Recompiler::AssertRegOrConstT(CompileFlags cf) const
800
{
801
DebugAssert(cf.valid_host_t || cf.const_t);
802
}
803
804
biscuit::GPR CPU::RISCV64Recompiler::CFGetSafeRegS(CompileFlags cf, const biscuit::GPR& temp_reg)
805
{
806
if (cf.valid_host_s)
807
{
808
return GPR(cf.host_s);
809
}
810
else if (cf.const_s)
811
{
812
if (HasConstantRegValue(cf.MipsS(), 0))
813
return zero;
814
815
EmitMov(temp_reg, GetConstantRegU32(cf.MipsS()));
816
return temp_reg;
817
}
818
else
819
{
820
WARNING_LOG("Hit memory path in CFGetSafeRegS() for {}", GetRegName(cf.MipsS()));
821
rvAsm->LW(temp_reg, PTR(&g_state.regs.r[cf.mips_s]));
822
return temp_reg;
823
}
824
}
825
826
biscuit::GPR CPU::RISCV64Recompiler::CFGetSafeRegT(CompileFlags cf, const biscuit::GPR& temp_reg)
827
{
828
if (cf.valid_host_t)
829
{
830
return GPR(cf.host_t);
831
}
832
else if (cf.const_t)
833
{
834
if (HasConstantRegValue(cf.MipsT(), 0))
835
return zero;
836
837
EmitMov(temp_reg, GetConstantRegU32(cf.MipsT()));
838
return temp_reg;
839
}
840
else
841
{
842
WARNING_LOG("Hit memory path in CFGetSafeRegT() for {}", GetRegName(cf.MipsT()));
843
rvAsm->LW(temp_reg, PTR(&g_state.regs.r[cf.mips_t]));
844
return temp_reg;
845
}
846
}
847
848
biscuit::GPR CPU::RISCV64Recompiler::CFGetRegD(CompileFlags cf) const
849
{
850
DebugAssert(cf.valid_host_d);
851
return GPR(cf.host_d);
852
}
853
854
biscuit::GPR CPU::RISCV64Recompiler::CFGetRegS(CompileFlags cf) const
855
{
856
DebugAssert(cf.valid_host_s);
857
return GPR(cf.host_s);
858
}
859
860
biscuit::GPR CPU::RISCV64Recompiler::CFGetRegT(CompileFlags cf) const
861
{
862
DebugAssert(cf.valid_host_t);
863
return GPR(cf.host_t);
864
}
865
866
biscuit::GPR CPU::RISCV64Recompiler::CFGetRegLO(CompileFlags cf) const
867
{
868
DebugAssert(cf.valid_host_lo);
869
return GPR(cf.host_lo);
870
}
871
872
biscuit::GPR CPU::RISCV64Recompiler::CFGetRegHI(CompileFlags cf) const
873
{
874
DebugAssert(cf.valid_host_hi);
875
return GPR(cf.host_hi);
876
}
877
878
void CPU::RISCV64Recompiler::MoveSToReg(const biscuit::GPR& dst, CompileFlags cf)
879
{
880
if (cf.valid_host_s)
881
{
882
if (cf.host_s != dst.Index())
883
rvAsm->MV(dst, GPR(cf.host_s));
884
}
885
else if (cf.const_s)
886
{
887
EmitMov(dst, GetConstantRegU32(cf.MipsS()));
888
}
889
else
890
{
891
WARNING_LOG("Hit memory path in MoveSToReg() for {}", GetRegName(cf.MipsS()));
892
rvAsm->LW(dst, PTR(&g_state.regs.r[cf.mips_s]));
893
}
894
}
895
896
void CPU::RISCV64Recompiler::MoveTToReg(const biscuit::GPR& dst, CompileFlags cf)
897
{
898
if (cf.valid_host_t)
899
{
900
if (cf.host_t != dst.Index())
901
rvAsm->MV(dst, GPR(cf.host_t));
902
}
903
else if (cf.const_t)
904
{
905
EmitMov(dst, GetConstantRegU32(cf.MipsT()));
906
}
907
else
908
{
909
WARNING_LOG("Hit memory path in MoveTToReg() for {}", GetRegName(cf.MipsT()));
910
rvAsm->LW(dst, PTR(&g_state.regs.r[cf.mips_t]));
911
}
912
}
913
914
void CPU::RISCV64Recompiler::MoveMIPSRegToReg(const biscuit::GPR& dst, Reg reg, bool ignore_load_delays)
915
{
916
DebugAssert(reg < Reg::count);
917
if (ignore_load_delays && m_load_delay_register == reg)
918
{
919
if (m_load_delay_value_register == NUM_HOST_REGS)
920
rvAsm->LW(dst, PTR(&g_state.load_delay_value));
921
else
922
rvAsm->MV(dst, GPR(m_load_delay_value_register));
923
}
924
else if (const std::optional<u32> hreg = CheckHostReg(0, Recompiler::HR_TYPE_CPU_REG, reg))
925
{
926
rvAsm->MV(dst, GPR(hreg.value()));
927
}
928
else if (HasConstantReg(reg))
929
{
930
EmitMov(dst, GetConstantRegU32(reg));
931
}
932
else
933
{
934
rvAsm->LW(dst, PTR(&g_state.regs.r[static_cast<u8>(reg)]));
935
}
936
}
937
938
void CPU::RISCV64Recompiler::GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, Reg arg2reg /* = Reg::count */,
939
Reg arg3reg /* = Reg::count */)
940
{
941
DebugAssert(g_settings.gpu_pgxp_enable);
942
943
Flush(FLUSH_FOR_C_CALL);
944
945
if (arg2reg != Reg::count)
946
MoveMIPSRegToReg(RARG2, arg2reg);
947
if (arg3reg != Reg::count)
948
MoveMIPSRegToReg(RARG3, arg3reg);
949
950
EmitMov(RARG1, arg1val);
951
EmitCall(func);
952
}
953
954
void CPU::RISCV64Recompiler::Flush(u32 flags)
955
{
956
Recompiler::Flush(flags);
957
958
if (flags & FLUSH_PC && m_dirty_pc)
959
{
960
StoreConstantToCPUPointer(m_compiler_pc, &g_state.pc);
961
m_dirty_pc = false;
962
}
963
964
if (flags & FLUSH_INSTRUCTION_BITS)
965
{
966
// This sucks, but it's only used for fallbacks.
967
Panic("Not implemented");
968
}
969
970
if (flags & FLUSH_LOAD_DELAY_FROM_STATE && m_load_delay_dirty)
971
{
972
// This sucks :(
973
// TODO: make it a function?
974
rvAsm->LBU(RARG1, PTR(&g_state.load_delay_reg));
975
rvAsm->LW(RARG2, PTR(&g_state.load_delay_value));
976
rvAsm->SLLI(RARG1, RARG1, 2); // *4
977
rvAsm->ADD(RARG1, RARG1, RSTATE);
978
rvAsm->SW(RARG2, OFFSETOF(CPU::State, regs.r[0]), RARG1);
979
rvAsm->LI(RSCRATCH, static_cast<u8>(Reg::count));
980
rvAsm->SB(RSCRATCH, PTR(&g_state.load_delay_reg));
981
m_load_delay_dirty = false;
982
}
983
984
if (flags & FLUSH_LOAD_DELAY && m_load_delay_register != Reg::count)
985
{
986
if (m_load_delay_value_register != NUM_HOST_REGS)
987
FreeHostReg(m_load_delay_value_register);
988
989
EmitMov(RSCRATCH, static_cast<u8>(m_load_delay_register));
990
rvAsm->SB(RSCRATCH, PTR(&g_state.load_delay_reg));
991
m_load_delay_register = Reg::count;
992
m_load_delay_dirty = true;
993
}
994
995
if (flags & FLUSH_GTE_STALL_FROM_STATE && m_dirty_gte_done_cycle)
996
{
997
// May as well flush cycles while we're here.
998
// GTE spanning blocks is very rare, we _could_ disable this for speed.
999
rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));
1000
rvAsm->LW(RARG2, PTR(&g_state.gte_completion_tick));
1001
if (m_cycles > 0)
1002
{
1003
SafeADDIW(RARG1, RARG1, m_cycles);
1004
m_cycles = 0;
1005
}
1006
Label no_stall;
1007
rvAsm->BGE(RARG1, RARG2, &no_stall);
1008
rvAsm->MV(RARG1, RARG2);
1009
rvAsm->Bind(&no_stall);
1010
rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));
1011
m_dirty_gte_done_cycle = false;
1012
}
1013
1014
if (flags & FLUSH_GTE_DONE_CYCLE && m_gte_done_cycle > m_cycles)
1015
{
1016
rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));
1017
1018
// update cycles at the same time
1019
if (flags & FLUSH_CYCLES && m_cycles > 0)
1020
{
1021
SafeADDIW(RARG1, RARG1, m_cycles);
1022
rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));
1023
m_gte_done_cycle -= m_cycles;
1024
m_cycles = 0;
1025
}
1026
1027
SafeADDIW(RARG1, RARG1, m_gte_done_cycle);
1028
rvAsm->SW(RARG1, PTR(&g_state.gte_completion_tick));
1029
m_gte_done_cycle = 0;
1030
m_dirty_gte_done_cycle = true;
1031
}
1032
1033
if (flags & FLUSH_CYCLES && m_cycles > 0)
1034
{
1035
rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));
1036
SafeADDIW(RARG1, RARG1, m_cycles);
1037
rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));
1038
m_gte_done_cycle = std::max<TickCount>(m_gte_done_cycle - m_cycles, 0);
1039
m_cycles = 0;
1040
}
1041
}
1042
1043
void CPU::RISCV64Recompiler::Compile_Fallback()
1044
{
1045
WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", m_current_instruction_pc,
1046
inst->bits);
1047
1048
Flush(FLUSH_FOR_INTERPRETER);
1049
1050
#if 0
1051
cg->call(&CPU::RecompilerThunks::InterpretInstruction);
1052
1053
// TODO: make me less garbage
1054
// TODO: this is wrong, it flushes the load delay on the same cycle when we return.
1055
// but nothing should be going through here..
1056
Label no_load_delay;
1057
cg->movzx(RWARG1, cg->byte[PTR(&g_state.next_load_delay_reg)]);
1058
cg->cmp(RWARG1, static_cast<u8>(Reg::count));
1059
cg->je(no_load_delay, CodeGenerator::T_SHORT);
1060
cg->mov(RWARG2, cg->dword[PTR(&g_state.next_load_delay_value)]);
1061
cg->mov(cg->byte[PTR(&g_state.load_delay_reg)], RWARG1);
1062
cg->mov(cg->dword[PTR(&g_state.load_delay_value)], RWARG2);
1063
cg->mov(cg->byte[PTR(&g_state.next_load_delay_reg)], static_cast<u32>(Reg::count));
1064
cg->L(no_load_delay);
1065
1066
m_load_delay_dirty = EMULATE_LOAD_DELAYS;
1067
#else
1068
Panic("Fixme");
1069
#endif
1070
}
1071
1072
void CPU::RISCV64Recompiler::CheckBranchTarget(const biscuit::GPR& pcreg)
1073
{
1074
if (!g_settings.cpu_recompiler_memory_exceptions)
1075
return;
1076
1077
DebugAssert(pcreg != RSCRATCH);
1078
rvAsm->ANDI(RSCRATCH, pcreg, 0x3);
1079
SwitchToFarCode(true, &Assembler::BEQ, RSCRATCH, zero);
1080
1081
BackupHostState();
1082
EndBlockWithException(Exception::AdEL);
1083
1084
RestoreHostState();
1085
SwitchToNearCode(false);
1086
}
1087
1088
void CPU::RISCV64Recompiler::Compile_jr(CompileFlags cf)
1089
{
1090
const GPR pcreg = CFGetRegS(cf);
1091
CheckBranchTarget(pcreg);
1092
1093
rvAsm->SW(pcreg, PTR(&g_state.pc));
1094
1095
CompileBranchDelaySlot(false);
1096
EndBlock(std::nullopt, true);
1097
}
1098
1099
void CPU::RISCV64Recompiler::Compile_jalr(CompileFlags cf)
1100
{
1101
const GPR pcreg = CFGetRegS(cf);
1102
if (MipsD() != Reg::zero)
1103
SetConstantReg(MipsD(), GetBranchReturnAddress(cf));
1104
1105
CheckBranchTarget(pcreg);
1106
rvAsm->SW(pcreg, PTR(&g_state.pc));
1107
1108
CompileBranchDelaySlot(false);
1109
EndBlock(std::nullopt, true);
1110
}
1111
1112
void CPU::RISCV64Recompiler::Compile_bxx(CompileFlags cf, BranchCondition cond)
1113
{
1114
AssertRegOrConstS(cf);
1115
1116
const u32 taken_pc = GetConditionalBranchTarget(cf);
1117
1118
Flush(FLUSH_FOR_BRANCH);
1119
1120
DebugAssert(cf.valid_host_s);
1121
1122
// MipsT() here should equal zero for zero branches.
1123
DebugAssert(cond == BranchCondition::Equal || cond == BranchCondition::NotEqual || cf.MipsT() == Reg::zero);
1124
1125
Label taken;
1126
const GPR rs = CFGetRegS(cf);
1127
switch (cond)
1128
{
1129
case BranchCondition::Equal:
1130
case BranchCondition::NotEqual:
1131
{
1132
AssertRegOrConstT(cf);
1133
if (cf.const_t && HasConstantRegValue(cf.MipsT(), 0))
1134
{
1135
(cond == BranchCondition::Equal) ? rvAsm->BEQZ(rs, &taken) : rvAsm->BNEZ(rs, &taken);
1136
}
1137
else
1138
{
1139
const GPR rt = cf.valid_host_t ? CFGetRegT(cf) : RARG1;
1140
if (!cf.valid_host_t)
1141
MoveTToReg(RARG1, cf);
1142
if (cond == Recompiler::BranchCondition::Equal)
1143
rvAsm->BEQ(rs, rt, &taken);
1144
else
1145
rvAsm->BNE(rs, rt, &taken);
1146
}
1147
}
1148
break;
1149
1150
case BranchCondition::GreaterThanZero:
1151
{
1152
rvAsm->BGTZ(rs, &taken);
1153
}
1154
break;
1155
1156
case BranchCondition::GreaterEqualZero:
1157
{
1158
rvAsm->BGEZ(rs, &taken);
1159
}
1160
break;
1161
1162
case BranchCondition::LessThanZero:
1163
{
1164
rvAsm->BLTZ(rs, &taken);
1165
}
1166
break;
1167
1168
case BranchCondition::LessEqualZero:
1169
{
1170
rvAsm->BLEZ(rs, &taken);
1171
}
1172
break;
1173
}
1174
1175
BackupHostState();
1176
if (!cf.delay_slot_swapped)
1177
CompileBranchDelaySlot();
1178
1179
EndBlock(m_compiler_pc, true);
1180
1181
rvAsm->Bind(&taken);
1182
1183
RestoreHostState();
1184
if (!cf.delay_slot_swapped)
1185
CompileBranchDelaySlot();
1186
1187
EndBlock(taken_pc, true);
1188
}
1189
1190
void CPU::RISCV64Recompiler::Compile_addi(CompileFlags cf, bool overflow)
1191
{
1192
const GPR rs = CFGetRegS(cf);
1193
const GPR rt = CFGetRegT(cf);
1194
if (const u32 imm = inst->i.imm_sext32(); imm != 0)
1195
{
1196
if (!overflow)
1197
{
1198
SafeADDIW(rt, rs, imm);
1199
}
1200
else
1201
{
1202
SafeADDI(RARG1, rs, imm);
1203
SafeADDIW(rt, rs, imm);
1204
TestOverflow(RARG1, rt, rt);
1205
}
1206
}
1207
else if (rt.Index() != rs.Index())
1208
{
1209
rvAsm->MV(rt, rs);
1210
}
1211
}
1212
1213
void CPU::RISCV64Recompiler::Compile_addi(CompileFlags cf)
1214
{
1215
Compile_addi(cf, g_settings.cpu_recompiler_memory_exceptions);
1216
}
1217
1218
void CPU::RISCV64Recompiler::Compile_addiu(CompileFlags cf)
1219
{
1220
Compile_addi(cf, false);
1221
}
1222
1223
void CPU::RISCV64Recompiler::Compile_slti(CompileFlags cf)
1224
{
1225
Compile_slti(cf, true);
1226
}
1227
1228
void CPU::RISCV64Recompiler::Compile_sltiu(CompileFlags cf)
1229
{
1230
Compile_slti(cf, false);
1231
}
1232
1233
void CPU::RISCV64Recompiler::Compile_slti(CompileFlags cf, bool sign)
1234
{
1235
if (sign)
1236
SafeSLTI(CFGetRegT(cf), CFGetRegS(cf), inst->i.imm_sext32());
1237
else
1238
SafeSLTIU(CFGetRegT(cf), CFGetRegS(cf), inst->i.imm_sext32());
1239
}
1240
1241
void CPU::RISCV64Recompiler::Compile_andi(CompileFlags cf)
1242
{
1243
const GPR rt = CFGetRegT(cf);
1244
if (const u32 imm = inst->i.imm_zext32(); imm != 0)
1245
SafeANDI(rt, CFGetRegS(cf), imm);
1246
else
1247
EmitMov(rt, 0);
1248
}
1249
1250
void CPU::RISCV64Recompiler::Compile_ori(CompileFlags cf)
1251
{
1252
const GPR rt = CFGetRegT(cf);
1253
const GPR rs = CFGetRegS(cf);
1254
if (const u32 imm = inst->i.imm_zext32(); imm != 0)
1255
SafeORI(rt, rs, imm);
1256
else if (rt.Index() != rs.Index())
1257
rvAsm->MV(rt, rs);
1258
}
1259
1260
void CPU::RISCV64Recompiler::Compile_xori(CompileFlags cf)
1261
{
1262
const GPR rt = CFGetRegT(cf);
1263
const GPR rs = CFGetRegS(cf);
1264
if (const u32 imm = inst->i.imm_zext32(); imm != 0)
1265
SafeXORI(rt, rs, imm);
1266
else if (rt.Index() != rs.Index())
1267
rvAsm->MV(rt, rs);
1268
}
1269
1270
void CPU::RISCV64Recompiler::Compile_shift(CompileFlags cf,
1271
void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR),
1272
void (biscuit::Assembler::*op_const)(biscuit::GPR, biscuit::GPR, unsigned))
1273
{
1274
const GPR rd = CFGetRegD(cf);
1275
const GPR rt = CFGetRegT(cf);
1276
if (inst->r.shamt > 0)
1277
(rvAsm->*op_const)(rd, rt, inst->r.shamt);
1278
else if (rd.Index() != rt.Index())
1279
rvAsm->MV(rd, rt);
1280
}
1281
1282
void CPU::RISCV64Recompiler::Compile_sll(CompileFlags cf)
1283
{
1284
Compile_shift(cf, &Assembler::SLLW, &Assembler::SLLIW);
1285
}
1286
1287
void CPU::RISCV64Recompiler::Compile_srl(CompileFlags cf)
1288
{
1289
Compile_shift(cf, &Assembler::SRLW, &Assembler::SRLIW);
1290
}
1291
1292
void CPU::RISCV64Recompiler::Compile_sra(CompileFlags cf)
1293
{
1294
Compile_shift(cf, &Assembler::SRAW, &Assembler::SRAIW);
1295
}
1296
1297
void CPU::RISCV64Recompiler::Compile_variable_shift(
1298
CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR),
1299
void (biscuit::Assembler::*op_const)(biscuit::GPR, biscuit::GPR, unsigned))
1300
{
1301
const GPR rd = CFGetRegD(cf);
1302
1303
AssertRegOrConstS(cf);
1304
AssertRegOrConstT(cf);
1305
1306
const GPR rt = cf.valid_host_t ? CFGetRegT(cf) : RARG2;
1307
if (!cf.valid_host_t)
1308
MoveTToReg(rt, cf);
1309
1310
if (cf.const_s)
1311
{
1312
if (const u32 shift = GetConstantRegU32(cf.MipsS()); shift != 0)
1313
(rvAsm->*op_const)(rd, rt, shift & 31u);
1314
else if (rd.Index() != rt.Index())
1315
rvAsm->MV(rd, rt);
1316
}
1317
else
1318
{
1319
(rvAsm->*op)(rd, rt, CFGetRegS(cf));
1320
}
1321
}
1322
1323
void CPU::RISCV64Recompiler::Compile_sllv(CompileFlags cf)
1324
{
1325
Compile_variable_shift(cf, &Assembler::SLLW, &Assembler::SLLIW);
1326
}
1327
1328
void CPU::RISCV64Recompiler::Compile_srlv(CompileFlags cf)
1329
{
1330
Compile_variable_shift(cf, &Assembler::SRLW, &Assembler::SRLIW);
1331
}
1332
1333
void CPU::RISCV64Recompiler::Compile_srav(CompileFlags cf)
1334
{
1335
Compile_variable_shift(cf, &Assembler::SRAW, &Assembler::SRAIW);
1336
}
1337
1338
void CPU::RISCV64Recompiler::Compile_mult(CompileFlags cf, bool sign)
1339
{
1340
const GPR rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1;
1341
if (!cf.valid_host_s)
1342
MoveSToReg(rs, cf);
1343
1344
const GPR rt = cf.valid_host_t ? CFGetRegT(cf) : RARG2;
1345
if (!cf.valid_host_t)
1346
MoveTToReg(rt, cf);
1347
1348
// TODO: if lo/hi gets killed, we can use a 32-bit multiply
1349
const GPR lo = CFGetRegLO(cf);
1350
const GPR hi = CFGetRegHI(cf);
1351
1352
if (sign)
1353
{
1354
rvAsm->MUL(lo, rs, rt);
1355
rvAsm->SRAI64(hi, lo, 32);
1356
EmitDSExtW(lo, lo);
1357
}
1358
else
1359
{
1360
// Need to make it unsigned.
1361
EmitDUExtW(RARG1, rs);
1362
EmitDUExtW(RARG2, rt);
1363
rvAsm->MUL(lo, RARG1, RARG2);
1364
rvAsm->SRAI64(hi, lo, 32);
1365
EmitDSExtW(lo, lo);
1366
}
1367
}
1368
1369
void CPU::RISCV64Recompiler::Compile_mult(CompileFlags cf)
1370
{
1371
Compile_mult(cf, true);
1372
}
1373
1374
void CPU::RISCV64Recompiler::Compile_multu(CompileFlags cf)
1375
{
1376
Compile_mult(cf, false);
1377
}
1378
1379
void CPU::RISCV64Recompiler::Compile_div(CompileFlags cf)
1380
{
1381
// 36 Volume I: RISC-V User-Level ISA V2.2
1382
const GPR rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1;
1383
if (!cf.valid_host_s)
1384
MoveSToReg(rs, cf);
1385
1386
const GPR rt = cf.valid_host_t ? CFGetRegT(cf) : RARG2;
1387
if (!cf.valid_host_t)
1388
MoveTToReg(rt, cf);
1389
1390
const GPR rlo = CFGetRegLO(cf);
1391
const GPR rhi = CFGetRegHI(cf);
1392
1393
Label done;
1394
Label not_divide_by_zero;
1395
rvAsm->BNEZ(rt, &not_divide_by_zero);
1396
rvAsm->MV(rhi, rs); // hi = num
1397
rvAsm->SRAI64(rlo, rs, 63);
1398
rvAsm->ANDI(rlo, rlo, 2);
1399
rvAsm->ADDI(rlo, rlo, -1); // lo = s >= 0 ? -1 : 1
1400
rvAsm->J(&done);
1401
1402
rvAsm->Bind(&not_divide_by_zero);
1403
Label not_unrepresentable;
1404
EmitMov(RSCRATCH, static_cast<u32>(-1));
1405
rvAsm->BNE(rt, RSCRATCH, &not_unrepresentable);
1406
EmitMov(rlo, 0x80000000u);
1407
rvAsm->BNE(rs, rlo, &not_unrepresentable);
1408
EmitMov(rhi, 0);
1409
rvAsm->J(&done);
1410
1411
rvAsm->Bind(&not_unrepresentable);
1412
1413
rvAsm->DIVW(rlo, rs, rt);
1414
rvAsm->REMW(rhi, rs, rt);
1415
1416
rvAsm->Bind(&done);
1417
}
1418
1419
void CPU::RISCV64Recompiler::Compile_divu(CompileFlags cf)
1420
{
1421
const GPR rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1;
1422
if (!cf.valid_host_s)
1423
MoveSToReg(rs, cf);
1424
1425
const GPR rt = cf.valid_host_t ? CFGetRegT(cf) : RARG2;
1426
if (!cf.valid_host_t)
1427
MoveTToReg(rt, cf);
1428
1429
const GPR rlo = CFGetRegLO(cf);
1430
const GPR rhi = CFGetRegHI(cf);
1431
1432
// Semantics match? :-)
1433
rvAsm->DIVUW(rlo, rs, rt);
1434
rvAsm->REMUW(rhi, rs, rt);
1435
}
1436
1437
void CPU::RISCV64Recompiler::TestOverflow(const biscuit::GPR& long_res, const biscuit::GPR& res,
1438
const biscuit::GPR& reg_to_discard)
1439
{
1440
SwitchToFarCode(true, &Assembler::BEQ, long_res, res);
1441
1442
BackupHostState();
1443
1444
// toss the result
1445
ClearHostReg(reg_to_discard.Index());
1446
1447
EndBlockWithException(Exception::Ov);
1448
1449
RestoreHostState();
1450
1451
SwitchToNearCode(false);
1452
}
1453
1454
void CPU::RISCV64Recompiler::Compile_dst_op(
1455
CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR),
1456
void (RISCV64Recompiler::*op_const)(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm),
1457
void (biscuit::Assembler::*op_long)(biscuit::GPR, biscuit::GPR, biscuit::GPR), bool commutative, bool overflow)
1458
{
1459
AssertRegOrConstS(cf);
1460
AssertRegOrConstT(cf);
1461
1462
const GPR rd = CFGetRegD(cf);
1463
1464
if (overflow)
1465
{
1466
const GPR rs = CFGetSafeRegS(cf, RARG1);
1467
const GPR rt = CFGetSafeRegT(cf, RARG2);
1468
(rvAsm->*op)(RARG3, rs, rt);
1469
(rvAsm->*op_long)(rd, rs, rt);
1470
TestOverflow(RARG3, rd, rd);
1471
return;
1472
}
1473
1474
if (cf.valid_host_s && cf.valid_host_t)
1475
{
1476
(rvAsm->*op)(rd, CFGetRegS(cf), CFGetRegT(cf));
1477
}
1478
else if (commutative && (cf.const_s || cf.const_t))
1479
{
1480
const GPR src = cf.const_s ? CFGetRegT(cf) : CFGetRegS(cf);
1481
if (const u32 cv = GetConstantRegU32(cf.const_s ? cf.MipsS() : cf.MipsT()); cv != 0)
1482
{
1483
(this->*op_const)(rd, src, cv);
1484
}
1485
else
1486
{
1487
if (rd.Index() != src.Index())
1488
rvAsm->MV(rd, src);
1489
overflow = false;
1490
}
1491
}
1492
else if (cf.const_s)
1493
{
1494
if (HasConstantRegValue(cf.MipsS(), 0))
1495
{
1496
(rvAsm->*op)(rd, zero, CFGetRegT(cf));
1497
}
1498
else
1499
{
1500
EmitMov(RSCRATCH, GetConstantRegU32(cf.MipsS()));
1501
(rvAsm->*op)(rd, RSCRATCH, CFGetRegT(cf));
1502
}
1503
}
1504
else if (cf.const_t)
1505
{
1506
const GPR rs = CFGetRegS(cf);
1507
if (const u32 cv = GetConstantRegU32(cf.const_s ? cf.MipsS() : cf.MipsT()); cv != 0)
1508
{
1509
(this->*op_const)(rd, rs, cv);
1510
}
1511
else
1512
{
1513
if (rd.Index() != rs.Index())
1514
rvAsm->MV(rd, rs);
1515
overflow = false;
1516
}
1517
}
1518
}
1519
1520
void CPU::RISCV64Recompiler::Compile_add(CompileFlags cf)
1521
{
1522
Compile_dst_op(cf, &Assembler::ADDW, &RISCV64Recompiler::SafeADDIW, &Assembler::ADD, true,
1523
g_settings.cpu_recompiler_memory_exceptions);
1524
}
1525
1526
void CPU::RISCV64Recompiler::Compile_addu(CompileFlags cf)
1527
{
1528
Compile_dst_op(cf, &Assembler::ADDW, &RISCV64Recompiler::SafeADDIW, &Assembler::ADD, true, false);
1529
}
1530
1531
void CPU::RISCV64Recompiler::Compile_sub(CompileFlags cf)
1532
{
1533
Compile_dst_op(cf, &Assembler::SUBW, &RISCV64Recompiler::SafeSUBIW, &Assembler::SUB, false,
1534
g_settings.cpu_recompiler_memory_exceptions);
1535
}
1536
1537
void CPU::RISCV64Recompiler::Compile_subu(CompileFlags cf)
1538
{
1539
Compile_dst_op(cf, &Assembler::SUBW, &RISCV64Recompiler::SafeSUBIW, &Assembler::SUB, false, false);
1540
}
1541
1542
void CPU::RISCV64Recompiler::Compile_and(CompileFlags cf)
1543
{
1544
AssertRegOrConstS(cf);
1545
AssertRegOrConstT(cf);
1546
1547
// special cases - and with self -> self, and with 0 -> 0
1548
const GPR regd = CFGetRegD(cf);
1549
if (cf.MipsS() == cf.MipsT())
1550
{
1551
rvAsm->MV(regd, CFGetRegS(cf));
1552
return;
1553
}
1554
else if (HasConstantRegValue(cf.MipsS(), 0) || HasConstantRegValue(cf.MipsT(), 0))
1555
{
1556
EmitMov(regd, 0);
1557
return;
1558
}
1559
1560
Compile_dst_op(cf, &Assembler::AND, &RISCV64Recompiler::SafeANDI, &Assembler::AND, true, false);
1561
}
1562
1563
void CPU::RISCV64Recompiler::Compile_or(CompileFlags cf)
1564
{
1565
AssertRegOrConstS(cf);
1566
AssertRegOrConstT(cf);
1567
1568
// or/nor with 0 -> no effect
1569
const GPR regd = CFGetRegD(cf);
1570
if (HasConstantRegValue(cf.MipsS(), 0) || HasConstantRegValue(cf.MipsT(), 0) || cf.MipsS() == cf.MipsT())
1571
{
1572
cf.const_s ? MoveTToReg(regd, cf) : MoveSToReg(regd, cf);
1573
return;
1574
}
1575
1576
Compile_dst_op(cf, &Assembler::OR, &RISCV64Recompiler::SafeORI, &Assembler::OR, true, false);
1577
}
1578
1579
void CPU::RISCV64Recompiler::Compile_xor(CompileFlags cf)
1580
{
1581
AssertRegOrConstS(cf);
1582
AssertRegOrConstT(cf);
1583
1584
const GPR regd = CFGetRegD(cf);
1585
if (cf.MipsS() == cf.MipsT())
1586
{
1587
// xor with self -> zero
1588
EmitMov(regd, 0);
1589
return;
1590
}
1591
else if (HasConstantRegValue(cf.MipsS(), 0) || HasConstantRegValue(cf.MipsT(), 0))
1592
{
1593
// xor with zero -> no effect
1594
cf.const_s ? MoveTToReg(regd, cf) : MoveSToReg(regd, cf);
1595
return;
1596
}
1597
1598
Compile_dst_op(cf, &Assembler::XOR, &RISCV64Recompiler::SafeXORI, &Assembler::XOR, true, false);
1599
}
1600
1601
void CPU::RISCV64Recompiler::Compile_nor(CompileFlags cf)
1602
{
1603
Compile_or(cf);
1604
rvAsm->NOT(CFGetRegD(cf), CFGetRegD(cf));
1605
}
1606
1607
void CPU::RISCV64Recompiler::Compile_slt(CompileFlags cf)
1608
{
1609
Compile_slt(cf, true);
1610
}
1611
1612
void CPU::RISCV64Recompiler::Compile_sltu(CompileFlags cf)
1613
{
1614
Compile_slt(cf, false);
1615
}
1616
1617
void CPU::RISCV64Recompiler::Compile_slt(CompileFlags cf, bool sign)
1618
{
1619
AssertRegOrConstS(cf);
1620
AssertRegOrConstT(cf);
1621
1622
const GPR rd = CFGetRegD(cf);
1623
const GPR rs = CFGetSafeRegS(cf, RARG1);
1624
1625
if (cf.const_t && rvIsValidSExtITypeImm(GetConstantRegU32(cf.MipsT())))
1626
{
1627
if (sign)
1628
rvAsm->SLTI(rd, rs, GetConstantRegS32(cf.MipsT()));
1629
else
1630
rvAsm->SLTIU(rd, rs, GetConstantRegS32(cf.MipsT()));
1631
}
1632
else
1633
{
1634
const GPR rt = CFGetSafeRegT(cf, RARG2);
1635
if (sign)
1636
rvAsm->SLT(rd, rs, rt);
1637
else
1638
rvAsm->SLTU(rd, rs, rt);
1639
}
1640
}
1641
1642
biscuit::GPR CPU::RISCV64Recompiler::ComputeLoadStoreAddressArg(CompileFlags cf,
1643
const std::optional<VirtualMemoryAddress>& address,
1644
const std::optional<const biscuit::GPR>& reg)
1645
{
1646
const u32 imm = inst->i.imm_sext32();
1647
if (cf.valid_host_s && imm == 0 && !reg.has_value())
1648
return CFGetRegS(cf);
1649
1650
const GPR dst = reg.has_value() ? reg.value() : RARG1;
1651
if (address.has_value())
1652
{
1653
EmitMov(dst, address.value());
1654
}
1655
else if (imm == 0)
1656
{
1657
if (cf.valid_host_s)
1658
{
1659
if (const GPR src = CFGetRegS(cf); src.Index() != dst.Index())
1660
rvAsm->MV(dst, CFGetRegS(cf));
1661
}
1662
else
1663
{
1664
rvAsm->LW(dst, PTR(&g_state.regs.r[cf.mips_s]));
1665
}
1666
}
1667
else
1668
{
1669
if (cf.valid_host_s)
1670
{
1671
SafeADDIW(dst, CFGetRegS(cf), inst->i.imm_sext32());
1672
}
1673
else
1674
{
1675
rvAsm->LW(dst, PTR(&g_state.regs.r[cf.mips_s]));
1676
SafeADDIW(dst, dst, inst->i.imm_sext32());
1677
}
1678
}
1679
1680
return dst;
1681
}
1682
1683
template<typename RegAllocFn>
1684
biscuit::GPR CPU::RISCV64Recompiler::GenerateLoad(const biscuit::GPR& addr_reg, MemoryAccessSize size, bool sign,
1685
bool use_fastmem, const RegAllocFn& dst_reg_alloc)
1686
{
1687
if (use_fastmem)
1688
{
1689
m_cycles += Bus::RAM_READ_TICKS;
1690
1691
// TODO: Make this better. If we're loading the address from state, we can use LWU instead, and skip this.
1692
// TODO: LUT fastmem
1693
const GPR dst = dst_reg_alloc();
1694
rvAsm->SLLI64(RSCRATCH, addr_reg, 32);
1695
rvAsm->SRLI64(RSCRATCH, RSCRATCH, 32);
1696
1697
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
1698
{
1699
DebugAssert(addr_reg.Index() != RARG3.Index());
1700
rvAsm->SRLI64(RARG3, RSCRATCH, Bus::FASTMEM_LUT_PAGE_SHIFT);
1701
rvAsm->SLLI64(RARG3, RARG3, 8);
1702
rvAsm->ADD(RARG3, RARG3, RMEMBASE);
1703
rvAsm->LD(RARG3, 0, RARG3);
1704
rvAsm->ADD(RSCRATCH, RSCRATCH, RARG3);
1705
}
1706
else
1707
{
1708
rvAsm->ADD(RSCRATCH, RSCRATCH, RMEMBASE);
1709
}
1710
1711
u8* start = m_emitter->GetCursorPointer();
1712
switch (size)
1713
{
1714
case MemoryAccessSize::Byte:
1715
sign ? rvAsm->LB(dst, 0, RSCRATCH) : rvAsm->LBU(dst, 0, RSCRATCH);
1716
break;
1717
1718
case MemoryAccessSize::HalfWord:
1719
sign ? rvAsm->LH(dst, 0, RSCRATCH) : rvAsm->LHU(dst, 0, RSCRATCH);
1720
break;
1721
1722
case MemoryAccessSize::Word:
1723
rvAsm->LW(dst, 0, RSCRATCH);
1724
break;
1725
}
1726
1727
// We need a nop, because the slowmem jump might be more than 1MB away.
1728
rvAsm->NOP();
1729
1730
AddLoadStoreInfo(start, 8, addr_reg.Index(), dst.Index(), size, sign, true);
1731
return dst;
1732
}
1733
1734
if (addr_reg.Index() != RARG1.Index())
1735
rvAsm->MV(RARG1, addr_reg);
1736
1737
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
1738
switch (size)
1739
{
1740
case MemoryAccessSize::Byte:
1741
{
1742
EmitCall(checked ? reinterpret_cast<const void*>(&RecompilerThunks::ReadMemoryByte) :
1743
reinterpret_cast<const void*>(&RecompilerThunks::UncheckedReadMemoryByte));
1744
}
1745
break;
1746
case MemoryAccessSize::HalfWord:
1747
{
1748
EmitCall(checked ? reinterpret_cast<const void*>(&RecompilerThunks::ReadMemoryHalfWord) :
1749
reinterpret_cast<const void*>(&RecompilerThunks::UncheckedReadMemoryHalfWord));
1750
}
1751
break;
1752
case MemoryAccessSize::Word:
1753
{
1754
EmitCall(checked ? reinterpret_cast<const void*>(&RecompilerThunks::ReadMemoryWord) :
1755
reinterpret_cast<const void*>(&RecompilerThunks::UncheckedReadMemoryWord));
1756
}
1757
break;
1758
}
1759
1760
// TODO: turn this into an asm function instead
1761
if (checked)
1762
{
1763
rvAsm->SRLI64(RSCRATCH, RRET, 63);
1764
SwitchToFarCode(true, &Assembler::BEQ, RSCRATCH, zero);
1765
BackupHostState();
1766
1767
// Need to stash this in a temp because of the flush.
1768
const GPR temp = GPR(AllocateTempHostReg(HR_CALLEE_SAVED));
1769
rvAsm->NEG(temp, RRET);
1770
rvAsm->SLLIW(temp, temp, 2);
1771
1772
Flush(FLUSH_FOR_C_CALL | FLUSH_FLUSH_MIPS_REGISTERS | FLUSH_FOR_EXCEPTION);
1773
1774
// cause_bits = (-result << 2) | BD | cop_n
1775
SafeORI(RARG1, temp,
1776
Cop0Registers::CAUSE::MakeValueForException(
1777
static_cast<Exception>(0), m_current_instruction_branch_delay_slot, false, inst->cop.cop_n));
1778
EmitMov(RARG2, m_current_instruction_pc);
1779
EmitCall(reinterpret_cast<const void*>(static_cast<void (*)(u32, u32)>(&CPU::RaiseException)));
1780
FreeHostReg(temp.Index());
1781
EndBlock(std::nullopt, true);
1782
1783
RestoreHostState();
1784
SwitchToNearCode(false);
1785
}
1786
1787
const GPR dst_reg = dst_reg_alloc();
1788
switch (size)
1789
{
1790
case MemoryAccessSize::Byte:
1791
{
1792
sign ? EmitSExtB(dst_reg, RRET) : EmitUExtB(dst_reg, RRET);
1793
}
1794
break;
1795
case MemoryAccessSize::HalfWord:
1796
{
1797
sign ? EmitSExtH(dst_reg, RRET) : EmitUExtH(dst_reg, RRET);
1798
}
1799
break;
1800
case MemoryAccessSize::Word:
1801
{
1802
// Need to undo the zero-extend.
1803
if (checked)
1804
rvEmitDSExtW(rvAsm, dst_reg, RRET);
1805
else if (dst_reg.Index() != RRET.Index())
1806
rvAsm->MV(dst_reg, RRET);
1807
}
1808
break;
1809
}
1810
1811
return dst_reg;
1812
}
1813
1814
void CPU::RISCV64Recompiler::GenerateStore(const biscuit::GPR& addr_reg, const biscuit::GPR& value_reg,
1815
MemoryAccessSize size, bool use_fastmem)
1816
{
1817
if (use_fastmem)
1818
{
1819
DebugAssert(value_reg != RSCRATCH);
1820
rvAsm->SLLI64(RSCRATCH, addr_reg, 32);
1821
rvAsm->SRLI64(RSCRATCH, RSCRATCH, 32);
1822
1823
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
1824
{
1825
DebugAssert(addr_reg.Index() != RARG3.Index());
1826
rvAsm->SRLI64(RARG3, RSCRATCH, Bus::FASTMEM_LUT_PAGE_SHIFT);
1827
rvAsm->SLLI64(RARG3, RARG3, 8);
1828
rvAsm->ADD(RARG3, RARG3, RMEMBASE);
1829
rvAsm->LD(RARG3, 0, RARG3);
1830
rvAsm->ADD(RSCRATCH, RSCRATCH, RARG3);
1831
}
1832
else
1833
{
1834
rvAsm->ADD(RSCRATCH, RSCRATCH, RMEMBASE);
1835
}
1836
1837
u8* start = m_emitter->GetCursorPointer();
1838
switch (size)
1839
{
1840
case MemoryAccessSize::Byte:
1841
rvAsm->SB(value_reg, 0, RSCRATCH);
1842
break;
1843
1844
case MemoryAccessSize::HalfWord:
1845
rvAsm->SH(value_reg, 0, RSCRATCH);
1846
break;
1847
1848
case MemoryAccessSize::Word:
1849
rvAsm->SW(value_reg, 0, RSCRATCH);
1850
break;
1851
}
1852
1853
// We need a nop, because the slowmem jump might be more than 1MB away.
1854
rvAsm->NOP();
1855
1856
AddLoadStoreInfo(start, 8, addr_reg.Index(), value_reg.Index(), size, false, false);
1857
return;
1858
}
1859
1860
if (addr_reg.Index() != RARG1.Index())
1861
rvAsm->MV(RARG1, addr_reg);
1862
if (value_reg.Index() != RARG2.Index())
1863
rvAsm->MV(RARG2, value_reg);
1864
1865
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
1866
switch (size)
1867
{
1868
case MemoryAccessSize::Byte:
1869
{
1870
EmitCall(checked ? reinterpret_cast<const void*>(&RecompilerThunks::WriteMemoryByte) :
1871
reinterpret_cast<const void*>(&RecompilerThunks::UncheckedWriteMemoryByte));
1872
}
1873
break;
1874
case MemoryAccessSize::HalfWord:
1875
{
1876
EmitCall(checked ? reinterpret_cast<const void*>(&RecompilerThunks::WriteMemoryHalfWord) :
1877
reinterpret_cast<const void*>(&RecompilerThunks::UncheckedWriteMemoryHalfWord));
1878
}
1879
break;
1880
case MemoryAccessSize::Word:
1881
{
1882
EmitCall(checked ? reinterpret_cast<const void*>(&RecompilerThunks::WriteMemoryWord) :
1883
reinterpret_cast<const void*>(&RecompilerThunks::UncheckedWriteMemoryWord));
1884
}
1885
break;
1886
}
1887
1888
// TODO: turn this into an asm function instead
1889
if (checked)
1890
{
1891
SwitchToFarCode(true, &Assembler::BEQ, RRET, zero);
1892
BackupHostState();
1893
1894
// Need to stash this in a temp because of the flush.
1895
const GPR temp = GPR(AllocateTempHostReg(HR_CALLEE_SAVED));
1896
rvAsm->SLLIW(temp, RRET, 2);
1897
1898
Flush(FLUSH_FOR_C_CALL | FLUSH_FLUSH_MIPS_REGISTERS | FLUSH_FOR_EXCEPTION);
1899
1900
// cause_bits = (result << 2) | BD | cop_n
1901
SafeORI(RARG1, temp,
1902
Cop0Registers::CAUSE::MakeValueForException(
1903
static_cast<Exception>(0), m_current_instruction_branch_delay_slot, false, inst->cop.cop_n));
1904
EmitMov(RARG2, m_current_instruction_pc);
1905
EmitCall(reinterpret_cast<const void*>(static_cast<void (*)(u32, u32)>(&CPU::RaiseException)));
1906
FreeHostReg(temp.Index());
1907
EndBlock(std::nullopt, true);
1908
1909
RestoreHostState();
1910
SwitchToNearCode(false);
1911
}
1912
}
1913
1914
void CPU::RISCV64Recompiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
1915
const std::optional<VirtualMemoryAddress>& address)
1916
{
1917
const std::optional<GPR> addr_reg = (g_settings.gpu_pgxp_enable && cf.MipsT() != Reg::zero) ?
1918
std::optional<GPR>(GPR(AllocateTempHostReg(HR_CALLEE_SAVED))) :
1919
std::optional<GPR>();
1920
FlushForLoadStore(address, false, use_fastmem);
1921
const GPR addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
1922
const GPR data = GenerateLoad(addr, size, sign, use_fastmem, [this, cf]() {
1923
if (cf.MipsT() == Reg::zero)
1924
return RRET;
1925
1926
return GPR(AllocateHostReg(GetFlagsForNewLoadDelayedReg(),
1927
EMULATE_LOAD_DELAYS ? HR_TYPE_NEXT_LOAD_DELAY_VALUE : HR_TYPE_CPU_REG, cf.MipsT()));
1928
});
1929
1930
if (g_settings.gpu_pgxp_enable && cf.MipsT() != Reg::zero)
1931
{
1932
Flush(FLUSH_FOR_C_CALL);
1933
1934
EmitMov(RARG1, inst->bits);
1935
rvAsm->MV(RARG2, addr);
1936
rvAsm->MV(RARG3, data);
1937
EmitCall(s_pgxp_mem_load_functions[static_cast<u32>(size)][static_cast<u32>(sign)]);
1938
FreeHostReg(addr_reg.value().Index());
1939
}
1940
}
1941
1942
void CPU::RISCV64Recompiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
1943
const std::optional<VirtualMemoryAddress>& address)
1944
{
1945
DebugAssert(size == MemoryAccessSize::Word && !sign);
1946
1947
const GPR addr = GPR(AllocateTempHostReg(HR_CALLEE_SAVED));
1948
FlushForLoadStore(address, false, use_fastmem);
1949
1950
// TODO: if address is constant, this can be simplified..
1951
1952
// If we're coming from another block, just flush the load delay and hope for the best..
1953
if (m_load_delay_dirty)
1954
UpdateLoadDelay();
1955
1956
// We'd need to be careful here if we weren't overwriting it..
1957
ComputeLoadStoreAddressArg(cf, address, addr);
1958
1959
// Do PGXP first, it does its own load.
1960
if (g_settings.gpu_pgxp_enable && inst->r.rt != Reg::zero)
1961
{
1962
Flush(FLUSH_FOR_C_CALL);
1963
EmitMov(RARG1, inst->bits);
1964
rvAsm->MV(RARG2, addr);
1965
MoveMIPSRegToReg(RARG3, inst->r.rt, true);
1966
EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_LWx));
1967
}
1968
1969
rvAsm->ANDI(RARG1, addr, ~0x3u);
1970
GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; });
1971
1972
if (inst->r.rt == Reg::zero)
1973
{
1974
FreeHostReg(addr.Index());
1975
return;
1976
}
1977
1978
// lwl/lwr from a load-delayed value takes the new value, but it itself, is load delayed, so the original value is
1979
// never written back. NOTE: can't trust T in cf because of the flush
1980
const Reg rt = inst->r.rt;
1981
GPR value;
1982
if (m_load_delay_register == rt)
1983
{
1984
const u32 existing_ld_rt = (m_load_delay_value_register == NUM_HOST_REGS) ?
1985
AllocateHostReg(HR_MODE_READ, HR_TYPE_LOAD_DELAY_VALUE, rt) :
1986
m_load_delay_value_register;
1987
RenameHostReg(existing_ld_rt, HR_MODE_WRITE, HR_TYPE_NEXT_LOAD_DELAY_VALUE, rt);
1988
value = GPR(existing_ld_rt);
1989
}
1990
else
1991
{
1992
if constexpr (EMULATE_LOAD_DELAYS)
1993
{
1994
value = GPR(AllocateHostReg(HR_MODE_WRITE, HR_TYPE_NEXT_LOAD_DELAY_VALUE, rt));
1995
if (const std::optional<u32> rtreg = CheckHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rt); rtreg.has_value())
1996
rvAsm->MV(value, GPR(rtreg.value()));
1997
else if (HasConstantReg(rt))
1998
EmitMov(value, GetConstantRegU32(rt));
1999
else
2000
rvAsm->LW(value, PTR(&g_state.regs.r[static_cast<u8>(rt)]));
2001
}
2002
else
2003
{
2004
value = GPR(AllocateHostReg(HR_MODE_READ | HR_MODE_WRITE, HR_TYPE_CPU_REG, rt));
2005
}
2006
}
2007
2008
DebugAssert(value.Index() != RARG2.Index() && value.Index() != RARG3.Index());
2009
rvAsm->ANDI(RARG2, addr, 3);
2010
rvAsm->SLLIW(RARG2, RARG2, 3); // *8
2011
EmitMov(RARG3, 24);
2012
rvAsm->SUBW(RARG3, RARG3, RARG2);
2013
2014
if (inst->op == InstructionOp::lwl)
2015
{
2016
// const u32 mask = UINT32_C(0x00FFFFFF) >> shift;
2017
// new_value = (value & mask) | (RWRET << (24 - shift));
2018
EmitMov(RSCRATCH, 0xFFFFFFu);
2019
rvAsm->SRLW(RSCRATCH, RSCRATCH, RARG2);
2020
rvAsm->AND(value, value, RSCRATCH);
2021
rvAsm->SLLW(RRET, RRET, RARG3);
2022
rvAsm->OR(value, value, RRET);
2023
}
2024
else
2025
{
2026
// const u32 mask = UINT32_C(0xFFFFFF00) << (24 - shift);
2027
// new_value = (value & mask) | (RWRET >> shift);
2028
rvAsm->SRLW(RRET, RRET, RARG2);
2029
EmitMov(RSCRATCH, 0xFFFFFF00u);
2030
rvAsm->SLLW(RSCRATCH, RSCRATCH, RARG3);
2031
rvAsm->AND(value, value, RSCRATCH);
2032
rvAsm->OR(value, value, RRET);
2033
}
2034
2035
FreeHostReg(addr.Index());
2036
}
2037
2038
void CPU::RISCV64Recompiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
2039
const std::optional<VirtualMemoryAddress>& address)
2040
{
2041
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
2042
const auto [ptr, action] = GetGTERegisterPointer(index, true);
2043
const std::optional<GPR> addr_reg =
2044
g_settings.gpu_pgxp_enable ? std::optional<GPR>(GPR(AllocateTempHostReg(HR_CALLEE_SAVED))) : std::optional<GPR>();
2045
FlushForLoadStore(address, false, use_fastmem);
2046
const GPR addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
2047
const GPR value = GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, [this, action = action]() {
2048
return (action == GTERegisterAccessAction::CallHandler && g_settings.gpu_pgxp_enable) ?
2049
GPR(AllocateTempHostReg(HR_CALLEE_SAVED)) :
2050
RRET;
2051
});
2052
2053
switch (action)
2054
{
2055
case GTERegisterAccessAction::Ignore:
2056
{
2057
break;
2058
}
2059
2060
case GTERegisterAccessAction::Direct:
2061
{
2062
rvAsm->SW(value, PTR(ptr));
2063
break;
2064
}
2065
2066
case GTERegisterAccessAction::SignExtend16:
2067
{
2068
EmitSExtH(RARG3, value);
2069
rvAsm->SW(RARG3, PTR(ptr));
2070
break;
2071
}
2072
2073
case GTERegisterAccessAction::ZeroExtend16:
2074
{
2075
EmitUExtH(RARG3, value);
2076
rvAsm->SW(RARG3, PTR(ptr));
2077
break;
2078
}
2079
2080
case GTERegisterAccessAction::CallHandler:
2081
{
2082
Flush(FLUSH_FOR_C_CALL);
2083
rvAsm->MV(RARG2, value);
2084
EmitMov(RARG1, index);
2085
EmitCall(reinterpret_cast<const void*>(&GTE::WriteRegister));
2086
break;
2087
}
2088
2089
case GTERegisterAccessAction::PushFIFO:
2090
{
2091
// SXY0 <- SXY1
2092
// SXY1 <- SXY2
2093
// SXY2 <- SXYP
2094
DebugAssert(value.Index() != RARG2.Index() && value.Index() != RARG3.Index());
2095
rvAsm->LW(RARG2, PTR(&g_state.gte_regs.SXY1[0]));
2096
rvAsm->LW(RARG3, PTR(&g_state.gte_regs.SXY2[0]));
2097
rvAsm->SW(RARG2, PTR(&g_state.gte_regs.SXY0[0]));
2098
rvAsm->SW(RARG3, PTR(&g_state.gte_regs.SXY1[0]));
2099
rvAsm->SW(value, PTR(&g_state.gte_regs.SXY2[0]));
2100
break;
2101
}
2102
2103
default:
2104
{
2105
Panic("Unknown action");
2106
return;
2107
}
2108
}
2109
2110
if (g_settings.gpu_pgxp_enable)
2111
{
2112
Flush(FLUSH_FOR_C_CALL);
2113
rvAsm->MV(RARG3, value);
2114
if (value.Index() != RRET.Index())
2115
FreeHostReg(value.Index());
2116
rvAsm->MV(RARG2, addr);
2117
FreeHostReg(addr_reg.value().Index());
2118
EmitMov(RARG1, inst->bits);
2119
EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_LWC2));
2120
}
2121
}
2122
2123
void CPU::RISCV64Recompiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
2124
const std::optional<VirtualMemoryAddress>& address)
2125
{
2126
AssertRegOrConstS(cf);
2127
AssertRegOrConstT(cf);
2128
2129
const std::optional<GPR> addr_reg =
2130
g_settings.gpu_pgxp_enable ? std::optional<GPR>(GPR(AllocateTempHostReg(HR_CALLEE_SAVED))) : std::optional<GPR>();
2131
FlushForLoadStore(address, true, use_fastmem);
2132
const GPR addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
2133
const GPR data = cf.valid_host_t ? CFGetRegT(cf) : RARG2;
2134
if (!cf.valid_host_t)
2135
MoveTToReg(RARG2, cf);
2136
2137
GenerateStore(addr, data, size, use_fastmem);
2138
2139
if (g_settings.gpu_pgxp_enable)
2140
{
2141
Flush(FLUSH_FOR_C_CALL);
2142
MoveMIPSRegToReg(RARG3, cf.MipsT());
2143
rvAsm->MV(RARG2, addr);
2144
EmitMov(RARG1, inst->bits);
2145
EmitCall(s_pgxp_mem_store_functions[static_cast<u32>(size)]);
2146
FreeHostReg(addr_reg.value().Index());
2147
}
2148
}
2149
2150
void CPU::RISCV64Recompiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
2151
const std::optional<VirtualMemoryAddress>& address)
2152
{
2153
DebugAssert(size == MemoryAccessSize::Word && !sign);
2154
2155
// TODO: this can take over rt's value if it's no longer needed
2156
// NOTE: can't trust T in cf because of the alloc
2157
const GPR addr = GPR(AllocateTempHostReg(HR_CALLEE_SAVED));
2158
2159
FlushForLoadStore(address, true, use_fastmem);
2160
2161
// TODO: if address is constant, this can be simplified..
2162
// We'd need to be careful here if we weren't overwriting it..
2163
ComputeLoadStoreAddressArg(cf, address, addr);
2164
2165
if (g_settings.gpu_pgxp_enable)
2166
{
2167
Flush(FLUSH_FOR_C_CALL);
2168
EmitMov(RARG1, inst->bits);
2169
rvAsm->MV(RARG2, addr);
2170
MoveMIPSRegToReg(RARG3, inst->r.rt);
2171
EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_SWx));
2172
}
2173
2174
rvAsm->ANDI(RARG1, addr, ~0x3u);
2175
GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; });
2176
2177
rvAsm->ANDI(RSCRATCH, addr, 3);
2178
rvAsm->SLLIW(RSCRATCH, RSCRATCH, 3); // *8
2179
rvAsm->ANDI(addr, addr, ~0x3u);
2180
2181
// Need to load down here for PGXP-off, because it's in a volatile reg that can get overwritten by flush.
2182
if (!g_settings.gpu_pgxp_enable)
2183
MoveMIPSRegToReg(RARG2, inst->r.rt);
2184
2185
if (inst->op == InstructionOp::swl)
2186
{
2187
// const u32 mem_mask = UINT32_C(0xFFFFFF00) << shift;
2188
// new_value = (RWRET & mem_mask) | (value >> (24 - shift));
2189
EmitMov(RARG3, 0xFFFFFF00u);
2190
rvAsm->SLLW(RARG3, RARG3, RSCRATCH);
2191
rvAsm->AND(RRET, RRET, RARG3);
2192
2193
EmitMov(RARG3, 24);
2194
rvAsm->SUBW(RARG3, RARG3, RSCRATCH);
2195
rvAsm->SRLW(RARG2, RARG2, RARG3);
2196
rvAsm->OR(RARG2, RARG2, RRET);
2197
}
2198
else
2199
{
2200
// const u32 mem_mask = UINT32_C(0x00FFFFFF) >> (24 - shift);
2201
// new_value = (RWRET & mem_mask) | (value << shift);
2202
rvAsm->SLLW(RARG2, RARG2, RSCRATCH);
2203
2204
EmitMov(RARG3, 24);
2205
rvAsm->SUBW(RARG3, RARG3, RSCRATCH);
2206
EmitMov(RSCRATCH, 0x00FFFFFFu);
2207
rvAsm->SRLW(RSCRATCH, RSCRATCH, RARG3);
2208
rvAsm->AND(RRET, RRET, RSCRATCH);
2209
rvAsm->OR(RARG2, RARG2, RRET);
2210
}
2211
2212
GenerateStore(addr, RARG2, MemoryAccessSize::Word, use_fastmem);
2213
FreeHostReg(addr.Index());
2214
}
2215
2216
void CPU::RISCV64Recompiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
2217
const std::optional<VirtualMemoryAddress>& address)
2218
{
2219
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
2220
const auto [ptr, action] = GetGTERegisterPointer(index, false);
2221
const GPR addr = (g_settings.gpu_pgxp_enable || action == GTERegisterAccessAction::CallHandler) ?
2222
GPR(AllocateTempHostReg(HR_CALLEE_SAVED)) :
2223
RARG1;
2224
const GPR data = g_settings.gpu_pgxp_enable ? GPR(AllocateTempHostReg(HR_CALLEE_SAVED)) : RARG2;
2225
FlushForLoadStore(address, true, use_fastmem);
2226
ComputeLoadStoreAddressArg(cf, address, addr);
2227
2228
switch (action)
2229
{
2230
case GTERegisterAccessAction::Direct:
2231
{
2232
rvAsm->LW(data, PTR(ptr));
2233
}
2234
break;
2235
2236
case GTERegisterAccessAction::CallHandler:
2237
{
2238
// should already be flushed.. except in fastmem case
2239
Flush(FLUSH_FOR_C_CALL);
2240
EmitMov(RARG1, index);
2241
EmitCall(reinterpret_cast<const void*>(&GTE::ReadRegister));
2242
rvAsm->MV(data, RRET);
2243
}
2244
break;
2245
2246
default:
2247
{
2248
Panic("Unknown action");
2249
}
2250
break;
2251
}
2252
2253
GenerateStore(addr, data, size, use_fastmem);
2254
2255
if (!g_settings.gpu_pgxp_enable)
2256
{
2257
if (addr.Index() != RARG1.Index())
2258
FreeHostReg(addr.Index());
2259
}
2260
else
2261
{
2262
// TODO: This can be simplified because we don't need to validate in PGXP..
2263
Flush(FLUSH_FOR_C_CALL);
2264
rvAsm->MV(RARG3, data);
2265
FreeHostReg(data.Index());
2266
rvAsm->MV(RARG2, addr);
2267
FreeHostReg(addr.Index());
2268
EmitMov(RARG1, inst->bits);
2269
EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_SWC2));
2270
}
2271
}
2272
2273
void CPU::RISCV64Recompiler::Compile_mtc0(CompileFlags cf)
2274
{
2275
// TODO: we need better constant setting here.. which will need backprop
2276
AssertRegOrConstT(cf);
2277
2278
const Cop0Reg reg = static_cast<Cop0Reg>(MipsD());
2279
const u32* ptr = GetCop0RegPtr(reg);
2280
const u32 mask = GetCop0RegWriteMask(reg);
2281
if (!ptr)
2282
{
2283
Compile_Fallback();
2284
return;
2285
}
2286
2287
if (mask == 0)
2288
{
2289
// if it's a read-only register, ignore
2290
DEBUG_LOG("Ignoring write to read-only cop0 reg {}", static_cast<u32>(reg));
2291
return;
2292
}
2293
2294
// for some registers, we need to test certain bits
2295
const bool needs_bit_test = (reg == Cop0Reg::SR);
2296
const GPR new_value = RARG1;
2297
const GPR old_value = RARG2;
2298
const GPR changed_bits = RARG3;
2299
const GPR mask_reg = RSCRATCH;
2300
2301
// Load old value
2302
rvAsm->LW(old_value, PTR(ptr));
2303
2304
// No way we fit this in an immediate..
2305
EmitMov(mask_reg, mask);
2306
2307
// update value
2308
// TODO: This is creating pointless MV instructions.. why?
2309
if (cf.valid_host_t)
2310
rvAsm->AND(new_value, CFGetRegT(cf), mask_reg);
2311
else
2312
EmitMov(new_value, GetConstantRegU32(cf.MipsT()) & mask);
2313
2314
if (needs_bit_test)
2315
rvAsm->XOR(changed_bits, old_value, new_value);
2316
rvAsm->NOT(mask_reg, mask_reg);
2317
rvAsm->AND(old_value, old_value, mask_reg);
2318
rvAsm->OR(new_value, old_value, new_value);
2319
rvAsm->SW(new_value, PTR(ptr));
2320
2321
if (reg == Cop0Reg::SR)
2322
{
2323
// TODO: replace with register backup
2324
// We could just inline the whole thing..
2325
Flush(FLUSH_FOR_C_CALL);
2326
2327
Label caches_unchanged;
2328
rvAsm->SRLIW(RSCRATCH, changed_bits, 16);
2329
rvAsm->ANDI(RSCRATCH, RSCRATCH, 1);
2330
rvAsm->BEQ(RSCRATCH, zero, &caches_unchanged);
2331
EmitCall(reinterpret_cast<const void*>(&CPU::UpdateMemoryPointers));
2332
rvAsm->LW(new_value, PTR(ptr));
2333
if (CodeCache::IsUsingFastmem())
2334
rvAsm->LD(RMEMBASE, PTR(&g_state.fastmem_base));
2335
rvAsm->Bind(&caches_unchanged);
2336
2337
TestInterrupts(RARG1);
2338
}
2339
else if (reg == Cop0Reg::CAUSE)
2340
{
2341
rvAsm->LW(RARG1, PTR(&g_state.cop0_regs.sr.bits));
2342
TestInterrupts(RARG1);
2343
}
2344
else if (reg == Cop0Reg::DCIC || reg == Cop0Reg::BPCM)
2345
{
2346
// need to check whether we're switching to debug mode
2347
Flush(FLUSH_FOR_C_CALL);
2348
EmitCall(reinterpret_cast<const void*>(&CPU::UpdateDebugDispatcherFlag));
2349
SwitchToFarCode(true, &Assembler::BEQ, RRET, zero);
2350
BackupHostState();
2351
Flush(FLUSH_FOR_EARLY_BLOCK_EXIT);
2352
EmitCall(reinterpret_cast<const void*>(&CPU::ExitExecution)); // does not return
2353
RestoreHostState();
2354
SwitchToNearCode(false);
2355
}
2356
}
2357
2358
void CPU::RISCV64Recompiler::Compile_rfe(CompileFlags cf)
2359
{
2360
// shift mode bits right two, preserving upper bits
2361
rvAsm->LW(RARG1, PTR(&g_state.cop0_regs.sr.bits));
2362
rvAsm->SRLIW(RSCRATCH, RARG1, 2);
2363
rvAsm->ANDI(RSCRATCH, RSCRATCH, 0xf);
2364
rvAsm->ANDI(RARG1, RARG1, ~0xfu);
2365
rvAsm->OR(RARG1, RARG1, RSCRATCH);
2366
rvAsm->SW(RARG1, PTR(&g_state.cop0_regs.sr.bits));
2367
2368
TestInterrupts(RARG1);
2369
}
2370
2371
void CPU::RISCV64Recompiler::TestInterrupts(const biscuit::GPR& sr)
2372
{
2373
DebugAssert(sr != RSCRATCH);
2374
2375
// if Iec == 0 then goto no_interrupt
2376
Label no_interrupt;
2377
rvAsm->ANDI(RSCRATCH, sr, 1);
2378
rvAsm->BEQZ(RSCRATCH, &no_interrupt);
2379
2380
// sr & cause
2381
rvAsm->LW(RSCRATCH, PTR(&g_state.cop0_regs.cause.bits));
2382
rvAsm->AND(sr, sr, RSCRATCH);
2383
2384
// ((sr & cause) & 0xff00) == 0 goto no_interrupt
2385
rvAsm->SRLIW(sr, sr, 8);
2386
rvAsm->ANDI(sr, sr, 0xFF);
2387
SwitchToFarCode(true, &Assembler::BEQ, sr, zero);
2388
2389
BackupHostState();
2390
2391
// Update load delay, this normally happens at the end of an instruction, but we're finishing it early.
2392
UpdateLoadDelay();
2393
2394
Flush(FLUSH_END_BLOCK | FLUSH_FOR_EXCEPTION | FLUSH_FOR_C_CALL);
2395
2396
// Can't use EndBlockWithException() here, because it'll use the wrong PC.
2397
// Can't use RaiseException() on the fast path if we're the last instruction, because the next PC is unknown.
2398
if (!iinfo->is_last_instruction)
2399
{
2400
EmitMov(RARG1, Cop0Registers::CAUSE::MakeValueForException(Exception::INT, iinfo->is_branch_instruction, false,
2401
(inst + 1)->cop.cop_n));
2402
EmitMov(RARG2, m_compiler_pc);
2403
EmitCall(reinterpret_cast<const void*>(static_cast<void (*)(u32, u32)>(&CPU::RaiseException)));
2404
m_dirty_pc = false;
2405
EndAndLinkBlock(std::nullopt, true, false);
2406
}
2407
else
2408
{
2409
if (m_dirty_pc)
2410
EmitMov(RARG1, m_compiler_pc);
2411
rvAsm->SW(biscuit::zero, PTR(&g_state.downcount));
2412
if (m_dirty_pc)
2413
rvAsm->SW(RARG1, PTR(&g_state.pc));
2414
m_dirty_pc = false;
2415
EndAndLinkBlock(std::nullopt, false, true);
2416
}
2417
2418
RestoreHostState();
2419
SwitchToNearCode(false);
2420
2421
rvAsm->Bind(&no_interrupt);
2422
}
2423
2424
void CPU::RISCV64Recompiler::Compile_mfc2(CompileFlags cf)
2425
{
2426
const u32 index = inst->cop.Cop2Index();
2427
const Reg rt = inst->r.rt;
2428
2429
const auto [ptr, action] = GetGTERegisterPointer(index, false);
2430
if (action == GTERegisterAccessAction::Ignore)
2431
return;
2432
2433
u32 hreg;
2434
if (action == GTERegisterAccessAction::Direct)
2435
{
2436
hreg = AllocateHostReg(GetFlagsForNewLoadDelayedReg(),
2437
EMULATE_LOAD_DELAYS ? HR_TYPE_NEXT_LOAD_DELAY_VALUE : HR_TYPE_CPU_REG, rt);
2438
rvAsm->LW(GPR(hreg), PTR(ptr));
2439
}
2440
else if (action == GTERegisterAccessAction::CallHandler)
2441
{
2442
Flush(FLUSH_FOR_C_CALL);
2443
EmitMov(RARG1, index);
2444
EmitCall(reinterpret_cast<const void*>(&GTE::ReadRegister));
2445
2446
hreg = AllocateHostReg(GetFlagsForNewLoadDelayedReg(),
2447
EMULATE_LOAD_DELAYS ? HR_TYPE_NEXT_LOAD_DELAY_VALUE : HR_TYPE_CPU_REG, rt);
2448
rvAsm->MV(GPR(hreg), RRET);
2449
}
2450
else
2451
{
2452
Panic("Unknown action");
2453
}
2454
2455
if (g_settings.gpu_pgxp_enable)
2456
{
2457
Flush(FLUSH_FOR_C_CALL);
2458
EmitMov(RARG1, inst->bits);
2459
rvAsm->MV(RARG2, GPR(hreg));
2460
EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_MFC2));
2461
}
2462
}
2463
2464
void CPU::RISCV64Recompiler::Compile_mtc2(CompileFlags cf)
2465
{
2466
const u32 index = inst->cop.Cop2Index();
2467
const auto [ptr, action] = GetGTERegisterPointer(index, true);
2468
if (action == GTERegisterAccessAction::Ignore)
2469
return;
2470
2471
if (action == GTERegisterAccessAction::Direct)
2472
{
2473
if (cf.const_t)
2474
StoreConstantToCPUPointer(GetConstantRegU32(cf.MipsT()), ptr);
2475
else
2476
rvAsm->SW(CFGetRegT(cf), PTR(ptr));
2477
}
2478
else if (action == GTERegisterAccessAction::SignExtend16 || action == GTERegisterAccessAction::ZeroExtend16)
2479
{
2480
const bool sign = (action == GTERegisterAccessAction::SignExtend16);
2481
if (cf.valid_host_t)
2482
{
2483
sign ? EmitSExtH(RARG1, CFGetRegT(cf)) : EmitUExtH(RARG1, CFGetRegT(cf));
2484
rvAsm->SW(RARG1, PTR(ptr));
2485
}
2486
else if (cf.const_t)
2487
{
2488
const u16 cv = Truncate16(GetConstantRegU32(cf.MipsT()));
2489
StoreConstantToCPUPointer(sign ? ::SignExtend32(cv) : ::ZeroExtend32(cv), ptr);
2490
}
2491
else
2492
{
2493
Panic("Unsupported setup");
2494
}
2495
}
2496
else if (action == GTERegisterAccessAction::CallHandler)
2497
{
2498
Flush(FLUSH_FOR_C_CALL);
2499
EmitMov(RARG1, index);
2500
MoveTToReg(RARG2, cf);
2501
EmitCall(reinterpret_cast<const void*>(&GTE::WriteRegister));
2502
}
2503
else if (action == GTERegisterAccessAction::PushFIFO)
2504
{
2505
// SXY0 <- SXY1
2506
// SXY1 <- SXY2
2507
// SXY2 <- SXYP
2508
DebugAssert(RRET.Index() != RARG2.Index() && RRET.Index() != RARG3.Index());
2509
rvAsm->LW(RARG2, PTR(&g_state.gte_regs.SXY1[0]));
2510
rvAsm->LW(RARG3, PTR(&g_state.gte_regs.SXY2[0]));
2511
rvAsm->SW(RARG2, PTR(&g_state.gte_regs.SXY0[0]));
2512
rvAsm->SW(RARG3, PTR(&g_state.gte_regs.SXY1[0]));
2513
if (cf.valid_host_t)
2514
rvAsm->SW(CFGetRegT(cf), PTR(&g_state.gte_regs.SXY2[0]));
2515
else if (cf.const_t)
2516
StoreConstantToCPUPointer(GetConstantRegU32(cf.MipsT()), &g_state.gte_regs.SXY2[0]);
2517
else
2518
Panic("Unsupported setup");
2519
}
2520
else
2521
{
2522
Panic("Unknown action");
2523
}
2524
}
2525
2526
void CPU::RISCV64Recompiler::Compile_cop2(CompileFlags cf)
2527
{
2528
TickCount func_ticks;
2529
GTE::InstructionImpl func = GTE::GetInstructionImpl(inst->bits, &func_ticks);
2530
2531
Flush(FLUSH_FOR_C_CALL);
2532
EmitMov(RARG1, inst->bits & GTE::Instruction::REQUIRED_BITS_MASK);
2533
EmitCall(reinterpret_cast<const void*>(func));
2534
2535
AddGTETicks(func_ticks);
2536
}
2537
2538
u32 CPU::Recompiler::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* code_address, u32 code_size,
2539
TickCount cycles_to_add, TickCount cycles_to_remove, u32 gpr_bitmask,
2540
u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed,
2541
bool is_load)
2542
{
2543
Assembler arm_asm(static_cast<u8*>(thunk_code), thunk_space);
2544
Assembler* rvAsm = &arm_asm;
2545
2546
static constexpr u32 GPR_SIZE = 8;
2547
2548
// save regs
2549
u32 num_gprs = 0;
2550
2551
for (u32 i = 0; i < NUM_HOST_REGS; i++)
2552
{
2553
if ((gpr_bitmask & (1u << i)) && rvIsCallerSavedRegister(i) && (!is_load || data_register != i))
2554
num_gprs++;
2555
}
2556
2557
const u32 stack_size = (((num_gprs + 1) & ~1u) * GPR_SIZE);
2558
2559
if (stack_size > 0)
2560
{
2561
rvAsm->ADDI(sp, sp, -static_cast<s32>(stack_size));
2562
2563
u32 stack_offset = 0;
2564
for (u32 i = 0; i < NUM_HOST_REGS; i++)
2565
{
2566
if ((gpr_bitmask & (1u << i)) && rvIsCallerSavedRegister(i) && (!is_load || data_register != i))
2567
{
2568
rvAsm->SD(GPR(i), stack_offset, sp);
2569
stack_offset += GPR_SIZE;
2570
}
2571
}
2572
}
2573
2574
if (cycles_to_add != 0)
2575
{
2576
// NOTE: we have to reload here, because memory writes can run DMA, which can screw with cycles
2577
Assert(rvIsValidSExtITypeImm(cycles_to_add));
2578
rvAsm->LW(RSCRATCH, PTR(&g_state.pending_ticks));
2579
rvAsm->ADDIW(RSCRATCH, RSCRATCH, cycles_to_add);
2580
rvAsm->SW(RSCRATCH, PTR(&g_state.pending_ticks));
2581
}
2582
2583
if (address_register != RARG1.Index())
2584
rvAsm->MV(RARG1, GPR(address_register));
2585
2586
if (!is_load)
2587
{
2588
if (data_register != RARG2.Index())
2589
rvAsm->MV(RARG2, GPR(data_register));
2590
}
2591
2592
switch (size)
2593
{
2594
case MemoryAccessSize::Byte:
2595
{
2596
rvEmitCall(rvAsm, is_load ? reinterpret_cast<const void*>(&RecompilerThunks::UncheckedReadMemoryByte) :
2597
reinterpret_cast<const void*>(&RecompilerThunks::UncheckedWriteMemoryByte));
2598
}
2599
break;
2600
case MemoryAccessSize::HalfWord:
2601
{
2602
rvEmitCall(rvAsm, is_load ? reinterpret_cast<const void*>(&RecompilerThunks::UncheckedReadMemoryHalfWord) :
2603
reinterpret_cast<const void*>(&RecompilerThunks::UncheckedWriteMemoryHalfWord));
2604
}
2605
break;
2606
case MemoryAccessSize::Word:
2607
{
2608
rvEmitCall(rvAsm, is_load ? reinterpret_cast<const void*>(&RecompilerThunks::UncheckedReadMemoryWord) :
2609
reinterpret_cast<const void*>(&RecompilerThunks::UncheckedWriteMemoryWord));
2610
}
2611
break;
2612
}
2613
2614
if (is_load)
2615
{
2616
const GPR dst = GPR(data_register);
2617
switch (size)
2618
{
2619
case MemoryAccessSize::Byte:
2620
{
2621
is_signed ? rvEmitSExtB(rvAsm, dst, RRET) : rvEmitUExtB(rvAsm, dst, RRET);
2622
}
2623
break;
2624
case MemoryAccessSize::HalfWord:
2625
{
2626
is_signed ? rvEmitSExtH(rvAsm, dst, RRET) : rvEmitUExtH(rvAsm, dst, RRET);
2627
}
2628
break;
2629
case MemoryAccessSize::Word:
2630
{
2631
if (dst.Index() != RRET.Index())
2632
rvAsm->MV(dst, RRET);
2633
}
2634
break;
2635
}
2636
}
2637
2638
if (cycles_to_remove != 0)
2639
{
2640
Assert(rvIsValidSExtITypeImm(-cycles_to_remove));
2641
rvAsm->LW(RSCRATCH, PTR(&g_state.pending_ticks));
2642
rvAsm->ADDIW(RSCRATCH, RSCRATCH, -cycles_to_remove);
2643
rvAsm->SW(RSCRATCH, PTR(&g_state.pending_ticks));
2644
}
2645
2646
// restore regs
2647
if (stack_size > 0)
2648
{
2649
u32 stack_offset = 0;
2650
for (u32 i = 0; i < NUM_HOST_REGS; i++)
2651
{
2652
if ((gpr_bitmask & (1u << i)) && rvIsCallerSavedRegister(i) && (!is_load || data_register != i))
2653
{
2654
rvAsm->LD(GPR(i), stack_offset, sp);
2655
stack_offset += GPR_SIZE;
2656
}
2657
}
2658
2659
rvAsm->ADDI(sp, sp, stack_size);
2660
}
2661
2662
rvEmitJmp(rvAsm, static_cast<const u8*>(code_address) + code_size);
2663
2664
return static_cast<u32>(rvAsm->GetCodeBuffer().GetSizeInBytes());
2665
}
2666
2667
#endif // CPU_ARCH_RISCV64
2668
2669