Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/src/core/cpu_pgxp.cpp
4802 views
1
// SPDX-FileCopyrightText: 2016 iCatButler, 2019-2024 Connor McLaughlin <[email protected]>
2
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
3
//
4
// This file has been completely rewritten over the years compared to the original PCSXR-PGXP release.
5
// No original code remains. The original copyright notice is included above for historical purposes.
6
//
7
8
#include "cpu_pgxp.h"
9
#include "bus.h"
10
#include "cpu_core.h"
11
#include "cpu_core_private.h"
12
#include "cpu_disasm.h"
13
#include "gpu_types.h"
14
#include "settings.h"
15
16
#include "util/gpu_device.h"
17
#include "util/state_wrapper.h"
18
19
#include "common/assert.h"
20
#include "common/log.h"
21
22
#include <climits>
23
#include <cmath>
24
25
LOG_CHANNEL(CPU);
26
27
// #define LOG_VALUES 1
28
// #define LOG_LOOKUPS 1
29
30
// TODO: Don't update flags on Validate(), instead return it.
31
32
namespace CPU::PGXP {
33
34
enum : u32
35
{
36
VERTEX_CACHE_WIDTH = 2048,
37
VERTEX_CACHE_HEIGHT = 2048,
38
VERTEX_CACHE_SIZE = VERTEX_CACHE_WIDTH * VERTEX_CACHE_HEIGHT,
39
PGXP_MEM_SIZE = (static_cast<u32>(Bus::RAM_8MB_SIZE) + static_cast<u32>(CPU::SCRATCHPAD_SIZE)) / 4,
40
PGXP_MEM_SCRATCH_OFFSET = Bus::RAM_8MB_SIZE / 4,
41
};
42
43
enum : u32
44
{
45
VALID_X = (1u << 0),
46
VALID_Y = (1u << 1),
47
VALID_Z = (1u << 2),
48
VALID_LOWZ = (1u << 16), // Valid Z from the low part of a 32-bit value.
49
VALID_HIGHZ = (1u << 17), // Valid Z from the high part of a 32-bit value.
50
VALID_TAINTED_Z = (1u << 31), // X/Y has been changed, Z may not be accurate.
51
52
VALID_XY = (VALID_X | VALID_Y),
53
VALID_XYZ = (VALID_X | VALID_Y | VALID_Z),
54
VALID_ALL = (VALID_X | VALID_Y | VALID_Z),
55
};
56
57
#define LOWORD_U16(val) (static_cast<u16>(val))
58
#define HIWORD_U16(val) (static_cast<u16>(static_cast<u32>(val) >> 16))
59
#define LOWORD_S16(val) (static_cast<s16>(static_cast<u16>(val)))
60
#define HIWORD_S16(val) (static_cast<s16>(static_cast<u16>(static_cast<u32>(val) >> 16)))
61
#define SET_LOWORD(val, loword) ((static_cast<u32>(val) & 0xFFFF0000u) | static_cast<u32>(static_cast<u16>(loword)))
62
#define SET_HIWORD(val, hiword) ((static_cast<u32>(val) & 0x0000FFFFu) | (static_cast<u32>(hiword) << 16))
63
64
static bool ShouldSavePGXPState();
65
66
static double f16Sign(double val);
67
static double f16Unsign(double val);
68
static double f16Overflow(double val);
69
70
static void CacheVertex(u32 value, const PGXPValue& vertex);
71
static PGXPValue* GetCachedVertex(u32 value);
72
73
static float TruncateVertexPosition(float p);
74
static bool IsWithinTolerance(float precise_x, float precise_y, int int_x, int int_y);
75
76
static PGXPValue& GetRdValue(Instruction instr);
77
static PGXPValue& GetRtValue(Instruction instr);
78
static PGXPValue& ValidateAndGetRtValue(Instruction instr, u32 rtVal);
79
static PGXPValue& ValidateAndGetRsValue(Instruction instr, u32 rsVal);
80
static void SetRtValue(Instruction instr, const PGXPValue& val);
81
static void SetRtValue(Instruction instr, const PGXPValue& val, u32 rtVal);
82
static PGXPValue& GetSXY0();
83
static PGXPValue& GetSXY1();
84
static PGXPValue& GetSXY2();
85
static PGXPValue& PushSXY();
86
87
static PGXPValue* GetPtr(u32 addr);
88
static const PGXPValue& ValidateAndLoadMem(u32 addr, u32 value);
89
static void ValidateAndLoadMem16(PGXPValue& dest, u32 addr, u32 value, bool sign);
90
91
static void CPU_MTC2(u32 reg, const PGXPValue& value, u32 val);
92
static void CPU_BITWISE(Instruction instr, u32 rdVal, u32 rsVal, u32 rtVal);
93
static void CPU_SLL(Instruction instr, u32 rtVal, u32 sh);
94
static void CPU_SRx(Instruction instr, u32 rtVal, u32 sh, bool sign, bool is_variable);
95
96
static void WriteMem(u32 addr, const PGXPValue& value);
97
static void WriteMem16(u32 addr, const PGXPValue& value);
98
99
static void CopyZIfMissing(PGXPValue& dst, const PGXPValue& src);
100
static void SelectZ(float& dst_z, u32& dst_flags, const PGXPValue& src1, const PGXPValue& src2);
101
102
#ifdef LOG_VALUES
103
static void LogInstruction(u32 pc, Instruction instr);
104
static void LogValue(const char* name, u32 rval, const PGXPValue* val);
105
static void LogValueStr(SmallStringBase& str, const char* name, u32 rval, const PGXPValue* val);
106
107
// clang-format off
108
#define LOG_VALUES_NV() do { LogInstruction(CPU::g_state.current_instruction_pc, instr); } while (0)
109
#define LOG_VALUES_1(name, rval, val) do { LogInstruction(CPU::g_state.current_instruction_pc, instr); LogValue(name, rval, val); } while (0)
110
#define LOG_VALUES_C1(rnum, rval) do { LogInstruction(CPU::g_state.current_instruction_pc,instr); LogValue(CPU::GetRegName(static_cast<CPU::Reg>(rnum)), rval, &g_state.pgxp_gpr[static_cast<u32>(rnum)]); } while(0)
111
#define LOG_VALUES_C2(r1num, r1val, r2num, r2val) do { LogInstruction(CPU::g_state.current_instruction_pc,instr); LogValue(CPU::GetRegName(static_cast<CPU::Reg>(r1num)), r1val, &g_state.pgxp_gpr[static_cast<u32>(r1num)]); LogValue(CPU::GetRegName(static_cast<CPU::Reg>(r2num)), r2val, &g_state.pgxp_gpr[static_cast<u32>(r2num)]); } while(0)
112
#define LOG_VALUES_LOAD(addr, val) do { LogInstruction(CPU::g_state.current_instruction_pc,instr); LogValue(TinyString::from_format("MEM[{:08X}]", addr).c_str(), val, GetPtr(addr)); } while(0)
113
#define LOG_VALUES_STORE(rnum, rval, addr) do { LOG_VALUES_C1(rnum, rval); std::fprintf(s_log, " addr=%08X", addr); } while(0)
114
#else
115
#define LOG_VALUES_NV() (void)0
116
#define LOG_VALUES_1(name, rval, val) (void)0
117
#define LOG_VALUES_C1(rnum, rval) (void)0
118
#define LOG_VALUES_C2(r1num, r1val, r2num, r2val) (void)0
119
#define LOG_VALUES_LOAD(addr, val) (void)0
120
#define LOG_VALUES_STORE(rnum, rval, addr) (void)0
121
#endif
122
// clang-format on
123
124
static constexpr const PGXPValue INVALID_VALUE = {};
125
126
static PGXPValue* s_mem = nullptr;
127
static PGXPValue* s_vertex_cache = nullptr;
128
129
#ifdef LOG_VALUES
130
static std::FILE* s_log;
131
#endif
132
} // namespace CPU::PGXP
133
134
void CPU::PGXP::Initialize()
135
{
136
std::memset(g_state.pgxp_gpr, 0, sizeof(g_state.pgxp_gpr));
137
std::memset(g_state.pgxp_cop0, 0, sizeof(g_state.pgxp_cop0));
138
std::memset(g_state.pgxp_gte, 0, sizeof(g_state.pgxp_gte));
139
140
if (!s_mem)
141
{
142
s_mem = static_cast<PGXPValue*>(std::calloc(PGXP_MEM_SIZE, sizeof(PGXPValue)));
143
if (!s_mem)
144
Panic("Failed to allocate PGXP memory");
145
}
146
147
if (g_settings.gpu_pgxp_vertex_cache && !s_vertex_cache)
148
{
149
s_vertex_cache = static_cast<PGXPValue*>(std::calloc(VERTEX_CACHE_SIZE, sizeof(PGXPValue)));
150
if (!s_vertex_cache)
151
{
152
ERROR_LOG("Failed to allocate memory for vertex cache, disabling.");
153
g_settings.gpu_pgxp_vertex_cache = false;
154
}
155
}
156
157
if (s_vertex_cache)
158
std::memset(s_vertex_cache, 0, sizeof(PGXPValue) * VERTEX_CACHE_SIZE);
159
}
160
161
void CPU::PGXP::Reset()
162
{
163
std::memset(g_state.pgxp_gpr, 0, sizeof(g_state.pgxp_gpr));
164
std::memset(g_state.pgxp_cop0, 0, sizeof(g_state.pgxp_cop0));
165
std::memset(g_state.pgxp_gte, 0, sizeof(g_state.pgxp_gte));
166
167
if (s_mem)
168
std::memset(s_mem, 0, sizeof(PGXPValue) * PGXP_MEM_SIZE);
169
170
if (g_settings.gpu_pgxp_vertex_cache && s_vertex_cache)
171
std::memset(s_vertex_cache, 0, sizeof(PGXPValue) * VERTEX_CACHE_SIZE);
172
}
173
174
void CPU::PGXP::Shutdown()
175
{
176
if (s_vertex_cache)
177
{
178
std::free(s_vertex_cache);
179
s_vertex_cache = nullptr;
180
}
181
if (s_mem)
182
{
183
std::free(s_mem);
184
s_mem = nullptr;
185
}
186
187
std::memset(g_state.pgxp_gte, 0, sizeof(g_state.pgxp_gte));
188
std::memset(g_state.pgxp_gpr, 0, sizeof(g_state.pgxp_gpr));
189
std::memset(g_state.pgxp_cop0, 0, sizeof(g_state.pgxp_cop0));
190
}
191
192
bool CPU::PGXP::ShouldSavePGXPState()
193
{
194
// Only save PGXP state for runahead, not rewind.
195
// The performance impact is too great, and the glitches are much less noticeable with rewind.
196
return (g_settings.gpu_pgxp_enable && g_settings.IsRunaheadEnabled());
197
}
198
199
size_t CPU::PGXP::GetStateSize()
200
{
201
if (!ShouldSavePGXPState())
202
return 0;
203
204
const size_t base_size = sizeof(g_state.pgxp_gpr) + sizeof(g_state.pgxp_cop0) + sizeof(g_state.pgxp_gte) +
205
(sizeof(PGXPValue) * PGXP_MEM_SIZE);
206
const size_t vertex_cache_size = sizeof(PGXPValue) * VERTEX_CACHE_SIZE;
207
return base_size + (g_settings.gpu_pgxp_vertex_cache ? vertex_cache_size : 0);
208
}
209
210
void CPU::PGXP::DoState(StateWrapper& sw)
211
{
212
if (!ShouldSavePGXPState())
213
{
214
// Value checks will fail and fall back to imprecise geometry when using rewind.
215
return;
216
}
217
218
sw.DoBytes(g_state.pgxp_gpr, sizeof(g_state.pgxp_gpr));
219
sw.DoBytes(g_state.pgxp_cop0, sizeof(g_state.pgxp_cop0));
220
sw.DoBytes(g_state.pgxp_gte, sizeof(g_state.pgxp_gte));
221
222
sw.DoBytes(s_mem, sizeof(PGXPValue) * PGXP_MEM_SIZE);
223
224
if (s_vertex_cache)
225
sw.DoBytes(s_vertex_cache, sizeof(PGXPValue) * VERTEX_CACHE_SIZE);
226
}
227
228
ALWAYS_INLINE_RELEASE double CPU::PGXP::f16Sign(double val)
229
{
230
const s32 s = static_cast<s32>(static_cast<s64>(val * (USHRT_MAX + 1)));
231
return static_cast<double>(s) / static_cast<double>(USHRT_MAX + 1);
232
}
233
234
ALWAYS_INLINE_RELEASE double CPU::PGXP::f16Unsign(double val)
235
{
236
return (val >= 0) ? val : (val + (USHRT_MAX + 1));
237
}
238
239
ALWAYS_INLINE_RELEASE double CPU::PGXP::f16Overflow(double val)
240
{
241
return static_cast<double>(static_cast<s64>(val) >> 16);
242
}
243
244
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetRdValue(Instruction instr)
245
{
246
return g_state.pgxp_gpr[static_cast<u8>(instr.r.rd.GetValue())];
247
}
248
249
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetRtValue(Instruction instr)
250
{
251
return g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())];
252
}
253
254
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::ValidateAndGetRtValue(Instruction instr, u32 rtVal)
255
{
256
PGXPValue& ret = g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())];
257
ret.Validate(rtVal);
258
return ret;
259
}
260
261
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::ValidateAndGetRsValue(Instruction instr, u32 rsVal)
262
{
263
PGXPValue& ret = g_state.pgxp_gpr[static_cast<u8>(instr.r.rs.GetValue())];
264
ret.Validate(rsVal);
265
return ret;
266
}
267
268
ALWAYS_INLINE void CPU::PGXP::SetRtValue(Instruction instr, const PGXPValue& val)
269
{
270
g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())] = val;
271
}
272
273
ALWAYS_INLINE void CPU::PGXP::SetRtValue(Instruction instr, const PGXPValue& val, u32 rtVal)
274
{
275
PGXPValue& prtVal = g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())];
276
prtVal = val;
277
prtVal.value = rtVal;
278
}
279
280
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetSXY0()
281
{
282
return g_state.pgxp_gte[12];
283
}
284
285
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetSXY1()
286
{
287
return g_state.pgxp_gte[13];
288
}
289
290
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetSXY2()
291
{
292
return g_state.pgxp_gte[14];
293
}
294
295
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::PushSXY()
296
{
297
g_state.pgxp_gte[12] = g_state.pgxp_gte[13];
298
g_state.pgxp_gte[13] = g_state.pgxp_gte[14];
299
return g_state.pgxp_gte[14];
300
}
301
302
ALWAYS_INLINE_RELEASE CPU::PGXPValue* CPU::PGXP::GetPtr(u32 addr)
303
{
304
#if 0
305
if ((addr & CPU::PHYSICAL_MEMORY_ADDRESS_MASK) >= 0x0017A2B4 &&
306
(addr & CPU::PHYSICAL_MEMORY_ADDRESS_MASK) <= 0x0017A2B4)
307
__debugbreak();
308
#endif
309
310
if ((addr & SCRATCHPAD_ADDR_MASK) == SCRATCHPAD_ADDR)
311
return &s_mem[PGXP_MEM_SCRATCH_OFFSET + ((addr & SCRATCHPAD_OFFSET_MASK) >> 2)];
312
313
// Don't worry about >512MB here for performance reasons.
314
const u32 paddr = (addr & KSEG_MASK);
315
if (paddr < Bus::RAM_MIRROR_END)
316
return &s_mem[(paddr & Bus::g_ram_mask) >> 2];
317
else
318
return nullptr;
319
}
320
321
ALWAYS_INLINE_RELEASE const CPU::PGXPValue& CPU::PGXP::ValidateAndLoadMem(u32 addr, u32 value)
322
{
323
PGXPValue* pMem = GetPtr(addr);
324
if (!pMem) [[unlikely]]
325
return INVALID_VALUE;
326
327
pMem->Validate(value);
328
return *pMem;
329
}
330
331
ALWAYS_INLINE_RELEASE void CPU::PGXP::ValidateAndLoadMem16(PGXPValue& dest, u32 addr, u32 value, bool sign)
332
{
333
PGXPValue* pMem = GetPtr(addr);
334
if (!pMem) [[unlikely]]
335
{
336
dest = INVALID_VALUE;
337
return;
338
}
339
340
// determine if high or low word
341
const bool hiword = ((addr & 2) != 0);
342
343
// only validate the component we're interested in
344
pMem->flags = hiword ?
345
((Truncate16(pMem->value >> 16) == Truncate16(value)) ? pMem->flags : (pMem->flags & ~VALID_Y)) :
346
((Truncate16(pMem->value) == Truncate16(value)) ? pMem->flags : (pMem->flags & ~VALID_X));
347
348
// copy whole value
349
dest = *pMem;
350
351
// if high word then shift
352
if (hiword)
353
{
354
dest.x = dest.y;
355
dest.flags = (dest.flags & ~VALID_X) | ((dest.flags & VALID_Y) >> 1);
356
}
357
358
// only set y as valid if x is also valid.. don't want to make fake values
359
if (dest.flags & VALID_X)
360
{
361
dest.y = (dest.x < 0) ? -1.0f * sign : 0.0f;
362
dest.flags |= VALID_Y;
363
}
364
else
365
{
366
dest.y = 0.0f;
367
dest.flags &= ~VALID_Y;
368
}
369
370
dest.value = value;
371
}
372
373
ALWAYS_INLINE_RELEASE void CPU::PGXP::WriteMem(u32 addr, const PGXPValue& value)
374
{
375
PGXPValue* pMem = GetPtr(addr);
376
if (!pMem) [[unlikely]]
377
return;
378
379
*pMem = value;
380
pMem->flags =
381
(value.flags & ~(VALID_LOWZ | VALID_HIGHZ)) | ((value.flags & VALID_Z) ? (VALID_LOWZ | VALID_HIGHZ) : 0);
382
}
383
384
ALWAYS_INLINE_RELEASE void CPU::PGXP::WriteMem16(u32 addr, const PGXPValue& value)
385
{
386
PGXPValue* dest = GetPtr(addr);
387
if (!dest) [[unlikely]]
388
return;
389
390
// determine if high or low word
391
const bool hiword = ((addr & 2) != 0);
392
if (hiword)
393
{
394
dest->y = value.x;
395
dest->flags = (dest->flags & ~VALID_Y) | ((value.flags & VALID_X) << 1);
396
dest->value = (dest->value & UINT32_C(0x0000FFFF)) | (value.value << 16);
397
}
398
else
399
{
400
dest->x = value.x;
401
dest->flags = (dest->flags & ~VALID_X) | (value.flags & VALID_X);
402
dest->value = (dest->value & UINT32_C(0xFFFF0000)) | (value.value & UINT32_C(0x0000FFFF));
403
}
404
405
// overwrite z/w if valid
406
// TODO: Check modified
407
if (value.flags & VALID_Z)
408
{
409
dest->z = value.z;
410
dest->flags |= VALID_Z | (hiword ? VALID_HIGHZ : VALID_LOWZ);
411
}
412
else
413
{
414
dest->flags &= hiword ? ~VALID_HIGHZ : ~VALID_LOWZ;
415
if (dest->flags & VALID_Z && !(dest->flags & (VALID_HIGHZ | VALID_LOWZ)))
416
dest->flags &= ~VALID_Z;
417
}
418
}
419
420
ALWAYS_INLINE_RELEASE void CPU::PGXP::CopyZIfMissing(PGXPValue& dst, const PGXPValue& src)
421
{
422
dst.z = (dst.flags & VALID_Z) ? dst.z : src.z;
423
dst.flags |= (src.flags & VALID_Z);
424
}
425
426
ALWAYS_INLINE_RELEASE void CPU::PGXP::SelectZ(float& dst_z, u32& dst_flags, const PGXPValue& src1,
427
const PGXPValue& src2)
428
{
429
// Prefer src2 if src1 is missing Z, or is potentially an imprecise value, when src2 is precise.
430
dst_z = (!(src1.flags & VALID_Z) ||
431
(src1.flags & VALID_TAINTED_Z && (src2.flags & (VALID_Z | VALID_TAINTED_Z)) == VALID_Z)) ?
432
src2.z :
433
src1.z;
434
dst_flags |= ((src1.flags | src2.flags) & VALID_Z);
435
}
436
437
#ifdef LOG_VALUES
438
void CPU::PGXP::LogInstruction(u32 pc, Instruction instr)
439
{
440
if (!s_log) [[unlikely]]
441
{
442
s_log = std::fopen("pgxp.log", "wb");
443
}
444
else
445
{
446
std::fflush(s_log);
447
std::fputc('\n', s_log);
448
}
449
450
SmallString str;
451
DisassembleInstruction(&str, pc, instr.bits);
452
std::fprintf(s_log, "%08X %08X %-20s", pc, instr.bits, str.c_str());
453
}
454
455
void CPU::PGXP::LogValue(const char* name, u32 rval, const PGXPValue* val)
456
{
457
if (!s_log) [[unlikely]]
458
return;
459
460
SmallString str;
461
LogValueStr(str, name, rval, val);
462
std::fprintf(s_log, " %s", str.c_str());
463
}
464
465
void CPU::PGXP::LogValueStr(SmallStringBase& str, const char* name, u32 rval, const PGXPValue* val)
466
{
467
str.append_format("{}=[{:08X}", name, rval);
468
if (!val)
469
{
470
str.append(", NULL]");
471
}
472
else
473
{
474
if (val->value != rval)
475
str.append_format(", PGXP{:08X}", val->value);
476
477
str.append_format(", {{{},{},{}}}", val->x, val->y, val->z);
478
479
if (val->flags & VALID_ALL)
480
{
481
str.append(", valid=");
482
if (val->flags & VALID_X)
483
str.append('X');
484
if (val->flags & VALID_Y)
485
str.append('Y');
486
if (val->flags & VALID_Z)
487
str.append('Z');
488
}
489
490
// if (val->flags & VALID_TAINTED_Z)
491
// str.append(", tainted");
492
493
str.append(']');
494
}
495
}
496
497
#endif
498
499
void CPU::PGXP::GTE_RTPS(float x, float y, float z, u32 value)
500
{
501
PGXPValue& pvalue = PushSXY();
502
pvalue.x = x;
503
pvalue.y = y;
504
pvalue.z = z;
505
pvalue.value = value;
506
pvalue.flags = VALID_ALL;
507
508
if (g_settings.gpu_pgxp_vertex_cache)
509
CacheVertex(value, pvalue);
510
}
511
512
bool CPU::PGXP::GTE_HasPreciseVertices(u32 sxy0, u32 sxy1, u32 sxy2)
513
{
514
PGXPValue& SXY0 = GetSXY0();
515
SXY0.Validate(sxy0);
516
PGXPValue& SXY1 = GetSXY1();
517
SXY1.Validate(sxy1);
518
PGXPValue& SXY2 = GetSXY2();
519
SXY2.Validate(sxy2);
520
521
// Don't use accurate clipping for game-constructed values, which don't have a valid Z.
522
return (((SXY0.flags & SXY1.flags & SXY2.flags & VALID_XYZ) == VALID_XYZ));
523
}
524
525
float CPU::PGXP::GTE_NCLIP()
526
{
527
const PGXPValue& SXY0 = GetSXY0();
528
const PGXPValue& SXY1 = GetSXY1();
529
const PGXPValue& SXY2 = GetSXY2();
530
float nclip = ((SXY0.x * SXY1.y) + (SXY1.x * SXY2.y) + (SXY2.x * SXY0.y) - (SXY0.x * SXY2.y) - (SXY1.x * SXY0.y) -
531
(SXY2.x * SXY1.y));
532
533
// ensure fractional values are not incorrectly rounded to 0
534
const float nclip_abs = std::abs(nclip);
535
if (0.1f < nclip_abs && nclip_abs < 1.0f)
536
nclip += (nclip < 0.0f ? -1.0f : 1.0f);
537
538
return nclip;
539
}
540
541
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_MTC2(u32 reg, const PGXPValue& value, u32 val)
542
{
543
switch (reg)
544
{
545
case 15:
546
{
547
// push FIFO
548
PGXPValue& SXY2 = PushSXY();
549
SXY2 = value;
550
return;
551
}
552
553
// read-only registers
554
case 29:
555
case 31:
556
{
557
return;
558
}
559
560
default:
561
{
562
PGXPValue& gteVal = g_state.pgxp_gte[reg];
563
gteVal = value;
564
gteVal.value = val;
565
return;
566
}
567
}
568
}
569
570
void CPU::PGXP::CPU_MFC2(Instruction instr, u32 rdVal)
571
{
572
// CPU[Rt] = GTE_D[Rd]
573
const u32 idx = instr.cop.Cop2Index();
574
LOG_VALUES_1(CPU::GetGTERegisterName(idx), rdVal, &g_state.pgxp_gte[idx]);
575
576
PGXPValue& prdVal = g_state.pgxp_gte[idx];
577
prdVal.Validate(rdVal);
578
SetRtValue(instr, prdVal, rdVal);
579
}
580
581
void CPU::PGXP::CPU_MTC2(Instruction instr, u32 rtVal)
582
{
583
// GTE_D[Rd] = CPU[Rt]
584
const u32 idx = instr.cop.Cop2Index();
585
LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
586
587
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
588
CPU_MTC2(idx, prtVal, rtVal);
589
}
590
591
void CPU::PGXP::CPU_LWC2(Instruction instr, u32 addr, u32 rtVal)
592
{
593
// GTE_D[Rt] = Mem[addr]
594
LOG_VALUES_LOAD(addr, rtVal);
595
596
const PGXPValue& pMem = ValidateAndLoadMem(addr, rtVal);
597
CPU_MTC2(static_cast<u32>(instr.r.rt.GetValue()), pMem, rtVal);
598
}
599
600
void CPU::PGXP::CPU_SWC2(Instruction instr, u32 addr, u32 rtVal)
601
{
602
// Mem[addr] = GTE_D[Rt]
603
const u32 idx = static_cast<u32>(instr.r.rt.GetValue());
604
PGXPValue& prtVal = g_state.pgxp_gte[idx];
605
#ifdef LOG_VALUES
606
LOG_VALUES_1(CPU::GetGTERegisterName(idx), rtVal, &prtVal);
607
std::fprintf(s_log, " addr=%08X", addr);
608
#endif
609
prtVal.Validate(rtVal);
610
WriteMem(addr, prtVal);
611
}
612
613
ALWAYS_INLINE_RELEASE void CPU::PGXP::CacheVertex(u32 value, const PGXPValue& vertex)
614
{
615
const s16 sx = static_cast<s16>(value & 0xFFFFu);
616
const s16 sy = static_cast<s16>(value >> 16);
617
DebugAssert(sx >= -1024 && sx <= 1023 && sy >= -1024 && sy <= 1023);
618
s_vertex_cache[(sy + 1024) * VERTEX_CACHE_WIDTH + (sx + 1024)] = vertex;
619
}
620
621
ALWAYS_INLINE_RELEASE CPU::PGXPValue* CPU::PGXP::GetCachedVertex(u32 value)
622
{
623
const s16 sx = static_cast<s16>(value & 0xFFFFu);
624
const s16 sy = static_cast<s16>(value >> 16);
625
return (sx >= -1024 && sx <= 1023 && sy >= -1024 && sy <= 1013) ?
626
&s_vertex_cache[(sy + 1024) * VERTEX_CACHE_WIDTH + (sx + 1024)] :
627
nullptr;
628
}
629
630
ALWAYS_INLINE_RELEASE float CPU::PGXP::TruncateVertexPosition(float p)
631
{
632
// Truncates positions to 11 bits before drawing.
633
// Matches GPU command parsing, where the upper 5 bits are dropped.
634
// Necessary for Jet Moto and Racingroovy VS.
635
const s32 int_part = static_cast<s32>(p);
636
const float int_part_f = static_cast<float>(int_part);
637
return static_cast<float>(TruncateGPUVertexPosition(int_part)) + (p - int_part_f);
638
}
639
640
ALWAYS_INLINE_RELEASE bool CPU::PGXP::IsWithinTolerance(float precise_x, float precise_y, int int_x, int int_y)
641
{
642
const float tolerance = g_settings.gpu_pgxp_tolerance;
643
if (tolerance < 0.0f)
644
return true;
645
646
return (std::abs(precise_x - static_cast<float>(int_x)) <= tolerance &&
647
std::abs(precise_y - static_cast<float>(int_y)) <= tolerance);
648
}
649
650
bool CPU::PGXP::GetPreciseVertex(u32 addr, u32 value, int x, int y, int xOffs, int yOffs, float* out_x, float* out_y,
651
float* out_w)
652
{
653
const PGXPValue* vert = GetPtr(addr);
654
if (vert && (vert->flags & VALID_XY) == VALID_XY && vert->value == value)
655
{
656
*out_x = TruncateVertexPosition(vert->x) + static_cast<float>(xOffs);
657
*out_y = TruncateVertexPosition(vert->y) + static_cast<float>(yOffs);
658
*out_w = vert->z / static_cast<float>(GTE::MAX_Z);
659
660
#ifdef LOG_LOOKUPS
661
GL_INS_FMT("0x{:08X} {},{} => {},{} ({},{},{}) ({},{})", addr, x, y, *out_x, *out_y,
662
TruncateVertexPosition(vert->x), TruncateVertexPosition(vert->y), vert->z, std::abs(*out_x - x),
663
std::abs(*out_y - y));
664
#endif
665
666
if (IsWithinTolerance(*out_x, *out_y, x, y))
667
{
668
// check validity of z component
669
return ((vert->flags & VALID_Z) == VALID_Z);
670
}
671
}
672
673
if (g_settings.gpu_pgxp_vertex_cache)
674
{
675
vert = GetCachedVertex(value);
676
if (vert && (vert->flags & VALID_XY) == VALID_XY)
677
{
678
*out_x = TruncateVertexPosition(vert->x) + static_cast<float>(xOffs);
679
*out_y = TruncateVertexPosition(vert->y) + static_cast<float>(yOffs);
680
*out_w = vert->z / static_cast<float>(GTE::MAX_Z);
681
682
#ifdef LOG_LOOKUPS
683
GL_INS_FMT("0x{:08X} {},{} => VERTEX_CACHE{{{},{} ({},{},{}) ({},{})}}", addr, x, y, *out_x, *out_y,
684
TruncateVertexPosition(vert->x), TruncateVertexPosition(vert->y), vert->z, std::abs(*out_x - x),
685
std::abs(*out_y - y));
686
#endif
687
688
if (IsWithinTolerance(*out_x, *out_y, x, y))
689
{
690
// This is only really used for Syphon Filter 3, and including Z tends to make things worse.
691
// At least it can get rid of the jitter, but not the warping.
692
return false;
693
}
694
}
695
}
696
697
// no valid value can be found anywhere, use the native PSX data
698
*out_x = static_cast<float>(x);
699
*out_y = static_cast<float>(y);
700
*out_w = 1.0f;
701
702
#ifdef LOG_LOOKUPS
703
GL_INS_FMT("0x{:08X} {},{} => MISS", addr, x, y);
704
#endif
705
return false;
706
}
707
708
void CPU::PGXP::CPU_LW(Instruction instr, u32 addr, u32 rtVal)
709
{
710
// Rt = Mem[Rs + Im]
711
LOG_VALUES_LOAD(addr, rtVal);
712
SetRtValue(instr, ValidateAndLoadMem(addr, rtVal));
713
}
714
715
void CPU::PGXP::CPU_LBx(Instruction instr, u32 addr, u32 rtVal)
716
{
717
LOG_VALUES_LOAD(addr, rtVal);
718
SetRtValue(instr, INVALID_VALUE);
719
}
720
721
void CPU::PGXP::CPU_LH(Instruction instr, u32 addr, u32 rtVal)
722
{
723
// Rt = Mem[Rs + Im] (sign extended)
724
LOG_VALUES_LOAD(addr, rtVal);
725
ValidateAndLoadMem16(GetRtValue(instr), addr, rtVal, true);
726
}
727
728
void CPU::PGXP::CPU_LHU(Instruction instr, u32 addr, u32 rtVal)
729
{
730
// Rt = Mem[Rs + Im] (zero extended)
731
LOG_VALUES_LOAD(addr, rtVal);
732
ValidateAndLoadMem16(GetRtValue(instr), addr, rtVal, false);
733
}
734
735
void CPU::PGXP::CPU_SB(Instruction instr, u32 addr, u32 rtVal)
736
{
737
LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr);
738
WriteMem(addr, INVALID_VALUE);
739
}
740
741
void CPU::PGXP::CPU_SH(Instruction instr, u32 addr, u32 rtVal)
742
{
743
LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr);
744
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
745
WriteMem16(addr, prtVal);
746
}
747
748
void CPU::PGXP::CPU_SW(Instruction instr, u32 addr, u32 rtVal)
749
{
750
// Mem[Rs + Im] = Rt
751
LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr);
752
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
753
WriteMem(addr, prtVal);
754
}
755
756
void CPU::PGXP::CPU_LWx(Instruction instr, u32 addr, u32 rtVal)
757
{
758
const u32 aligned_addr = addr & ~3u;
759
PGXPValue* pmemVal = GetPtr(aligned_addr);
760
u32 memVal;
761
if (!pmemVal)
762
return;
763
if (!CPU::SafeReadMemoryWord(aligned_addr, &memVal)) [[unlikely]]
764
return;
765
pmemVal->Validate(memVal);
766
LOG_VALUES_LOAD(addr, memVal);
767
768
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
769
770
const u32 byte_shift = addr & 3u;
771
772
if (instr.op == InstructionOp::lwl)
773
{
774
const u32 bit_shift = (byte_shift * 8);
775
const u32 mixed_value = (rtVal & (UINT32_C(0x00FFFFFF) >> bit_shift)) | (memVal << (24 - bit_shift));
776
777
switch (byte_shift)
778
{
779
case 0:
780
{
781
// only writing the upper half of Y, can't do much about that..
782
prtVal.y = static_cast<float>(static_cast<s16>(mixed_value >> 16));
783
prtVal.value = mixed_value;
784
prtVal.flags = (prtVal.flags & ~VALID_Y);
785
}
786
break;
787
788
case 1:
789
{
790
prtVal.y = pmemVal->x;
791
prtVal.z = (pmemVal->flags & VALID_LOWZ) ? pmemVal->z : prtVal.z;
792
prtVal.value = mixed_value;
793
prtVal.flags =
794
(prtVal.flags & ~VALID_Y) | ((pmemVal->flags & VALID_X) << 1) | ((pmemVal->flags & VALID_LOWZ) ? VALID_Z : 0);
795
}
796
break;
797
798
case 2:
799
{
800
// making a dog's breakfast of both X and Y
801
prtVal.x = static_cast<float>(static_cast<s16>(mixed_value));
802
prtVal.y = static_cast<float>(static_cast<s16>(mixed_value >> 16));
803
prtVal.value = mixed_value;
804
prtVal.flags &= ~(VALID_X | VALID_Y | VALID_Z);
805
}
806
break;
807
808
case 3:
809
{
810
// effectively the same as a normal load.
811
prtVal = *pmemVal;
812
prtVal.value = mixed_value;
813
}
814
break;
815
816
DefaultCaseIsUnreachable();
817
}
818
}
819
else
820
{
821
const u32 bit_shift = (byte_shift * 8);
822
const u32 mixed_value = (rtVal & (UINT32_C(0xFFFFFF00) << (24 - bit_shift))) | (memVal >> bit_shift);
823
824
switch (byte_shift)
825
{
826
case 0:
827
{
828
// effectively the same as a normal load.
829
prtVal = *pmemVal;
830
prtVal.value = mixed_value;
831
}
832
break;
833
834
case 1:
835
{
836
// making a dog's breakfast of both X and Y
837
prtVal.x = static_cast<float>(static_cast<s16>(mixed_value));
838
prtVal.y = static_cast<float>(static_cast<s16>(mixed_value >> 16));
839
prtVal.value = mixed_value;
840
prtVal.flags &= ~(VALID_X | VALID_Y | VALID_Z);
841
}
842
break;
843
844
case 2:
845
{
846
prtVal.x = pmemVal->y;
847
prtVal.z = (pmemVal->flags & VALID_HIGHZ) ? pmemVal->z : prtVal.z;
848
prtVal.value = mixed_value;
849
prtVal.flags = (prtVal.flags & ~VALID_X) | ((pmemVal->flags & VALID_Y) >> 1) |
850
((pmemVal->flags & VALID_HIGHZ) ? VALID_Z : 0);
851
}
852
break;
853
854
case 3:
855
{
856
// only writing the lower half of X, can't do much about that..
857
prtVal.x = static_cast<float>(static_cast<s16>(mixed_value));
858
prtVal.value = mixed_value;
859
prtVal.flags = (prtVal.flags & ~VALID_X);
860
}
861
break;
862
863
DefaultCaseIsUnreachable();
864
}
865
}
866
}
867
868
void CPU::PGXP::CPU_SWx(Instruction instr, u32 addr, u32 rtVal)
869
{
870
LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr);
871
872
const u32 aligned_addr = addr & ~3u;
873
PGXPValue* pmemVal = GetPtr(aligned_addr);
874
u32 memVal;
875
if (!pmemVal)
876
return;
877
if (!CPU::SafeReadMemoryWord(aligned_addr, &memVal)) [[unlikely]]
878
return;
879
pmemVal->Validate(memVal);
880
881
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
882
883
const u32 byte_shift = addr & 3u;
884
885
if (instr.op == InstructionOp::swl)
886
{
887
const u32 bit_shift = (byte_shift * 8);
888
const u32 mixed_value = (memVal & (UINT32_C(0xFFFFFF00) << bit_shift)) | (rtVal >> (24 - bit_shift));
889
890
switch (byte_shift)
891
{
892
case 0:
893
{
894
// only writing the lower half of X, can't do much about that..
895
pmemVal->x = static_cast<float>(static_cast<s16>(mixed_value));
896
pmemVal->value = mixed_value;
897
pmemVal->flags =
898
(pmemVal->flags & ~(VALID_X | VALID_Z | VALID_LOWZ)) | ((pmemVal->flags & VALID_HIGHZ) ? VALID_Z : 0);
899
}
900
break;
901
902
case 1:
903
{
904
pmemVal->x = prtVal.y;
905
pmemVal->z = (prtVal.flags & VALID_Z) ? prtVal.z : pmemVal->z;
906
pmemVal->value = mixed_value;
907
pmemVal->flags = (pmemVal->flags & ~(VALID_X | VALID_Z | VALID_LOWZ)) | ((prtVal.flags & VALID_Y) >> 1) |
908
((prtVal.flags & VALID_Z) ? (VALID_Z | VALID_LOWZ) : 0) |
909
((pmemVal->flags & VALID_HIGHZ) ? VALID_Z : 0);
910
}
911
break;
912
913
case 2:
914
{
915
// making a dog's breakfast of both X and Y
916
pmemVal->x = static_cast<float>(static_cast<s16>(mixed_value));
917
pmemVal->y = static_cast<float>(static_cast<s16>(mixed_value >> 16));
918
pmemVal->value = mixed_value;
919
pmemVal->flags &= ~(VALID_X | VALID_Y | VALID_Z | VALID_LOWZ | VALID_HIGHZ);
920
}
921
break;
922
923
case 3:
924
{
925
// effectively the same as a normal store.
926
*pmemVal = prtVal;
927
pmemVal->value = mixed_value;
928
pmemVal->flags =
929
(prtVal.flags & ~(VALID_LOWZ | VALID_HIGHZ)) | ((prtVal.flags & VALID_Z) ? (VALID_LOWZ | VALID_HIGHZ) : 0);
930
}
931
break;
932
933
DefaultCaseIsUnreachable();
934
}
935
}
936
else
937
{
938
const u32 bit_shift = (byte_shift * 8);
939
const u32 mixed_value = (memVal & (UINT32_C(0x00FFFFFF) >> (24 - bit_shift))) | (rtVal << bit_shift);
940
941
switch (byte_shift)
942
{
943
case 0:
944
{
945
// effectively the same as a normal store.
946
*pmemVal = prtVal;
947
pmemVal->value = mixed_value;
948
pmemVal->flags =
949
(prtVal.flags & ~(VALID_LOWZ | VALID_HIGHZ)) | ((prtVal.flags & VALID_Z) ? (VALID_LOWZ | VALID_HIGHZ) : 0);
950
}
951
break;
952
953
case 1:
954
{
955
// making a dog's breakfast of both X and Y
956
pmemVal->x = static_cast<float>(static_cast<s16>(mixed_value));
957
pmemVal->y = static_cast<float>(static_cast<s16>(mixed_value >> 16));
958
pmemVal->value = mixed_value;
959
pmemVal->flags &= ~(VALID_X | VALID_Y | VALID_LOWZ | VALID_HIGHZ);
960
}
961
break;
962
963
case 2:
964
{
965
pmemVal->y = prtVal.x;
966
pmemVal->z = (prtVal.flags & VALID_Z) ? prtVal.z : pmemVal->z;
967
pmemVal->value = mixed_value;
968
pmemVal->flags = (pmemVal->flags & ~(VALID_X | VALID_Z | VALID_HIGHZ)) | ((prtVal.flags & VALID_X) << 1) |
969
((prtVal.flags & VALID_Z) ? (VALID_Z | VALID_HIGHZ) : 0) |
970
((pmemVal->flags & VALID_LOWZ) ? VALID_Z : 0);
971
}
972
break;
973
974
case 3:
975
{
976
// only writing the upper half of Y, can't do much about that..
977
pmemVal->y = static_cast<float>(static_cast<s16>(mixed_value));
978
pmemVal->value = mixed_value;
979
pmemVal->flags =
980
(pmemVal->flags & ~(VALID_X | VALID_Z | VALID_HIGHZ)) | ((pmemVal->flags & VALID_LOWZ) ? VALID_Z : 0);
981
}
982
break;
983
984
DefaultCaseIsUnreachable();
985
}
986
}
987
}
988
989
void CPU::PGXP::CPU_MOVE_Packed(u32 rd_and_rs, u32 rsVal)
990
{
991
const u32 Rs = (rd_and_rs & 0xFFu);
992
const u32 Rd = (rd_and_rs >> 8);
993
CPU_MOVE(Rd, Rs, rsVal);
994
}
995
996
void CPU::PGXP::CPU_MOVE(u32 Rd, u32 Rs, u32 rsVal)
997
{
998
#ifdef LOG_VALUES
999
const Instruction instr = {0};
1000
LOG_VALUES_C1(Rs, rsVal);
1001
#endif
1002
PGXPValue& prsVal = g_state.pgxp_gpr[Rs];
1003
prsVal.Validate(rsVal);
1004
g_state.pgxp_gpr[Rd] = prsVal;
1005
}
1006
1007
void CPU::PGXP::CPU_ADDI(Instruction instr, u32 rsVal)
1008
{
1009
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
1010
1011
// Rt = Rs + Imm (signed)
1012
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1013
1014
const u32 immVal = instr.i.imm_sext32();
1015
1016
PGXPValue& prtVal = GetRtValue(instr);
1017
prtVal = prsVal;
1018
1019
if (immVal == 0)
1020
return;
1021
1022
if (rsVal == 0)
1023
{
1024
// x is low precision value
1025
prtVal.x = static_cast<float>(LOWORD_S16(immVal));
1026
prtVal.y = static_cast<float>(HIWORD_S16(immVal));
1027
prtVal.flags |= VALID_X | VALID_Y | VALID_TAINTED_Z;
1028
prtVal.value = immVal;
1029
return;
1030
}
1031
1032
prtVal.x = static_cast<float>(f16Unsign(prtVal.x));
1033
prtVal.x += static_cast<float>(LOWORD_U16(immVal));
1034
1035
// carry on over/underflow
1036
const float of = (prtVal.x > USHRT_MAX) ? 1.0f : (prtVal.x < 0.0f) ? -1.0f : 0.0f;
1037
prtVal.x = static_cast<float>(f16Sign(prtVal.x));
1038
prtVal.y += HIWORD_S16(immVal) + of;
1039
1040
// truncate on overflow/underflow
1041
prtVal.y += (prtVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prtVal.y < SHRT_MIN) ? (USHRT_MAX + 1) : 0.0f;
1042
1043
prtVal.value = rsVal + immVal;
1044
1045
prtVal.flags |= VALID_TAINTED_Z;
1046
}
1047
1048
void CPU::PGXP::CPU_ANDI(Instruction instr, u32 rsVal)
1049
{
1050
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
1051
1052
// Rt = Rs & Imm
1053
const u32 imm = instr.i.imm_zext32();
1054
const u32 rtVal = rsVal & imm;
1055
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1056
PGXPValue& prtVal = GetRtValue(instr);
1057
1058
// remove upper 16-bits
1059
prtVal.y = 0.0f;
1060
prtVal.z = prsVal.z;
1061
prtVal.value = rtVal;
1062
prtVal.flags = prsVal.flags | VALID_Y | VALID_TAINTED_Z;
1063
1064
switch (imm)
1065
{
1066
case 0:
1067
{
1068
// if 0 then x == 0
1069
prtVal.x = 0.0f;
1070
prtVal.flags |= VALID_X;
1071
}
1072
break;
1073
1074
case 0xFFFFu:
1075
{
1076
// if saturated then x == x
1077
prtVal.x = prsVal.x;
1078
}
1079
break;
1080
1081
default:
1082
{
1083
// otherwise x is low precision value
1084
prtVal.x = static_cast<float>(LOWORD_S16(rtVal));
1085
prtVal.flags |= VALID_X;
1086
}
1087
break;
1088
}
1089
}
1090
1091
void CPU::PGXP::CPU_ORI(Instruction instr, u32 rsVal)
1092
{
1093
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
1094
1095
// Rt = Rs | Imm
1096
const u32 imm = instr.i.imm_zext32();
1097
const u32 rtVal = rsVal | imm;
1098
1099
PGXPValue& pRsVal = ValidateAndGetRsValue(instr, rsVal);
1100
PGXPValue& pRtVal = GetRtValue(instr);
1101
pRtVal = pRsVal;
1102
pRtVal.value = rtVal;
1103
1104
if (imm == 0) [[unlikely]]
1105
{
1106
// if 0 then x == x
1107
}
1108
else
1109
{
1110
// otherwise x is low precision value
1111
pRtVal.x = static_cast<float>(LOWORD_S16(rtVal));
1112
pRtVal.flags |= VALID_X | VALID_TAINTED_Z;
1113
}
1114
}
1115
1116
void CPU::PGXP::CPU_XORI(Instruction instr, u32 rsVal)
1117
{
1118
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
1119
1120
// Rt = Rs ^ Imm
1121
const u32 imm = instr.i.imm_zext32();
1122
const u32 rtVal = rsVal ^ imm;
1123
1124
PGXPValue& pRsVal = ValidateAndGetRsValue(instr, rsVal);
1125
PGXPValue& pRtVal = GetRtValue(instr);
1126
pRtVal = pRsVal;
1127
pRtVal.value = rtVal;
1128
1129
if (imm == 0) [[unlikely]]
1130
{
1131
// if 0 then x == x
1132
}
1133
else
1134
{
1135
// otherwise x is low precision value
1136
pRtVal.x = static_cast<float>(LOWORD_S16(rtVal));
1137
pRtVal.flags |= VALID_X | VALID_TAINTED_Z;
1138
}
1139
}
1140
1141
void CPU::PGXP::CPU_SLTI(Instruction instr, u32 rsVal)
1142
{
1143
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
1144
1145
// Rt = Rs < Imm (signed)
1146
const s32 imm = instr.i.imm_s16();
1147
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1148
1149
const float fimmx = static_cast<float>(imm);
1150
const float fimmy = fimmx < 0.0f ? -1.0f : 0.0f;
1151
1152
PGXPValue& prtVal = GetRtValue(instr);
1153
prtVal.x = (prsVal.GetValidY(rsVal) < fimmy || prsVal.GetValidX(rsVal) < fimmx) ? 1.0f : 0.0f;
1154
prtVal.y = 0.0f;
1155
prtVal.z = prsVal.z;
1156
prtVal.flags = prsVal.flags | VALID_X | VALID_Y | VALID_TAINTED_Z;
1157
prtVal.value = BoolToUInt32(static_cast<s32>(rsVal) < imm);
1158
}
1159
1160
void CPU::PGXP::CPU_SLTIU(Instruction instr, u32 rsVal)
1161
{
1162
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
1163
1164
// Rt = Rs < Imm (Unsigned)
1165
const u32 imm = instr.i.imm_u16();
1166
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1167
1168
const float fimmx = static_cast<float>(static_cast<s16>(imm)); // deliberately signed
1169
const float fimmy = fimmx < 0.0f ? -1.0f : 0.0f;
1170
1171
PGXPValue& prtVal = GetRtValue(instr);
1172
prtVal.x =
1173
(f16Unsign(prsVal.GetValidY(rsVal)) < f16Unsign(fimmy) || f16Unsign(prsVal.GetValidX(rsVal)) < fimmx) ? 1.0f : 0.0f;
1174
prtVal.y = 0.0f;
1175
prtVal.z = prsVal.z;
1176
prtVal.flags = prsVal.flags | VALID_X | VALID_Y | VALID_TAINTED_Z;
1177
prtVal.value = BoolToUInt32(rsVal < imm);
1178
}
1179
1180
void CPU::PGXP::CPU_LUI(Instruction instr)
1181
{
1182
LOG_VALUES_NV();
1183
1184
// Rt = Imm << 16
1185
PGXPValue& pRtVal = GetRtValue(instr);
1186
pRtVal.x = 0.0f;
1187
pRtVal.y = static_cast<float>(instr.i.imm_s16());
1188
pRtVal.z = 0.0f;
1189
pRtVal.value = instr.i.imm_zext32() << 16;
1190
pRtVal.flags = VALID_XY;
1191
}
1192
1193
void CPU::PGXP::CPU_ADD(Instruction instr, u32 rsVal, u32 rtVal)
1194
{
1195
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1196
1197
// Rd = Rs + Rt (signed)
1198
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1199
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1200
PGXPValue& prdVal = GetRdValue(instr);
1201
1202
if (rtVal == 0)
1203
{
1204
prdVal = prsVal;
1205
CopyZIfMissing(prdVal, prtVal);
1206
}
1207
else if (rsVal == 0)
1208
{
1209
prdVal = prtVal;
1210
CopyZIfMissing(prdVal, prsVal);
1211
}
1212
else
1213
{
1214
const double x = f16Unsign(prsVal.GetValidX(rsVal)) + f16Unsign(prtVal.GetValidX(rtVal));
1215
1216
// carry on over/underflow
1217
const float of = (x > USHRT_MAX) ? 1.0f : (x < 0.0f) ? -1.0f : 0.0f;
1218
prdVal.x = static_cast<float>(f16Sign(x));
1219
prdVal.y = prsVal.GetValidY(rsVal) + prtVal.GetValidY(rtVal) + of;
1220
1221
// truncate on overflow/underflow
1222
prdVal.y += (prdVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prdVal.y < SHRT_MIN) ? (USHRT_MAX + 1) : 0.0f;
1223
1224
prdVal.value = rsVal + rtVal;
1225
1226
// valid x/y only if one side had a valid x/y
1227
prdVal.flags = prsVal.flags | (prtVal.flags & VALID_XY) | VALID_TAINTED_Z;
1228
1229
SelectZ(prdVal.z, prdVal.flags, prsVal, prtVal);
1230
}
1231
}
1232
1233
void CPU::PGXP::CPU_SUB(Instruction instr, u32 rsVal, u32 rtVal)
1234
{
1235
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1236
1237
// Rd = Rs - Rt (signed)
1238
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1239
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1240
PGXPValue& prdVal = GetRdValue(instr);
1241
1242
if (rtVal == 0)
1243
{
1244
prdVal = prsVal;
1245
CopyZIfMissing(prdVal, prtVal);
1246
}
1247
else
1248
{
1249
const double x = f16Unsign(prsVal.GetValidX(rsVal)) - f16Unsign(prtVal.GetValidX(rtVal));
1250
1251
// carry on over/underflow
1252
const float of = (x > USHRT_MAX) ? 1.0f : (x < 0.0f) ? -1.0f : 0.0f;
1253
prdVal.x = static_cast<float>(f16Sign(x));
1254
prdVal.y = prsVal.GetValidY(rsVal) - (prtVal.GetValidY(rtVal) - of);
1255
1256
// truncate on overflow/underflow
1257
prdVal.y += (prdVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prdVal.y < SHRT_MIN) ? (USHRT_MAX + 1) : 0.0f;
1258
1259
prdVal.value = rsVal - rtVal;
1260
1261
// valid x/y only if one side had a valid x/y
1262
prdVal.flags = prsVal.flags | (prtVal.flags & VALID_XY) | VALID_TAINTED_Z;
1263
1264
SelectZ(prdVal.z, prdVal.flags, prsVal, prtVal);
1265
}
1266
}
1267
1268
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_BITWISE(Instruction instr, u32 rdVal, u32 rsVal, u32 rtVal)
1269
{
1270
// Rd = Rs & Rt
1271
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1272
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1273
1274
float x, y;
1275
if (LOWORD_U16(rdVal) == 0)
1276
x = 0.0f;
1277
else if (LOWORD_U16(rdVal) == LOWORD_U16(rsVal))
1278
x = prsVal.GetValidX(rsVal);
1279
else if (LOWORD_U16(rdVal) == LOWORD_U16(rtVal))
1280
x = prtVal.GetValidX(rtVal);
1281
else
1282
x = static_cast<float>(LOWORD_S16(rdVal));
1283
1284
if (HIWORD_U16(rdVal) == 0)
1285
y = 0.0f;
1286
else if (HIWORD_U16(rdVal) == HIWORD_U16(rsVal))
1287
y = prsVal.GetValidY(rsVal);
1288
else if (HIWORD_U16(rdVal) == HIWORD_U16(rtVal))
1289
y = prtVal.GetValidY(rtVal);
1290
else
1291
y = static_cast<float>(HIWORD_S16(rdVal));
1292
1293
// Why not write directly to prdVal? Because it might be the same as the source.
1294
u32 flags = ((prsVal.flags | prtVal.flags) & VALID_XY) ? (VALID_XY | VALID_TAINTED_Z) : 0;
1295
PGXPValue& prdVal = GetRdValue(instr);
1296
SelectZ(prdVal.z, flags, prsVal, prtVal);
1297
prdVal.x = x;
1298
prdVal.y = y;
1299
prdVal.flags = flags;
1300
prdVal.value = rdVal;
1301
}
1302
1303
void CPU::PGXP::CPU_AND_(Instruction instr, u32 rsVal, u32 rtVal)
1304
{
1305
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1306
1307
// Rd = Rs & Rt
1308
const u32 rdVal = rsVal & rtVal;
1309
CPU_BITWISE(instr, rdVal, rsVal, rtVal);
1310
}
1311
1312
void CPU::PGXP::CPU_OR_(Instruction instr, u32 rsVal, u32 rtVal)
1313
{
1314
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1315
1316
// Rd = Rs | Rt
1317
const u32 rdVal = rsVal | rtVal;
1318
CPU_BITWISE(instr, rdVal, rsVal, rtVal);
1319
}
1320
1321
void CPU::PGXP::CPU_XOR_(Instruction instr, u32 rsVal, u32 rtVal)
1322
{
1323
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1324
1325
// Rd = Rs ^ Rt
1326
const u32 rdVal = rsVal ^ rtVal;
1327
CPU_BITWISE(instr, rdVal, rsVal, rtVal);
1328
}
1329
1330
void CPU::PGXP::CPU_NOR(Instruction instr, u32 rsVal, u32 rtVal)
1331
{
1332
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1333
1334
// Rd = Rs NOR Rt
1335
const u32 rdVal = ~(rsVal | rtVal);
1336
CPU_BITWISE(instr, rdVal, rsVal, rtVal);
1337
}
1338
1339
void CPU::PGXP::CPU_SLT(Instruction instr, u32 rsVal, u32 rtVal)
1340
{
1341
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1342
1343
// Rd = Rs < Rt (signed)
1344
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1345
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1346
PGXPValue& prdVal = GetRdValue(instr);
1347
prdVal.x = (prsVal.GetValidY(rsVal) < prtVal.GetValidY(rtVal) ||
1348
f16Unsign(prsVal.GetValidX(rsVal)) < f16Unsign(prtVal.GetValidX(rtVal))) ?
1349
1.0f :
1350
0.0f;
1351
prdVal.y = 0.0f;
1352
prdVal.z = prsVal.z;
1353
prdVal.flags = prsVal.flags | VALID_TAINTED_Z | VALID_X | VALID_Y;
1354
prdVal.value = BoolToUInt32(static_cast<s32>(rsVal) < static_cast<s32>(rtVal));
1355
}
1356
1357
void CPU::PGXP::CPU_SLTU(Instruction instr, u32 rsVal, u32 rtVal)
1358
{
1359
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1360
1361
// Rd = Rs < Rt (unsigned)
1362
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1363
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1364
PGXPValue& prdVal = GetRdValue(instr);
1365
prdVal.x = (f16Unsign(prsVal.GetValidY(rsVal)) < f16Unsign(prtVal.GetValidY(rtVal)) ||
1366
f16Unsign(prsVal.GetValidX(rsVal)) < f16Unsign(prtVal.GetValidX(rtVal))) ?
1367
1.0f :
1368
0.0f;
1369
prdVal.y = 0.0f;
1370
prdVal.z = prsVal.z;
1371
prdVal.flags = prsVal.flags | VALID_TAINTED_Z | VALID_X | VALID_Y;
1372
prdVal.value = BoolToUInt32(rsVal < rtVal);
1373
}
1374
1375
void CPU::PGXP::CPU_MULT(Instruction instr, u32 rsVal, u32 rtVal)
1376
{
1377
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1378
1379
// Hi/Lo = Rs * Rt (signed)
1380
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1381
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1382
1383
PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
1384
PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)];
1385
ploVal = prsVal;
1386
CopyZIfMissing(ploVal, prsVal);
1387
1388
// Z/valid is the same
1389
phiVal = ploVal;
1390
1391
const float rsx = prsVal.GetValidX(rsVal);
1392
const float rsy = prsVal.GetValidY(rsVal);
1393
const float rtx = prtVal.GetValidX(rtVal);
1394
const float rty = prtVal.GetValidY(rtVal);
1395
1396
// Multiply out components
1397
const double xx = f16Unsign(rsx) * f16Unsign(rtx);
1398
const double xy = f16Unsign(rsx) * (rty);
1399
const double yx = rsy * f16Unsign(rtx);
1400
const double yy = rsy * rty;
1401
1402
// Split values into outputs
1403
const double lx = xx;
1404
const double ly = f16Overflow(xx) + (xy + yx);
1405
const double hx = f16Overflow(ly) + yy;
1406
const double hy = f16Overflow(hx);
1407
1408
ploVal.x = static_cast<float>(f16Sign(lx));
1409
ploVal.y = static_cast<float>(f16Sign(ly));
1410
ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1411
phiVal.x = static_cast<float>(f16Sign(hx));
1412
phiVal.y = static_cast<float>(f16Sign(hy));
1413
phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1414
1415
// compute PSX value
1416
const u64 result = static_cast<u64>(static_cast<s64>(SignExtend64(rsVal)) * static_cast<s64>(SignExtend64(rtVal)));
1417
phiVal.value = Truncate32(result >> 32);
1418
ploVal.value = Truncate32(result);
1419
}
1420
1421
void CPU::PGXP::CPU_MULTU(Instruction instr, u32 rsVal, u32 rtVal)
1422
{
1423
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1424
1425
// Hi/Lo = Rs * Rt (unsigned)
1426
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1427
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1428
1429
PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
1430
PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)];
1431
ploVal = prsVal;
1432
CopyZIfMissing(ploVal, prsVal);
1433
1434
// Z/valid is the same
1435
phiVal = ploVal;
1436
1437
const float rsx = prsVal.GetValidX(rsVal);
1438
const float rsy = prsVal.GetValidY(rsVal);
1439
const float rtx = prtVal.GetValidX(rtVal);
1440
const float rty = prtVal.GetValidY(rtVal);
1441
1442
// Multiply out components
1443
const double xx = f16Unsign(rsx) * f16Unsign(rtx);
1444
const double xy = f16Unsign(rsx) * f16Unsign(rty);
1445
const double yx = f16Unsign(rsy) * f16Unsign(rtx);
1446
const double yy = f16Unsign(rsy) * f16Unsign(rty);
1447
1448
// Split values into outputs
1449
const double lx = xx;
1450
const double ly = f16Overflow(xx) + (xy + yx);
1451
const double hx = f16Overflow(ly) + yy;
1452
const double hy = f16Overflow(hx);
1453
1454
ploVal.x = static_cast<float>(f16Sign(lx));
1455
ploVal.y = static_cast<float>(f16Sign(ly));
1456
ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1457
phiVal.x = static_cast<float>(f16Sign(hx));
1458
phiVal.y = static_cast<float>(f16Sign(hy));
1459
phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1460
1461
// compute PSX value
1462
const u64 result = ZeroExtend64(rsVal) * ZeroExtend64(rtVal);
1463
phiVal.value = Truncate32(result >> 32);
1464
ploVal.value = Truncate32(result);
1465
}
1466
1467
void CPU::PGXP::CPU_DIV(Instruction instr, u32 rsVal, u32 rtVal)
1468
{
1469
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1470
1471
// Lo = Rs / Rt (signed)
1472
// Hi = Rs % Rt (signed)
1473
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1474
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1475
1476
PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
1477
PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)];
1478
ploVal = prsVal;
1479
CopyZIfMissing(ploVal, prsVal);
1480
1481
// Z/valid is the same
1482
phiVal = ploVal;
1483
1484
const double vs = f16Unsign(prsVal.GetValidX(rsVal)) + prsVal.GetValidY(rsVal) * static_cast<double>(1 << 16);
1485
const double vt = f16Unsign(prtVal.GetValidX(rtVal)) + prtVal.GetValidY(rtVal) * static_cast<double>(1 << 16);
1486
1487
const double lo = vs / vt;
1488
ploVal.y = static_cast<float>(f16Sign(f16Overflow(lo)));
1489
ploVal.x = static_cast<float>(f16Sign(lo));
1490
ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1491
1492
const double hi = std::fmod(vs, vt);
1493
phiVal.y = static_cast<float>(f16Sign(f16Overflow(hi)));
1494
phiVal.x = static_cast<float>(f16Sign(hi));
1495
phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1496
1497
// compute PSX value
1498
if (static_cast<s32>(rtVal) == 0)
1499
{
1500
// divide by zero
1501
ploVal.value = (static_cast<s32>(rsVal) >= 0) ? UINT32_C(0xFFFFFFFF) : UINT32_C(1);
1502
phiVal.value = static_cast<u32>(static_cast<s32>(rsVal));
1503
}
1504
else if (rsVal == UINT32_C(0x80000000) && static_cast<s32>(rtVal) == -1)
1505
{
1506
// unrepresentable
1507
ploVal.value = UINT32_C(0x80000000);
1508
phiVal.value = 0;
1509
}
1510
else
1511
{
1512
ploVal.value = static_cast<u32>(static_cast<s32>(rsVal) / static_cast<s32>(rtVal));
1513
phiVal.value = static_cast<u32>(static_cast<s32>(rsVal) % static_cast<s32>(rtVal));
1514
}
1515
}
1516
1517
void CPU::PGXP::CPU_DIVU(Instruction instr, u32 rsVal, u32 rtVal)
1518
{
1519
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1520
1521
// Lo = Rs / Rt (unsigned)
1522
// Hi = Rs % Rt (unsigned)
1523
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1524
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1525
1526
PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
1527
PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)];
1528
ploVal = prsVal;
1529
CopyZIfMissing(ploVal, prsVal);
1530
1531
// Z/valid is the same
1532
phiVal = ploVal;
1533
1534
const double vs =
1535
f16Unsign(prsVal.GetValidX(rsVal)) + f16Unsign(prsVal.GetValidY(rsVal)) * static_cast<double>(1 << 16);
1536
const double vt =
1537
f16Unsign(prtVal.GetValidX(rtVal)) + f16Unsign(prtVal.GetValidY(rtVal)) * static_cast<double>(1 << 16);
1538
1539
const double lo = vs / vt;
1540
ploVal.y = static_cast<float>(f16Sign(f16Overflow(lo)));
1541
ploVal.x = static_cast<float>(f16Sign(lo));
1542
ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1543
1544
const double hi = std::fmod(vs, vt);
1545
phiVal.y = static_cast<float>(f16Sign(f16Overflow(hi)));
1546
phiVal.x = static_cast<float>(f16Sign(hi));
1547
phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1548
1549
if (rtVal == 0)
1550
{
1551
// divide by zero
1552
ploVal.value = UINT32_C(0xFFFFFFFF);
1553
phiVal.value = rsVal;
1554
}
1555
else
1556
{
1557
ploVal.value = rsVal / rtVal;
1558
phiVal.value = rsVal % rtVal;
1559
}
1560
}
1561
1562
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_SLL(Instruction instr, u32 rtVal, u32 sh)
1563
{
1564
const u32 rdVal = rtVal << sh;
1565
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1566
PGXPValue& prdVal = GetRdValue(instr);
1567
prdVal.z = prtVal.z;
1568
prdVal.value = rdVal;
1569
1570
if (sh >= 32) [[unlikely]]
1571
{
1572
prdVal.x = 0.0f;
1573
prdVal.y = 0.0f;
1574
prdVal.flags = prtVal.flags | VALID_XY | VALID_TAINTED_Z;
1575
}
1576
else if (sh == 16)
1577
{
1578
prdVal.y = prtVal.x;
1579
prdVal.x = 0.0f;
1580
1581
// Only set valid X if there's also a valid Y. We could use GetValidX() to pull it from the low precision value
1582
// instead, need to investigate further. Spyro breaks if only X is set even if Y is not valid.
1583
// prdVal.flags = (prtVal.flags & ~VALID_Y) | ((prtVal.flags & VALID_X) << 1) | VALID_X | VALID_TAINTED_Z;
1584
prdVal.flags = (prtVal.flags | VALID_TAINTED_Z) | ((prtVal.flags & VALID_Y) >> 1);
1585
}
1586
else if (sh >= 16)
1587
{
1588
prdVal.y = static_cast<float>(f16Sign(f16Unsign(prtVal.x * static_cast<double>(1 << (sh - 16)))));
1589
prdVal.x = 0.0f;
1590
1591
// See above.
1592
// prdVal.flags = (prtVal.flags & ~VALID_Y) | ((prtVal.flags & VALID_X) << 1) | VALID_X | VALID_TAINTED_Z;
1593
prdVal.flags = (prtVal.flags | VALID_TAINTED_Z) | ((prtVal.flags & VALID_Y) >> 1);
1594
}
1595
else
1596
{
1597
const double x = f16Unsign(prtVal.x) * static_cast<double>(1 << sh);
1598
const double y = (f16Unsign(prtVal.y) * static_cast<double>(1 << sh)) + f16Overflow(x);
1599
prdVal.x = static_cast<float>(f16Sign(x));
1600
prdVal.y = static_cast<float>(f16Sign(y));
1601
prdVal.flags = (prtVal.flags | VALID_TAINTED_Z);
1602
}
1603
}
1604
1605
void CPU::PGXP::CPU_SLL(Instruction instr, u32 rtVal)
1606
{
1607
LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
1608
1609
// Rd = Rt << Sa
1610
const u32 sh = instr.r.shamt;
1611
CPU_SLL(instr, rtVal, sh);
1612
}
1613
1614
void CPU::PGXP::CPU_SLLV(Instruction instr, u32 rtVal, u32 rsVal)
1615
{
1616
LOG_VALUES_C2(instr.r.rt.GetValue(), rtVal, instr.r.rs.GetValue(), rsVal);
1617
1618
// Rd = Rt << Rs
1619
const u32 sh = rsVal & 0x1F;
1620
CPU_SLL(instr, rtVal, sh);
1621
}
1622
1623
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_SRx(Instruction instr, u32 rtVal, u32 sh, bool sign, bool is_variable)
1624
{
1625
const u32 rdVal = sign ? static_cast<u32>(static_cast<s32>(rtVal) >> sh) : (rtVal >> sh);
1626
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1627
1628
double x = prtVal.x;
1629
double y = sign ? prtVal.y : f16Unsign(prtVal.y);
1630
1631
const u32 iX = SignExtend32(LOWORD_S16(rtVal)); // remove Y
1632
const u32 iY = SET_LOWORD(rtVal, HIWORD_U16(iX)); // overwrite x with sign(x)
1633
1634
// Shift test values
1635
const u32 dX = static_cast<u32>(static_cast<s32>(iX) >> sh);
1636
const u32 dY = sign ? static_cast<u32>(static_cast<s32>(iY) >> sh) : (iY >> sh);
1637
1638
if (LOWORD_S16(dX) != HIWORD_S16(iX))
1639
x = x / static_cast<double>(1 << sh);
1640
else
1641
x = LOWORD_S16(dX); // only sign bits left
1642
1643
if (LOWORD_S16(dY) != HIWORD_S16(iX))
1644
{
1645
if (sh == 16)
1646
{
1647
x = y;
1648
}
1649
else if (sh < 16)
1650
{
1651
x += y * static_cast<double>(1 << (16 - sh));
1652
if (prtVal.x < 0)
1653
x += static_cast<double>(1 << (16 - sh));
1654
}
1655
else
1656
{
1657
x += y / static_cast<double>(1 << (sh - 16));
1658
}
1659
}
1660
1661
if ((HIWORD_S16(dY) == 0) || (HIWORD_S16(dY) == -1))
1662
y = HIWORD_S16(dY);
1663
else
1664
y = y / static_cast<double>(1 << sh);
1665
1666
PGXPValue& prdVal = GetRdValue(instr);
1667
1668
// Use low precision/rounded values when we're not shifting an entire component,
1669
// and it's not originally from a 3D value. Too many false positives in P2/etc.
1670
// What we probably should do is not set the valid flag on non-3D values to begin
1671
// with, only letting them become valid when used in another expression.
1672
if (sign && !is_variable && !(prtVal.flags & VALID_Z) && sh < 16)
1673
{
1674
prdVal.x = static_cast<float>(LOWORD_S16(rdVal));
1675
prdVal.y = static_cast<float>(HIWORD_S16(rdVal));
1676
prdVal.z = 0.0f;
1677
prdVal.value = rdVal;
1678
prdVal.flags = VALID_XY | VALID_TAINTED_Z;
1679
}
1680
else
1681
{
1682
prdVal.x = static_cast<float>(f16Sign(x));
1683
prdVal.y = static_cast<float>(f16Sign(y));
1684
prdVal.z = prtVal.z;
1685
prdVal.value = rdVal;
1686
prdVal.flags = prtVal.flags | VALID_TAINTED_Z;
1687
}
1688
}
1689
1690
void CPU::PGXP::CPU_SRL(Instruction instr, u32 rtVal)
1691
{
1692
LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
1693
1694
// Rd = Rt >> Sa
1695
const u32 sh = instr.r.shamt;
1696
CPU_SRx(instr, rtVal, sh, false, false);
1697
}
1698
1699
void CPU::PGXP::CPU_SRLV(Instruction instr, u32 rtVal, u32 rsVal)
1700
{
1701
LOG_VALUES_C2(instr.r.rt.GetValue(), rtVal, instr.r.rs.GetValue(), rsVal);
1702
1703
// Rd = Rt >> Sa
1704
const u32 sh = rsVal & 0x1F;
1705
CPU_SRx(instr, rtVal, sh, false, true);
1706
}
1707
1708
void CPU::PGXP::CPU_SRA(Instruction instr, u32 rtVal)
1709
{
1710
LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
1711
1712
// Rd = Rt >> Sa
1713
const u32 sh = instr.r.shamt;
1714
CPU_SRx(instr, rtVal, sh, true, false);
1715
}
1716
1717
void CPU::PGXP::CPU_SRAV(Instruction instr, u32 rtVal, u32 rsVal)
1718
{
1719
LOG_VALUES_C2(instr.r.rt.GetValue(), rtVal, instr.r.rs.GetValue(), rsVal);
1720
1721
// Rd = Rt >> Sa
1722
const u32 sh = rsVal & 0x1F;
1723
CPU_SRx(instr, rtVal, sh, true, true);
1724
}
1725
1726
void CPU::PGXP::CPU_MFC0(Instruction instr, u32 rdVal)
1727
{
1728
const u32 idx = static_cast<u8>(instr.r.rd.GetValue());
1729
LOG_VALUES_1(TinyString::from_format("cop0_{}", idx).c_str(), rdVal, &g_state.pgxp_cop0[idx]);
1730
1731
// CPU[Rt] = CP0[Rd]
1732
PGXPValue& prdVal = g_state.pgxp_cop0[idx];
1733
prdVal.Validate(rdVal);
1734
1735
PGXPValue& prtVal = GetRtValue(instr);
1736
prtVal = prdVal;
1737
prtVal.value = rdVal;
1738
}
1739
1740
void CPU::PGXP::CPU_MTC0(Instruction instr, u32 rdVal, u32 rtVal)
1741
{
1742
LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
1743
1744
// CP0[Rd] = CPU[Rt]
1745
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1746
PGXPValue& prdVal = g_state.pgxp_cop0[static_cast<u8>(instr.r.rd.GetValue())];
1747
prdVal = prtVal;
1748
prtVal.value = rdVal;
1749
}
1750
1751