CoCalc -- cpu_pgxp.cpp

GitHub Repository: stenzek/duckstation
Path: blob/master/src/core/cpu_pgxp.cpp
⁴⁸⁰² views
1
// SPDX-FileCopyrightText: 2016 iCatButler, 2019-2024 Connor McLaughlin <[email protected]>
2
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
3
//
4
// This file has been completely rewritten over the years compared to the original PCSXR-PGXP release.
5
// No original code remains. The original copyright notice is included above for historical purposes.
6
//
7

8
#include "cpu_pgxp.h"
9
#include "bus.h"
10
#include "cpu_core.h"
11
#include "cpu_core_private.h"
12
#include "cpu_disasm.h"
13
#include "gpu_types.h"
14
#include "settings.h"
15

16
#include "util/gpu_device.h"
17
#include "util/state_wrapper.h"
18

19
#include "common/assert.h"
20
#include "common/log.h"
21

22
#include <climits>
23
#include <cmath>
24

25
LOG_CHANNEL(CPU);
26

27
// #define LOG_VALUES 1
28
// #define LOG_LOOKUPS 1
29

30
// TODO: Don't update flags on Validate(), instead return it.
31

32
namespace CPU::PGXP {
33

34
enum : u32
35
{
36
  VERTEX_CACHE_WIDTH = 2048,
37
  VERTEX_CACHE_HEIGHT = 2048,
38
  VERTEX_CACHE_SIZE = VERTEX_CACHE_WIDTH * VERTEX_CACHE_HEIGHT,
39
  PGXP_MEM_SIZE = (static_cast<u32>(Bus::RAM_8MB_SIZE) + static_cast<u32>(CPU::SCRATCHPAD_SIZE)) / 4,
40
  PGXP_MEM_SCRATCH_OFFSET = Bus::RAM_8MB_SIZE / 4,
41
};
42

43
enum : u32
44
{
45
  VALID_X = (1u << 0),
46
  VALID_Y = (1u << 1),
47
  VALID_Z = (1u << 2),
48
  VALID_LOWZ = (1u << 16),      // Valid Z from the low part of a 32-bit value.
49
  VALID_HIGHZ = (1u << 17),     // Valid Z from the high part of a 32-bit value.
50
  VALID_TAINTED_Z = (1u << 31), // X/Y has been changed, Z may not be accurate.
51

52
  VALID_XY = (VALID_X | VALID_Y),
53
  VALID_XYZ = (VALID_X | VALID_Y | VALID_Z),
54
  VALID_ALL = (VALID_X | VALID_Y | VALID_Z),
55
};
56

57
#define LOWORD_U16(val) (static_cast<u16>(val))
58
#define HIWORD_U16(val) (static_cast<u16>(static_cast<u32>(val) >> 16))
59
#define LOWORD_S16(val) (static_cast<s16>(static_cast<u16>(val)))
60
#define HIWORD_S16(val) (static_cast<s16>(static_cast<u16>(static_cast<u32>(val) >> 16)))
61
#define SET_LOWORD(val, loword) ((static_cast<u32>(val) & 0xFFFF0000u) | static_cast<u32>(static_cast<u16>(loword)))
62
#define SET_HIWORD(val, hiword) ((static_cast<u32>(val) & 0x0000FFFFu) | (static_cast<u32>(hiword) << 16))
63

64
static bool ShouldSavePGXPState();
65

66
static double f16Sign(double val);
67
static double f16Unsign(double val);
68
static double f16Overflow(double val);
69

70
static void CacheVertex(u32 value, const PGXPValue& vertex);
71
static PGXPValue* GetCachedVertex(u32 value);
72

73
static float TruncateVertexPosition(float p);
74
static bool IsWithinTolerance(float precise_x, float precise_y, int int_x, int int_y);
75

76
static PGXPValue& GetRdValue(Instruction instr);
77
static PGXPValue& GetRtValue(Instruction instr);
78
static PGXPValue& ValidateAndGetRtValue(Instruction instr, u32 rtVal);
79
static PGXPValue& ValidateAndGetRsValue(Instruction instr, u32 rsVal);
80
static void SetRtValue(Instruction instr, const PGXPValue& val);
81
static void SetRtValue(Instruction instr, const PGXPValue& val, u32 rtVal);
82
static PGXPValue& GetSXY0();
83
static PGXPValue& GetSXY1();
84
static PGXPValue& GetSXY2();
85
static PGXPValue& PushSXY();
86

87
static PGXPValue* GetPtr(u32 addr);
88
static const PGXPValue& ValidateAndLoadMem(u32 addr, u32 value);
89
static void ValidateAndLoadMem16(PGXPValue& dest, u32 addr, u32 value, bool sign);
90

91
static void CPU_MTC2(u32 reg, const PGXPValue& value, u32 val);
92
static void CPU_BITWISE(Instruction instr, u32 rdVal, u32 rsVal, u32 rtVal);
93
static void CPU_SLL(Instruction instr, u32 rtVal, u32 sh);
94
static void CPU_SRx(Instruction instr, u32 rtVal, u32 sh, bool sign, bool is_variable);
95

96
static void WriteMem(u32 addr, const PGXPValue& value);
97
static void WriteMem16(u32 addr, const PGXPValue& value);
98

99
static void CopyZIfMissing(PGXPValue& dst, const PGXPValue& src);
100
static void SelectZ(float& dst_z, u32& dst_flags, const PGXPValue& src1, const PGXPValue& src2);
101

102
#ifdef LOG_VALUES
103
static void LogInstruction(u32 pc, Instruction instr);
104
static void LogValue(const char* name, u32 rval, const PGXPValue* val);
105
static void LogValueStr(SmallStringBase& str, const char* name, u32 rval, const PGXPValue* val);
106

107
// clang-format off
108
#define LOG_VALUES_NV() do { LogInstruction(CPU::g_state.current_instruction_pc, instr); } while (0)
109
#define LOG_VALUES_1(name, rval, val) do { LogInstruction(CPU::g_state.current_instruction_pc, instr); LogValue(name, rval, val); } while (0)
110
#define LOG_VALUES_C1(rnum, rval) do { LogInstruction(CPU::g_state.current_instruction_pc,instr); LogValue(CPU::GetRegName(static_cast<CPU::Reg>(rnum)), rval, &g_state.pgxp_gpr[static_cast<u32>(rnum)]); } while(0)
111
#define LOG_VALUES_C2(r1num, r1val, r2num, r2val) do { LogInstruction(CPU::g_state.current_instruction_pc,instr); LogValue(CPU::GetRegName(static_cast<CPU::Reg>(r1num)), r1val, &g_state.pgxp_gpr[static_cast<u32>(r1num)]); LogValue(CPU::GetRegName(static_cast<CPU::Reg>(r2num)), r2val, &g_state.pgxp_gpr[static_cast<u32>(r2num)]); } while(0)
112
#define LOG_VALUES_LOAD(addr, val) do { LogInstruction(CPU::g_state.current_instruction_pc,instr); LogValue(TinyString::from_format("MEM[{:08X}]", addr).c_str(), val, GetPtr(addr)); } while(0)
113
#define LOG_VALUES_STORE(rnum, rval, addr) do { LOG_VALUES_C1(rnum, rval); std::fprintf(s_log, " addr=%08X", addr); } while(0)
114
#else
115
#define LOG_VALUES_NV() (void)0
116
#define LOG_VALUES_1(name, rval, val) (void)0
117
#define LOG_VALUES_C1(rnum, rval) (void)0
118
#define LOG_VALUES_C2(r1num, r1val, r2num, r2val) (void)0
119
#define LOG_VALUES_LOAD(addr, val) (void)0
120
#define LOG_VALUES_STORE(rnum, rval, addr) (void)0
121
#endif
122
// clang-format on
123

124
static constexpr const PGXPValue INVALID_VALUE = {};
125

126
static PGXPValue* s_mem = nullptr;
127
static PGXPValue* s_vertex_cache = nullptr;
128

129
#ifdef LOG_VALUES
130
static std::FILE* s_log;
131
#endif
132
} // namespace CPU::PGXP
133

134
void CPU::PGXP::Initialize()
135
{
136
  std::memset(g_state.pgxp_gpr, 0, sizeof(g_state.pgxp_gpr));
137
  std::memset(g_state.pgxp_cop0, 0, sizeof(g_state.pgxp_cop0));
138
  std::memset(g_state.pgxp_gte, 0, sizeof(g_state.pgxp_gte));
139

140
  if (!s_mem)
141
  {
142
    s_mem = static_cast<PGXPValue*>(std::calloc(PGXP_MEM_SIZE, sizeof(PGXPValue)));
143
    if (!s_mem)
144
      Panic("Failed to allocate PGXP memory");
145
  }
146

147
  if (g_settings.gpu_pgxp_vertex_cache && !s_vertex_cache)
148
  {
149
    s_vertex_cache = static_cast<PGXPValue*>(std::calloc(VERTEX_CACHE_SIZE, sizeof(PGXPValue)));
150
    if (!s_vertex_cache)
151
    {
152
      ERROR_LOG("Failed to allocate memory for vertex cache, disabling.");
153
      g_settings.gpu_pgxp_vertex_cache = false;
154
    }
155
  }
156

157
  if (s_vertex_cache)
158
    std::memset(s_vertex_cache, 0, sizeof(PGXPValue) * VERTEX_CACHE_SIZE);
159
}
160

161
void CPU::PGXP::Reset()
162
{
163
  std::memset(g_state.pgxp_gpr, 0, sizeof(g_state.pgxp_gpr));
164
  std::memset(g_state.pgxp_cop0, 0, sizeof(g_state.pgxp_cop0));
165
  std::memset(g_state.pgxp_gte, 0, sizeof(g_state.pgxp_gte));
166

167
  if (s_mem)
168
    std::memset(s_mem, 0, sizeof(PGXPValue) * PGXP_MEM_SIZE);
169

170
  if (g_settings.gpu_pgxp_vertex_cache && s_vertex_cache)
171
    std::memset(s_vertex_cache, 0, sizeof(PGXPValue) * VERTEX_CACHE_SIZE);
172
}
173

174
void CPU::PGXP::Shutdown()
175
{
176
  if (s_vertex_cache)
177
  {
178
    std::free(s_vertex_cache);
179
    s_vertex_cache = nullptr;
180
  }
181
  if (s_mem)
182
  {
183
    std::free(s_mem);
184
    s_mem = nullptr;
185
  }
186

187
  std::memset(g_state.pgxp_gte, 0, sizeof(g_state.pgxp_gte));
188
  std::memset(g_state.pgxp_gpr, 0, sizeof(g_state.pgxp_gpr));
189
  std::memset(g_state.pgxp_cop0, 0, sizeof(g_state.pgxp_cop0));
190
}
191

192
bool CPU::PGXP::ShouldSavePGXPState()
193
{
194
  // Only save PGXP state for runahead, not rewind.
195
  // The performance impact is too great, and the glitches are much less noticeable with rewind.
196
  return (g_settings.gpu_pgxp_enable && g_settings.IsRunaheadEnabled());
197
}
198

199
size_t CPU::PGXP::GetStateSize()
200
{
201
  if (!ShouldSavePGXPState())
202
    return 0;
203

204
  const size_t base_size = sizeof(g_state.pgxp_gpr) + sizeof(g_state.pgxp_cop0) + sizeof(g_state.pgxp_gte) +
205
                           (sizeof(PGXPValue) * PGXP_MEM_SIZE);
206
  const size_t vertex_cache_size = sizeof(PGXPValue) * VERTEX_CACHE_SIZE;
207
  return base_size + (g_settings.gpu_pgxp_vertex_cache ? vertex_cache_size : 0);
208
}
209

210
void CPU::PGXP::DoState(StateWrapper& sw)
211
{
212
  if (!ShouldSavePGXPState())
213
  {
214
    // Value checks will fail and fall back to imprecise geometry when using rewind.
215
    return;
216
  }
217

218
  sw.DoBytes(g_state.pgxp_gpr, sizeof(g_state.pgxp_gpr));
219
  sw.DoBytes(g_state.pgxp_cop0, sizeof(g_state.pgxp_cop0));
220
  sw.DoBytes(g_state.pgxp_gte, sizeof(g_state.pgxp_gte));
221

222
  sw.DoBytes(s_mem, sizeof(PGXPValue) * PGXP_MEM_SIZE);
223

224
  if (s_vertex_cache)
225
    sw.DoBytes(s_vertex_cache, sizeof(PGXPValue) * VERTEX_CACHE_SIZE);
226
}
227

228
ALWAYS_INLINE_RELEASE double CPU::PGXP::f16Sign(double val)
229
{
230
  const s32 s = static_cast<s32>(static_cast<s64>(val * (USHRT_MAX + 1)));
231
  return static_cast<double>(s) / static_cast<double>(USHRT_MAX + 1);
232
}
233

234
ALWAYS_INLINE_RELEASE double CPU::PGXP::f16Unsign(double val)
235
{
236
  return (val >= 0) ? val : (val + (USHRT_MAX + 1));
237
}
238

239
ALWAYS_INLINE_RELEASE double CPU::PGXP::f16Overflow(double val)
240
{
241
  return static_cast<double>(static_cast<s64>(val) >> 16);
242
}
243

244
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetRdValue(Instruction instr)
245
{
246
  return g_state.pgxp_gpr[static_cast<u8>(instr.r.rd.GetValue())];
247
}
248

249
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetRtValue(Instruction instr)
250
{
251
  return g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())];
252
}
253

254
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::ValidateAndGetRtValue(Instruction instr, u32 rtVal)
255
{
256
  PGXPValue& ret = g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())];
257
  ret.Validate(rtVal);
258
  return ret;
259
}
260

261
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::ValidateAndGetRsValue(Instruction instr, u32 rsVal)
262
{
263
  PGXPValue& ret = g_state.pgxp_gpr[static_cast<u8>(instr.r.rs.GetValue())];
264
  ret.Validate(rsVal);
265
  return ret;
266
}
267

268
ALWAYS_INLINE void CPU::PGXP::SetRtValue(Instruction instr, const PGXPValue& val)
269
{
270
  g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())] = val;
271
}
272

273
ALWAYS_INLINE void CPU::PGXP::SetRtValue(Instruction instr, const PGXPValue& val, u32 rtVal)
274
{
275
  PGXPValue& prtVal = g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())];
276
  prtVal = val;
277
  prtVal.value = rtVal;
278
}
279

280
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetSXY0()
281
{
282
  return g_state.pgxp_gte[12];
283
}
284

285
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetSXY1()
286
{
287
  return g_state.pgxp_gte[13];
288
}
289

290
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetSXY2()
291
{
292
  return g_state.pgxp_gte[14];
293
}
294

295
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::PushSXY()
296
{
297
  g_state.pgxp_gte[12] = g_state.pgxp_gte[13];
298
  g_state.pgxp_gte[13] = g_state.pgxp_gte[14];
299
  return g_state.pgxp_gte[14];
300
}
301

302
ALWAYS_INLINE_RELEASE CPU::PGXPValue* CPU::PGXP::GetPtr(u32 addr)
303
{
304
#if 0
305
  if ((addr & CPU::PHYSICAL_MEMORY_ADDRESS_MASK) >= 0x0017A2B4 &&
306
      (addr & CPU::PHYSICAL_MEMORY_ADDRESS_MASK) <= 0x0017A2B4)
307
    __debugbreak();
308
#endif
309

310
  if ((addr & SCRATCHPAD_ADDR_MASK) == SCRATCHPAD_ADDR)
311
    return &s_mem[PGXP_MEM_SCRATCH_OFFSET + ((addr & SCRATCHPAD_OFFSET_MASK) >> 2)];
312

313
  // Don't worry about >512MB here for performance reasons.
314
  const u32 paddr = (addr & KSEG_MASK);
315
  if (paddr < Bus::RAM_MIRROR_END)
316
    return &s_mem[(paddr & Bus::g_ram_mask) >> 2];
317
  else
318
    return nullptr;
319
}
320

321
ALWAYS_INLINE_RELEASE const CPU::PGXPValue& CPU::PGXP::ValidateAndLoadMem(u32 addr, u32 value)
322
{
323
  PGXPValue* pMem = GetPtr(addr);
324
  if (!pMem) [[unlikely]]
325
    return INVALID_VALUE;
326

327
  pMem->Validate(value);
328
  return *pMem;
329
}
330

331
ALWAYS_INLINE_RELEASE void CPU::PGXP::ValidateAndLoadMem16(PGXPValue& dest, u32 addr, u32 value, bool sign)
332
{
333
  PGXPValue* pMem = GetPtr(addr);
334
  if (!pMem) [[unlikely]]
335
  {
336
    dest = INVALID_VALUE;
337
    return;
338
  }
339

340
  // determine if high or low word
341
  const bool hiword = ((addr & 2) != 0);
342

343
  // only validate the component we're interested in
344
  pMem->flags = hiword ?
345
                  ((Truncate16(pMem->value >> 16) == Truncate16(value)) ? pMem->flags : (pMem->flags & ~VALID_Y)) :
346
                  ((Truncate16(pMem->value) == Truncate16(value)) ? pMem->flags : (pMem->flags & ~VALID_X));
347

348
  // copy whole value
349
  dest = *pMem;
350

351
  // if high word then shift
352
  if (hiword)
353
  {
354
    dest.x = dest.y;
355
    dest.flags = (dest.flags & ~VALID_X) | ((dest.flags & VALID_Y) >> 1);
356
  }
357

358
  // only set y as valid if x is also valid.. don't want to make fake values
359
  if (dest.flags & VALID_X)
360
  {
361
    dest.y = (dest.x < 0) ? -1.0f * sign : 0.0f;
362
    dest.flags |= VALID_Y;
363
  }
364
  else
365
  {
366
    dest.y = 0.0f;
367
    dest.flags &= ~VALID_Y;
368
  }
369

370
  dest.value = value;
371
}
372

373
ALWAYS_INLINE_RELEASE void CPU::PGXP::WriteMem(u32 addr, const PGXPValue& value)
374
{
375
  PGXPValue* pMem = GetPtr(addr);
376
  if (!pMem) [[unlikely]]
377
    return;
378

379
  *pMem = value;
380
  pMem->flags =
381
    (value.flags & ~(VALID_LOWZ | VALID_HIGHZ)) | ((value.flags & VALID_Z) ? (VALID_LOWZ | VALID_HIGHZ) : 0);
382
}
383

384
ALWAYS_INLINE_RELEASE void CPU::PGXP::WriteMem16(u32 addr, const PGXPValue& value)
385
{
386
  PGXPValue* dest = GetPtr(addr);
387
  if (!dest) [[unlikely]]
388
    return;
389

390
  // determine if high or low word
391
  const bool hiword = ((addr & 2) != 0);
392
  if (hiword)
393
  {
394
    dest->y = value.x;
395
    dest->flags = (dest->flags & ~VALID_Y) | ((value.flags & VALID_X) << 1);
396
    dest->value = (dest->value & UINT32_C(0x0000FFFF)) | (value.value << 16);
397
  }
398
  else
399
  {
400
    dest->x = value.x;
401
    dest->flags = (dest->flags & ~VALID_X) | (value.flags & VALID_X);
402
    dest->value = (dest->value & UINT32_C(0xFFFF0000)) | (value.value & UINT32_C(0x0000FFFF));
403
  }
404

405
  // overwrite z/w if valid
406
  // TODO: Check modified
407
  if (value.flags & VALID_Z)
408
  {
409
    dest->z = value.z;
410
    dest->flags |= VALID_Z | (hiword ? VALID_HIGHZ : VALID_LOWZ);
411
  }
412
  else
413
  {
414
    dest->flags &= hiword ? ~VALID_HIGHZ : ~VALID_LOWZ;
415
    if (dest->flags & VALID_Z && !(dest->flags & (VALID_HIGHZ | VALID_LOWZ)))
416
      dest->flags &= ~VALID_Z;
417
  }
418
}
419

420
ALWAYS_INLINE_RELEASE void CPU::PGXP::CopyZIfMissing(PGXPValue& dst, const PGXPValue& src)
421
{
422
  dst.z = (dst.flags & VALID_Z) ? dst.z : src.z;
423
  dst.flags |= (src.flags & VALID_Z);
424
}
425

426
ALWAYS_INLINE_RELEASE void CPU::PGXP::SelectZ(float& dst_z, u32& dst_flags, const PGXPValue& src1,
427
                                              const PGXPValue& src2)
428
{
429
  // Prefer src2 if src1 is missing Z, or is potentially an imprecise value, when src2 is precise.
430
  dst_z = (!(src1.flags & VALID_Z) ||
431
           (src1.flags & VALID_TAINTED_Z && (src2.flags & (VALID_Z | VALID_TAINTED_Z)) == VALID_Z)) ?
432
            src2.z :
433
            src1.z;
434
  dst_flags |= ((src1.flags | src2.flags) & VALID_Z);
435
}
436

437
#ifdef LOG_VALUES
438
void CPU::PGXP::LogInstruction(u32 pc, Instruction instr)
439
{
440
  if (!s_log) [[unlikely]]
441
  {
442
    s_log = std::fopen("pgxp.log", "wb");
443
  }
444
  else
445
  {
446
    std::fflush(s_log);
447
    std::fputc('\n', s_log);
448
  }
449

450
  SmallString str;
451
  DisassembleInstruction(&str, pc, instr.bits);
452
  std::fprintf(s_log, "%08X %08X %-20s", pc, instr.bits, str.c_str());
453
}
454

455
void CPU::PGXP::LogValue(const char* name, u32 rval, const PGXPValue* val)
456
{
457
  if (!s_log) [[unlikely]]
458
    return;
459

460
  SmallString str;
461
  LogValueStr(str, name, rval, val);
462
  std::fprintf(s_log, " %s", str.c_str());
463
}
464

465
void CPU::PGXP::LogValueStr(SmallStringBase& str, const char* name, u32 rval, const PGXPValue* val)
466
{
467
  str.append_format("{}=[{:08X}", name, rval);
468
  if (!val)
469
  {
470
    str.append(", NULL]");
471
  }
472
  else
473
  {
474
    if (val->value != rval)
475
      str.append_format(", PGXP{:08X}", val->value);
476

477
    str.append_format(", {{{},{},{}}}", val->x, val->y, val->z);
478

479
    if (val->flags & VALID_ALL)
480
    {
481
      str.append(", valid=");
482
      if (val->flags & VALID_X)
483
        str.append('X');
484
      if (val->flags & VALID_Y)
485
        str.append('Y');
486
      if (val->flags & VALID_Z)
487
        str.append('Z');
488
    }
489

490
    // if (val->flags & VALID_TAINTED_Z)
491
    // str.append(", tainted");
492

493
    str.append(']');
494
  }
495
}
496

497
#endif
498

499
void CPU::PGXP::GTE_RTPS(float x, float y, float z, u32 value)
500
{
501
  PGXPValue& pvalue = PushSXY();
502
  pvalue.x = x;
503
  pvalue.y = y;
504
  pvalue.z = z;
505
  pvalue.value = value;
506
  pvalue.flags = VALID_ALL;
507

508
  if (g_settings.gpu_pgxp_vertex_cache)
509
    CacheVertex(value, pvalue);
510
}
511

512
bool CPU::PGXP::GTE_HasPreciseVertices(u32 sxy0, u32 sxy1, u32 sxy2)
513
{
514
  PGXPValue& SXY0 = GetSXY0();
515
  SXY0.Validate(sxy0);
516
  PGXPValue& SXY1 = GetSXY1();
517
  SXY1.Validate(sxy1);
518
  PGXPValue& SXY2 = GetSXY2();
519
  SXY2.Validate(sxy2);
520

521
  // Don't use accurate clipping for game-constructed values, which don't have a valid Z.
522
  return (((SXY0.flags & SXY1.flags & SXY2.flags & VALID_XYZ) == VALID_XYZ));
523
}
524

525
float CPU::PGXP::GTE_NCLIP()
526
{
527
  const PGXPValue& SXY0 = GetSXY0();
528
  const PGXPValue& SXY1 = GetSXY1();
529
  const PGXPValue& SXY2 = GetSXY2();
530
  float nclip = ((SXY0.x * SXY1.y) + (SXY1.x * SXY2.y) + (SXY2.x * SXY0.y) - (SXY0.x * SXY2.y) - (SXY1.x * SXY0.y) -
531
                 (SXY2.x * SXY1.y));
532

533
  // ensure fractional values are not incorrectly rounded to 0
534
  const float nclip_abs = std::abs(nclip);
535
  if (0.1f < nclip_abs && nclip_abs < 1.0f)
536
    nclip += (nclip < 0.0f ? -1.0f : 1.0f);
537

538
  return nclip;
539
}
540

541
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_MTC2(u32 reg, const PGXPValue& value, u32 val)
542
{
543
  switch (reg)
544
  {
545
    case 15:
546
    {
547
      // push FIFO
548
      PGXPValue& SXY2 = PushSXY();
549
      SXY2 = value;
550
      return;
551
    }
552

553
    // read-only registers
554
    case 29:
555
    case 31:
556
    {
557
      return;
558
    }
559

560
    default:
561
    {
562
      PGXPValue& gteVal = g_state.pgxp_gte[reg];
563
      gteVal = value;
564
      gteVal.value = val;
565
      return;
566
    }
567
  }
568
}
569

570
void CPU::PGXP::CPU_MFC2(Instruction instr, u32 rdVal)
571
{
572
  // CPU[Rt] = GTE_D[Rd]
573
  const u32 idx = instr.cop.Cop2Index();
574
  LOG_VALUES_1(CPU::GetGTERegisterName(idx), rdVal, &g_state.pgxp_gte[idx]);
575

576
  PGXPValue& prdVal = g_state.pgxp_gte[idx];
577
  prdVal.Validate(rdVal);
578
  SetRtValue(instr, prdVal, rdVal);
579
}
580

581
void CPU::PGXP::CPU_MTC2(Instruction instr, u32 rtVal)
582
{
583
  // GTE_D[Rd] = CPU[Rt]
584
  const u32 idx = instr.cop.Cop2Index();
585
  LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
586

587
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
588
  CPU_MTC2(idx, prtVal, rtVal);
589
}
590

591
void CPU::PGXP::CPU_LWC2(Instruction instr, u32 addr, u32 rtVal)
592
{
593
  // GTE_D[Rt] = Mem[addr]
594
  LOG_VALUES_LOAD(addr, rtVal);
595

596
  const PGXPValue& pMem = ValidateAndLoadMem(addr, rtVal);
597
  CPU_MTC2(static_cast<u32>(instr.r.rt.GetValue()), pMem, rtVal);
598
}
599

600
void CPU::PGXP::CPU_SWC2(Instruction instr, u32 addr, u32 rtVal)
601
{
602
  //  Mem[addr] = GTE_D[Rt]
603
  const u32 idx = static_cast<u32>(instr.r.rt.GetValue());
604
  PGXPValue& prtVal = g_state.pgxp_gte[idx];
605
#ifdef LOG_VALUES
606
  LOG_VALUES_1(CPU::GetGTERegisterName(idx), rtVal, &prtVal);
607
  std::fprintf(s_log, " addr=%08X", addr);
608
#endif
609
  prtVal.Validate(rtVal);
610
  WriteMem(addr, prtVal);
611
}
612

613
ALWAYS_INLINE_RELEASE void CPU::PGXP::CacheVertex(u32 value, const PGXPValue& vertex)
614
{
615
  const s16 sx = static_cast<s16>(value & 0xFFFFu);
616
  const s16 sy = static_cast<s16>(value >> 16);
617
  DebugAssert(sx >= -1024 && sx <= 1023 && sy >= -1024 && sy <= 1023);
618
  s_vertex_cache[(sy + 1024) * VERTEX_CACHE_WIDTH + (sx + 1024)] = vertex;
619
}
620

621
ALWAYS_INLINE_RELEASE CPU::PGXPValue* CPU::PGXP::GetCachedVertex(u32 value)
622
{
623
  const s16 sx = static_cast<s16>(value & 0xFFFFu);
624
  const s16 sy = static_cast<s16>(value >> 16);
625
  return (sx >= -1024 && sx <= 1023 && sy >= -1024 && sy <= 1013) ?
626
           &s_vertex_cache[(sy + 1024) * VERTEX_CACHE_WIDTH + (sx + 1024)] :
627
           nullptr;
628
}
629

630
ALWAYS_INLINE_RELEASE float CPU::PGXP::TruncateVertexPosition(float p)
631
{
632
  // Truncates positions to 11 bits before drawing.
633
  // Matches GPU command parsing, where the upper 5 bits are dropped.
634
  // Necessary for Jet Moto and Racingroovy VS.
635
  const s32 int_part = static_cast<s32>(p);
636
  const float int_part_f = static_cast<float>(int_part);
637
  return static_cast<float>(TruncateGPUVertexPosition(int_part)) + (p - int_part_f);
638
}
639

640
ALWAYS_INLINE_RELEASE bool CPU::PGXP::IsWithinTolerance(float precise_x, float precise_y, int int_x, int int_y)
641
{
642
  const float tolerance = g_settings.gpu_pgxp_tolerance;
643
  if (tolerance < 0.0f)
644
    return true;
645

646
  return (std::abs(precise_x - static_cast<float>(int_x)) <= tolerance &&
647
          std::abs(precise_y - static_cast<float>(int_y)) <= tolerance);
648
}
649

650
bool CPU::PGXP::GetPreciseVertex(u32 addr, u32 value, int x, int y, int xOffs, int yOffs, float* out_x, float* out_y,
651
                                 float* out_w)
652
{
653
  const PGXPValue* vert = GetPtr(addr);
654
  if (vert && (vert->flags & VALID_XY) == VALID_XY && vert->value == value)
655
  {
656
    *out_x = TruncateVertexPosition(vert->x) + static_cast<float>(xOffs);
657
    *out_y = TruncateVertexPosition(vert->y) + static_cast<float>(yOffs);
658
    *out_w = vert->z / static_cast<float>(GTE::MAX_Z);
659

660
#ifdef LOG_LOOKUPS
661
    GL_INS_FMT("0x{:08X} {},{} => {},{} ({},{},{}) ({},{})", addr, x, y, *out_x, *out_y,
662
               TruncateVertexPosition(vert->x), TruncateVertexPosition(vert->y), vert->z, std::abs(*out_x - x),
663
               std::abs(*out_y - y));
664
#endif
665

666
    if (IsWithinTolerance(*out_x, *out_y, x, y))
667
    {
668
      // check validity of z component
669
      return ((vert->flags & VALID_Z) == VALID_Z);
670
    }
671
  }
672

673
  if (g_settings.gpu_pgxp_vertex_cache)
674
  {
675
    vert = GetCachedVertex(value);
676
    if (vert && (vert->flags & VALID_XY) == VALID_XY)
677
    {
678
      *out_x = TruncateVertexPosition(vert->x) + static_cast<float>(xOffs);
679
      *out_y = TruncateVertexPosition(vert->y) + static_cast<float>(yOffs);
680
      *out_w = vert->z / static_cast<float>(GTE::MAX_Z);
681

682
#ifdef LOG_LOOKUPS
683
      GL_INS_FMT("0x{:08X} {},{} => VERTEX_CACHE{{{},{} ({},{},{}) ({},{})}}", addr, x, y, *out_x, *out_y,
684
                 TruncateVertexPosition(vert->x), TruncateVertexPosition(vert->y), vert->z, std::abs(*out_x - x),
685
                 std::abs(*out_y - y));
686
#endif
687

688
      if (IsWithinTolerance(*out_x, *out_y, x, y))
689
      {
690
        // This is only really used for Syphon Filter 3, and including Z tends to make things worse.
691
        // At least it can get rid of the jitter, but not the warping.
692
        return false;
693
      }
694
    }
695
  }
696

697
  // no valid value can be found anywhere, use the native PSX data
698
  *out_x = static_cast<float>(x);
699
  *out_y = static_cast<float>(y);
700
  *out_w = 1.0f;
701

702
#ifdef LOG_LOOKUPS
703
  GL_INS_FMT("0x{:08X} {},{} => MISS", addr, x, y);
704
#endif
705
  return false;
706
}
707

708
void CPU::PGXP::CPU_LW(Instruction instr, u32 addr, u32 rtVal)
709
{
710
  // Rt = Mem[Rs + Im]
711
  LOG_VALUES_LOAD(addr, rtVal);
712
  SetRtValue(instr, ValidateAndLoadMem(addr, rtVal));
713
}
714

715
void CPU::PGXP::CPU_LBx(Instruction instr, u32 addr, u32 rtVal)
716
{
717
  LOG_VALUES_LOAD(addr, rtVal);
718
  SetRtValue(instr, INVALID_VALUE);
719
}
720

721
void CPU::PGXP::CPU_LH(Instruction instr, u32 addr, u32 rtVal)
722
{
723
  // Rt = Mem[Rs + Im] (sign extended)
724
  LOG_VALUES_LOAD(addr, rtVal);
725
  ValidateAndLoadMem16(GetRtValue(instr), addr, rtVal, true);
726
}
727

728
void CPU::PGXP::CPU_LHU(Instruction instr, u32 addr, u32 rtVal)
729
{
730
  // Rt = Mem[Rs + Im] (zero extended)
731
  LOG_VALUES_LOAD(addr, rtVal);
732
  ValidateAndLoadMem16(GetRtValue(instr), addr, rtVal, false);
733
}
734

735
void CPU::PGXP::CPU_SB(Instruction instr, u32 addr, u32 rtVal)
736
{
737
  LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr);
738
  WriteMem(addr, INVALID_VALUE);
739
}
740

741
void CPU::PGXP::CPU_SH(Instruction instr, u32 addr, u32 rtVal)
742
{
743
  LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr);
744
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
745
  WriteMem16(addr, prtVal);
746
}
747

748
void CPU::PGXP::CPU_SW(Instruction instr, u32 addr, u32 rtVal)
749
{
750
  // Mem[Rs + Im] = Rt
751
  LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr);
752
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
753
  WriteMem(addr, prtVal);
754
}
755

756
void CPU::PGXP::CPU_LWx(Instruction instr, u32 addr, u32 rtVal)
757
{
758
  const u32 aligned_addr = addr & ~3u;
759
  PGXPValue* pmemVal = GetPtr(aligned_addr);
760
  u32 memVal;
761
  if (!pmemVal)
762
    return;
763
  if (!CPU::SafeReadMemoryWord(aligned_addr, &memVal)) [[unlikely]]
764
    return;
765
  pmemVal->Validate(memVal);
766
  LOG_VALUES_LOAD(addr, memVal);
767

768
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
769

770
  const u32 byte_shift = addr & 3u;
771

772
  if (instr.op == InstructionOp::lwl)
773
  {
774
    const u32 bit_shift = (byte_shift * 8);
775
    const u32 mixed_value = (rtVal & (UINT32_C(0x00FFFFFF) >> bit_shift)) | (memVal << (24 - bit_shift));
776

777
    switch (byte_shift)
778
    {
779
      case 0:
780
      {
781
        // only writing the upper half of Y, can't do much about that..
782
        prtVal.y = static_cast<float>(static_cast<s16>(mixed_value >> 16));
783
        prtVal.value = mixed_value;
784
        prtVal.flags = (prtVal.flags & ~VALID_Y);
785
      }
786
      break;
787

788
      case 1:
789
      {
790
        prtVal.y = pmemVal->x;
791
        prtVal.z = (pmemVal->flags & VALID_LOWZ) ? pmemVal->z : prtVal.z;
792
        prtVal.value = mixed_value;
793
        prtVal.flags =
794
          (prtVal.flags & ~VALID_Y) | ((pmemVal->flags & VALID_X) << 1) | ((pmemVal->flags & VALID_LOWZ) ? VALID_Z : 0);
795
      }
796
      break;
797

798
      case 2:
799
      {
800
        // making a dog's breakfast of both X and Y
801
        prtVal.x = static_cast<float>(static_cast<s16>(mixed_value));
802
        prtVal.y = static_cast<float>(static_cast<s16>(mixed_value >> 16));
803
        prtVal.value = mixed_value;
804
        prtVal.flags &= ~(VALID_X | VALID_Y | VALID_Z);
805
      }
806
      break;
807

808
      case 3:
809
      {
810
        // effectively the same as a normal load.
811
        prtVal = *pmemVal;
812
        prtVal.value = mixed_value;
813
      }
814
      break;
815

816
        DefaultCaseIsUnreachable();
817
    }
818
  }
819
  else
820
  {
821
    const u32 bit_shift = (byte_shift * 8);
822
    const u32 mixed_value = (rtVal & (UINT32_C(0xFFFFFF00) << (24 - bit_shift))) | (memVal >> bit_shift);
823

824
    switch (byte_shift)
825
    {
826
      case 0:
827
      {
828
        // effectively the same as a normal load.
829
        prtVal = *pmemVal;
830
        prtVal.value = mixed_value;
831
      }
832
      break;
833

834
      case 1:
835
      {
836
        // making a dog's breakfast of both X and Y
837
        prtVal.x = static_cast<float>(static_cast<s16>(mixed_value));
838
        prtVal.y = static_cast<float>(static_cast<s16>(mixed_value >> 16));
839
        prtVal.value = mixed_value;
840
        prtVal.flags &= ~(VALID_X | VALID_Y | VALID_Z);
841
      }
842
      break;
843

844
      case 2:
845
      {
846
        prtVal.x = pmemVal->y;
847
        prtVal.z = (pmemVal->flags & VALID_HIGHZ) ? pmemVal->z : prtVal.z;
848
        prtVal.value = mixed_value;
849
        prtVal.flags = (prtVal.flags & ~VALID_X) | ((pmemVal->flags & VALID_Y) >> 1) |
850
                       ((pmemVal->flags & VALID_HIGHZ) ? VALID_Z : 0);
851
      }
852
      break;
853

854
      case 3:
855
      {
856
        // only writing the lower half of X, can't do much about that..
857
        prtVal.x = static_cast<float>(static_cast<s16>(mixed_value));
858
        prtVal.value = mixed_value;
859
        prtVal.flags = (prtVal.flags & ~VALID_X);
860
      }
861
      break;
862

863
        DefaultCaseIsUnreachable();
864
    }
865
  }
866
}
867

868
void CPU::PGXP::CPU_SWx(Instruction instr, u32 addr, u32 rtVal)
869
{
870
  LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr);
871

872
  const u32 aligned_addr = addr & ~3u;
873
  PGXPValue* pmemVal = GetPtr(aligned_addr);
874
  u32 memVal;
875
  if (!pmemVal)
876
    return;
877
  if (!CPU::SafeReadMemoryWord(aligned_addr, &memVal)) [[unlikely]]
878
    return;
879
  pmemVal->Validate(memVal);
880

881
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
882

883
  const u32 byte_shift = addr & 3u;
884

885
  if (instr.op == InstructionOp::swl)
886
  {
887
    const u32 bit_shift = (byte_shift * 8);
888
    const u32 mixed_value = (memVal & (UINT32_C(0xFFFFFF00) << bit_shift)) | (rtVal >> (24 - bit_shift));
889

890
    switch (byte_shift)
891
    {
892
      case 0:
893
      {
894
        // only writing the lower half of X, can't do much about that..
895
        pmemVal->x = static_cast<float>(static_cast<s16>(mixed_value));
896
        pmemVal->value = mixed_value;
897
        pmemVal->flags =
898
          (pmemVal->flags & ~(VALID_X | VALID_Z | VALID_LOWZ)) | ((pmemVal->flags & VALID_HIGHZ) ? VALID_Z : 0);
899
      }
900
      break;
901

902
      case 1:
903
      {
904
        pmemVal->x = prtVal.y;
905
        pmemVal->z = (prtVal.flags & VALID_Z) ? prtVal.z : pmemVal->z;
906
        pmemVal->value = mixed_value;
907
        pmemVal->flags = (pmemVal->flags & ~(VALID_X | VALID_Z | VALID_LOWZ)) | ((prtVal.flags & VALID_Y) >> 1) |
908
                         ((prtVal.flags & VALID_Z) ? (VALID_Z | VALID_LOWZ) : 0) |
909
                         ((pmemVal->flags & VALID_HIGHZ) ? VALID_Z : 0);
910
      }
911
      break;
912

913
      case 2:
914
      {
915
        // making a dog's breakfast of both X and Y
916
        pmemVal->x = static_cast<float>(static_cast<s16>(mixed_value));
917
        pmemVal->y = static_cast<float>(static_cast<s16>(mixed_value >> 16));
918
        pmemVal->value = mixed_value;
919
        pmemVal->flags &= ~(VALID_X | VALID_Y | VALID_Z | VALID_LOWZ | VALID_HIGHZ);
920
      }
921
      break;
922

923
      case 3:
924
      {
925
        // effectively the same as a normal store.
926
        *pmemVal = prtVal;
927
        pmemVal->value = mixed_value;
928
        pmemVal->flags =
929
          (prtVal.flags & ~(VALID_LOWZ | VALID_HIGHZ)) | ((prtVal.flags & VALID_Z) ? (VALID_LOWZ | VALID_HIGHZ) : 0);
930
      }
931
      break;
932

933
        DefaultCaseIsUnreachable();
934
    }
935
  }
936
  else
937
  {
938
    const u32 bit_shift = (byte_shift * 8);
939
    const u32 mixed_value = (memVal & (UINT32_C(0x00FFFFFF) >> (24 - bit_shift))) | (rtVal << bit_shift);
940

941
    switch (byte_shift)
942
    {
943
      case 0:
944
      {
945
        // effectively the same as a normal store.
946
        *pmemVal = prtVal;
947
        pmemVal->value = mixed_value;
948
        pmemVal->flags =
949
          (prtVal.flags & ~(VALID_LOWZ | VALID_HIGHZ)) | ((prtVal.flags & VALID_Z) ? (VALID_LOWZ | VALID_HIGHZ) : 0);
950
      }
951
      break;
952

953
      case 1:
954
      {
955
        // making a dog's breakfast of both X and Y
956
        pmemVal->x = static_cast<float>(static_cast<s16>(mixed_value));
957
        pmemVal->y = static_cast<float>(static_cast<s16>(mixed_value >> 16));
958
        pmemVal->value = mixed_value;
959
        pmemVal->flags &= ~(VALID_X | VALID_Y | VALID_LOWZ | VALID_HIGHZ);
960
      }
961
      break;
962

963
      case 2:
964
      {
965
        pmemVal->y = prtVal.x;
966
        pmemVal->z = (prtVal.flags & VALID_Z) ? prtVal.z : pmemVal->z;
967
        pmemVal->value = mixed_value;
968
        pmemVal->flags = (pmemVal->flags & ~(VALID_X | VALID_Z | VALID_HIGHZ)) | ((prtVal.flags & VALID_X) << 1) |
969
                         ((prtVal.flags & VALID_Z) ? (VALID_Z | VALID_HIGHZ) : 0) |
970
                         ((pmemVal->flags & VALID_LOWZ) ? VALID_Z : 0);
971
      }
972
      break;
973

974
      case 3:
975
      {
976
        // only writing the upper half of Y, can't do much about that..
977
        pmemVal->y = static_cast<float>(static_cast<s16>(mixed_value));
978
        pmemVal->value = mixed_value;
979
        pmemVal->flags =
980
          (pmemVal->flags & ~(VALID_X | VALID_Z | VALID_HIGHZ)) | ((pmemVal->flags & VALID_LOWZ) ? VALID_Z : 0);
981
      }
982
      break;
983

984
        DefaultCaseIsUnreachable();
985
    }
986
  }
987
}
988

989
void CPU::PGXP::CPU_MOVE_Packed(u32 rd_and_rs, u32 rsVal)
990
{
991
  const u32 Rs = (rd_and_rs & 0xFFu);
992
  const u32 Rd = (rd_and_rs >> 8);
993
  CPU_MOVE(Rd, Rs, rsVal);
994
}
995

996
void CPU::PGXP::CPU_MOVE(u32 Rd, u32 Rs, u32 rsVal)
997
{
998
#ifdef LOG_VALUES
999
  const Instruction instr = {0};
1000
  LOG_VALUES_C1(Rs, rsVal);
1001
#endif
1002
  PGXPValue& prsVal = g_state.pgxp_gpr[Rs];
1003
  prsVal.Validate(rsVal);
1004
  g_state.pgxp_gpr[Rd] = prsVal;
1005
}
1006

1007
void CPU::PGXP::CPU_ADDI(Instruction instr, u32 rsVal)
1008
{
1009
  LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
1010

1011
  // Rt = Rs + Imm (signed)
1012
  PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1013

1014
  const u32 immVal = instr.i.imm_sext32();
1015

1016
  PGXPValue& prtVal = GetRtValue(instr);
1017
  prtVal = prsVal;
1018

1019
  if (immVal == 0)
1020
    return;
1021

1022
  if (rsVal == 0)
1023
  {
1024
    // x is low precision value
1025
    prtVal.x = static_cast<float>(LOWORD_S16(immVal));
1026
    prtVal.y = static_cast<float>(HIWORD_S16(immVal));
1027
    prtVal.flags |= VALID_X | VALID_Y | VALID_TAINTED_Z;
1028
    prtVal.value = immVal;
1029
    return;
1030
  }
1031

1032
  prtVal.x = static_cast<float>(f16Unsign(prtVal.x));
1033
  prtVal.x += static_cast<float>(LOWORD_U16(immVal));
1034

1035
  // carry on over/underflow
1036
  const float of = (prtVal.x > USHRT_MAX) ? 1.0f : (prtVal.x < 0.0f) ? -1.0f : 0.0f;
1037
  prtVal.x = static_cast<float>(f16Sign(prtVal.x));
1038
  prtVal.y += HIWORD_S16(immVal) + of;
1039

1040
  // truncate on overflow/underflow
1041
  prtVal.y += (prtVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prtVal.y < SHRT_MIN) ? (USHRT_MAX + 1) : 0.0f;
1042

1043
  prtVal.value = rsVal + immVal;
1044

1045
  prtVal.flags |= VALID_TAINTED_Z;
1046
}
1047

1048
void CPU::PGXP::CPU_ANDI(Instruction instr, u32 rsVal)
1049
{
1050
  LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
1051

1052
  // Rt = Rs & Imm
1053
  const u32 imm = instr.i.imm_zext32();
1054
  const u32 rtVal = rsVal & imm;
1055
  PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1056
  PGXPValue& prtVal = GetRtValue(instr);
1057

1058
  // remove upper 16-bits
1059
  prtVal.y = 0.0f;
1060
  prtVal.z = prsVal.z;
1061
  prtVal.value = rtVal;
1062
  prtVal.flags = prsVal.flags | VALID_Y | VALID_TAINTED_Z;
1063

1064
  switch (imm)
1065
  {
1066
    case 0:
1067
    {
1068
      // if 0 then x == 0
1069
      prtVal.x = 0.0f;
1070
      prtVal.flags |= VALID_X;
1071
    }
1072
    break;
1073

1074
    case 0xFFFFu:
1075
    {
1076
      // if saturated then x == x
1077
      prtVal.x = prsVal.x;
1078
    }
1079
    break;
1080

1081
    default:
1082
    {
1083
      // otherwise x is low precision value
1084
      prtVal.x = static_cast<float>(LOWORD_S16(rtVal));
1085
      prtVal.flags |= VALID_X;
1086
    }
1087
    break;
1088
  }
1089
}
1090

1091
void CPU::PGXP::CPU_ORI(Instruction instr, u32 rsVal)
1092
{
1093
  LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
1094

1095
  // Rt = Rs | Imm
1096
  const u32 imm = instr.i.imm_zext32();
1097
  const u32 rtVal = rsVal | imm;
1098

1099
  PGXPValue& pRsVal = ValidateAndGetRsValue(instr, rsVal);
1100
  PGXPValue& pRtVal = GetRtValue(instr);
1101
  pRtVal = pRsVal;
1102
  pRtVal.value = rtVal;
1103

1104
  if (imm == 0) [[unlikely]]
1105
  {
1106
    // if 0 then x == x
1107
  }
1108
  else
1109
  {
1110
    // otherwise x is low precision value
1111
    pRtVal.x = static_cast<float>(LOWORD_S16(rtVal));
1112
    pRtVal.flags |= VALID_X | VALID_TAINTED_Z;
1113
  }
1114
}
1115

1116
void CPU::PGXP::CPU_XORI(Instruction instr, u32 rsVal)
1117
{
1118
  LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
1119

1120
  // Rt = Rs ^ Imm
1121
  const u32 imm = instr.i.imm_zext32();
1122
  const u32 rtVal = rsVal ^ imm;
1123

1124
  PGXPValue& pRsVal = ValidateAndGetRsValue(instr, rsVal);
1125
  PGXPValue& pRtVal = GetRtValue(instr);
1126
  pRtVal = pRsVal;
1127
  pRtVal.value = rtVal;
1128

1129
  if (imm == 0) [[unlikely]]
1130
  {
1131
    // if 0 then x == x
1132
  }
1133
  else
1134
  {
1135
    // otherwise x is low precision value
1136
    pRtVal.x = static_cast<float>(LOWORD_S16(rtVal));
1137
    pRtVal.flags |= VALID_X | VALID_TAINTED_Z;
1138
  }
1139
}
1140

1141
void CPU::PGXP::CPU_SLTI(Instruction instr, u32 rsVal)
1142
{
1143
  LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
1144

1145
  // Rt = Rs < Imm (signed)
1146
  const s32 imm = instr.i.imm_s16();
1147
  PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1148

1149
  const float fimmx = static_cast<float>(imm);
1150
  const float fimmy = fimmx < 0.0f ? -1.0f : 0.0f;
1151

1152
  PGXPValue& prtVal = GetRtValue(instr);
1153
  prtVal.x = (prsVal.GetValidY(rsVal) < fimmy || prsVal.GetValidX(rsVal) < fimmx) ? 1.0f : 0.0f;
1154
  prtVal.y = 0.0f;
1155
  prtVal.z = prsVal.z;
1156
  prtVal.flags = prsVal.flags | VALID_X | VALID_Y | VALID_TAINTED_Z;
1157
  prtVal.value = BoolToUInt32(static_cast<s32>(rsVal) < imm);
1158
}
1159

1160
void CPU::PGXP::CPU_SLTIU(Instruction instr, u32 rsVal)
1161
{
1162
  LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
1163

1164
  // Rt = Rs < Imm (Unsigned)
1165
  const u32 imm = instr.i.imm_u16();
1166
  PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1167

1168
  const float fimmx = static_cast<float>(static_cast<s16>(imm)); // deliberately signed
1169
  const float fimmy = fimmx < 0.0f ? -1.0f : 0.0f;
1170

1171
  PGXPValue& prtVal = GetRtValue(instr);
1172
  prtVal.x =
1173
    (f16Unsign(prsVal.GetValidY(rsVal)) < f16Unsign(fimmy) || f16Unsign(prsVal.GetValidX(rsVal)) < fimmx) ? 1.0f : 0.0f;
1174
  prtVal.y = 0.0f;
1175
  prtVal.z = prsVal.z;
1176
  prtVal.flags = prsVal.flags | VALID_X | VALID_Y | VALID_TAINTED_Z;
1177
  prtVal.value = BoolToUInt32(rsVal < imm);
1178
}
1179

1180
void CPU::PGXP::CPU_LUI(Instruction instr)
1181
{
1182
  LOG_VALUES_NV();
1183

1184
  // Rt = Imm << 16
1185
  PGXPValue& pRtVal = GetRtValue(instr);
1186
  pRtVal.x = 0.0f;
1187
  pRtVal.y = static_cast<float>(instr.i.imm_s16());
1188
  pRtVal.z = 0.0f;
1189
  pRtVal.value = instr.i.imm_zext32() << 16;
1190
  pRtVal.flags = VALID_XY;
1191
}
1192

1193
void CPU::PGXP::CPU_ADD(Instruction instr, u32 rsVal, u32 rtVal)
1194
{
1195
  LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1196

1197
  // Rd = Rs + Rt (signed)
1198
  PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1199
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1200
  PGXPValue& prdVal = GetRdValue(instr);
1201

1202
  if (rtVal == 0)
1203
  {
1204
    prdVal = prsVal;
1205
    CopyZIfMissing(prdVal, prtVal);
1206
  }
1207
  else if (rsVal == 0)
1208
  {
1209
    prdVal = prtVal;
1210
    CopyZIfMissing(prdVal, prsVal);
1211
  }
1212
  else
1213
  {
1214
    const double x = f16Unsign(prsVal.GetValidX(rsVal)) + f16Unsign(prtVal.GetValidX(rtVal));
1215

1216
    // carry on over/underflow
1217
    const float of = (x > USHRT_MAX) ? 1.0f : (x < 0.0f) ? -1.0f : 0.0f;
1218
    prdVal.x = static_cast<float>(f16Sign(x));
1219
    prdVal.y = prsVal.GetValidY(rsVal) + prtVal.GetValidY(rtVal) + of;
1220

1221
    // truncate on overflow/underflow
1222
    prdVal.y += (prdVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prdVal.y < SHRT_MIN) ? (USHRT_MAX + 1) : 0.0f;
1223

1224
    prdVal.value = rsVal + rtVal;
1225

1226
    // valid x/y only if one side had a valid x/y
1227
    prdVal.flags = prsVal.flags | (prtVal.flags & VALID_XY) | VALID_TAINTED_Z;
1228

1229
    SelectZ(prdVal.z, prdVal.flags, prsVal, prtVal);
1230
  }
1231
}
1232

1233
void CPU::PGXP::CPU_SUB(Instruction instr, u32 rsVal, u32 rtVal)
1234
{
1235
  LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1236

1237
  // Rd = Rs - Rt (signed)
1238
  PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1239
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1240
  PGXPValue& prdVal = GetRdValue(instr);
1241

1242
  if (rtVal == 0)
1243
  {
1244
    prdVal = prsVal;
1245
    CopyZIfMissing(prdVal, prtVal);
1246
  }
1247
  else
1248
  {
1249
    const double x = f16Unsign(prsVal.GetValidX(rsVal)) - f16Unsign(prtVal.GetValidX(rtVal));
1250

1251
    // carry on over/underflow
1252
    const float of = (x > USHRT_MAX) ? 1.0f : (x < 0.0f) ? -1.0f : 0.0f;
1253
    prdVal.x = static_cast<float>(f16Sign(x));
1254
    prdVal.y = prsVal.GetValidY(rsVal) - (prtVal.GetValidY(rtVal) - of);
1255

1256
    // truncate on overflow/underflow
1257
    prdVal.y += (prdVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prdVal.y < SHRT_MIN) ? (USHRT_MAX + 1) : 0.0f;
1258

1259
    prdVal.value = rsVal - rtVal;
1260

1261
    // valid x/y only if one side had a valid x/y
1262
    prdVal.flags = prsVal.flags | (prtVal.flags & VALID_XY) | VALID_TAINTED_Z;
1263

1264
    SelectZ(prdVal.z, prdVal.flags, prsVal, prtVal);
1265
  }
1266
}
1267

1268
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_BITWISE(Instruction instr, u32 rdVal, u32 rsVal, u32 rtVal)
1269
{
1270
  // Rd = Rs & Rt
1271
  PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1272
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1273

1274
  float x, y;
1275
  if (LOWORD_U16(rdVal) == 0)
1276
    x = 0.0f;
1277
  else if (LOWORD_U16(rdVal) == LOWORD_U16(rsVal))
1278
    x = prsVal.GetValidX(rsVal);
1279
  else if (LOWORD_U16(rdVal) == LOWORD_U16(rtVal))
1280
    x = prtVal.GetValidX(rtVal);
1281
  else
1282
    x = static_cast<float>(LOWORD_S16(rdVal));
1283

1284
  if (HIWORD_U16(rdVal) == 0)
1285
    y = 0.0f;
1286
  else if (HIWORD_U16(rdVal) == HIWORD_U16(rsVal))
1287
    y = prsVal.GetValidY(rsVal);
1288
  else if (HIWORD_U16(rdVal) == HIWORD_U16(rtVal))
1289
    y = prtVal.GetValidY(rtVal);
1290
  else
1291
    y = static_cast<float>(HIWORD_S16(rdVal));
1292

1293
  // Why not write directly to prdVal? Because it might be the same as the source.
1294
  u32 flags = ((prsVal.flags | prtVal.flags) & VALID_XY) ? (VALID_XY | VALID_TAINTED_Z) : 0;
1295
  PGXPValue& prdVal = GetRdValue(instr);
1296
  SelectZ(prdVal.z, flags, prsVal, prtVal);
1297
  prdVal.x = x;
1298
  prdVal.y = y;
1299
  prdVal.flags = flags;
1300
  prdVal.value = rdVal;
1301
}
1302

1303
void CPU::PGXP::CPU_AND_(Instruction instr, u32 rsVal, u32 rtVal)
1304
{
1305
  LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1306

1307
  // Rd = Rs & Rt
1308
  const u32 rdVal = rsVal & rtVal;
1309
  CPU_BITWISE(instr, rdVal, rsVal, rtVal);
1310
}
1311

1312
void CPU::PGXP::CPU_OR_(Instruction instr, u32 rsVal, u32 rtVal)
1313
{
1314
  LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1315

1316
  // Rd = Rs | Rt
1317
  const u32 rdVal = rsVal | rtVal;
1318
  CPU_BITWISE(instr, rdVal, rsVal, rtVal);
1319
}
1320

1321
void CPU::PGXP::CPU_XOR_(Instruction instr, u32 rsVal, u32 rtVal)
1322
{
1323
  LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1324

1325
  // Rd = Rs ^ Rt
1326
  const u32 rdVal = rsVal ^ rtVal;
1327
  CPU_BITWISE(instr, rdVal, rsVal, rtVal);
1328
}
1329

1330
void CPU::PGXP::CPU_NOR(Instruction instr, u32 rsVal, u32 rtVal)
1331
{
1332
  LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1333

1334
  // Rd = Rs NOR Rt
1335
  const u32 rdVal = ~(rsVal | rtVal);
1336
  CPU_BITWISE(instr, rdVal, rsVal, rtVal);
1337
}
1338

1339
void CPU::PGXP::CPU_SLT(Instruction instr, u32 rsVal, u32 rtVal)
1340
{
1341
  LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1342

1343
  // Rd = Rs < Rt (signed)
1344
  PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1345
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1346
  PGXPValue& prdVal = GetRdValue(instr);
1347
  prdVal.x = (prsVal.GetValidY(rsVal) < prtVal.GetValidY(rtVal) ||
1348
              f16Unsign(prsVal.GetValidX(rsVal)) < f16Unsign(prtVal.GetValidX(rtVal))) ?
1349
               1.0f :
1350
               0.0f;
1351
  prdVal.y = 0.0f;
1352
  prdVal.z = prsVal.z;
1353
  prdVal.flags = prsVal.flags | VALID_TAINTED_Z | VALID_X | VALID_Y;
1354
  prdVal.value = BoolToUInt32(static_cast<s32>(rsVal) < static_cast<s32>(rtVal));
1355
}
1356

1357
void CPU::PGXP::CPU_SLTU(Instruction instr, u32 rsVal, u32 rtVal)
1358
{
1359
  LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1360

1361
  // Rd = Rs < Rt (unsigned)
1362
  PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1363
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1364
  PGXPValue& prdVal = GetRdValue(instr);
1365
  prdVal.x = (f16Unsign(prsVal.GetValidY(rsVal)) < f16Unsign(prtVal.GetValidY(rtVal)) ||
1366
              f16Unsign(prsVal.GetValidX(rsVal)) < f16Unsign(prtVal.GetValidX(rtVal))) ?
1367
               1.0f :
1368
               0.0f;
1369
  prdVal.y = 0.0f;
1370
  prdVal.z = prsVal.z;
1371
  prdVal.flags = prsVal.flags | VALID_TAINTED_Z | VALID_X | VALID_Y;
1372
  prdVal.value = BoolToUInt32(rsVal < rtVal);
1373
}
1374

1375
void CPU::PGXP::CPU_MULT(Instruction instr, u32 rsVal, u32 rtVal)
1376
{
1377
  LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1378

1379
  // Hi/Lo = Rs * Rt (signed)
1380
  PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1381
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1382

1383
  PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
1384
  PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)];
1385
  ploVal = prsVal;
1386
  CopyZIfMissing(ploVal, prsVal);
1387

1388
  // Z/valid is the same
1389
  phiVal = ploVal;
1390

1391
  const float rsx = prsVal.GetValidX(rsVal);
1392
  const float rsy = prsVal.GetValidY(rsVal);
1393
  const float rtx = prtVal.GetValidX(rtVal);
1394
  const float rty = prtVal.GetValidY(rtVal);
1395

1396
  // Multiply out components
1397
  const double xx = f16Unsign(rsx) * f16Unsign(rtx);
1398
  const double xy = f16Unsign(rsx) * (rty);
1399
  const double yx = rsy * f16Unsign(rtx);
1400
  const double yy = rsy * rty;
1401

1402
  // Split values into outputs
1403
  const double lx = xx;
1404
  const double ly = f16Overflow(xx) + (xy + yx);
1405
  const double hx = f16Overflow(ly) + yy;
1406
  const double hy = f16Overflow(hx);
1407

1408
  ploVal.x = static_cast<float>(f16Sign(lx));
1409
  ploVal.y = static_cast<float>(f16Sign(ly));
1410
  ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1411
  phiVal.x = static_cast<float>(f16Sign(hx));
1412
  phiVal.y = static_cast<float>(f16Sign(hy));
1413
  phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1414

1415
  // compute PSX value
1416
  const u64 result = static_cast<u64>(static_cast<s64>(SignExtend64(rsVal)) * static_cast<s64>(SignExtend64(rtVal)));
1417
  phiVal.value = Truncate32(result >> 32);
1418
  ploVal.value = Truncate32(result);
1419
}
1420

1421
void CPU::PGXP::CPU_MULTU(Instruction instr, u32 rsVal, u32 rtVal)
1422
{
1423
  LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1424

1425
  // Hi/Lo = Rs * Rt (unsigned)
1426
  PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1427
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1428

1429
  PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
1430
  PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)];
1431
  ploVal = prsVal;
1432
  CopyZIfMissing(ploVal, prsVal);
1433

1434
  // Z/valid is the same
1435
  phiVal = ploVal;
1436

1437
  const float rsx = prsVal.GetValidX(rsVal);
1438
  const float rsy = prsVal.GetValidY(rsVal);
1439
  const float rtx = prtVal.GetValidX(rtVal);
1440
  const float rty = prtVal.GetValidY(rtVal);
1441

1442
  // Multiply out components
1443
  const double xx = f16Unsign(rsx) * f16Unsign(rtx);
1444
  const double xy = f16Unsign(rsx) * f16Unsign(rty);
1445
  const double yx = f16Unsign(rsy) * f16Unsign(rtx);
1446
  const double yy = f16Unsign(rsy) * f16Unsign(rty);
1447

1448
  // Split values into outputs
1449
  const double lx = xx;
1450
  const double ly = f16Overflow(xx) + (xy + yx);
1451
  const double hx = f16Overflow(ly) + yy;
1452
  const double hy = f16Overflow(hx);
1453

1454
  ploVal.x = static_cast<float>(f16Sign(lx));
1455
  ploVal.y = static_cast<float>(f16Sign(ly));
1456
  ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1457
  phiVal.x = static_cast<float>(f16Sign(hx));
1458
  phiVal.y = static_cast<float>(f16Sign(hy));
1459
  phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1460

1461
  // compute PSX value
1462
  const u64 result = ZeroExtend64(rsVal) * ZeroExtend64(rtVal);
1463
  phiVal.value = Truncate32(result >> 32);
1464
  ploVal.value = Truncate32(result);
1465
}
1466

1467
void CPU::PGXP::CPU_DIV(Instruction instr, u32 rsVal, u32 rtVal)
1468
{
1469
  LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1470

1471
  // Lo = Rs / Rt (signed)
1472
  // Hi = Rs % Rt (signed)
1473
  PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1474
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1475

1476
  PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
1477
  PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)];
1478
  ploVal = prsVal;
1479
  CopyZIfMissing(ploVal, prsVal);
1480

1481
  // Z/valid is the same
1482
  phiVal = ploVal;
1483

1484
  const double vs = f16Unsign(prsVal.GetValidX(rsVal)) + prsVal.GetValidY(rsVal) * static_cast<double>(1 << 16);
1485
  const double vt = f16Unsign(prtVal.GetValidX(rtVal)) + prtVal.GetValidY(rtVal) * static_cast<double>(1 << 16);
1486

1487
  const double lo = vs / vt;
1488
  ploVal.y = static_cast<float>(f16Sign(f16Overflow(lo)));
1489
  ploVal.x = static_cast<float>(f16Sign(lo));
1490
  ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1491

1492
  const double hi = std::fmod(vs, vt);
1493
  phiVal.y = static_cast<float>(f16Sign(f16Overflow(hi)));
1494
  phiVal.x = static_cast<float>(f16Sign(hi));
1495
  phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1496

1497
  // compute PSX value
1498
  if (static_cast<s32>(rtVal) == 0)
1499
  {
1500
    // divide by zero
1501
    ploVal.value = (static_cast<s32>(rsVal) >= 0) ? UINT32_C(0xFFFFFFFF) : UINT32_C(1);
1502
    phiVal.value = static_cast<u32>(static_cast<s32>(rsVal));
1503
  }
1504
  else if (rsVal == UINT32_C(0x80000000) && static_cast<s32>(rtVal) == -1)
1505
  {
1506
    // unrepresentable
1507
    ploVal.value = UINT32_C(0x80000000);
1508
    phiVal.value = 0;
1509
  }
1510
  else
1511
  {
1512
    ploVal.value = static_cast<u32>(static_cast<s32>(rsVal) / static_cast<s32>(rtVal));
1513
    phiVal.value = static_cast<u32>(static_cast<s32>(rsVal) % static_cast<s32>(rtVal));
1514
  }
1515
}
1516

1517
void CPU::PGXP::CPU_DIVU(Instruction instr, u32 rsVal, u32 rtVal)
1518
{
1519
  LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1520

1521
  // Lo = Rs / Rt (unsigned)
1522
  // Hi = Rs % Rt (unsigned)
1523
  PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1524
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1525

1526
  PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
1527
  PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)];
1528
  ploVal = prsVal;
1529
  CopyZIfMissing(ploVal, prsVal);
1530

1531
  // Z/valid is the same
1532
  phiVal = ploVal;
1533

1534
  const double vs =
1535
    f16Unsign(prsVal.GetValidX(rsVal)) + f16Unsign(prsVal.GetValidY(rsVal)) * static_cast<double>(1 << 16);
1536
  const double vt =
1537
    f16Unsign(prtVal.GetValidX(rtVal)) + f16Unsign(prtVal.GetValidY(rtVal)) * static_cast<double>(1 << 16);
1538

1539
  const double lo = vs / vt;
1540
  ploVal.y = static_cast<float>(f16Sign(f16Overflow(lo)));
1541
  ploVal.x = static_cast<float>(f16Sign(lo));
1542
  ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1543

1544
  const double hi = std::fmod(vs, vt);
1545
  phiVal.y = static_cast<float>(f16Sign(f16Overflow(hi)));
1546
  phiVal.x = static_cast<float>(f16Sign(hi));
1547
  phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1548

1549
  if (rtVal == 0)
1550
  {
1551
    // divide by zero
1552
    ploVal.value = UINT32_C(0xFFFFFFFF);
1553
    phiVal.value = rsVal;
1554
  }
1555
  else
1556
  {
1557
    ploVal.value = rsVal / rtVal;
1558
    phiVal.value = rsVal % rtVal;
1559
  }
1560
}
1561

1562
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_SLL(Instruction instr, u32 rtVal, u32 sh)
1563
{
1564
  const u32 rdVal = rtVal << sh;
1565
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1566
  PGXPValue& prdVal = GetRdValue(instr);
1567
  prdVal.z = prtVal.z;
1568
  prdVal.value = rdVal;
1569

1570
  if (sh >= 32) [[unlikely]]
1571
  {
1572
    prdVal.x = 0.0f;
1573
    prdVal.y = 0.0f;
1574
    prdVal.flags = prtVal.flags | VALID_XY | VALID_TAINTED_Z;
1575
  }
1576
  else if (sh == 16)
1577
  {
1578
    prdVal.y = prtVal.x;
1579
    prdVal.x = 0.0f;
1580

1581
    // Only set valid X if there's also a valid Y. We could use GetValidX() to pull it from the low precision value
1582
    // instead, need to investigate further. Spyro breaks if only X is set even if Y is not valid.
1583
    // prdVal.flags = (prtVal.flags & ~VALID_Y) | ((prtVal.flags & VALID_X) << 1) | VALID_X | VALID_TAINTED_Z;
1584
    prdVal.flags = (prtVal.flags | VALID_TAINTED_Z) | ((prtVal.flags & VALID_Y) >> 1);
1585
  }
1586
  else if (sh >= 16)
1587
  {
1588
    prdVal.y = static_cast<float>(f16Sign(f16Unsign(prtVal.x * static_cast<double>(1 << (sh - 16)))));
1589
    prdVal.x = 0.0f;
1590

1591
    // See above.
1592
    // prdVal.flags = (prtVal.flags & ~VALID_Y) | ((prtVal.flags & VALID_X) << 1) | VALID_X | VALID_TAINTED_Z;
1593
    prdVal.flags = (prtVal.flags | VALID_TAINTED_Z) | ((prtVal.flags & VALID_Y) >> 1);
1594
  }
1595
  else
1596
  {
1597
    const double x = f16Unsign(prtVal.x) * static_cast<double>(1 << sh);
1598
    const double y = (f16Unsign(prtVal.y) * static_cast<double>(1 << sh)) + f16Overflow(x);
1599
    prdVal.x = static_cast<float>(f16Sign(x));
1600
    prdVal.y = static_cast<float>(f16Sign(y));
1601
    prdVal.flags = (prtVal.flags | VALID_TAINTED_Z);
1602
  }
1603
}
1604

1605
void CPU::PGXP::CPU_SLL(Instruction instr, u32 rtVal)
1606
{
1607
  LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
1608

1609
  // Rd = Rt << Sa
1610
  const u32 sh = instr.r.shamt;
1611
  CPU_SLL(instr, rtVal, sh);
1612
}
1613

1614
void CPU::PGXP::CPU_SLLV(Instruction instr, u32 rtVal, u32 rsVal)
1615
{
1616
  LOG_VALUES_C2(instr.r.rt.GetValue(), rtVal, instr.r.rs.GetValue(), rsVal);
1617

1618
  // Rd = Rt << Rs
1619
  const u32 sh = rsVal & 0x1F;
1620
  CPU_SLL(instr, rtVal, sh);
1621
}
1622

1623
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_SRx(Instruction instr, u32 rtVal, u32 sh, bool sign, bool is_variable)
1624
{
1625
  const u32 rdVal = sign ? static_cast<u32>(static_cast<s32>(rtVal) >> sh) : (rtVal >> sh);
1626
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1627

1628
  double x = prtVal.x;
1629
  double y = sign ? prtVal.y : f16Unsign(prtVal.y);
1630

1631
  const u32 iX = SignExtend32(LOWORD_S16(rtVal));   // remove Y
1632
  const u32 iY = SET_LOWORD(rtVal, HIWORD_U16(iX)); // overwrite x with sign(x)
1633

1634
  // Shift test values
1635
  const u32 dX = static_cast<u32>(static_cast<s32>(iX) >> sh);
1636
  const u32 dY = sign ? static_cast<u32>(static_cast<s32>(iY) >> sh) : (iY >> sh);
1637

1638
  if (LOWORD_S16(dX) != HIWORD_S16(iX))
1639
    x = x / static_cast<double>(1 << sh);
1640
  else
1641
    x = LOWORD_S16(dX); // only sign bits left
1642

1643
  if (LOWORD_S16(dY) != HIWORD_S16(iX))
1644
  {
1645
    if (sh == 16)
1646
    {
1647
      x = y;
1648
    }
1649
    else if (sh < 16)
1650
    {
1651
      x += y * static_cast<double>(1 << (16 - sh));
1652
      if (prtVal.x < 0)
1653
        x += static_cast<double>(1 << (16 - sh));
1654
    }
1655
    else
1656
    {
1657
      x += y / static_cast<double>(1 << (sh - 16));
1658
    }
1659
  }
1660

1661
  if ((HIWORD_S16(dY) == 0) || (HIWORD_S16(dY) == -1))
1662
    y = HIWORD_S16(dY);
1663
  else
1664
    y = y / static_cast<double>(1 << sh);
1665

1666
  PGXPValue& prdVal = GetRdValue(instr);
1667

1668
  // Use low precision/rounded values when we're not shifting an entire component,
1669
  // and it's not originally from a 3D value. Too many false positives in P2/etc.
1670
  // What we probably should do is not set the valid flag on non-3D values to begin
1671
  // with, only letting them become valid when used in another expression.
1672
  if (sign && !is_variable && !(prtVal.flags & VALID_Z) && sh < 16)
1673
  {
1674
    prdVal.x = static_cast<float>(LOWORD_S16(rdVal));
1675
    prdVal.y = static_cast<float>(HIWORD_S16(rdVal));
1676
    prdVal.z = 0.0f;
1677
    prdVal.value = rdVal;
1678
    prdVal.flags = VALID_XY | VALID_TAINTED_Z;
1679
  }
1680
  else
1681
  {
1682
    prdVal.x = static_cast<float>(f16Sign(x));
1683
    prdVal.y = static_cast<float>(f16Sign(y));
1684
    prdVal.z = prtVal.z;
1685
    prdVal.value = rdVal;
1686
    prdVal.flags = prtVal.flags | VALID_TAINTED_Z;
1687
  }
1688
}
1689

1690
void CPU::PGXP::CPU_SRL(Instruction instr, u32 rtVal)
1691
{
1692
  LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
1693

1694
  // Rd = Rt >> Sa
1695
  const u32 sh = instr.r.shamt;
1696
  CPU_SRx(instr, rtVal, sh, false, false);
1697
}
1698

1699
void CPU::PGXP::CPU_SRLV(Instruction instr, u32 rtVal, u32 rsVal)
1700
{
1701
  LOG_VALUES_C2(instr.r.rt.GetValue(), rtVal, instr.r.rs.GetValue(), rsVal);
1702

1703
  // Rd = Rt >> Sa
1704
  const u32 sh = rsVal & 0x1F;
1705
  CPU_SRx(instr, rtVal, sh, false, true);
1706
}
1707

1708
void CPU::PGXP::CPU_SRA(Instruction instr, u32 rtVal)
1709
{
1710
  LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
1711

1712
  // Rd = Rt >> Sa
1713
  const u32 sh = instr.r.shamt;
1714
  CPU_SRx(instr, rtVal, sh, true, false);
1715
}
1716

1717
void CPU::PGXP::CPU_SRAV(Instruction instr, u32 rtVal, u32 rsVal)
1718
{
1719
  LOG_VALUES_C2(instr.r.rt.GetValue(), rtVal, instr.r.rs.GetValue(), rsVal);
1720

1721
  // Rd = Rt >> Sa
1722
  const u32 sh = rsVal & 0x1F;
1723
  CPU_SRx(instr, rtVal, sh, true, true);
1724
}
1725

1726
void CPU::PGXP::CPU_MFC0(Instruction instr, u32 rdVal)
1727
{
1728
  const u32 idx = static_cast<u8>(instr.r.rd.GetValue());
1729
  LOG_VALUES_1(TinyString::from_format("cop0_{}", idx).c_str(), rdVal, &g_state.pgxp_cop0[idx]);
1730

1731
  // CPU[Rt] = CP0[Rd]
1732
  PGXPValue& prdVal = g_state.pgxp_cop0[idx];
1733
  prdVal.Validate(rdVal);
1734

1735
  PGXPValue& prtVal = GetRtValue(instr);
1736
  prtVal = prdVal;
1737
  prtVal.value = rdVal;
1738
}
1739

1740
void CPU::PGXP::CPU_MTC0(Instruction instr, u32 rdVal, u32 rtVal)
1741
{
1742
  LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
1743

1744
  // CP0[Rd] = CPU[Rt]
1745
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1746
  PGXPValue& prdVal = g_state.pgxp_cop0[static_cast<u8>(instr.r.rd.GetValue())];
1747
  prdVal = prtVal;
1748
  prtVal.value = rdVal;
1749
}
1750

1751
Product

Resources

Company