A reimplementation of Mario Kart Wii's physics engine in C++
Loading...
Searching...
No Matches
Math.hh
1#pragma once
2
3#include <Common.hh>
4
5#include <cmath>
6
7static constexpr f32 F_PI = 3.1415927f;
8static constexpr f32 F_TAU = 6.2831855f;
9static constexpr f32 DEG2RAD = 0.017453292f;
10static constexpr f32 DEG2RAD360 = 0.034906585f;
11static constexpr f32 RAD2DEG = 57.2957795f;
12static constexpr f32 DEG2FIDX = 256.0f / 360.0f;
13static constexpr f32 RAD2FIDX = 128.0f / F_PI;
14static constexpr f32 FIDX2RAD = F_PI / 128.0f;
15
17namespace EGG::Mathf {
18
19[[nodiscard]] f32 frsqrt(f32 x);
20
22[[nodiscard]] static inline f32 sqrt(f32 x) {
23 return x > 0.0f ? frsqrt(x) * x : 0.0f;
24}
25
26[[nodiscard]] f32 SinFIdx(f32 fidx);
27[[nodiscard]] f32 CosFIdx(f32 fidx);
28[[nodiscard]] std::pair<f32, f32> SinCosFIdx(f32 fidx);
29[[nodiscard]] f32 AtanFIdx_(f32 fidx);
30[[nodiscard]] f32 Atan2FIdx(f32 x, f32 y);
31
34[[nodiscard]] static inline f32 sin(f32 x) {
35 return SinFIdx(x * RAD2FIDX);
36}
37
40[[nodiscard]] static inline f32 cos(f32 x) {
41 return CosFIdx(x * RAD2FIDX);
42}
43
45[[nodiscard]] static inline f32 acos(f32 x) {
46 return ::acosl(x);
47}
48
50[[nodiscard]] static inline f32 atan2(f32 y, f32 x) {
51 return Atan2FIdx(y, x) * FIDX2RAD;
52}
53
54[[nodiscard]] static inline f32 abs(f32 x) {
55 return std::abs(x);
56}
57
61[[nodiscard]] static inline f64 force25Bit(f64 x) {
62 u64 bits = std::bit_cast<u64>(x);
63 bits = (bits & 0xfffffffff8000000ULL) + (bits & 0x8000000);
64 return std::bit_cast<f64>(bits);
65}
66
69[[nodiscard]] static inline f32 fma(f32 x, f32 y, f32 z) {
70 return static_cast<f32>(
71 static_cast<f64>(x) * force25Bit(static_cast<f64>(y)) + static_cast<f64>(z));
72}
73
76[[nodiscard]] static inline f32 fms(f32 x, f32 y, f32 z) {
77 return static_cast<f32>(
78 static_cast<f64>(x) * force25Bit(static_cast<f64>(y)) - static_cast<f64>(z));
79}
80
81// frsqrte matching courtesy of Geotale, with reference to https://achurch.org/cpu-tests/ppc750cl.s
82
84 u32 base;
85 s32 dec;
86};
87
89 u64 base;
90 s64 dec;
91};
92
93union c32 {
94 constexpr c32(const f32 p) {
95 f = p;
96 }
97
98 constexpr c32(const u32 p) {
99 u = p;
100 }
101
102 u32 u;
103 f32 f;
104};
105
106union c64 {
107 constexpr c64(const f64 p) {
108 f = p;
109 }
110
111 constexpr c64(const u64 p) {
112 u = p;
113 }
114
115 u64 u;
116 f64 f;
117};
118
119static constexpr u64 EXPONENT_SHIFT_F64 = 52;
120static constexpr u64 MANTISSA_MASK_F64 = 0x000fffffffffffffULL;
121static constexpr u64 EXPONENT_MASK_F64 = 0x7ff0000000000000ULL;
122static constexpr u64 SIGN_MASK_F64 = 0x8000000000000000ULL;
123
124static constexpr std::array<BaseAndDec64, 32> RSQRTE_TABLE = {{
125 {0x69fa000000000ULL, -0x15a0000000LL},
126 {0x5f2e000000000ULL, -0x13cc000000LL},
127 {0x554a000000000ULL, -0x1234000000LL},
128 {0x4c30000000000ULL, -0x10d4000000LL},
129 {0x43c8000000000ULL, -0x0f9c000000LL},
130 {0x3bfc000000000ULL, -0x0e88000000LL},
131 {0x34b8000000000ULL, -0x0d94000000LL},
132 {0x2df0000000000ULL, -0x0cb8000000LL},
133 {0x2794000000000ULL, -0x0bf0000000LL},
134 {0x219c000000000ULL, -0x0b40000000LL},
135 {0x1bfc000000000ULL, -0x0aa0000000LL},
136 {0x16ae000000000ULL, -0x0a0c000000LL},
137 {0x11a8000000000ULL, -0x0984000000LL},
138 {0x0ce6000000000ULL, -0x090c000000LL},
139 {0x0862000000000ULL, -0x0898000000LL},
140 {0x0416000000000ULL, -0x082c000000LL},
141 {0xffe8000000000ULL, -0x1e90000000LL},
142 {0xf0a4000000000ULL, -0x1c00000000LL},
143 {0xe2a8000000000ULL, -0x19c0000000LL},
144 {0xd5c8000000000ULL, -0x17c8000000LL},
145 {0xc9e4000000000ULL, -0x1610000000LL},
146 {0xbedc000000000ULL, -0x1490000000LL},
147 {0xb498000000000ULL, -0x1330000000LL},
148 {0xab00000000000ULL, -0x11f8000000LL},
149 {0xa204000000000ULL, -0x10e8000000LL},
150 {0x9994000000000ULL, -0x0fe8000000LL},
151 {0x91a0000000000ULL, -0x0f08000000LL},
152 {0x8a1c000000000ULL, -0x0e38000000LL},
153 {0x8304000000000ULL, -0x0d78000000LL},
154 {0x7c48000000000ULL, -0x0cc8000000LL},
155 {0x75e4000000000ULL, -0x0c28000000LL},
156 {0x6fd0000000000ULL, -0x0b98000000LL},
157}};
158
159[[nodiscard]] static inline f64 frsqrte(const f64 val) {
160 c64 bits(val);
161
162 u64 mantissa = bits.u & MANTISSA_MASK_F64;
163 s64 exponent = bits.u & EXPONENT_MASK_F64;
164 bool sign = (bits.u & SIGN_MASK_F64) != 0;
165
166 // Handle 0 case
167 if (mantissa == 0 && exponent == 0) {
168 return std::copysign(std::numeric_limits<f64>::infinity(), bits.f);
169 }
170
171 // Handle NaN-like
172 if (exponent == EXPONENT_MASK_F64) {
173 if (mantissa == 0) {
174 return sign ? std::numeric_limits<f64>::quiet_NaN() : 0.0;
175 }
176
177 return val;
178 }
179
180 // Handle negative inputs
181 if (sign) {
182 return std::numeric_limits<f64>::quiet_NaN();
183 }
184
185 if (exponent == 0) {
186 // Shift so one bit goes to where the exponent would be,
187 // then clear that bit to mimick a not-subnormal number!
188 // Aka, if there are 12 leading zeroes, shift left once
189 u32 shift = std::countl_zero(mantissa) - static_cast<u32>(63 - EXPONENT_SHIFT_F64);
190
191 mantissa <<= shift;
192 mantissa &= MANTISSA_MASK_F64;
193 // The shift is subtracted by 1 because denormals by default
194 // are offset by 1 (exponent 0 doesn't have implied 1 bit)
195 exponent -= static_cast<s64>(shift - 1) << EXPONENT_SHIFT_F64;
196 }
197
198 // In reality this doesn't get the full exponent -- Only the least significant bit
199 // Only that's needed because square roots of higher exponent bits simply multiply the
200 // result by 2!!
201 u32 key = static_cast<u32>((static_cast<u64>(exponent) | mantissa) >> 37);
202 u64 new_exp =
203 (static_cast<u64>((0xbfcLL << EXPONENT_SHIFT_F64) - exponent) >> 1) & EXPONENT_MASK_F64;
204
205 // Remove the bits relating to anything higher than the LSB of the exponent
206 const auto &entry = RSQRTE_TABLE[0x1f & (key >> 11)];
207
208 // The result is given by an estimate then an adjustment based on the original
209 // key that was computed
210 u64 new_mantissa = static_cast<u64>(entry.base + entry.dec * static_cast<s64>(key & 0x7ff));
211
212 return c64(new_exp | new_mantissa).f;
213}
214
215static constexpr std::array<BaseAndDec32, 32> FRES_TABLE = {{
216 {0x00fff000UL, -0x3e1L},
217 {0x00f07000UL, -0x3a7L},
218 {0x00e1d400UL, -0x371L},
219 {0x00d41000UL, -0x340L},
220 {0x00c71000UL, -0x313L},
221 {0x00bac400UL, -0x2eaL},
222 {0x00af2000UL, -0x2c4L},
223 {0x00a41000UL, -0x2a0L},
224 {0x00999000UL, -0x27fL},
225 {0x008f9400UL, -0x261L},
226 {0x00861000UL, -0x245L},
227 {0x007d0000UL, -0x22aL},
228 {0x00745800UL, -0x212L},
229 {0x006c1000UL, -0x1fbL},
230 {0x00642800UL, -0x1e5L},
231 {0x005c9400UL, -0x1d1L},
232 {0x00555000UL, -0x1beL},
233 {0x004e5800UL, -0x1acL},
234 {0x0047ac00UL, -0x19bL},
235 {0x00413c00UL, -0x18bL},
236 {0x003b1000UL, -0x17cL},
237 {0x00352000UL, -0x16eL},
238 {0x002f5c00UL, -0x15bL},
239 {0x0029f000UL, -0x15bL},
240 {0x00248800UL, -0x143L},
241 {0x001f7c00UL, -0x143L},
242 {0x001a7000UL, -0x12dL},
243 {0x0015bc00UL, -0x12dL},
244 {0x00110800UL, -0x11aL},
245 {0x000ca000UL, -0x11aL},
246 {0x00083800UL, -0x108L},
247 {0x00041800UL, -0x106L},
248}};
249
250[[nodiscard]] static inline f32 fres(const f32 val) {
251 static constexpr u32 EXPONENT_SHIFT_F32 = 23;
252 static constexpr u64 EXPONENT_SHIFT_F64 = 52;
253 static constexpr u32 EXPONENT_MASK_F32 = 0x7f800000UL;
254 static constexpr u64 EXPONENT_MASK_F64 = 0x7ff0000000000000ULL;
255 static constexpr u32 MANTISSA_MASK_F32 = 0x007fffffUL;
256 static constexpr u32 SIGN_MASK_F32 = 0x80000000UL;
257 static constexpr u64 SIGN_MASK_F64 = 0x8000000000000000ULL;
258 static constexpr u64 QUIET_BIT_F64 = 0x0008000000000000ULL;
259 static constexpr c64 LARGEST_FLOAT(static_cast<u64>(0x47d0000000000000ULL));
260
261 c64 bits(val);
262
263 u32 mantissa = static_cast<u32>(
264 bits.u >> (EXPONENT_SHIFT_F64 - static_cast<u64>(EXPONENT_SHIFT_F32))) &
265 MANTISSA_MASK_F32;
266 s32 exponent = static_cast<s32>((bits.u & EXPONENT_MASK_F64) >> EXPONENT_SHIFT_F64) - 0x380;
267 u32 sign = static_cast<u32>(bits.u >> 32) & SIGN_MASK_F32;
268
269 if (exponent < -1) {
270 bool nonzero = (bits.u & !SIGN_MASK_F64) != 0;
271
272 if (nonzero) {
273 // Create the largest normal number. It'll be a better approximation than infinity.
274 c32 cresult(sign | (EXPONENT_MASK_F32 - (1 << EXPONENT_SHIFT_F32)) | MANTISSA_MASK_F32);
275 return cresult.f;
276 } else {
277 return std::copysignf(std::numeric_limits<f32>::infinity(), val);
278 }
279 }
280
281 if ((bits.u & EXPONENT_MASK_F64) >= LARGEST_FLOAT.u) {
282 if (mantissa == 0 || (bits.u & EXPONENT_MASK_F64) != EXPONENT_MASK_F64) {
283 // Infinity or a number which will flush to 0 -- return 0
284 return std::copysignf(0.0f, val);
285 } else if ((bits.u & QUIET_BIT_F64) != 0) {
286 // QNaN -- Just return the original value
287 return val;
288 } else {
289 // SNaN!!
290 return std::numeric_limits<f32>::quiet_NaN();
291 }
292 }
293
294 // Exponent doesn't matter for simple reciprocal because the exponent is a single multiplication
295 u32 key = mantissa >> 18;
296 s32 new_exp = 253 - exponent;
297 const auto &entry = FRES_TABLE[key];
298
299 // The result is given by an estimate and adjusted based on the original key that was computed
300 u32 pre_shift =
301 static_cast<u32>(entry.base + entry.dec * (static_cast<s32>((mantissa >> 8) & 0x3ff)));
302 u32 new_mantissa = pre_shift >> 1;
303
304 if (new_exp <= 0) {
305 // Flush the denormal
306 c32 cresult(sign);
307 return cresult.f;
308 } else {
309 u32 temp = sign | static_cast<u32>(new_exp) << EXPONENT_SHIFT_F32 | new_mantissa;
310 c32 cresult(temp);
311 return cresult.f;
312 }
313}
314
316[[nodiscard]] static inline f32 finv(f32 x) {
317 f32 inv = fres(x);
318 f32 invDouble = inv + inv;
319 f32 invSquare = inv * inv;
320 return -fms(x, invSquare, invDouble);
321}
322
323} // namespace EGG::Mathf
This header houses common data types such as our integral types and enums.
Math functions and constants used in the base game.
Definition Math.cc:3
static f32 fma(f32 x, f32 y, f32 z)
Fused multiply-add operation.
Definition Math.hh:69
f32 frsqrt(f32 x)
Definition Math.cc:315
static f32 sin(f32 x)
Definition Math.hh:34
static f64 force25Bit(f64 x)
This is used to mimic the Wii's floating-point unit.
Definition Math.hh:61
static f32 finv(f32 x)
Fused Newton-Raphson operation.
Definition Math.hh:316
static f32 cos(f32 x)
Definition Math.hh:40
static f32 fms(f32 x, f32 y, f32 z)
Fused multiply-subtract operation.
Definition Math.hh:76