A reimplementation of Mario Kart Wii's physics engine in C++
Loading...
Searching...
No Matches
Math.hh
1#pragma once
2
3#include <Common.hh>
4
5#include <cmath>
6
7static constexpr f32 F_PI = 3.1415927f;
8static constexpr f32 F_TAU = 6.2831855f;
9static constexpr f32 DEG2RAD = 0.017453292f;
10static constexpr f32 DEG2RAD360 = 0.034906585f;
11static constexpr f32 RAD2DEG = 57.2957795f;
12static constexpr f32 DEG2FIDX = 256.0f / 360.0f;
13static constexpr f32 RAD2FIDX = 128.0f / F_PI;
14static constexpr f32 FIDX2RAD = F_PI / 128.0f;
15static constexpr f32 HALF_PI = F_PI / 2.0f;
16
18namespace EGG::Mathf {
19
20[[nodiscard]] f32 frsqrt(f32 x);
21
23[[nodiscard]] static inline f32 sqrt(f32 x) {
24 return x > 0.0f ? x * frsqrt(x) : 0.0f;
25}
26
27[[nodiscard]] f32 SinFIdx(f32 fidx);
28[[nodiscard]] f32 CosFIdx(f32 fidx);
29[[nodiscard]] std::pair<f32, f32> SinCosFIdx(f32 fidx);
30[[nodiscard]] f32 AtanFIdx_(f32 fidx);
31[[nodiscard]] f32 Atan2FIdx(f32 x, f32 y);
32
33u32 FindRootsQuadratic(f32 a, f32 b, f32 c, f32 &root1, f32 &root2);
34
37[[nodiscard]] static inline f32 sin(f32 x) {
38 return SinFIdx(x * RAD2FIDX);
39}
40
43[[nodiscard]] static inline f32 cos(f32 x) {
44 return CosFIdx(x * RAD2FIDX);
45}
46
48[[nodiscard]] static inline f32 asin(f32 x) {
49 return ::asinl(x);
50}
51
53[[nodiscard]] static inline f32 acos(f32 x) {
54 return ::acosl(x);
55}
56
58[[nodiscard]] static inline f32 atan2(f32 y, f32 x) {
59 return Atan2FIdx(y, x) * FIDX2RAD;
60}
61
62[[nodiscard]] static inline f32 abs(f32 x) {
63 return std::abs(x);
64}
65
69[[nodiscard]] static inline f64 force25Bit(f64 x) {
70 u64 bits = std::bit_cast<u64>(x);
71 bits = (bits & 0xfffffffff8000000ULL) + (bits & 0x8000000);
72 return std::bit_cast<f64>(bits);
73}
74
77[[nodiscard]] static inline f32 fma(f32 x, f32 y, f32 z) {
78 return static_cast<f32>(
79 static_cast<f64>(x) * force25Bit(static_cast<f64>(y)) + static_cast<f64>(z));
80}
81
84[[nodiscard]] static inline f32 fms(f32 x, f32 y, f32 z) {
85 return static_cast<f32>(
86 static_cast<f64>(x) * force25Bit(static_cast<f64>(y)) - static_cast<f64>(z));
87}
88
89// frsqrte matching courtesy of Geotale, with reference to https://achurch.org/cpu-tests/ppc750cl.s
90
92 u32 base;
93 s32 dec;
94};
95
97 u64 base;
98 s64 dec;
99};
100
101union c32 {
102 constexpr c32(const f32 p) {
103 f = p;
104 }
105
106 constexpr c32(const u32 p) {
107 u = p;
108 }
109
110 u32 u;
111 f32 f;
112};
113
114union c64 {
115 constexpr c64(const f64 p) {
116 f = p;
117 }
118
119 constexpr c64(const u64 p) {
120 u = p;
121 }
122
123 u64 u;
124 f64 f;
125};
126
127static constexpr u64 EXPONENT_SHIFT_F64 = 52;
128static constexpr u64 MANTISSA_MASK_F64 = 0x000fffffffffffffULL;
129static constexpr u64 EXPONENT_MASK_F64 = 0x7ff0000000000000ULL;
130static constexpr u64 SIGN_MASK_F64 = 0x8000000000000000ULL;
131
132static constexpr std::array<BaseAndDec64, 32> RSQRTE_TABLE = {{
133 {0x69fa000000000ULL, -0x15a0000000LL},
134 {0x5f2e000000000ULL, -0x13cc000000LL},
135 {0x554a000000000ULL, -0x1234000000LL},
136 {0x4c30000000000ULL, -0x10d4000000LL},
137 {0x43c8000000000ULL, -0x0f9c000000LL},
138 {0x3bfc000000000ULL, -0x0e88000000LL},
139 {0x34b8000000000ULL, -0x0d94000000LL},
140 {0x2df0000000000ULL, -0x0cb8000000LL},
141 {0x2794000000000ULL, -0x0bf0000000LL},
142 {0x219c000000000ULL, -0x0b40000000LL},
143 {0x1bfc000000000ULL, -0x0aa0000000LL},
144 {0x16ae000000000ULL, -0x0a0c000000LL},
145 {0x11a8000000000ULL, -0x0984000000LL},
146 {0x0ce6000000000ULL, -0x090c000000LL},
147 {0x0862000000000ULL, -0x0898000000LL},
148 {0x0416000000000ULL, -0x082c000000LL},
149 {0xffe8000000000ULL, -0x1e90000000LL},
150 {0xf0a4000000000ULL, -0x1c00000000LL},
151 {0xe2a8000000000ULL, -0x19c0000000LL},
152 {0xd5c8000000000ULL, -0x17c8000000LL},
153 {0xc9e4000000000ULL, -0x1610000000LL},
154 {0xbedc000000000ULL, -0x1490000000LL},
155 {0xb498000000000ULL, -0x1330000000LL},
156 {0xab00000000000ULL, -0x11f8000000LL},
157 {0xa204000000000ULL, -0x10e8000000LL},
158 {0x9994000000000ULL, -0x0fe8000000LL},
159 {0x91a0000000000ULL, -0x0f08000000LL},
160 {0x8a1c000000000ULL, -0x0e38000000LL},
161 {0x8304000000000ULL, -0x0d78000000LL},
162 {0x7c48000000000ULL, -0x0cc8000000LL},
163 {0x75e4000000000ULL, -0x0c28000000LL},
164 {0x6fd0000000000ULL, -0x0b98000000LL},
165}};
166
167[[nodiscard]] static inline f64 frsqrte(const f64 val) {
168 c64 bits(val);
169
170 u64 mantissa = bits.u & MANTISSA_MASK_F64;
171 s64 exponent = bits.u & EXPONENT_MASK_F64;
172 bool sign = (bits.u & SIGN_MASK_F64) != 0;
173
174 // Handle 0 case
175 if (mantissa == 0 && exponent == 0) {
176 return std::copysign(std::numeric_limits<f64>::infinity(), bits.f);
177 }
178
179 // Handle NaN-like
180 if (exponent == EXPONENT_MASK_F64) {
181 if (mantissa == 0) {
182 return sign ? std::numeric_limits<f64>::quiet_NaN() : 0.0;
183 }
184
185 return val;
186 }
187
188 // Handle negative inputs
189 if (sign) {
190 return std::numeric_limits<f64>::quiet_NaN();
191 }
192
193 if (exponent == 0) {
194 // Shift so one bit goes to where the exponent would be,
195 // then clear that bit to mimick a not-subnormal number!
196 // Aka, if there are 12 leading zeroes, shift left once
197 u32 shift = std::countl_zero(mantissa) - static_cast<u32>(63 - EXPONENT_SHIFT_F64);
198
199 mantissa <<= shift;
200 mantissa &= MANTISSA_MASK_F64;
201 // The shift is subtracted by 1 because denormals by default
202 // are offset by 1 (exponent 0 doesn't have implied 1 bit)
203 exponent -= static_cast<s64>(shift - 1) << EXPONENT_SHIFT_F64;
204 }
205
206 // In reality this doesn't get the full exponent -- Only the least significant bit
207 // Only that's needed because square roots of higher exponent bits simply multiply the
208 // result by 2!!
209 u32 key = static_cast<u32>((static_cast<u64>(exponent) | mantissa) >> 37);
210 u64 new_exp =
211 (static_cast<u64>((0xbfcLL << EXPONENT_SHIFT_F64) - exponent) >> 1) & EXPONENT_MASK_F64;
212
213 // Remove the bits relating to anything higher than the LSB of the exponent
214 const auto &entry = RSQRTE_TABLE[0x1f & (key >> 11)];
215
216 // The result is given by an estimate then an adjustment based on the original
217 // key that was computed
218 u64 new_mantissa = static_cast<u64>(entry.base + entry.dec * static_cast<s64>(key & 0x7ff));
219
220 return c64(new_exp | new_mantissa).f;
221}
222
223static constexpr std::array<BaseAndDec32, 32> FRES_TABLE = {{
224 {0x00fff000UL, -0x3e1L},
225 {0x00f07000UL, -0x3a7L},
226 {0x00e1d400UL, -0x371L},
227 {0x00d41000UL, -0x340L},
228 {0x00c71000UL, -0x313L},
229 {0x00bac400UL, -0x2eaL},
230 {0x00af2000UL, -0x2c4L},
231 {0x00a41000UL, -0x2a0L},
232 {0x00999000UL, -0x27fL},
233 {0x008f9400UL, -0x261L},
234 {0x00861000UL, -0x245L},
235 {0x007d0000UL, -0x22aL},
236 {0x00745800UL, -0x212L},
237 {0x006c1000UL, -0x1fbL},
238 {0x00642800UL, -0x1e5L},
239 {0x005c9400UL, -0x1d1L},
240 {0x00555000UL, -0x1beL},
241 {0x004e5800UL, -0x1acL},
242 {0x0047ac00UL, -0x19bL},
243 {0x00413c00UL, -0x18bL},
244 {0x003b1000UL, -0x17cL},
245 {0x00352000UL, -0x16eL},
246 {0x002f5c00UL, -0x15bL},
247 {0x0029f000UL, -0x15bL},
248 {0x00248800UL, -0x143L},
249 {0x001f7c00UL, -0x143L},
250 {0x001a7000UL, -0x12dL},
251 {0x0015bc00UL, -0x12dL},
252 {0x00110800UL, -0x11aL},
253 {0x000ca000UL, -0x11aL},
254 {0x00083800UL, -0x108L},
255 {0x00041800UL, -0x106L},
256}};
257
258[[nodiscard]] static inline f32 fres(const f32 val) {
259 static constexpr u32 EXPONENT_SHIFT_F32 = 23;
260 static constexpr u64 EXPONENT_SHIFT_F64 = 52;
261 static constexpr u32 EXPONENT_MASK_F32 = 0x7f800000UL;
262 static constexpr u64 EXPONENT_MASK_F64 = 0x7ff0000000000000ULL;
263 static constexpr u32 MANTISSA_MASK_F32 = 0x007fffffUL;
264 static constexpr u32 SIGN_MASK_F32 = 0x80000000UL;
265 static constexpr u64 SIGN_MASK_F64 = 0x8000000000000000ULL;
266 static constexpr u64 QUIET_BIT_F64 = 0x0008000000000000ULL;
267 static constexpr c64 LARGEST_FLOAT(static_cast<u64>(0x47d0000000000000ULL));
268
269 c64 bits(val);
270
271 u32 mantissa = static_cast<u32>(
272 bits.u >> (EXPONENT_SHIFT_F64 - static_cast<u64>(EXPONENT_SHIFT_F32))) &
273 MANTISSA_MASK_F32;
274 s32 exponent = static_cast<s32>((bits.u & EXPONENT_MASK_F64) >> EXPONENT_SHIFT_F64) - 0x380;
275 u32 sign = static_cast<u32>(bits.u >> 32) & SIGN_MASK_F32;
276
277 if (exponent < -1) {
278 bool nonzero = (bits.u & !SIGN_MASK_F64) != 0;
279
280 if (nonzero) {
281 // Create the largest normal number. It'll be a better approximation than infinity.
282 c32 cresult(sign | (EXPONENT_MASK_F32 - (1 << EXPONENT_SHIFT_F32)) | MANTISSA_MASK_F32);
283 return cresult.f;
284 } else {
285 return std::copysignf(std::numeric_limits<f32>::infinity(), val);
286 }
287 }
288
289 if ((bits.u & EXPONENT_MASK_F64) >= LARGEST_FLOAT.u) {
290 if (mantissa == 0 || (bits.u & EXPONENT_MASK_F64) != EXPONENT_MASK_F64) {
291 // Infinity or a number which will flush to 0 -- return 0
292 return std::copysignf(0.0f, val);
293 } else if ((bits.u & QUIET_BIT_F64) != 0) {
294 // QNaN -- Just return the original value
295 return val;
296 } else {
297 // SNaN!!
298 return std::numeric_limits<f32>::quiet_NaN();
299 }
300 }
301
302 // Exponent doesn't matter for simple reciprocal because the exponent is a single multiplication
303 u32 key = mantissa >> 18;
304 s32 new_exp = 253 - exponent;
305 const auto &entry = FRES_TABLE[key];
306
307 // The result is given by an estimate and adjusted based on the original key that was computed
308 u32 pre_shift =
309 static_cast<u32>(entry.base + entry.dec * (static_cast<s32>((mantissa >> 8) & 0x3ff)));
310 u32 new_mantissa = pre_shift >> 1;
311
312 if (new_exp <= 0) {
313 // Flush the denormal
314 c32 cresult(sign);
315 return cresult.f;
316 } else {
317 u32 temp = sign | static_cast<u32>(new_exp) << EXPONENT_SHIFT_F32 | new_mantissa;
318 c32 cresult(temp);
319 return cresult.f;
320 }
321}
322
324[[nodiscard]] static inline f32 finv(f32 x) {
325 f32 inv = fres(x);
326 f32 invDouble = inv + inv;
327 f32 invSquare = inv * inv;
328 return -fms(x, invSquare, invDouble);
329}
330
331} // namespace EGG::Mathf
This header houses common data types such as our integral types and enums.
Math functions and constants used in the base game.
Definition Math.cc:3
static f32 fma(f32 x, f32 y, f32 z)
Fused multiply-add operation.
Definition Math.hh:77
f32 frsqrt(f32 x)
Definition Math.cc:315
static f32 sin(f32 x)
Definition Math.hh:37
static f64 force25Bit(f64 x)
This is used to mimic the Wii's floating-point unit.
Definition Math.hh:69
static f32 finv(f32 x)
Fused Newton-Raphson operation.
Definition Math.hh:324
static f32 cos(f32 x)
Definition Math.hh:43
static f32 fms(f32 x, f32 y, f32 z)
Fused multiply-subtract operation.
Definition Math.hh:84