25[[nodiscard]]
static inline constexpr f64
force25Bit(f64 x) {
26 u64 bits = std::bit_cast<u64>(x);
27 bits = (bits & 0xfffffffff8000000ULL) + (bits & 0x8000000);
28 return std::bit_cast<f64>(bits);
33[[nodiscard]]
static inline constexpr f32
fma(f32 x, f32 y, f32 z) {
34 return static_cast<f32
>(
35 static_cast<f64
>(x) *
force25Bit(
static_cast<f64
>(y)) +
static_cast<f64
>(z));
40[[nodiscard]]
static inline constexpr f32
fms(f32 x, f32 y, f32 z) {
41 return static_cast<f32
>(
42 static_cast<f64
>(x) *
force25Bit(
static_cast<f64
>(y)) -
static_cast<f64
>(z));
47 f32 sinVal, cosVal, sinDt, cosDt;
57 {0.000000f, 1.000000f, 0.024541f, -0.000301f},
58 {0.024541f, 0.999699f, 0.024526f, -0.000903f},
59 {0.049068f, 0.998795f, 0.024497f, -0.001505f},
60 {0.073565f, 0.997290f, 0.024453f, -0.002106f},
61 {0.098017f, 0.995185f, 0.024394f, -0.002705f},
62 {0.122411f, 0.992480f, 0.024320f, -0.003303f},
63 {0.146730f, 0.989177f, 0.024231f, -0.003899f},
64 {0.170962f, 0.985278f, 0.024128f, -0.004492f},
65 {0.195090f, 0.980785f, 0.024011f, -0.005083f},
66 {0.219101f, 0.975702f, 0.023879f, -0.005671f},
67 {0.242980f, 0.970031f, 0.023733f, -0.006255f},
68 {0.266713f, 0.963776f, 0.023572f, -0.006836f},
69 {0.290285f, 0.956940f, 0.023397f, -0.007412f},
70 {0.313682f, 0.949528f, 0.023208f, -0.007984f},
71 {0.336890f, 0.941544f, 0.023005f, -0.008551f},
72 {0.359895f, 0.932993f, 0.022788f, -0.009113f},
73 {0.382683f, 0.923880f, 0.022558f, -0.009670f},
74 {0.405241f, 0.914210f, 0.022314f, -0.010220f},
75 {0.427555f, 0.903989f, 0.022056f, -0.010765f},
76 {0.449611f, 0.893224f, 0.021785f, -0.011303f},
77 {0.471397f, 0.881921f, 0.021501f, -0.011834f},
78 {0.492898f, 0.870087f, 0.021205f, -0.012358f},
79 {0.514103f, 0.857729f, 0.020895f, -0.012875f},
80 {0.534998f, 0.844854f, 0.020573f, -0.013384f},
81 {0.555570f, 0.831470f, 0.020238f, -0.013885f},
82 {0.575808f, 0.817585f, 0.019891f, -0.014377f},
83 {0.595699f, 0.803208f, 0.019532f, -0.014861f},
84 {0.615232f, 0.788346f, 0.019162f, -0.015336f},
85 {0.634393f, 0.773010f, 0.018780f, -0.015802f},
86 {0.653173f, 0.757209f, 0.018386f, -0.016258f},
87 {0.671559f, 0.740951f, 0.017982f, -0.016704f},
88 {0.689541f, 0.724247f, 0.017566f, -0.017140f},
89 {0.707107f, 0.707107f, 0.017140f, -0.017566f},
90 {0.724247f, 0.689541f, 0.016704f, -0.017982f},
91 {0.740951f, 0.671559f, 0.016258f, -0.018386f},
92 {0.757209f, 0.653173f, 0.015802f, -0.018780f},
93 {0.773010f, 0.634393f, 0.015336f, -0.019162f},
94 {0.788346f, 0.615232f, 0.014861f, -0.019532f},
95 {0.803208f, 0.595699f, 0.014377f, -0.019891f},
96 {0.817585f, 0.575808f, 0.013885f, -0.020238f},
97 {0.831470f, 0.555570f, 0.013384f, -0.020573f},
98 {0.844854f, 0.534998f, 0.012875f, -0.020895f},
99 {0.857729f, 0.514103f, 0.012358f, -0.021205f},
100 {0.870087f, 0.492898f, 0.011834f, -0.021501f},
101 {0.881921f, 0.471397f, 0.011303f, -0.021785f},
102 {0.893224f, 0.449611f, 0.010765f, -0.022056f},
103 {0.903989f, 0.427555f, 0.010220f, -0.022314f},
104 {0.914210f, 0.405241f, 0.009670f, -0.022558f},
105 {0.923880f, 0.382683f, 0.009113f, -0.022788f},
106 {0.932993f, 0.359895f, 0.008551f, -0.023005f},
107 {0.941544f, 0.336890f, 0.007984f, -0.023208f},
108 {0.949528f, 0.313682f, 0.007412f, -0.023397f},
109 {0.956940f, 0.290285f, 0.006836f, -0.023572f},
110 {0.963776f, 0.266713f, 0.006255f, -0.023733f},
111 {0.970031f, 0.242980f, 0.005671f, -0.023879f},
112 {0.975702f, 0.219101f, 0.005083f, -0.024011f},
113 {0.980785f, 0.195090f, 0.004492f, -0.024128f},
114 {0.985278f, 0.170962f, 0.003899f, -0.024231f},
115 {0.989177f, 0.146730f, 0.003303f, -0.024320f},
116 {0.992480f, 0.122411f, 0.002705f, -0.024394f},
117 {0.995185f, 0.098017f, 0.002106f, -0.024453f},
118 {0.997290f, 0.073565f, 0.001505f, -0.024497f},
119 {0.998795f, 0.049068f, 0.000903f, -0.024526f},
120 {0.999699f, 0.024541f, 0.000301f, -0.024541f},
121 {1.000000f, 0.000000f, -0.000301f, -0.024541f},
122 {0.999699f, -0.024541f, -0.000903f, -0.024526f},
123 {0.998795f, -0.049068f, -0.001505f, -0.024497f},
124 {0.997290f, -0.073565f, -0.002106f, -0.024453f},
125 {0.995185f, -0.098017f, -0.002705f, -0.024394f},
126 {0.992480f, -0.122411f, -0.003303f, -0.024320f},
127 {0.989177f, -0.146730f, -0.003899f, -0.024231f},
128 {0.985278f, -0.170962f, -0.004492f, -0.024128f},
129 {0.980785f, -0.195090f, -0.005083f, -0.024011f},
130 {0.975702f, -0.219101f, -0.005671f, -0.023879f},
131 {0.970031f, -0.242980f, -0.006255f, -0.023733f},
132 {0.963776f, -0.266713f, -0.006836f, -0.023572f},
133 {0.956940f, -0.290285f, -0.007412f, -0.023397f},
134 {0.949528f, -0.313682f, -0.007984f, -0.023208f},
135 {0.941544f, -0.336890f, -0.008551f, -0.023005f},
136 {0.932993f, -0.359895f, -0.009113f, -0.022788f},
137 {0.923880f, -0.382683f, -0.009670f, -0.022558f},
138 {0.914210f, -0.405241f, -0.010220f, -0.022314f},
139 {0.903989f, -0.427555f, -0.010765f, -0.022056f},
140 {0.893224f, -0.449611f, -0.011303f, -0.021785f},
141 {0.881921f, -0.471397f, -0.011834f, -0.021501f},
142 {0.870087f, -0.492898f, -0.012358f, -0.021205f},
143 {0.857729f, -0.514103f, -0.012875f, -0.020895f},
144 {0.844854f, -0.534998f, -0.013384f, -0.020573f},
145 {0.831470f, -0.555570f, -0.013885f, -0.020238f},
146 {0.817585f, -0.575808f, -0.014377f, -0.019891f},
147 {0.803208f, -0.595699f, -0.014861f, -0.019532f},
148 {0.788346f, -0.615232f, -0.015336f, -0.019162f},
149 {0.773010f, -0.634393f, -0.015802f, -0.018780f},
150 {0.757209f, -0.653173f, -0.016258f, -0.018386f},
151 {0.740951f, -0.671559f, -0.016704f, -0.017982f},
152 {0.724247f, -0.689541f, -0.017140f, -0.017566f},
153 {0.707107f, -0.707107f, -0.017566f, -0.017140f},
154 {0.689541f, -0.724247f, -0.017982f, -0.016704f},
155 {0.671559f, -0.740951f, -0.018386f, -0.016258f},
156 {0.653173f, -0.757209f, -0.018780f, -0.015802f},
157 {0.634393f, -0.773010f, -0.019162f, -0.015336f},
158 {0.615232f, -0.788346f, -0.019532f, -0.014861f},
159 {0.595699f, -0.803208f, -0.019891f, -0.014377f},
160 {0.575808f, -0.817585f, -0.020238f, -0.013885f},
161 {0.555570f, -0.831470f, -0.020573f, -0.013384f},
162 {0.534998f, -0.844854f, -0.020895f, -0.012875f},
163 {0.514103f, -0.857729f, -0.021205f, -0.012358f},
164 {0.492898f, -0.870087f, -0.021501f, -0.011834f},
165 {0.471397f, -0.881921f, -0.021785f, -0.011303f},
166 {0.449611f, -0.893224f, -0.022056f, -0.010765f},
167 {0.427555f, -0.903989f, -0.022314f, -0.010220f},
168 {0.405241f, -0.914210f, -0.022558f, -0.009670f},
169 {0.382683f, -0.923880f, -0.022788f, -0.009113f},
170 {0.359895f, -0.932993f, -0.023005f, -0.008551f},
171 {0.336890f, -0.941544f, -0.023208f, -0.007984f},
172 {0.313682f, -0.949528f, -0.023397f, -0.007412f},
173 {0.290285f, -0.956940f, -0.023572f, -0.006836f},
174 {0.266713f, -0.963776f, -0.023733f, -0.006255f},
175 {0.242980f, -0.970031f, -0.023879f, -0.005671f},
176 {0.219101f, -0.975702f, -0.024011f, -0.005083f},
177 {0.195090f, -0.980785f, -0.024128f, -0.004492f},
178 {0.170962f, -0.985278f, -0.024231f, -0.003899f},
179 {0.146730f, -0.989177f, -0.024320f, -0.003303f},
180 {0.122411f, -0.992480f, -0.024394f, -0.002705f},
181 {0.098017f, -0.995185f, -0.024453f, -0.002106f},
182 {0.073565f, -0.997290f, -0.024497f, -0.001505f},
183 {0.049068f, -0.998795f, -0.024526f, -0.000903f},
184 {0.024541f, -0.999699f, -0.024541f, -0.000301f},
185 {0.000000f, -1.000000f, -0.024541f, 0.000301f},
186 {-0.024541f, -0.999699f, -0.024526f, 0.000903f},
187 {-0.049068f, -0.998795f, -0.024497f, 0.001505f},
188 {-0.073565f, -0.997290f, -0.024453f, 0.002106f},
189 {-0.098017f, -0.995185f, -0.024394f, 0.002705f},
190 {-0.122411f, -0.992480f, -0.024320f, 0.003303f},
191 {-0.146730f, -0.989177f, -0.024231f, 0.003899f},
192 {-0.170962f, -0.985278f, -0.024128f, 0.004492f},
193 {-0.195090f, -0.980785f, -0.024011f, 0.005083f},
194 {-0.219101f, -0.975702f, -0.023879f, 0.005671f},
195 {-0.242980f, -0.970031f, -0.023733f, 0.006255f},
196 {-0.266713f, -0.963776f, -0.023572f, 0.006836f},
197 {-0.290285f, -0.956940f, -0.023397f, 0.007412f},
198 {-0.313682f, -0.949528f, -0.023208f, 0.007984f},
199 {-0.336890f, -0.941544f, -0.023005f, 0.008551f},
200 {-0.359895f, -0.932993f, -0.022788f, 0.009113f},
201 {-0.382683f, -0.923880f, -0.022558f, 0.009670f},
202 {-0.405241f, -0.914210f, -0.022314f, 0.010220f},
203 {-0.427555f, -0.903989f, -0.022056f, 0.010765f},
204 {-0.449611f, -0.893224f, -0.021785f, 0.011303f},
205 {-0.471397f, -0.881921f, -0.021501f, 0.011834f},
206 {-0.492898f, -0.870087f, -0.021205f, 0.012358f},
207 {-0.514103f, -0.857729f, -0.020895f, 0.012875f},
208 {-0.534998f, -0.844854f, -0.020573f, 0.013384f},
209 {-0.555570f, -0.831470f, -0.020238f, 0.013885f},
210 {-0.575808f, -0.817585f, -0.019891f, 0.014377f},
211 {-0.595699f, -0.803208f, -0.019532f, 0.014861f},
212 {-0.615232f, -0.788346f, -0.019162f, 0.015336f},
213 {-0.634393f, -0.773010f, -0.018780f, 0.015802f},
214 {-0.653173f, -0.757209f, -0.018386f, 0.016258f},
215 {-0.671559f, -0.740951f, -0.017982f, 0.016704f},
216 {-0.689541f, -0.724247f, -0.017566f, 0.017140f},
217 {-0.707107f, -0.707107f, -0.017140f, 0.017566f},
218 {-0.724247f, -0.689541f, -0.016704f, 0.017982f},
219 {-0.740951f, -0.671559f, -0.016258f, 0.018386f},
220 {-0.757209f, -0.653173f, -0.015802f, 0.018780f},
221 {-0.773010f, -0.634393f, -0.015336f, 0.019162f},
222 {-0.788346f, -0.615232f, -0.014861f, 0.019532f},
223 {-0.803208f, -0.595699f, -0.014377f, 0.019891f},
224 {-0.817585f, -0.575808f, -0.013885f, 0.020238f},
225 {-0.831470f, -0.555570f, -0.013384f, 0.020573f},
226 {-0.844854f, -0.534998f, -0.012875f, 0.020895f},
227 {-0.857729f, -0.514103f, -0.012358f, 0.021205f},
228 {-0.870087f, -0.492898f, -0.011834f, 0.021501f},
229 {-0.881921f, -0.471397f, -0.011303f, 0.021785f},
230 {-0.893224f, -0.449611f, -0.010765f, 0.022056f},
231 {-0.903989f, -0.427555f, -0.010220f, 0.022314f},
232 {-0.914210f, -0.405241f, -0.009670f, 0.022558f},
233 {-0.923880f, -0.382683f, -0.009113f, 0.022788f},
234 {-0.932993f, -0.359895f, -0.008551f, 0.023005f},
235 {-0.941544f, -0.336890f, -0.007984f, 0.023208f},
236 {-0.949528f, -0.313682f, -0.007412f, 0.023397f},
237 {-0.956940f, -0.290285f, -0.006836f, 0.023572f},
238 {-0.963776f, -0.266713f, -0.006255f, 0.023733f},
239 {-0.970031f, -0.242980f, -0.005671f, 0.023879f},
240 {-0.975702f, -0.219101f, -0.005083f, 0.024011f},
241 {-0.980785f, -0.195090f, -0.004492f, 0.024128f},
242 {-0.985278f, -0.170962f, -0.003899f, 0.024231f},
243 {-0.989177f, -0.146730f, -0.003303f, 0.024320f},
244 {-0.992480f, -0.122411f, -0.002705f, 0.024394f},
245 {-0.995185f, -0.098017f, -0.002106f, 0.024453f},
246 {-0.997290f, -0.073565f, -0.001505f, 0.024497f},
247 {-0.998795f, -0.049068f, -0.000903f, 0.024526f},
248 {-0.999699f, -0.024541f, -0.000301f, 0.024541f},
249 {-1.000000f, -0.000000f, 0.000301f, 0.024541f},
250 {-0.999699f, 0.024541f, 0.000903f, 0.024526f},
251 {-0.998795f, 0.049068f, 0.001505f, 0.024497f},
252 {-0.997290f, 0.073565f, 0.002106f, 0.024453f},
253 {-0.995185f, 0.098017f, 0.002705f, 0.024394f},
254 {-0.992480f, 0.122411f, 0.003303f, 0.024320f},
255 {-0.989177f, 0.146730f, 0.003899f, 0.024231f},
256 {-0.985278f, 0.170962f, 0.004492f, 0.024128f},
257 {-0.980785f, 0.195090f, 0.005083f, 0.024011f},
258 {-0.975702f, 0.219101f, 0.005671f, 0.023879f},
259 {-0.970031f, 0.242980f, 0.006255f, 0.023733f},
260 {-0.963776f, 0.266713f, 0.006836f, 0.023572f},
261 {-0.956940f, 0.290285f, 0.007412f, 0.023397f},
262 {-0.949528f, 0.313682f, 0.007984f, 0.023208f},
263 {-0.941544f, 0.336890f, 0.008551f, 0.023005f},
264 {-0.932993f, 0.359895f, 0.009113f, 0.022788f},
265 {-0.923880f, 0.382683f, 0.009670f, 0.022558f},
266 {-0.914210f, 0.405241f, 0.010220f, 0.022314f},
267 {-0.903989f, 0.427555f, 0.010765f, 0.022056f},
268 {-0.893224f, 0.449611f, 0.011303f, 0.021785f},
269 {-0.881921f, 0.471397f, 0.011834f, 0.021501f},
270 {-0.870087f, 0.492898f, 0.012358f, 0.021205f},
271 {-0.857729f, 0.514103f, 0.012875f, 0.020895f},
272 {-0.844854f, 0.534998f, 0.013384f, 0.020573f},
273 {-0.831470f, 0.555570f, 0.013885f, 0.020238f},
274 {-0.817585f, 0.575808f, 0.014377f, 0.019891f},
275 {-0.803208f, 0.595699f, 0.014861f, 0.019532f},
276 {-0.788346f, 0.615232f, 0.015336f, 0.019162f},
277 {-0.773010f, 0.634393f, 0.015802f, 0.018780f},
278 {-0.757209f, 0.653173f, 0.016258f, 0.018386f},
279 {-0.740951f, 0.671559f, 0.016704f, 0.017982f},
280 {-0.724247f, 0.689541f, 0.017140f, 0.017566f},
281 {-0.707107f, 0.707107f, 0.017566f, 0.017140f},
282 {-0.689541f, 0.724247f, 0.017982f, 0.016704f},
283 {-0.671559f, 0.740951f, 0.018386f, 0.016258f},
284 {-0.653173f, 0.757209f, 0.018780f, 0.015802f},
285 {-0.634393f, 0.773010f, 0.019162f, 0.015336f},
286 {-0.615232f, 0.788346f, 0.019532f, 0.014861f},
287 {-0.595699f, 0.803208f, 0.019891f, 0.014377f},
288 {-0.575808f, 0.817585f, 0.020238f, 0.013885f},
289 {-0.555570f, 0.831470f, 0.020573f, 0.013384f},
290 {-0.534998f, 0.844854f, 0.020895f, 0.012875f},
291 {-0.514103f, 0.857729f, 0.021205f, 0.012358f},
292 {-0.492898f, 0.870087f, 0.021501f, 0.011834f},
293 {-0.471397f, 0.881921f, 0.021785f, 0.011303f},
294 {-0.449611f, 0.893224f, 0.022056f, 0.010765f},
295 {-0.427555f, 0.903989f, 0.022314f, 0.010220f},
296 {-0.405241f, 0.914210f, 0.022558f, 0.009670f},
297 {-0.382683f, 0.923880f, 0.022788f, 0.009113f},
298 {-0.359895f, 0.932993f, 0.023005f, 0.008551f},
299 {-0.336890f, 0.941544f, 0.023208f, 0.007984f},
300 {-0.313682f, 0.949528f, 0.023397f, 0.007412f},
301 {-0.290285f, 0.956940f, 0.023572f, 0.006836f},
302 {-0.266713f, 0.963776f, 0.023733f, 0.006255f},
303 {-0.242980f, 0.970031f, 0.023879f, 0.005671f},
304 {-0.219101f, 0.975702f, 0.024011f, 0.005083f},
305 {-0.195090f, 0.980785f, 0.024128f, 0.004492f},
306 {-0.170962f, 0.985278f, 0.024231f, 0.003899f},
307 {-0.146730f, 0.989177f, 0.024320f, 0.003303f},
308 {-0.122411f, 0.992480f, 0.024394f, 0.002705f},
309 {-0.098017f, 0.995185f, 0.024453f, 0.002106f},
310 {-0.073565f, 0.997290f, 0.024497f, 0.001505f},
311 {-0.049068f, 0.998795f, 0.024526f, 0.000903f},
312 {-0.024541f, 0.999699f, 0.024541f, 0.000301f},
313 {-0.000000f, 1.000000f, 0.024541f, -0.000301f},
317static constexpr AtanEntry sArcTanTbl[32 + 1] = {
318 {0.000000000f, 1.272825321f},
319 {1.272825321f, 1.270345790f},
320 {2.543171111f, 1.265415586f},
321 {3.808586697f, 1.258091595f},
322 {5.066678293f, 1.248457103f},
323 {6.315135396f, 1.236619467f},
324 {7.551754863f, 1.222707202f},
325 {8.774462065f, 1.206866624f},
326 {9.981328688f, 1.189258212f},
327 {11.170586901f, 1.170052841f},
328 {12.340639741f, 1.149428034f},
329 {13.490067775f, 1.127564381f},
330 {14.617632156f, 1.104642222f},
331 {15.722274378f, 1.080838675f},
332 {16.803113053f, 1.056325088f},
333 {17.859438141f, 1.031264918f},
334 {18.890703059f, 1.005812061f},
335 {19.896515121f, 0.980109621f},
336 {20.876624742f, 0.954289072f},
337 {21.830913814f, 0.928469801f},
338 {22.759383615f, 0.902758952f},
339 {23.662142567f, 0.877251558f},
340 {24.539394125f, 0.852030871f},
341 {25.391424996f, 0.827168886f},
342 {26.218593881f, 0.802726967f},
343 {27.021320848f, 0.778756582f},
344 {27.800077430f, 0.755300081f},
345 {28.555377511f, 0.732391496f},
346 {29.287769007f, 0.710057351f},
347 {29.997826358f, 0.688317453f},
348 {30.686143811f, 0.667185647f},
349 {31.353329458f, 0.646670542f},
350 {32.000000000f, 0.626776175f},
354[[nodiscard]]
static inline constexpr f32 SinFIdx(f32 fidx) {
355 f32 abs_fidx = fabs(fidx);
357 while (abs_fidx >= 65536.0f) {
358 abs_fidx -= 65536.0f;
361 u16 idx =
static_cast<u16
>(abs_fidx);
362 f32 r = abs_fidx -
static_cast<f32
>(idx);
364 f32 val = sSinCosTbl[idx].sinVal + r * sSinCosTbl[idx].sinDt;
365 return fidx < 0.0f ? -val : val;
369[[nodiscard]]
static inline constexpr f32 CosFIdx(f32 fidx) {
370 f32 abs_fidx = fabs(fidx);
372 while (abs_fidx >= 65536.0f) {
373 abs_fidx -= 65536.0f;
376 u16 idx =
static_cast<u16
>(abs_fidx);
377 f32 r = abs_fidx -
static_cast<f32
>(idx);
380 return sSinCosTbl[idx].cosVal + r * sSinCosTbl[idx].cosDt;
384[[nodiscard]]
static inline constexpr std::pair<f32, f32> SinCosFIdx(f32 fidx) {
385 f32 abs_fidx = fabs(fidx);
387 while (abs_fidx >= 65536.0f) {
388 abs_fidx -= 65536.0f;
391 u16 idx =
static_cast<u16
>(abs_fidx);
392 f32 r = abs_fidx -
static_cast<f32
>(idx);
395 f32
cos =
fma(sSinCosTbl[idx].cosDt, r, sSinCosTbl[idx].cosVal);
396 f32
sin =
fma(sSinCosTbl[idx].sinDt, r, sSinCosTbl[idx].sinVal);
405[[nodiscard]]
static inline constexpr f32 AtanFIdx_(f32 x) {
407 u16 idx =
static_cast<u16
>(x);
408 f32 r = x -
static_cast<f32
>(idx);
409 return sArcTanTbl[idx].atanVal + r * sArcTanTbl[idx].atanDt;
413[[nodiscard]]
static inline constexpr f32 Atan2FIdx(f32 y, f32 x) {
414 if (x == 0.0f && y == 0.0f) {
421 return 0.0f + AtanFIdx_(y / x);
423 return 64.0f - AtanFIdx_(x / y);
427 return 0.0f - AtanFIdx_(-y / x);
429 return -64.0f + AtanFIdx_(x / -y);
435 return 128.0f - AtanFIdx_(y / -x);
437 return 64.0f + AtanFIdx_(-x / y);
441 return -128.0f + AtanFIdx_(-y / -x);
443 return -64.0f - AtanFIdx_(-x / -y);
451[[nodiscard]]
static inline constexpr f32
sin(f32 x) {
452 return SinFIdx(x * RAD2FIDX);
457[[nodiscard]]
static inline constexpr f32
cos(f32 x) {
458 return CosFIdx(x * RAD2FIDX);
462[[nodiscard]]
static inline constexpr f32 asin(f32 x) {
467[[nodiscard]]
static inline constexpr f32 acos(f32 x) {
472[[nodiscard]]
static inline constexpr f32 atan2(f32 y, f32 x) {
473 return Atan2FIdx(y, x) * FIDX2RAD;
476[[nodiscard]]
static inline constexpr f32 abs(f32 x) {
493 constexpr c32(
const f32 p) {
497 constexpr c32(
const u32 p) {
506 constexpr c64(
const f64 p) {
510 constexpr c64(
const u64 p) {
518static constexpr u64 EXPONENT_SHIFT_F64 = 52;
519static constexpr u64 MANTISSA_MASK_F64 = 0x000fffffffffffffULL;
520static constexpr u64 EXPONENT_MASK_F64 = 0x7ff0000000000000ULL;
521static constexpr u64 SIGN_MASK_F64 = 0x8000000000000000ULL;
523static constexpr std::array<BaseAndDec64, 32> RSQRTE_TABLE = {{
524 {0x69fa000000000ULL, -0x15a0000000LL},
525 {0x5f2e000000000ULL, -0x13cc000000LL},
526 {0x554a000000000ULL, -0x1234000000LL},
527 {0x4c30000000000ULL, -0x10d4000000LL},
528 {0x43c8000000000ULL, -0x0f9c000000LL},
529 {0x3bfc000000000ULL, -0x0e88000000LL},
530 {0x34b8000000000ULL, -0x0d94000000LL},
531 {0x2df0000000000ULL, -0x0cb8000000LL},
532 {0x2794000000000ULL, -0x0bf0000000LL},
533 {0x219c000000000ULL, -0x0b40000000LL},
534 {0x1bfc000000000ULL, -0x0aa0000000LL},
535 {0x16ae000000000ULL, -0x0a0c000000LL},
536 {0x11a8000000000ULL, -0x0984000000LL},
537 {0x0ce6000000000ULL, -0x090c000000LL},
538 {0x0862000000000ULL, -0x0898000000LL},
539 {0x0416000000000ULL, -0x082c000000LL},
540 {0xffe8000000000ULL, -0x1e90000000LL},
541 {0xf0a4000000000ULL, -0x1c00000000LL},
542 {0xe2a8000000000ULL, -0x19c0000000LL},
543 {0xd5c8000000000ULL, -0x17c8000000LL},
544 {0xc9e4000000000ULL, -0x1610000000LL},
545 {0xbedc000000000ULL, -0x1490000000LL},
546 {0xb498000000000ULL, -0x1330000000LL},
547 {0xab00000000000ULL, -0x11f8000000LL},
548 {0xa204000000000ULL, -0x10e8000000LL},
549 {0x9994000000000ULL, -0x0fe8000000LL},
550 {0x91a0000000000ULL, -0x0f08000000LL},
551 {0x8a1c000000000ULL, -0x0e38000000LL},
552 {0x8304000000000ULL, -0x0d78000000LL},
553 {0x7c48000000000ULL, -0x0cc8000000LL},
554 {0x75e4000000000ULL, -0x0c28000000LL},
555 {0x6fd0000000000ULL, -0x0b98000000LL},
558[[nodiscard]]
static inline constexpr f64 frsqrte(
const f64 val) {
561 u64 mantissa = bits.u & MANTISSA_MASK_F64;
562 s64 exponent = bits.u & EXPONENT_MASK_F64;
563 bool sign = (bits.u & SIGN_MASK_F64) != 0;
566 if (mantissa == 0 && exponent == 0) {
567 return std::copysign(std::numeric_limits<f64>::infinity(), bits.f);
571 if (exponent == EXPONENT_MASK_F64) {
573 return sign ? std::numeric_limits<f64>::quiet_NaN() : 0.0;
581 return std::numeric_limits<f64>::quiet_NaN();
588 u32 shift = std::countl_zero(mantissa) -
static_cast<u32
>(63 - EXPONENT_SHIFT_F64);
591 mantissa &= MANTISSA_MASK_F64;
594 exponent -=
static_cast<s64
>(shift - 1) << EXPONENT_SHIFT_F64;
600 u32 key =
static_cast<u32
>((
static_cast<u64
>(exponent) | mantissa) >> 37);
602 (
static_cast<u64
>((0xbfcLL << EXPONENT_SHIFT_F64) - exponent) >> 1) & EXPONENT_MASK_F64;
605 const auto &entry = RSQRTE_TABLE[0x1f & (key >> 11)];
609 u64 new_mantissa =
static_cast<u64
>(entry.base + entry.dec *
static_cast<s64
>(key & 0x7ff));
611 return c64(new_exp | new_mantissa).f;
614static constexpr std::array<BaseAndDec32, 32> FRES_TABLE = {{
615 {0x00fff000UL, -0x3e1L},
616 {0x00f07000UL, -0x3a7L},
617 {0x00e1d400UL, -0x371L},
618 {0x00d41000UL, -0x340L},
619 {0x00c71000UL, -0x313L},
620 {0x00bac400UL, -0x2eaL},
621 {0x00af2000UL, -0x2c4L},
622 {0x00a41000UL, -0x2a0L},
623 {0x00999000UL, -0x27fL},
624 {0x008f9400UL, -0x261L},
625 {0x00861000UL, -0x245L},
626 {0x007d0000UL, -0x22aL},
627 {0x00745800UL, -0x212L},
628 {0x006c1000UL, -0x1fbL},
629 {0x00642800UL, -0x1e5L},
630 {0x005c9400UL, -0x1d1L},
631 {0x00555000UL, -0x1beL},
632 {0x004e5800UL, -0x1acL},
633 {0x0047ac00UL, -0x19bL},
634 {0x00413c00UL, -0x18bL},
635 {0x003b1000UL, -0x17cL},
636 {0x00352000UL, -0x16eL},
637 {0x002f5c00UL, -0x15bL},
638 {0x0029f000UL, -0x15bL},
639 {0x00248800UL, -0x143L},
640 {0x001f7c00UL, -0x143L},
641 {0x001a7000UL, -0x12dL},
642 {0x0015bc00UL, -0x12dL},
643 {0x00110800UL, -0x11aL},
644 {0x000ca000UL, -0x11aL},
645 {0x00083800UL, -0x108L},
646 {0x00041800UL, -0x106L},
649[[nodiscard]]
static inline constexpr f32 fres(
const f32 val) {
650 static constexpr u32 EXPONENT_SHIFT_F32 = 23;
651 static constexpr u64 EXPONENT_SHIFT_F64 = 52;
652 static constexpr u32 EXPONENT_MASK_F32 = 0x7f800000UL;
653 static constexpr u64 EXPONENT_MASK_F64 = 0x7ff0000000000000ULL;
654 static constexpr u32 MANTISSA_MASK_F32 = 0x007fffffUL;
655 static constexpr u32 SIGN_MASK_F32 = 0x80000000UL;
656 static constexpr u64 SIGN_MASK_F64 = 0x8000000000000000ULL;
657 static constexpr u64 QUIET_BIT_F64 = 0x0008000000000000ULL;
658 static constexpr c64 LARGEST_FLOAT(
static_cast<u64
>(0x47d0000000000000ULL));
662 u32 mantissa =
static_cast<u32
>(
663 bits.u >> (EXPONENT_SHIFT_F64 -
static_cast<u64
>(EXPONENT_SHIFT_F32))) &
665 s32 exponent =
static_cast<s32
>((bits.u & EXPONENT_MASK_F64) >> EXPONENT_SHIFT_F64) - 0x380;
666 u32 sign =
static_cast<u32
>(bits.u >> 32) & SIGN_MASK_F32;
669 bool nonzero = (bits.u & !SIGN_MASK_F64) != 0;
673 c32 cresult(sign | (EXPONENT_MASK_F32 - (1 << EXPONENT_SHIFT_F32)) | MANTISSA_MASK_F32);
676 return std::copysignf(std::numeric_limits<f32>::infinity(), val);
680 if ((bits.u & EXPONENT_MASK_F64) >= LARGEST_FLOAT.u) {
681 if (mantissa == 0 || (bits.u & EXPONENT_MASK_F64) != EXPONENT_MASK_F64) {
683 return std::copysignf(0.0f, val);
684 }
else if ((bits.u & QUIET_BIT_F64) != 0) {
689 return std::numeric_limits<f32>::quiet_NaN();
694 u32 key = mantissa >> 18;
695 s32 new_exp = 253 - exponent;
696 const auto &entry = FRES_TABLE[key];
700 static_cast<u32
>(entry.base + entry.dec * (
static_cast<s32
>((mantissa >> 8) & 0x3ff)));
701 u32 new_mantissa = pre_shift >> 1;
708 u32 temp = sign |
static_cast<u32
>(new_exp) << EXPONENT_SHIFT_F32 | new_mantissa;
716[[nodiscard]]
static inline constexpr f32
frsqrt(f32 x) {
718 f64 est = frsqrte(x);
721 f32 tmp0 =
static_cast<f32
>(est *
force25Bit(est));
722 f32 tmp1 =
static_cast<f32
>(est *
static_cast<f64
>(0.5f));
724 static_cast<f32
>(
static_cast<f64
>(3.0f) -
static_cast<f64
>(tmp0) *
static_cast<f64
>(x));
729[[nodiscard]]
static inline constexpr f32 sqrt(f32 x) {
730 return x > 0.0f ? x *
frsqrt(x) : 0.0f;
734static inline constexpr u32 FindRootsQuadratic(f32 a, f32 b, f32 c, f32 &root1, f32 &root2) {
735 constexpr f32 EPSILON = 0.0002f;
754 f32 halfBOverA = b / (2.0f * a);
755 f32 normalizedC = c / (halfBOverA * (a * halfBOverA));
756 f32 normalizedDiscriminant = 1.0f - normalizedC;
758 if (normalizedDiscriminant > EPSILON) {
759 f32 sqrtNormalizedDiscriminant = normalizedDiscriminant *
frsqrt(normalizedDiscriminant);
760 root2 = (halfBOverA * normalizedC) / (-1.0f - sqrtNormalizedDiscriminant);
761 root1 = halfBOverA * (-1.0f - sqrtNormalizedDiscriminant);
766 if (normalizedDiscriminant >= -EPSILON) {
775[[nodiscard]]
static inline constexpr f32
finv(f32 x) {
777 f32 invDouble = inv + inv;
778 f32 invSquare = inv * inv;
779 return -
fms(x, invSquare, invDouble);
797[[nodiscard]]
static inline constexpr f32
Hermite(f32 p0, f32 m0, f32 p1, f32 m1, f32 t) {
799 f32 t2_less_t = t2 - t;
800 f32 h3 = t2_less_t * t;
802 f32 h2 = h3 - t2_less_t;
805 return h3 * m1 + (h2 * m0 + (p0 - h1 * p0 + h1 * p1));