1use super::*;
2
3pick! {
4 if #[cfg(target_feature="sse2")] {
5 #[derive(Default, Clone, Copy, PartialEq)]
6 #[repr(C, align(16))]
7 pub struct f64x2 { pub(crate) sse: m128d }
8 } else if #[cfg(target_feature="simd128")] {
9 use core::arch::wasm32::*;
10
11 #[derive(Clone, Copy)]
12 #[repr(transparent)]
13 pub struct f64x2 { pub(crate) simd: v128 }
14
15 impl Default for f64x2 {
16 fn default() -> Self {
17 Self::splat(0.0)
18 }
19 }
20
21 impl PartialEq for f64x2 {
22 fn eq(&self, other: &Self) -> bool {
23 u64x2_all_true(f64x2_eq(self.simd, other.simd))
24 }
25 }
26 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
27 use core::arch::aarch64::*;
28 #[repr(C)]
29 #[derive(Copy, Clone)]
30 pub struct f64x2 { pub(crate) neon: float64x2_t }
31
32 impl Default for f64x2 {
33 #[inline]
34 #[must_use]
35 fn default() -> Self {
36 unsafe { Self { neon: vdupq_n_f64(0.0)} }
37 }
38 }
39
40 impl PartialEq for f64x2 {
41 #[inline]
42 #[must_use]
43 fn eq(&self, other: &Self) -> bool {
44 unsafe
45 { let e = vceqq_f64(self.neon, other.neon);
46 vgetq_lane_u64(e,0) == u64::MAX && vgetq_lane_u64(e,1) == u64::MAX
47 }
48 }
49
50 }
51 } else {
52 #[derive(Default, Clone, Copy, PartialEq)]
53 #[repr(C, align(16))]
54 pub struct f64x2 { pub(crate) arr: [f64;2] }
55 }
56}
57
58macro_rules! const_f64_as_f64x2 {
59 ($i:ident, $f:expr) => {
60 #[allow(non_upper_case_globals)]
61 pub const $i: f64x2 =
62 unsafe { ConstUnionHack128bit { f64a2: [$f; 2] }.f64x2 };
63 };
64}
65
66impl f64x2 {
67 const_f64_as_f64x2!(ONE, 1.0);
68 const_f64_as_f64x2!(ZERO, 0.0);
69 const_f64_as_f64x2!(HALF, 0.5);
70 const_f64_as_f64x2!(E, core::f64::consts::E);
71 const_f64_as_f64x2!(FRAC_1_PI, core::f64::consts::FRAC_1_PI);
72 const_f64_as_f64x2!(FRAC_2_PI, core::f64::consts::FRAC_2_PI);
73 const_f64_as_f64x2!(FRAC_2_SQRT_PI, core::f64::consts::FRAC_2_SQRT_PI);
74 const_f64_as_f64x2!(FRAC_1_SQRT_2, core::f64::consts::FRAC_1_SQRT_2);
75 const_f64_as_f64x2!(FRAC_PI_2, core::f64::consts::FRAC_PI_2);
76 const_f64_as_f64x2!(FRAC_PI_3, core::f64::consts::FRAC_PI_3);
77 const_f64_as_f64x2!(FRAC_PI_4, core::f64::consts::FRAC_PI_4);
78 const_f64_as_f64x2!(FRAC_PI_6, core::f64::consts::FRAC_PI_6);
79 const_f64_as_f64x2!(FRAC_PI_8, core::f64::consts::FRAC_PI_8);
80 const_f64_as_f64x2!(LN_2, core::f64::consts::LN_2);
81 const_f64_as_f64x2!(LN_10, core::f64::consts::LN_10);
82 const_f64_as_f64x2!(LOG2_E, core::f64::consts::LOG2_E);
83 const_f64_as_f64x2!(LOG10_E, core::f64::consts::LOG10_E);
84 const_f64_as_f64x2!(LOG10_2, core::f64::consts::LOG10_2);
85 const_f64_as_f64x2!(LOG2_10, core::f64::consts::LOG2_10);
86 const_f64_as_f64x2!(PI, core::f64::consts::PI);
87 const_f64_as_f64x2!(SQRT_2, core::f64::consts::SQRT_2);
88 const_f64_as_f64x2!(TAU, core::f64::consts::TAU);
89}
90
91unsafe impl Zeroable for f64x2 {}
92unsafe impl Pod for f64x2 {}
93
94impl Add for f64x2 {
95 type Output = Self;
96 #[inline]
97 #[must_use]
98 fn add(self, rhs: Self) -> Self::Output {
99 pick! {
100 if #[cfg(target_feature="sse2")] {
101 Self { sse: add_m128d(self.sse, rhs.sse) }
102 } else if #[cfg(target_feature="simd128")] {
103 Self { simd: f64x2_add(self.simd, rhs.simd) }
104 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
105 unsafe { Self { neon: vaddq_f64(self.neon, rhs.neon) } }
106 } else {
107 Self { arr: [
108 self.arr[0] + rhs.arr[0],
109 self.arr[1] + rhs.arr[1],
110 ]}
111 }
112 }
113 }
114}
115
116impl Sub for f64x2 {
117 type Output = Self;
118 #[inline]
119 #[must_use]
120 fn sub(self, rhs: Self) -> Self::Output {
121 pick! {
122 if #[cfg(target_feature="sse2")] {
123 Self { sse: sub_m128d(self.sse, rhs.sse) }
124 } else if #[cfg(target_feature="simd128")] {
125 Self { simd: f64x2_sub(self.simd, rhs.simd) }
126 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
127 unsafe { Self { neon: vsubq_f64(self.neon, rhs.neon) } }
128 } else {
129 Self { arr: [
130 self.arr[0] - rhs.arr[0],
131 self.arr[1] - rhs.arr[1],
132 ]}
133 }
134 }
135 }
136}
137
138impl Mul for f64x2 {
139 type Output = Self;
140 #[inline]
141 #[must_use]
142 fn mul(self, rhs: Self) -> Self::Output {
143 pick! {
144 if #[cfg(target_feature="sse2")] {
145 Self { sse: mul_m128d(self.sse, rhs.sse) }
146 } else if #[cfg(target_feature="simd128")] {
147 Self { simd: f64x2_mul(self.simd, rhs.simd) }
148 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
149 unsafe {Self { neon: vmulq_f64(self.neon, rhs.neon) }}
150 } else {
151 Self { arr: [
152 self.arr[0] * rhs.arr[0],
153 self.arr[1] * rhs.arr[1],
154 ]}
155 }
156 }
157 }
158}
159
160impl Div for f64x2 {
161 type Output = Self;
162 #[inline]
163 #[must_use]
164 fn div(self, rhs: Self) -> Self::Output {
165 pick! {
166 if #[cfg(target_feature="sse2")] {
167 Self { sse: div_m128d(self.sse, rhs.sse) }
168 } else if #[cfg(target_feature="simd128")] {
169 Self { simd: f64x2_div(self.simd, rhs.simd) }
170 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
171 unsafe {Self { neon: vdivq_f64(self.neon, rhs.neon) }}
172 } else {
173 Self { arr: [
174 self.arr[0] / rhs.arr[0],
175 self.arr[1] / rhs.arr[1],
176 ]}
177 }
178 }
179 }
180}
181
182impl Add<f64> for f64x2 {
183 type Output = Self;
184 #[inline]
185 #[must_use]
186 fn add(self, rhs: f64) -> Self::Output {
187 self.add(Self::splat(rhs))
188 }
189}
190
191impl Sub<f64> for f64x2 {
192 type Output = Self;
193 #[inline]
194 #[must_use]
195 fn sub(self, rhs: f64) -> Self::Output {
196 self.sub(Self::splat(rhs))
197 }
198}
199
200impl Mul<f64> for f64x2 {
201 type Output = Self;
202 #[inline]
203 #[must_use]
204 fn mul(self, rhs: f64) -> Self::Output {
205 self.mul(Self::splat(rhs))
206 }
207}
208
209impl Div<f64> for f64x2 {
210 type Output = Self;
211 #[inline]
212 #[must_use]
213 fn div(self, rhs: f64) -> Self::Output {
214 self.div(Self::splat(rhs))
215 }
216}
217
218impl Add<f64x2> for f64 {
219 type Output = f64x2;
220 #[inline]
221 #[must_use]
222 fn add(self, rhs: f64x2) -> Self::Output {
223 f64x2::splat(self).add(rhs)
224 }
225}
226
227impl Sub<f64x2> for f64 {
228 type Output = f64x2;
229 #[inline]
230 #[must_use]
231 fn sub(self, rhs: f64x2) -> Self::Output {
232 f64x2::splat(self).sub(rhs)
233 }
234}
235
236impl Mul<f64x2> for f64 {
237 type Output = f64x2;
238 #[inline]
239 #[must_use]
240 fn mul(self, rhs: f64x2) -> Self::Output {
241 f64x2::splat(self).mul(rhs)
242 }
243}
244
245impl Div<f64x2> for f64 {
246 type Output = f64x2;
247 #[inline]
248 #[must_use]
249 fn div(self, rhs: f64x2) -> Self::Output {
250 f64x2::splat(self).div(rhs)
251 }
252}
253
254impl BitAnd for f64x2 {
255 type Output = Self;
256 #[inline]
257 #[must_use]
258 fn bitand(self, rhs: Self) -> Self::Output {
259 pick! {
260 if #[cfg(target_feature="sse2")] {
261 Self { sse: bitand_m128d(self.sse, rhs.sse) }
262 } else if #[cfg(target_feature="simd128")] {
263 Self { simd: v128_and(self.simd, rhs.simd) }
264 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
265 unsafe {Self { neon: vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(self.neon), vreinterpretq_u64_f64(rhs.neon))) }}
266 } else {
267 Self { arr: [
268 f64::from_bits(self.arr[0].to_bits() & rhs.arr[0].to_bits()),
269 f64::from_bits(self.arr[1].to_bits() & rhs.arr[1].to_bits()),
270 ]}
271 }
272 }
273 }
274}
275
276impl BitOr for f64x2 {
277 type Output = Self;
278 #[inline]
279 #[must_use]
280 fn bitor(self, rhs: Self) -> Self::Output {
281 pick! {
282 if #[cfg(target_feature="sse2")] {
283 Self { sse: bitor_m128d(self.sse, rhs.sse) }
284 } else if #[cfg(target_feature="simd128")] {
285 Self { simd: v128_or(self.simd, rhs.simd) }
286 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
287 unsafe {Self { neon: vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(self.neon), vreinterpretq_u64_f64(rhs.neon))) }}
288 } else {
289 Self { arr: [
290 f64::from_bits(self.arr[0].to_bits() | rhs.arr[0].to_bits()),
291 f64::from_bits(self.arr[1].to_bits() | rhs.arr[1].to_bits()),
292 ]}
293 }
294 }
295 }
296}
297
298impl BitXor for f64x2 {
299 type Output = Self;
300 #[inline]
301 #[must_use]
302 fn bitxor(self, rhs: Self) -> Self::Output {
303 pick! {
304 if #[cfg(target_feature="sse2")] {
305 Self { sse: bitxor_m128d(self.sse, rhs.sse) }
306 } else if #[cfg(target_feature="simd128")] {
307 Self { simd: v128_xor(self.simd, rhs.simd) }
308 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
309 unsafe {Self { neon: vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(self.neon), vreinterpretq_u64_f64(rhs.neon))) }}
310 } else {
311 Self { arr: [
312 f64::from_bits(self.arr[0].to_bits() ^ rhs.arr[0].to_bits()),
313 f64::from_bits(self.arr[1].to_bits() ^ rhs.arr[1].to_bits()),
314 ]}
315 }
316 }
317 }
318}
319
320impl CmpEq for f64x2 {
321 type Output = Self;
322 #[inline]
323 #[must_use]
324 fn cmp_eq(self, rhs: Self) -> Self::Output {
325 pick! {
326 if #[cfg(target_feature="sse2")] {
327 Self { sse: cmp_eq_mask_m128d(self.sse, rhs.sse) }
328 } else if #[cfg(target_feature="simd128")] {
329 Self { simd: f64x2_eq(self.simd, rhs.simd) }
330 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
331 unsafe {Self { neon: vreinterpretq_f64_u64(vceqq_f64(self.neon, rhs.neon)) }}
332 } else {
333 Self { arr: [
334 if self.arr[0] == rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
335 if self.arr[1] == rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
336 ]}
337 }
338 }
339 }
340}
341
342impl CmpGe for f64x2 {
343 type Output = Self;
344 #[inline]
345 #[must_use]
346 fn cmp_ge(self, rhs: Self) -> Self::Output {
347 pick! {
348 if #[cfg(target_feature="sse2")] {
349 Self { sse: cmp_ge_mask_m128d(self.sse, rhs.sse) }
350 } else if #[cfg(target_feature="simd128")] {
351 Self { simd: f64x2_ge(self.simd, rhs.simd) }
352 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
353 unsafe {Self { neon: vreinterpretq_f64_u64(vcgeq_f64(self.neon, rhs.neon)) }}
354 } else {
355 Self { arr: [
356 if self.arr[0] >= rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
357 if self.arr[1] >= rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
358 ]}
359 }
360 }
361 }
362}
363
364impl CmpGt for f64x2 {
365 type Output = Self;
366 #[inline]
367 #[must_use]
368 fn cmp_gt(self, rhs: Self) -> Self::Output {
369 pick! {
370 if #[cfg(target_feature="avx")] {
371 Self { sse: cmp_op_mask_m128d::<{cmp_op!(GreaterThanOrdered)}>(self.sse, rhs.sse) }
372 } else if #[cfg(target_feature="sse2")] {
373 Self { sse: cmp_gt_mask_m128d(self.sse, rhs.sse) }
374 } else if #[cfg(target_feature="simd128")] {
375 Self { simd: f64x2_gt(self.simd, rhs.simd) }
376 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
377 unsafe {Self { neon: vreinterpretq_f64_u64(vcgtq_f64(self.neon, rhs.neon)) }}
378 } else {
379 Self { arr: [
380 if self.arr[0] > rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
381 if self.arr[1] > rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
382 ]}
383 }
384 }
385 }
386}
387
388impl CmpNe for f64x2 {
389 type Output = Self;
390 #[inline]
391 #[must_use]
392 fn cmp_ne(self, rhs: Self) -> Self::Output {
393 pick! {
394 if #[cfg(target_feature="sse2")] {
395 Self { sse: cmp_neq_mask_m128d(self.sse, rhs.sse) }
396 } else if #[cfg(target_feature="simd128")] {
397 Self { simd: f64x2_ne(self.simd, rhs.simd) }
398 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
399 unsafe {Self { neon: vreinterpretq_f64_u64(vceqq_f64(self.neon, rhs.neon)) }.not() }
400 } else {
401 Self { arr: [
402 if self.arr[0] != rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
403 if self.arr[1] != rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
404 ]}
405 }
406 }
407 }
408}
409
410impl CmpLe for f64x2 {
411 type Output = Self;
412 #[inline]
413 #[must_use]
414 fn cmp_le(self, rhs: Self) -> Self::Output {
415 pick! {
416 if #[cfg(target_feature="sse2")] {
417 Self { sse: cmp_le_mask_m128d(self.sse, rhs.sse) }
418 } else if #[cfg(target_feature="simd128")] {
419 Self { simd: f64x2_le(self.simd, rhs.simd) }
420 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
421 unsafe {Self { neon: vreinterpretq_f64_u64(vcleq_f64(self.neon, rhs.neon)) }}
422 } else {
423 Self { arr: [
424 if self.arr[0] <= rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
425 if self.arr[1] <= rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
426 ]}
427 }
428 }
429 }
430}
431
432impl CmpLt for f64x2 {
433 type Output = Self;
434 #[inline]
435 #[must_use]
436 fn cmp_lt(self, rhs: Self) -> Self::Output {
437 pick! {
438 if #[cfg(target_feature="sse2")] {
439 Self { sse: cmp_lt_mask_m128d(self.sse, rhs.sse) }
440 } else if #[cfg(target_feature="simd128")] {
441 Self { simd: f64x2_lt(self.simd, rhs.simd) }
442 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
443 unsafe {Self { neon: vreinterpretq_f64_u64(vcltq_f64(self.neon, rhs.neon)) }}
444 } else {
445 Self { arr: [
446 if self.arr[0] < rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
447 if self.arr[1] < rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
448 ]}
449 }
450 }
451 }
452}
453
454impl f64x2 {
455 #[inline]
456 #[must_use]
457 pub fn new(array: [f64; 2]) -> Self {
458 Self::from(array)
459 }
460 #[inline]
461 #[must_use]
462 pub fn blend(self, t: Self, f: Self) -> Self {
463 pick! {
464 if #[cfg(target_feature="sse4.1")] {
465 Self { sse: blend_varying_m128d(f.sse, t.sse, self.sse) }
466 } else if #[cfg(target_feature="simd128")] {
467 Self { simd: v128_bitselect(t.simd, f.simd, self.simd) }
468 } else {
469 generic_bit_blend(self, t, f)
470 }
471 }
472 }
473 #[inline]
474 #[must_use]
475 pub fn abs(self) -> Self {
476 pick! {
477 if #[cfg(target_feature="simd128")] {
478 Self { simd: f64x2_abs(self.simd) }
479 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
480 unsafe {Self { neon: vabsq_f64(self.neon) }}
481 } else {
482 let non_sign_bits = f64x2::from(f64::from_bits(i64::MAX as u64));
483 self & non_sign_bits
484 }
485 }
486 }
487
488 #[inline]
492 #[must_use]
493 pub fn fast_max(self, rhs: Self) -> Self {
494 pick! {
495 if #[cfg(target_feature="sse2")] {
496 Self { sse: max_m128d(self.sse, rhs.sse) }
497 } else if #[cfg(target_feature="simd128")] {
498 Self {
499 simd: f64x2_pmax(self.simd, rhs.simd),
500 }
501 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
502 unsafe {Self { neon: vmaxq_f64(self.neon, rhs.neon) }}
503 } else {
504 Self { arr: [
505 if self.arr[0] < rhs.arr[0] { rhs.arr[0] } else { self.arr[0] },
506 if self.arr[1] < rhs.arr[1] { rhs.arr[1] } else { self.arr[1] },
507 ]}
508 }
509 }
510 }
511
512 #[inline]
516 #[must_use]
517 pub fn max(self, rhs: Self) -> Self {
518 pick! {
519 if #[cfg(target_feature="sse2")] {
520 rhs.is_nan().blend(self, Self { sse: max_m128d(self.sse, rhs.sse) })
524 } else if #[cfg(target_feature="simd128")] {
525 Self {
532 simd: v128_bitselect(
533 rhs.simd,
534 f64x2_pmax(self.simd, rhs.simd),
535 f64x2_ne(self.simd, self.simd), )
537 }
538 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
539 unsafe {Self { neon: vmaxnmq_f64(self.neon, rhs.neon) }}
540 } else {
541 Self { arr: [
542 self.arr[0].max(rhs.arr[0]),
543 self.arr[1].max(rhs.arr[1]),
544 ]}
545 }
546 }
547 }
548
549 #[inline]
553 #[must_use]
554 pub fn fast_min(self, rhs: Self) -> Self {
555 pick! {
556 if #[cfg(target_feature="sse2")] {
557 Self { sse: min_m128d(self.sse, rhs.sse) }
558 } else if #[cfg(target_feature="simd128")] {
559 Self {
560 simd: f64x2_pmin(self.simd, rhs.simd),
561 }
562 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
563 unsafe {Self { neon: vminq_f64(self.neon, rhs.neon) }}
564 } else {
565 Self { arr: [
566 if self.arr[0] < rhs.arr[0] { self.arr[0] } else { rhs.arr[0] },
567 if self.arr[1] < rhs.arr[1] { self.arr[1] } else { rhs.arr[1] },
568 ]}
569 }
570 }
571 }
572
573 #[inline]
577 #[must_use]
578 pub fn min(self, rhs: Self) -> Self {
579 pick! {
580 if #[cfg(target_feature="sse2")] {
581 rhs.is_nan().blend(self, Self { sse: min_m128d(self.sse, rhs.sse) })
585 } else if #[cfg(target_feature="simd128")] {
586 Self {
593 simd: v128_bitselect(
594 rhs.simd,
595 f64x2_pmin(self.simd, rhs.simd),
596 f64x2_ne(self.simd, self.simd), )
598 }
599 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
600 unsafe {Self { neon: vminnmq_f64(self.neon, rhs.neon) }}
601 } else {
602 Self { arr: [
603 self.arr[0].min(rhs.arr[0]),
604 self.arr[1].min(rhs.arr[1]),
605 ]}
606 }
607 }
608 }
609
610 #[inline]
611 #[must_use]
612 pub fn is_nan(self) -> Self {
613 pick! {
614 if #[cfg(target_feature="sse2")] {
615 Self { sse: cmp_unord_mask_m128d(self.sse, self.sse) }
616 } else if #[cfg(target_feature="simd128")] {
617 Self { simd: f64x2_ne(self.simd, self.simd) }
618 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
619 unsafe {Self { neon: vreinterpretq_f64_u64(vceqq_f64(self.neon, self.neon)) }.not() }
620 } else {
621 Self { arr: [
622 if self.arr[0].is_nan() { f64::from_bits(u64::MAX) } else { 0.0 },
623 if self.arr[1].is_nan() { f64::from_bits(u64::MAX) } else { 0.0 },
624 ]}
625 }
626 }
627 }
628 #[inline]
629 #[must_use]
630 pub fn is_finite(self) -> Self {
631 let shifted_exp_mask = u64x2::from(0xFFE0000000000000);
632 let u: u64x2 = cast(self);
633 let shift_u = u << 1_u64;
634 let out = !(shift_u & shifted_exp_mask).cmp_eq(shifted_exp_mask);
635 cast(out)
636 }
637 #[inline]
638 #[must_use]
639 pub fn is_inf(self) -> Self {
640 let shifted_inf = u64x2::from(0xFFE0000000000000);
641 let u: u64x2 = cast(self);
642 let shift_u = u << 1_u64;
643 let out = (shift_u).cmp_eq(shifted_inf);
644 cast(out)
645 }
646
647 #[inline]
648 #[must_use]
649 pub fn round(self) -> Self {
650 pick! {
651 if #[cfg(target_feature="sse4.1")] {
652 Self { sse: round_m128d::<{round_op!(Nearest)}>(self.sse) }
653 } else if #[cfg(target_feature="simd128")] {
654 Self { simd: f64x2_nearest(self.simd) }
655 } else {
656 let sign_mask = f64x2::from(-0.0);
657 let magic = f64x2::from(f64::from_bits(0x43300000_00000000));
658 let sign = self & sign_mask;
659 let signed_magic = magic | sign;
660 self + signed_magic - signed_magic
661 }
662 }
663 }
664 #[inline]
665 #[must_use]
666 pub fn round_int(self) -> i64x2 {
667 let rounded: [f64; 2] = cast(self.round());
668 cast([rounded[0] as i64, rounded[1] as i64])
669 }
670 #[inline]
671 #[must_use]
672 pub fn mul_add(self, m: Self, a: Self) -> Self {
673 pick! {
674 if #[cfg(all(target_feature="fma"))] {
675 Self { sse: fused_mul_add_m128d(self.sse, m.sse, a.sse) }
676 } else {
677 (self * m) + a
678 }
679 }
680 }
681
682 #[inline]
683 #[must_use]
684 pub fn mul_sub(self, m: Self, a: Self) -> Self {
685 pick! {
686 if #[cfg(all(target_feature="fma"))] {
687 Self { sse: fused_mul_sub_m128d(self.sse, m.sse, a.sse) }
688 } else {
689 (self * m) - a
690 }
691 }
692 }
693
694 #[inline]
695 #[must_use]
696 pub fn mul_neg_add(self, m: Self, a: Self) -> Self {
697 pick! {
698 if #[cfg(all(target_feature="fma"))] {
699 Self { sse: fused_mul_neg_add_m128d(self.sse, m.sse, a.sse) }
700 } else {
701 a - (self * m)
702 }
703 }
704 }
705
706 #[inline]
707 #[must_use]
708 pub fn mul_neg_sub(self, m: Self, a: Self) -> Self {
709 pick! {
710 if #[cfg(all(target_feature="fma"))] {
711 Self { sse: fused_mul_neg_sub_m128d(self.sse, m.sse, a.sse) }
712 } else {
713 -(self * m) - a
714 }
715 }
716 }
717
718 #[inline]
719 #[must_use]
720 pub fn flip_signs(self, signs: Self) -> Self {
721 self ^ (signs & Self::from(-0.0))
722 }
723
724 #[inline]
725 #[must_use]
726 pub fn copysign(self, sign: Self) -> Self {
727 let magnitude_mask = Self::from(f64::from_bits(u64::MAX >> 1));
728 (self & magnitude_mask) | (sign & Self::from(-0.0))
729 }
730
731 #[inline]
732 pub fn asin_acos(self) -> (Self, Self) {
733 const_f64_as_f64x2!(R4asin, 2.967721961301243206100E-3);
736 const_f64_as_f64x2!(R3asin, -5.634242780008963776856E-1);
737 const_f64_as_f64x2!(R2asin, 6.968710824104713396794E0);
738 const_f64_as_f64x2!(R1asin, -2.556901049652824852289E1);
739 const_f64_as_f64x2!(R0asin, 2.853665548261061424989E1);
740
741 const_f64_as_f64x2!(S3asin, -2.194779531642920639778E1);
742 const_f64_as_f64x2!(S2asin, 1.470656354026814941758E2);
743 const_f64_as_f64x2!(S1asin, -3.838770957603691357202E2);
744 const_f64_as_f64x2!(S0asin, 3.424398657913078477438E2);
745
746 const_f64_as_f64x2!(P5asin, 4.253011369004428248960E-3);
747 const_f64_as_f64x2!(P4asin, -6.019598008014123785661E-1);
748 const_f64_as_f64x2!(P3asin, 5.444622390564711410273E0);
749 const_f64_as_f64x2!(P2asin, -1.626247967210700244449E1);
750 const_f64_as_f64x2!(P1asin, 1.956261983317594739197E1);
751 const_f64_as_f64x2!(P0asin, -8.198089802484824371615E0);
752
753 const_f64_as_f64x2!(Q4asin, -1.474091372988853791896E1);
754 const_f64_as_f64x2!(Q3asin, 7.049610280856842141659E1);
755 const_f64_as_f64x2!(Q2asin, -1.471791292232726029859E2);
756 const_f64_as_f64x2!(Q1asin, 1.395105614657485689735E2);
757 const_f64_as_f64x2!(Q0asin, -4.918853881490881290097E1);
758
759 let xa = self.abs();
760
761 let big = xa.cmp_ge(f64x2::splat(0.625));
762
763 let x1 = big.blend(f64x2::splat(1.0) - xa, xa * xa);
764
765 let x2 = x1 * x1;
766 let x3 = x2 * x1;
767 let x4 = x2 * x2;
768 let x5 = x4 * x1;
769
770 let do_big = big.any();
771 let do_small = !big.all();
772
773 let mut rx = f64x2::default();
774 let mut sx = f64x2::default();
775 let mut px = f64x2::default();
776 let mut qx = f64x2::default();
777
778 if do_big {
779 rx = x3.mul_add(R3asin, x2 * R2asin)
780 + x4.mul_add(R4asin, x1.mul_add(R1asin, R0asin));
781 sx =
782 x3.mul_add(S3asin, x4) + x2.mul_add(S2asin, x1.mul_add(S1asin, S0asin));
783 }
784 if do_small {
785 px = x3.mul_add(P3asin, P0asin)
786 + x4.mul_add(P4asin, x1 * P1asin)
787 + x5.mul_add(P5asin, x2 * P2asin);
788 qx = x4.mul_add(Q4asin, x5)
789 + x3.mul_add(Q3asin, x1 * Q1asin)
790 + x2.mul_add(Q2asin, Q0asin);
791 };
792
793 let vx = big.blend(rx, px);
794 let wx = big.blend(sx, qx);
795
796 let y1 = vx / wx * x1;
797
798 let mut z1 = f64x2::default();
799 let mut z2 = f64x2::default();
800 if do_big {
801 let xb = (x1 + x1).sqrt();
802 z1 = xb.mul_add(y1, xb);
803 }
804
805 if do_small {
806 z2 = xa.mul_add(y1, xa);
807 }
808
809 let z3 = f64x2::FRAC_PI_2 - z1;
811 let asin = big.blend(z3, z2);
812 let asin = asin.flip_signs(self);
813
814 let z3 = self.cmp_lt(f64x2::ZERO).blend(f64x2::PI - z1, z1);
816 let z4 = f64x2::FRAC_PI_2 - z2.flip_signs(self);
817 let acos = big.blend(z3, z4);
818
819 (asin, acos)
820 }
821
822 #[inline]
823 pub fn acos(self) -> Self {
824 const_f64_as_f64x2!(R4asin, 2.967721961301243206100E-3);
827 const_f64_as_f64x2!(R3asin, -5.634242780008963776856E-1);
828 const_f64_as_f64x2!(R2asin, 6.968710824104713396794E0);
829 const_f64_as_f64x2!(R1asin, -2.556901049652824852289E1);
830 const_f64_as_f64x2!(R0asin, 2.853665548261061424989E1);
831
832 const_f64_as_f64x2!(S3asin, -2.194779531642920639778E1);
833 const_f64_as_f64x2!(S2asin, 1.470656354026814941758E2);
834 const_f64_as_f64x2!(S1asin, -3.838770957603691357202E2);
835 const_f64_as_f64x2!(S0asin, 3.424398657913078477438E2);
836
837 const_f64_as_f64x2!(P5asin, 4.253011369004428248960E-3);
838 const_f64_as_f64x2!(P4asin, -6.019598008014123785661E-1);
839 const_f64_as_f64x2!(P3asin, 5.444622390564711410273E0);
840 const_f64_as_f64x2!(P2asin, -1.626247967210700244449E1);
841 const_f64_as_f64x2!(P1asin, 1.956261983317594739197E1);
842 const_f64_as_f64x2!(P0asin, -8.198089802484824371615E0);
843
844 const_f64_as_f64x2!(Q4asin, -1.474091372988853791896E1);
845 const_f64_as_f64x2!(Q3asin, 7.049610280856842141659E1);
846 const_f64_as_f64x2!(Q2asin, -1.471791292232726029859E2);
847 const_f64_as_f64x2!(Q1asin, 1.395105614657485689735E2);
848 const_f64_as_f64x2!(Q0asin, -4.918853881490881290097E1);
849
850 let xa = self.abs();
851
852 let big = xa.cmp_ge(f64x2::splat(0.625));
853
854 let x1 = big.blend(f64x2::splat(1.0) - xa, xa * xa);
855
856 let x2 = x1 * x1;
857 let x3 = x2 * x1;
858 let x4 = x2 * x2;
859 let x5 = x4 * x1;
860
861 let do_big = big.any();
862 let do_small = !big.all();
863
864 let mut rx = f64x2::default();
865 let mut sx = f64x2::default();
866 let mut px = f64x2::default();
867 let mut qx = f64x2::default();
868
869 if do_big {
870 rx = x3.mul_add(R3asin, x2 * R2asin)
871 + x4.mul_add(R4asin, x1.mul_add(R1asin, R0asin));
872 sx =
873 x3.mul_add(S3asin, x4) + x2.mul_add(S2asin, x1.mul_add(S1asin, S0asin));
874 }
875 if do_small {
876 px = x3.mul_add(P3asin, P0asin)
877 + x4.mul_add(P4asin, x1 * P1asin)
878 + x5.mul_add(P5asin, x2 * P2asin);
879 qx = x4.mul_add(Q4asin, x5)
880 + x3.mul_add(Q3asin, x1 * Q1asin)
881 + x2.mul_add(Q2asin, Q0asin);
882 };
883
884 let vx = big.blend(rx, px);
885 let wx = big.blend(sx, qx);
886
887 let y1 = vx / wx * x1;
888
889 let mut z1 = f64x2::default();
890 let mut z2 = f64x2::default();
891 if do_big {
892 let xb = (x1 + x1).sqrt();
893 z1 = xb.mul_add(y1, xb);
894 }
895
896 if do_small {
897 z2 = xa.mul_add(y1, xa);
898 }
899
900 let z3 = self.cmp_lt(f64x2::ZERO).blend(f64x2::PI - z1, z1);
902 let z4 = f64x2::FRAC_PI_2 - z2.flip_signs(self);
903 let acos = big.blend(z3, z4);
904
905 acos
906 }
907
908 #[inline]
909 pub fn asin(self) -> Self {
910 const_f64_as_f64x2!(R4asin, 2.967721961301243206100E-3);
913 const_f64_as_f64x2!(R3asin, -5.634242780008963776856E-1);
914 const_f64_as_f64x2!(R2asin, 6.968710824104713396794E0);
915 const_f64_as_f64x2!(R1asin, -2.556901049652824852289E1);
916 const_f64_as_f64x2!(R0asin, 2.853665548261061424989E1);
917
918 const_f64_as_f64x2!(S3asin, -2.194779531642920639778E1);
919 const_f64_as_f64x2!(S2asin, 1.470656354026814941758E2);
920 const_f64_as_f64x2!(S1asin, -3.838770957603691357202E2);
921 const_f64_as_f64x2!(S0asin, 3.424398657913078477438E2);
922
923 const_f64_as_f64x2!(P5asin, 4.253011369004428248960E-3);
924 const_f64_as_f64x2!(P4asin, -6.019598008014123785661E-1);
925 const_f64_as_f64x2!(P3asin, 5.444622390564711410273E0);
926 const_f64_as_f64x2!(P2asin, -1.626247967210700244449E1);
927 const_f64_as_f64x2!(P1asin, 1.956261983317594739197E1);
928 const_f64_as_f64x2!(P0asin, -8.198089802484824371615E0);
929
930 const_f64_as_f64x2!(Q4asin, -1.474091372988853791896E1);
931 const_f64_as_f64x2!(Q3asin, 7.049610280856842141659E1);
932 const_f64_as_f64x2!(Q2asin, -1.471791292232726029859E2);
933 const_f64_as_f64x2!(Q1asin, 1.395105614657485689735E2);
934 const_f64_as_f64x2!(Q0asin, -4.918853881490881290097E1);
935
936 let xa = self.abs();
937
938 let big = xa.cmp_ge(f64x2::splat(0.625));
939
940 let x1 = big.blend(f64x2::splat(1.0) - xa, xa * xa);
941
942 let x2 = x1 * x1;
943 let x3 = x2 * x1;
944 let x4 = x2 * x2;
945 let x5 = x4 * x1;
946
947 let do_big = big.any();
948 let do_small = !big.all();
949
950 let mut rx = f64x2::default();
951 let mut sx = f64x2::default();
952 let mut px = f64x2::default();
953 let mut qx = f64x2::default();
954
955 if do_big {
956 rx = x3.mul_add(R3asin, x2 * R2asin)
957 + x4.mul_add(R4asin, x1.mul_add(R1asin, R0asin));
958 sx =
959 x3.mul_add(S3asin, x4) + x2.mul_add(S2asin, x1.mul_add(S1asin, S0asin));
960 }
961 if do_small {
962 px = x3.mul_add(P3asin, P0asin)
963 + x4.mul_add(P4asin, x1 * P1asin)
964 + x5.mul_add(P5asin, x2 * P2asin);
965 qx = x4.mul_add(Q4asin, x5)
966 + x3.mul_add(Q3asin, x1 * Q1asin)
967 + x2.mul_add(Q2asin, Q0asin);
968 };
969
970 let vx = big.blend(rx, px);
971 let wx = big.blend(sx, qx);
972
973 let y1 = vx / wx * x1;
974
975 let mut z1 = f64x2::default();
976 let mut z2 = f64x2::default();
977 if do_big {
978 let xb = (x1 + x1).sqrt();
979 z1 = xb.mul_add(y1, xb);
980 }
981
982 if do_small {
983 z2 = xa.mul_add(y1, xa);
984 }
985
986 let z3 = f64x2::FRAC_PI_2 - z1;
988 let asin = big.blend(z3, z2);
989 let asin = asin.flip_signs(self);
990
991 asin
992 }
993
994 #[inline]
995 pub fn atan(self) -> Self {
996 const_f64_as_f64x2!(MORE_BITS, 6.123233995736765886130E-17);
999 const_f64_as_f64x2!(MORE_BITS_O2, 6.123233995736765886130E-17 * 0.5);
1000 const_f64_as_f64x2!(T3PO8, core::f64::consts::SQRT_2 + 1.0);
1001
1002 const_f64_as_f64x2!(P4atan, -8.750608600031904122785E-1);
1003 const_f64_as_f64x2!(P3atan, -1.615753718733365076637E1);
1004 const_f64_as_f64x2!(P2atan, -7.500855792314704667340E1);
1005 const_f64_as_f64x2!(P1atan, -1.228866684490136173410E2);
1006 const_f64_as_f64x2!(P0atan, -6.485021904942025371773E1);
1007
1008 const_f64_as_f64x2!(Q4atan, 2.485846490142306297962E1);
1009 const_f64_as_f64x2!(Q3atan, 1.650270098316988542046E2);
1010 const_f64_as_f64x2!(Q2atan, 4.328810604912902668951E2);
1011 const_f64_as_f64x2!(Q1atan, 4.853903996359136964868E2);
1012 const_f64_as_f64x2!(Q0atan, 1.945506571482613964425E2);
1013
1014 let t = self.abs();
1015
1016 let notbig = t.cmp_le(T3PO8);
1020 let notsmal = t.cmp_ge(Self::splat(0.66));
1021
1022 let mut s = notbig.blend(Self::FRAC_PI_4, Self::FRAC_PI_2);
1023 s = notsmal & s;
1024 let mut fac = notbig.blend(MORE_BITS_O2, MORE_BITS);
1025 fac = notsmal & fac;
1026
1027 let mut a = notbig & t;
1031 a = notsmal.blend(a - Self::ONE, a);
1032 let mut b = notbig & Self::ONE;
1033 b = notsmal.blend(b + t, b);
1034 let z = a / b;
1035
1036 let zz = z * z;
1037
1038 let px = polynomial_4!(zz, P0atan, P1atan, P2atan, P3atan, P4atan);
1039 let qx = polynomial_5n!(zz, Q0atan, Q1atan, Q2atan, Q3atan, Q4atan);
1040
1041 let mut re = (px / qx).mul_add(z * zz, z);
1042 re += s + fac;
1043
1044 re = (self.sign_bit()).blend(-re, re);
1046
1047 re
1048 }
1049
1050 #[inline]
1051 pub fn atan2(self, x: Self) -> Self {
1052 const_f64_as_f64x2!(MORE_BITS, 6.123233995736765886130E-17);
1055 const_f64_as_f64x2!(MORE_BITS_O2, 6.123233995736765886130E-17 * 0.5);
1056 const_f64_as_f64x2!(T3PO8, core::f64::consts::SQRT_2 + 1.0);
1057
1058 const_f64_as_f64x2!(P4atan, -8.750608600031904122785E-1);
1059 const_f64_as_f64x2!(P3atan, -1.615753718733365076637E1);
1060 const_f64_as_f64x2!(P2atan, -7.500855792314704667340E1);
1061 const_f64_as_f64x2!(P1atan, -1.228866684490136173410E2);
1062 const_f64_as_f64x2!(P0atan, -6.485021904942025371773E1);
1063
1064 const_f64_as_f64x2!(Q4atan, 2.485846490142306297962E1);
1065 const_f64_as_f64x2!(Q3atan, 1.650270098316988542046E2);
1066 const_f64_as_f64x2!(Q2atan, 4.328810604912902668951E2);
1067 const_f64_as_f64x2!(Q1atan, 4.853903996359136964868E2);
1068 const_f64_as_f64x2!(Q0atan, 1.945506571482613964425E2);
1069
1070 let y = self;
1071
1072 let x1 = x.abs();
1074 let y1 = y.abs();
1075 let swapxy = y1.cmp_gt(x1);
1076 let mut x2 = swapxy.blend(y1, x1);
1078 let mut y2 = swapxy.blend(x1, y1);
1079
1080 let both_infinite = x.is_inf() & y.is_inf();
1082 if both_infinite.any() {
1083 let minus_one = -Self::ONE;
1084 x2 = both_infinite.blend(x2 & minus_one, x2);
1085 y2 = both_infinite.blend(y2 & minus_one, y2);
1086 }
1087
1088 let t = y2 / x2;
1090
1091 let notbig = t.cmp_le(T3PO8);
1095 let notsmal = t.cmp_ge(Self::splat(0.66));
1096
1097 let mut s = notbig.blend(Self::FRAC_PI_4, Self::FRAC_PI_2);
1098 s = notsmal & s;
1099 let mut fac = notbig.blend(MORE_BITS_O2, MORE_BITS);
1100 fac = notsmal & fac;
1101
1102 let mut a = notbig & t;
1106 a = notsmal.blend(a - Self::ONE, a);
1107 let mut b = notbig & Self::ONE;
1108 b = notsmal.blend(b + t, b);
1109 let z = a / b;
1110
1111 let zz = z * z;
1112
1113 let px = polynomial_4!(zz, P0atan, P1atan, P2atan, P3atan, P4atan);
1114 let qx = polynomial_5n!(zz, Q0atan, Q1atan, Q2atan, Q3atan, Q4atan);
1115
1116 let mut re = (px / qx).mul_add(z * zz, z);
1117 re += s + fac;
1118
1119 re = swapxy.blend(Self::FRAC_PI_2 - re, re);
1121 re = ((x | y).cmp_eq(Self::ZERO)).blend(Self::ZERO, re);
1122 re = (x.sign_bit()).blend(Self::PI - re, re);
1123
1124 re = (y.sign_bit()).blend(-re, re);
1126
1127 re
1128 }
1129
1130 #[inline]
1131 #[must_use]
1132 pub fn sin_cos(self) -> (Self, Self) {
1133 const_f64_as_f64x2!(P0sin, -1.66666666666666307295E-1);
1137 const_f64_as_f64x2!(P1sin, 8.33333333332211858878E-3);
1138 const_f64_as_f64x2!(P2sin, -1.98412698295895385996E-4);
1139 const_f64_as_f64x2!(P3sin, 2.75573136213857245213E-6);
1140 const_f64_as_f64x2!(P4sin, -2.50507477628578072866E-8);
1141 const_f64_as_f64x2!(P5sin, 1.58962301576546568060E-10);
1142
1143 const_f64_as_f64x2!(P0cos, 4.16666666666665929218E-2);
1144 const_f64_as_f64x2!(P1cos, -1.38888888888730564116E-3);
1145 const_f64_as_f64x2!(P2cos, 2.48015872888517045348E-5);
1146 const_f64_as_f64x2!(P3cos, -2.75573141792967388112E-7);
1147 const_f64_as_f64x2!(P4cos, 2.08757008419747316778E-9);
1148 const_f64_as_f64x2!(P5cos, -1.13585365213876817300E-11);
1149
1150 const_f64_as_f64x2!(DP1, 7.853981554508209228515625E-1 * 2.);
1151 const_f64_as_f64x2!(DP2, 7.94662735614792836714E-9 * 2.);
1152 const_f64_as_f64x2!(DP3, 3.06161699786838294307E-17 * 2.);
1153
1154 const_f64_as_f64x2!(TWO_OVER_PI, 2.0 / core::f64::consts::PI);
1155
1156 let xa = self.abs();
1157
1158 let y = (xa * TWO_OVER_PI).round();
1159 let q = y.round_int();
1160
1161 let x = y.mul_neg_add(DP3, y.mul_neg_add(DP2, y.mul_neg_add(DP1, xa)));
1162
1163 let x2 = x * x;
1164 let mut s = polynomial_5!(x2, P0sin, P1sin, P2sin, P3sin, P4sin, P5sin);
1165 let mut c = polynomial_5!(x2, P0cos, P1cos, P2cos, P3cos, P4cos, P5cos);
1166 s = (x * x2).mul_add(s, x);
1167 c =
1168 (x2 * x2).mul_add(c, x2.mul_neg_add(f64x2::from(0.5), f64x2::from(1.0)));
1169
1170 let swap = !((q & i64x2::from(1)).cmp_eq(i64x2::from(0)));
1171
1172 let mut overflow: f64x2 = cast(q.cmp_gt(i64x2::from(0x80000000000000)));
1173 overflow &= xa.is_finite();
1174 s = overflow.blend(f64x2::from(0.0), s);
1175 c = overflow.blend(f64x2::from(1.0), c);
1176
1177 let mut sin1 = cast::<_, f64x2>(swap).blend(c, s);
1179 let sign_sin: i64x2 = (q << 62) ^ cast::<_, i64x2>(self);
1180 sin1 = sin1.flip_signs(cast(sign_sin));
1181
1182 let mut cos1 = cast::<_, f64x2>(swap).blend(s, c);
1184 let sign_cos: i64x2 = ((q + i64x2::from(1)) & i64x2::from(2)) << 62;
1185 cos1 ^= cast::<_, f64x2>(sign_cos);
1186
1187 (sin1, cos1)
1188 }
1189 #[inline]
1190 #[must_use]
1191 pub fn sin(self) -> Self {
1192 let (s, _) = self.sin_cos();
1193 s
1194 }
1195 #[inline]
1196 #[must_use]
1197 pub fn cos(self) -> Self {
1198 let (_, c) = self.sin_cos();
1199 c
1200 }
1201 #[inline]
1202 #[must_use]
1203 pub fn tan(self) -> Self {
1204 let (s, c) = self.sin_cos();
1205 s / c
1206 }
1207 #[inline]
1208 #[must_use]
1209 pub fn to_degrees(self) -> Self {
1210 const_f64_as_f64x2!(RAD_TO_DEG_RATIO, 180.0_f64 / core::f64::consts::PI);
1211 self * RAD_TO_DEG_RATIO
1212 }
1213 #[inline]
1214 #[must_use]
1215 pub fn to_radians(self) -> Self {
1216 const_f64_as_f64x2!(DEG_TO_RAD_RATIO, core::f64::consts::PI / 180.0_f64);
1217 self * DEG_TO_RAD_RATIO
1218 }
1219 #[inline]
1220 #[must_use]
1221 pub fn sqrt(self) -> Self {
1222 pick! {
1223 if #[cfg(target_feature="sse2")] {
1224 Self { sse: sqrt_m128d(self.sse) }
1225 } else if #[cfg(target_feature="simd128")] {
1226 Self { simd: f64x2_sqrt(self.simd) }
1227 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1228 unsafe {Self { neon: vsqrtq_f64(self.neon) }}
1229 } else if #[cfg(feature="std")] {
1230 Self { arr: [
1231 self.arr[0].sqrt(),
1232 self.arr[1].sqrt(),
1233 ]}
1234 } else {
1235 Self { arr: [
1236 software_sqrt(self.arr[0]),
1237 software_sqrt(self.arr[1]),
1238 ]}
1239 }
1240 }
1241 }
1242 #[inline]
1243 #[must_use]
1244 pub fn move_mask(self) -> i32 {
1245 pick! {
1246 if #[cfg(target_feature="sse2")] {
1247 move_mask_m128d(self.sse)
1248 } else if #[cfg(target_feature="simd128")] {
1249 u64x2_bitmask(self.simd) as i32
1250 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1251 unsafe
1252 {
1253 let e = vreinterpretq_u64_f64(self.neon);
1254
1255 (vgetq_lane_u64(e,0) >> 63 | ((vgetq_lane_u64(e,1) >> 62) & 0x2)) as i32
1256 }
1257 } else {
1258 (((self.arr[0].to_bits() as i64) < 0) as i32) << 0 |
1259 (((self.arr[1].to_bits() as i64) < 0) as i32) << 1
1260 }
1261 }
1262 }
1263 #[inline]
1264 #[must_use]
1265 pub fn any(self) -> bool {
1266 pick! {
1267 if #[cfg(target_feature="simd128")] {
1268 v128_any_true(self.simd)
1269 } else {
1270 self.move_mask() != 0
1271 }
1272 }
1273 }
1274 #[inline]
1275 #[must_use]
1276 pub fn all(self) -> bool {
1277 pick! {
1278 if #[cfg(target_feature="simd128")] {
1279 u64x2_all_true(self.simd)
1280 } else {
1281 self.move_mask() == 0b11
1283 }
1284 }
1285 }
1286 #[inline]
1287 #[must_use]
1288 pub fn none(self) -> bool {
1289 !self.any()
1290 }
1291
1292 #[inline]
1293 fn vm_pow2n(self) -> Self {
1294 const_f64_as_f64x2!(pow2_52, 4503599627370496.0);
1295 const_f64_as_f64x2!(bias, 1023.0);
1296 let a = self + (bias + pow2_52);
1297 let c = cast::<_, i64x2>(a) << 52;
1298 cast::<_, f64x2>(c)
1299 }
1300
1301 #[inline]
1303 #[must_use]
1304 pub fn exp(self) -> Self {
1305 const_f64_as_f64x2!(P2, 1.0 / 2.0);
1306 const_f64_as_f64x2!(P3, 1.0 / 6.0);
1307 const_f64_as_f64x2!(P4, 1. / 24.);
1308 const_f64_as_f64x2!(P5, 1. / 120.);
1309 const_f64_as_f64x2!(P6, 1. / 720.);
1310 const_f64_as_f64x2!(P7, 1. / 5040.);
1311 const_f64_as_f64x2!(P8, 1. / 40320.);
1312 const_f64_as_f64x2!(P9, 1. / 362880.);
1313 const_f64_as_f64x2!(P10, 1. / 3628800.);
1314 const_f64_as_f64x2!(P11, 1. / 39916800.);
1315 const_f64_as_f64x2!(P12, 1. / 479001600.);
1316 const_f64_as_f64x2!(P13, 1. / 6227020800.);
1317 const_f64_as_f64x2!(LN2D_HI, 0.693145751953125);
1318 const_f64_as_f64x2!(LN2D_LO, 1.42860682030941723212E-6);
1319 let max_x = f64x2::from(708.39);
1320 let r = (self * Self::LOG2_E).round();
1321 let x = r.mul_neg_add(LN2D_HI, self);
1322 let x = r.mul_neg_add(LN2D_LO, x);
1323 let z =
1324 polynomial_13!(x, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11, P12, P13);
1325 let n2 = Self::vm_pow2n(r);
1326 let z = (z + Self::ONE) * n2;
1327 let in_range = self.abs().cmp_lt(max_x);
1329 let in_range = in_range & self.is_finite();
1330 in_range.blend(z, Self::ZERO)
1331 }
1332
1333 #[inline]
1334 fn exponent(self) -> f64x2 {
1335 const_f64_as_f64x2!(pow2_52, 4503599627370496.0);
1336 const_f64_as_f64x2!(bias, 1023.0);
1337 let a = cast::<_, u64x2>(self);
1338 let b = a >> 52;
1339 let c = b | cast::<_, u64x2>(pow2_52);
1340 let d = cast::<_, f64x2>(c);
1341 let e = d - (pow2_52 + bias);
1342 e
1343 }
1344
1345 #[inline]
1346 fn fraction_2(self) -> Self {
1347 let t1 = cast::<_, u64x2>(self);
1348 let t2 = cast::<_, u64x2>(
1349 (t1 & u64x2::from(0x000FFFFFFFFFFFFF)) | u64x2::from(0x3FE0000000000000),
1350 );
1351 cast::<_, f64x2>(t2)
1352 }
1353
1354 #[inline]
1355 fn is_zero_or_subnormal(self) -> Self {
1356 let t = cast::<_, i64x2>(self);
1357 let t = t & i64x2::splat(0x7FF0000000000000);
1358 i64x2::round_float(t.cmp_eq(i64x2::splat(0)))
1359 }
1360
1361 #[inline]
1362 fn infinity() -> Self {
1363 cast::<_, f64x2>(i64x2::splat(0x7FF0000000000000))
1364 }
1365
1366 #[inline]
1367 fn nan_log() -> Self {
1368 cast::<_, f64x2>(i64x2::splat(0x7FF8000000000000 | 0x101 << 29))
1369 }
1370
1371 #[inline]
1372 fn nan_pow() -> Self {
1373 cast::<_, f64x2>(i64x2::splat(0x7FF8000000000000 | 0x101 << 29))
1374 }
1375
1376 #[inline]
1377 fn sign_bit(self) -> Self {
1378 let t1 = cast::<_, i64x2>(self);
1379 let t2 = t1 >> 63;
1380 !cast::<_, f64x2>(t2).cmp_eq(f64x2::ZERO)
1381 }
1382
1383 #[inline]
1385 #[must_use]
1386 pub fn reduce_add(self) -> f64 {
1387 pick! {
1388 if #[cfg(target_feature="ssse3")] {
1389 let a = add_horizontal_m128d(self.sse, self.sse);
1390 a.to_array()[0]
1391 } else if #[cfg(any(target_feature="sse2", target_feature="simd128"))] {
1392 let a: [f64;2] = cast(self);
1393 a.iter().sum()
1394 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1395 unsafe { vgetq_lane_f64(self.neon,0) + vgetq_lane_f64(self.neon,1) }
1396 } else {
1397 self.arr.iter().sum()
1398 }
1399 }
1400 }
1401
1402 #[inline]
1403 #[must_use]
1404 pub fn ln(self) -> Self {
1405 const_f64_as_f64x2!(P0, 7.70838733755885391666E0);
1406 const_f64_as_f64x2!(P1, 1.79368678507819816313E1);
1407 const_f64_as_f64x2!(P2, 1.44989225341610930846E1);
1408 const_f64_as_f64x2!(P3, 4.70579119878881725854E0);
1409 const_f64_as_f64x2!(P4, 4.97494994976747001425E-1);
1410 const_f64_as_f64x2!(P5, 1.01875663804580931796E-4);
1411
1412 const_f64_as_f64x2!(Q0, 2.31251620126765340583E1);
1413 const_f64_as_f64x2!(Q1, 7.11544750618563894466E1);
1414 const_f64_as_f64x2!(Q2, 8.29875266912776603211E1);
1415 const_f64_as_f64x2!(Q3, 4.52279145837532221105E1);
1416 const_f64_as_f64x2!(Q4, 1.12873587189167450590E1);
1417 const_f64_as_f64x2!(LN2F_HI, 0.693359375);
1418 const_f64_as_f64x2!(LN2F_LO, -2.12194440e-4);
1419 const_f64_as_f64x2!(VM_SQRT2, 1.414213562373095048801);
1420 const_f64_as_f64x2!(VM_SMALLEST_NORMAL, 1.17549435E-38);
1421
1422 let x1 = self;
1423 let x = Self::fraction_2(x1);
1424 let e = Self::exponent(x1);
1425 let mask = x.cmp_gt(VM_SQRT2 * f64x2::HALF);
1426 let x = (!mask).blend(x + x, x);
1427 let fe = mask.blend(e + Self::ONE, e);
1428 let x = x - Self::ONE;
1429 let px = polynomial_5!(x, P0, P1, P2, P3, P4, P5);
1430 let x2 = x * x;
1431 let px = x2 * x * px;
1432 let qx = polynomial_5n!(x, Q0, Q1, Q2, Q3, Q4);
1433 let res = px / qx;
1434 let res = fe.mul_add(LN2F_LO, res);
1435 let res = res + x2.mul_neg_add(f64x2::HALF, x);
1436 let res = fe.mul_add(LN2F_HI, res);
1437 let overflow = !self.is_finite();
1438 let underflow = x1.cmp_lt(VM_SMALLEST_NORMAL);
1439 let mask = overflow | underflow;
1440 if !mask.any() {
1441 res
1442 } else {
1443 let is_zero = self.is_zero_or_subnormal();
1444 let res = underflow.blend(Self::nan_log(), res);
1445 let res = is_zero.blend(Self::infinity(), res);
1446 let res = overflow.blend(self, res);
1447 res
1448 }
1449 }
1450
1451 #[inline]
1452 #[must_use]
1453 pub fn log2(self) -> Self {
1454 Self::ln(self) * Self::LOG2_E
1455 }
1456 #[inline]
1457 #[must_use]
1458 pub fn log10(self) -> Self {
1459 Self::ln(self) * Self::LOG10_E
1460 }
1461
1462 #[inline]
1463 #[must_use]
1464 pub fn pow_f64x2(self, y: Self) -> Self {
1465 const_f64_as_f64x2!(ln2d_hi, 0.693145751953125);
1466 const_f64_as_f64x2!(ln2d_lo, 1.42860682030941723212E-6);
1467 const_f64_as_f64x2!(P0log, 2.0039553499201281259648E1);
1468 const_f64_as_f64x2!(P1log, 5.7112963590585538103336E1);
1469 const_f64_as_f64x2!(P2log, 6.0949667980987787057556E1);
1470 const_f64_as_f64x2!(P3log, 2.9911919328553073277375E1);
1471 const_f64_as_f64x2!(P4log, 6.5787325942061044846969E0);
1472 const_f64_as_f64x2!(P5log, 4.9854102823193375972212E-1);
1473 const_f64_as_f64x2!(P6log, 4.5270000862445199635215E-5);
1474 const_f64_as_f64x2!(Q0log, 6.0118660497603843919306E1);
1475 const_f64_as_f64x2!(Q1log, 2.1642788614495947685003E2);
1476 const_f64_as_f64x2!(Q2log, 3.0909872225312059774938E2);
1477 const_f64_as_f64x2!(Q3log, 2.2176239823732856465394E2);
1478 const_f64_as_f64x2!(Q4log, 8.3047565967967209469434E1);
1479 const_f64_as_f64x2!(Q5log, 1.5062909083469192043167E1);
1480
1481 const_f64_as_f64x2!(p2, 1.0 / 2.0); const_f64_as_f64x2!(p3, 1.0 / 6.0);
1484 const_f64_as_f64x2!(p4, 1.0 / 24.0);
1485 const_f64_as_f64x2!(p5, 1.0 / 120.0);
1486 const_f64_as_f64x2!(p6, 1.0 / 720.0);
1487 const_f64_as_f64x2!(p7, 1.0 / 5040.0);
1488 const_f64_as_f64x2!(p8, 1.0 / 40320.0);
1489 const_f64_as_f64x2!(p9, 1.0 / 362880.0);
1490 const_f64_as_f64x2!(p10, 1.0 / 3628800.0);
1491 const_f64_as_f64x2!(p11, 1.0 / 39916800.0);
1492 const_f64_as_f64x2!(p12, 1.0 / 479001600.0);
1493 const_f64_as_f64x2!(p13, 1.0 / 6227020800.0);
1494
1495 let x1 = self.abs();
1496 let x = x1.fraction_2();
1497 let mask = x.cmp_gt(f64x2::SQRT_2 * f64x2::HALF);
1498 let x = (!mask).blend(x + x, x);
1499 let x = x - f64x2::ONE;
1500 let x2 = x * x;
1501 let px = polynomial_6!(x, P0log, P1log, P2log, P3log, P4log, P5log, P6log);
1502 let px = px * x * x2;
1503 let qx = polynomial_6n!(x, Q0log, Q1log, Q2log, Q3log, Q4log, Q5log);
1504 let lg1 = px / qx;
1505
1506 let ef = x1.exponent();
1507 let ef = mask.blend(ef + f64x2::ONE, ef);
1508 let e1 = (ef * y).round();
1509 let yr = ef.mul_sub(y, e1);
1510
1511 let lg = f64x2::HALF.mul_neg_add(x2, x) + lg1;
1512 let x2err = (f64x2::HALF * x).mul_sub(x, f64x2::HALF * x2);
1513 let lg_err = f64x2::HALF.mul_add(x2, lg - x) - lg1;
1514
1515 let e2 = (lg * y * f64x2::LOG2_E).round();
1516 let v = lg.mul_sub(y, e2 * ln2d_hi);
1517 let v = e2.mul_neg_add(ln2d_lo, v);
1518 let v = v - (lg_err + x2err).mul_sub(y, yr * f64x2::LN_2);
1519
1520 let x = v;
1521 let e3 = (x * f64x2::LOG2_E).round();
1522 let x = e3.mul_neg_add(f64x2::LN_2, x);
1523 let z =
1524 polynomial_13m!(x, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13)
1525 + f64x2::ONE;
1526 let ee = e1 + e2 + e3;
1527 let ei = cast::<_, i64x2>(ee.round_int());
1528 let ej = cast::<_, i64x2>(ei + (cast::<_, i64x2>(z) >> 52));
1529
1530 let overflow = cast::<_, f64x2>(!ej.cmp_lt(i64x2::splat(0x07FF)))
1531 | ee.cmp_gt(f64x2::splat(3000.0));
1532 let underflow = cast::<_, f64x2>(!ej.cmp_gt(i64x2::splat(0x000)))
1533 | ee.cmp_lt(f64x2::splat(-3000.0));
1534
1535 let z = cast::<_, f64x2>(cast::<_, i64x2>(z) + (ei << 52));
1537
1538 let z = if (overflow | underflow).any() {
1540 let z = underflow.blend(f64x2::ZERO, z);
1541 overflow.blend(Self::infinity(), z)
1542 } else {
1543 z
1544 };
1545
1546 let x_zero = self.is_zero_or_subnormal();
1548 let z = x_zero.blend(
1549 y.cmp_lt(f64x2::ZERO).blend(
1550 Self::infinity(),
1551 y.cmp_eq(f64x2::ZERO).blend(f64x2::ONE, f64x2::ZERO),
1552 ),
1553 z,
1554 );
1555
1556 let x_sign = self.sign_bit();
1557 let z = if x_sign.any() {
1558 let yi = y.cmp_eq(y.round());
1560 let y_odd = cast::<_, i64x2>(y.round_int() << 63).round_float();
1562
1563 let z1 =
1564 yi.blend(z | y_odd, self.cmp_eq(Self::ZERO).blend(z, Self::nan_pow()));
1565 x_sign.blend(z1, z)
1566 } else {
1567 z
1568 };
1569
1570 let x_finite = self.is_finite();
1571 let y_finite = y.is_finite();
1572 let e_finite = ee.is_finite();
1573
1574 if (x_finite & y_finite & (e_finite | x_zero)).all() {
1575 return z;
1576 }
1577
1578 (self.is_nan() | y.is_nan()).blend(self + y, z)
1579 }
1580
1581 #[inline]
1582 pub fn powf(self, y: f64) -> Self {
1583 Self::pow_f64x2(self, f64x2::splat(y))
1584 }
1585
1586 #[inline]
1587 pub fn to_array(self) -> [f64; 2] {
1588 cast(self)
1589 }
1590
1591 #[inline]
1592 pub fn as_array_ref(&self) -> &[f64; 2] {
1593 cast_ref(self)
1594 }
1595
1596 #[inline]
1597 pub fn as_array_mut(&mut self) -> &mut [f64; 2] {
1598 cast_mut(self)
1599 }
1600
1601 #[inline]
1604 pub fn from_i32x4_lower2(v: i32x4) -> Self {
1605 pick! {
1606 if #[cfg(target_feature="sse2")] {
1607 Self { sse: convert_to_m128d_from_lower2_i32_m128i(v.sse) }
1608 } else if #[cfg(target_feature="simd128")] {
1609 Self { simd: f64x2_convert_low_i32x4(v.simd)}
1610 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
1611 Self { neon: unsafe { vcvtq_f64_s64(vmovl_s32(vget_low_s32(v.neon))) }}
1612 } else {
1613 Self { arr: [
1614 v.as_array_ref()[0] as f64,
1615 v.as_array_ref()[1] as f64,
1616 ]}
1617 }
1618 }
1619 }
1620}
1621
1622impl From<i32x4> for f64x2 {
1623 #[inline]
1626 fn from(v: i32x4) -> Self {
1627 Self::from_i32x4_lower2(v)
1628 }
1629}
1630
1631impl Not for f64x2 {
1632 type Output = Self;
1633 #[inline]
1634 fn not(self) -> Self {
1635 pick! {
1636 if #[cfg(target_feature="sse2")] {
1637 Self { sse: self.sse.not() }
1638 } else if #[cfg(target_feature="simd128")] {
1639 Self { simd: v128_not(self.simd) }
1640 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1641 unsafe {Self { neon: vreinterpretq_f64_u32(vmvnq_u32(vreinterpretq_u32_f64(self.neon))) }}
1642 } else {
1643 Self { arr: [
1644 f64::from_bits(!self.arr[0].to_bits()),
1645 f64::from_bits(!self.arr[1].to_bits()),
1646 ]}
1647 }
1648 }
1649 }
1650}