wide/
f64x2_.rs

1use super::*;
2
3pick! {
4  if #[cfg(target_feature="sse2")] {
5    #[derive(Default, Clone, Copy, PartialEq)]
6    #[repr(C, align(16))]
7    pub struct f64x2 { pub(crate) sse: m128d }
8  } else if #[cfg(target_feature="simd128")] {
9    use core::arch::wasm32::*;
10
11    #[derive(Clone, Copy)]
12    #[repr(transparent)]
13    pub struct f64x2 { pub(crate) simd: v128 }
14
15    impl Default for f64x2 {
16      fn default() -> Self {
17        Self::splat(0.0)
18      }
19    }
20
21    impl PartialEq for f64x2 {
22      fn eq(&self, other: &Self) -> bool {
23        u64x2_all_true(f64x2_eq(self.simd, other.simd))
24      }
25    }
26  } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
27    use core::arch::aarch64::*;
28    #[repr(C)]
29    #[derive(Copy, Clone)]
30    pub struct f64x2 { pub(crate) neon: float64x2_t }
31
32    impl Default for f64x2 {
33      #[inline]
34      #[must_use]
35      fn default() -> Self {
36        unsafe { Self { neon: vdupq_n_f64(0.0)} }
37      }
38    }
39
40    impl PartialEq for f64x2 {
41      #[inline]
42      #[must_use]
43      fn eq(&self, other: &Self) -> bool {
44        unsafe
45        { let e = vceqq_f64(self.neon, other.neon);
46          vgetq_lane_u64(e,0) == u64::MAX && vgetq_lane_u64(e,1) == u64::MAX
47        }
48      }
49
50    }
51  } else {
52    #[derive(Default, Clone, Copy, PartialEq)]
53    #[repr(C, align(16))]
54    pub struct f64x2 { pub(crate) arr: [f64;2] }
55  }
56}
57
58macro_rules! const_f64_as_f64x2 {
59  ($i:ident, $f:expr) => {
60    #[allow(non_upper_case_globals)]
61    pub const $i: f64x2 =
62      unsafe { ConstUnionHack128bit { f64a2: [$f; 2] }.f64x2 };
63  };
64}
65
66impl f64x2 {
67  const_f64_as_f64x2!(ONE, 1.0);
68  const_f64_as_f64x2!(ZERO, 0.0);
69  const_f64_as_f64x2!(HALF, 0.5);
70  const_f64_as_f64x2!(E, core::f64::consts::E);
71  const_f64_as_f64x2!(FRAC_1_PI, core::f64::consts::FRAC_1_PI);
72  const_f64_as_f64x2!(FRAC_2_PI, core::f64::consts::FRAC_2_PI);
73  const_f64_as_f64x2!(FRAC_2_SQRT_PI, core::f64::consts::FRAC_2_SQRT_PI);
74  const_f64_as_f64x2!(FRAC_1_SQRT_2, core::f64::consts::FRAC_1_SQRT_2);
75  const_f64_as_f64x2!(FRAC_PI_2, core::f64::consts::FRAC_PI_2);
76  const_f64_as_f64x2!(FRAC_PI_3, core::f64::consts::FRAC_PI_3);
77  const_f64_as_f64x2!(FRAC_PI_4, core::f64::consts::FRAC_PI_4);
78  const_f64_as_f64x2!(FRAC_PI_6, core::f64::consts::FRAC_PI_6);
79  const_f64_as_f64x2!(FRAC_PI_8, core::f64::consts::FRAC_PI_8);
80  const_f64_as_f64x2!(LN_2, core::f64::consts::LN_2);
81  const_f64_as_f64x2!(LN_10, core::f64::consts::LN_10);
82  const_f64_as_f64x2!(LOG2_E, core::f64::consts::LOG2_E);
83  const_f64_as_f64x2!(LOG10_E, core::f64::consts::LOG10_E);
84  const_f64_as_f64x2!(LOG10_2, core::f64::consts::LOG10_2);
85  const_f64_as_f64x2!(LOG2_10, core::f64::consts::LOG2_10);
86  const_f64_as_f64x2!(PI, core::f64::consts::PI);
87  const_f64_as_f64x2!(SQRT_2, core::f64::consts::SQRT_2);
88  const_f64_as_f64x2!(TAU, core::f64::consts::TAU);
89}
90
91unsafe impl Zeroable for f64x2 {}
92unsafe impl Pod for f64x2 {}
93
94impl Add for f64x2 {
95  type Output = Self;
96  #[inline]
97  #[must_use]
98  fn add(self, rhs: Self) -> Self::Output {
99    pick! {
100      if #[cfg(target_feature="sse2")] {
101        Self { sse: add_m128d(self.sse, rhs.sse) }
102      } else if #[cfg(target_feature="simd128")] {
103        Self { simd: f64x2_add(self.simd, rhs.simd) }
104      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
105        unsafe { Self { neon: vaddq_f64(self.neon, rhs.neon) } }
106      } else {
107        Self { arr: [
108          self.arr[0] + rhs.arr[0],
109          self.arr[1] + rhs.arr[1],
110        ]}
111      }
112    }
113  }
114}
115
116impl Sub for f64x2 {
117  type Output = Self;
118  #[inline]
119  #[must_use]
120  fn sub(self, rhs: Self) -> Self::Output {
121    pick! {
122      if #[cfg(target_feature="sse2")] {
123        Self { sse: sub_m128d(self.sse, rhs.sse) }
124      } else if #[cfg(target_feature="simd128")] {
125        Self { simd: f64x2_sub(self.simd, rhs.simd) }
126      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
127        unsafe { Self { neon: vsubq_f64(self.neon, rhs.neon) } }
128      } else {
129        Self { arr: [
130          self.arr[0] - rhs.arr[0],
131          self.arr[1] - rhs.arr[1],
132        ]}
133      }
134    }
135  }
136}
137
138impl Mul for f64x2 {
139  type Output = Self;
140  #[inline]
141  #[must_use]
142  fn mul(self, rhs: Self) -> Self::Output {
143    pick! {
144      if #[cfg(target_feature="sse2")] {
145        Self { sse: mul_m128d(self.sse, rhs.sse) }
146      } else if #[cfg(target_feature="simd128")] {
147        Self { simd: f64x2_mul(self.simd, rhs.simd) }
148      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
149        unsafe {Self { neon: vmulq_f64(self.neon, rhs.neon) }}
150      } else {
151        Self { arr: [
152          self.arr[0] * rhs.arr[0],
153          self.arr[1] * rhs.arr[1],
154        ]}
155      }
156    }
157  }
158}
159
160impl Div for f64x2 {
161  type Output = Self;
162  #[inline]
163  #[must_use]
164  fn div(self, rhs: Self) -> Self::Output {
165    pick! {
166      if #[cfg(target_feature="sse2")] {
167        Self { sse: div_m128d(self.sse, rhs.sse) }
168      } else if #[cfg(target_feature="simd128")] {
169        Self { simd: f64x2_div(self.simd, rhs.simd) }
170      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
171        unsafe {Self { neon: vdivq_f64(self.neon, rhs.neon) }}
172      } else {
173        Self { arr: [
174          self.arr[0] / rhs.arr[0],
175          self.arr[1] / rhs.arr[1],
176        ]}
177      }
178    }
179  }
180}
181
182impl Add<f64> for f64x2 {
183  type Output = Self;
184  #[inline]
185  #[must_use]
186  fn add(self, rhs: f64) -> Self::Output {
187    self.add(Self::splat(rhs))
188  }
189}
190
191impl Sub<f64> for f64x2 {
192  type Output = Self;
193  #[inline]
194  #[must_use]
195  fn sub(self, rhs: f64) -> Self::Output {
196    self.sub(Self::splat(rhs))
197  }
198}
199
200impl Mul<f64> for f64x2 {
201  type Output = Self;
202  #[inline]
203  #[must_use]
204  fn mul(self, rhs: f64) -> Self::Output {
205    self.mul(Self::splat(rhs))
206  }
207}
208
209impl Div<f64> for f64x2 {
210  type Output = Self;
211  #[inline]
212  #[must_use]
213  fn div(self, rhs: f64) -> Self::Output {
214    self.div(Self::splat(rhs))
215  }
216}
217
218impl Add<f64x2> for f64 {
219  type Output = f64x2;
220  #[inline]
221  #[must_use]
222  fn add(self, rhs: f64x2) -> Self::Output {
223    f64x2::splat(self).add(rhs)
224  }
225}
226
227impl Sub<f64x2> for f64 {
228  type Output = f64x2;
229  #[inline]
230  #[must_use]
231  fn sub(self, rhs: f64x2) -> Self::Output {
232    f64x2::splat(self).sub(rhs)
233  }
234}
235
236impl Mul<f64x2> for f64 {
237  type Output = f64x2;
238  #[inline]
239  #[must_use]
240  fn mul(self, rhs: f64x2) -> Self::Output {
241    f64x2::splat(self).mul(rhs)
242  }
243}
244
245impl Div<f64x2> for f64 {
246  type Output = f64x2;
247  #[inline]
248  #[must_use]
249  fn div(self, rhs: f64x2) -> Self::Output {
250    f64x2::splat(self).div(rhs)
251  }
252}
253
254impl BitAnd for f64x2 {
255  type Output = Self;
256  #[inline]
257  #[must_use]
258  fn bitand(self, rhs: Self) -> Self::Output {
259    pick! {
260      if #[cfg(target_feature="sse2")] {
261        Self { sse: bitand_m128d(self.sse, rhs.sse) }
262      } else if #[cfg(target_feature="simd128")] {
263        Self { simd: v128_and(self.simd, rhs.simd) }
264      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
265        unsafe {Self { neon: vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(self.neon), vreinterpretq_u64_f64(rhs.neon))) }}
266      } else {
267        Self { arr: [
268          f64::from_bits(self.arr[0].to_bits() & rhs.arr[0].to_bits()),
269          f64::from_bits(self.arr[1].to_bits() & rhs.arr[1].to_bits()),
270        ]}
271      }
272    }
273  }
274}
275
276impl BitOr for f64x2 {
277  type Output = Self;
278  #[inline]
279  #[must_use]
280  fn bitor(self, rhs: Self) -> Self::Output {
281    pick! {
282      if #[cfg(target_feature="sse2")] {
283        Self { sse: bitor_m128d(self.sse, rhs.sse) }
284      } else if #[cfg(target_feature="simd128")] {
285        Self { simd: v128_or(self.simd, rhs.simd) }
286      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
287        unsafe {Self { neon: vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(self.neon), vreinterpretq_u64_f64(rhs.neon))) }}
288      } else {
289        Self { arr: [
290          f64::from_bits(self.arr[0].to_bits() | rhs.arr[0].to_bits()),
291          f64::from_bits(self.arr[1].to_bits() | rhs.arr[1].to_bits()),
292        ]}
293      }
294    }
295  }
296}
297
298impl BitXor for f64x2 {
299  type Output = Self;
300  #[inline]
301  #[must_use]
302  fn bitxor(self, rhs: Self) -> Self::Output {
303    pick! {
304      if #[cfg(target_feature="sse2")] {
305        Self { sse: bitxor_m128d(self.sse, rhs.sse) }
306      } else if #[cfg(target_feature="simd128")] {
307        Self { simd: v128_xor(self.simd, rhs.simd) }
308      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
309        unsafe {Self { neon: vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(self.neon), vreinterpretq_u64_f64(rhs.neon))) }}
310      } else {
311        Self { arr: [
312          f64::from_bits(self.arr[0].to_bits() ^ rhs.arr[0].to_bits()),
313          f64::from_bits(self.arr[1].to_bits() ^ rhs.arr[1].to_bits()),
314        ]}
315      }
316    }
317  }
318}
319
320impl CmpEq for f64x2 {
321  type Output = Self;
322  #[inline]
323  #[must_use]
324  fn cmp_eq(self, rhs: Self) -> Self::Output {
325    pick! {
326      if #[cfg(target_feature="sse2")] {
327        Self { sse: cmp_eq_mask_m128d(self.sse, rhs.sse) }
328      } else if #[cfg(target_feature="simd128")] {
329        Self { simd: f64x2_eq(self.simd, rhs.simd) }
330      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
331        unsafe {Self { neon: vreinterpretq_f64_u64(vceqq_f64(self.neon, rhs.neon)) }}
332      } else {
333        Self { arr: [
334          if self.arr[0] == rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
335          if self.arr[1] == rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
336        ]}
337      }
338    }
339  }
340}
341
342impl CmpGe for f64x2 {
343  type Output = Self;
344  #[inline]
345  #[must_use]
346  fn cmp_ge(self, rhs: Self) -> Self::Output {
347    pick! {
348      if #[cfg(target_feature="sse2")] {
349        Self { sse: cmp_ge_mask_m128d(self.sse, rhs.sse) }
350      } else if #[cfg(target_feature="simd128")] {
351        Self { simd: f64x2_ge(self.simd, rhs.simd) }
352      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
353        unsafe {Self { neon: vreinterpretq_f64_u64(vcgeq_f64(self.neon, rhs.neon)) }}
354      } else {
355        Self { arr: [
356          if self.arr[0] >= rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
357          if self.arr[1] >= rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
358        ]}
359      }
360    }
361  }
362}
363
364impl CmpGt for f64x2 {
365  type Output = Self;
366  #[inline]
367  #[must_use]
368  fn cmp_gt(self, rhs: Self) -> Self::Output {
369    pick! {
370      if #[cfg(target_feature="avx")] {
371        Self { sse: cmp_op_mask_m128d::<{cmp_op!(GreaterThanOrdered)}>(self.sse, rhs.sse) }
372      } else if #[cfg(target_feature="sse2")] {
373        Self { sse: cmp_gt_mask_m128d(self.sse, rhs.sse) }
374      } else if #[cfg(target_feature="simd128")] {
375        Self { simd: f64x2_gt(self.simd, rhs.simd) }
376      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
377        unsafe {Self { neon: vreinterpretq_f64_u64(vcgtq_f64(self.neon, rhs.neon)) }}
378      } else {
379        Self { arr: [
380          if self.arr[0] > rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
381          if self.arr[1] > rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
382        ]}
383      }
384    }
385  }
386}
387
388impl CmpNe for f64x2 {
389  type Output = Self;
390  #[inline]
391  #[must_use]
392  fn cmp_ne(self, rhs: Self) -> Self::Output {
393    pick! {
394      if #[cfg(target_feature="sse2")] {
395        Self { sse: cmp_neq_mask_m128d(self.sse, rhs.sse) }
396      } else if #[cfg(target_feature="simd128")] {
397        Self { simd: f64x2_ne(self.simd, rhs.simd) }
398      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
399        unsafe {Self { neon: vreinterpretq_f64_u64(vceqq_f64(self.neon, rhs.neon)) }.not() }
400      } else {
401        Self { arr: [
402          if self.arr[0] != rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
403          if self.arr[1] != rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
404        ]}
405      }
406    }
407  }
408}
409
410impl CmpLe for f64x2 {
411  type Output = Self;
412  #[inline]
413  #[must_use]
414  fn cmp_le(self, rhs: Self) -> Self::Output {
415    pick! {
416      if #[cfg(target_feature="sse2")] {
417        Self { sse: cmp_le_mask_m128d(self.sse, rhs.sse) }
418      } else if #[cfg(target_feature="simd128")] {
419        Self { simd: f64x2_le(self.simd, rhs.simd) }
420      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
421        unsafe {Self { neon: vreinterpretq_f64_u64(vcleq_f64(self.neon, rhs.neon)) }}
422      } else {
423        Self { arr: [
424          if self.arr[0] <= rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
425          if self.arr[1] <= rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
426        ]}
427      }
428    }
429  }
430}
431
432impl CmpLt for f64x2 {
433  type Output = Self;
434  #[inline]
435  #[must_use]
436  fn cmp_lt(self, rhs: Self) -> Self::Output {
437    pick! {
438      if #[cfg(target_feature="sse2")] {
439        Self { sse: cmp_lt_mask_m128d(self.sse, rhs.sse) }
440      } else if #[cfg(target_feature="simd128")] {
441        Self { simd: f64x2_lt(self.simd, rhs.simd) }
442      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
443        unsafe {Self { neon: vreinterpretq_f64_u64(vcltq_f64(self.neon, rhs.neon)) }}
444      } else {
445        Self { arr: [
446          if self.arr[0] < rhs.arr[0] { f64::from_bits(u64::MAX) } else { 0.0 },
447          if self.arr[1] < rhs.arr[1] { f64::from_bits(u64::MAX) } else { 0.0 },
448        ]}
449      }
450    }
451  }
452}
453
454impl f64x2 {
455  #[inline]
456  #[must_use]
457  pub fn new(array: [f64; 2]) -> Self {
458    Self::from(array)
459  }
460  #[inline]
461  #[must_use]
462  pub fn blend(self, t: Self, f: Self) -> Self {
463    pick! {
464      if #[cfg(target_feature="sse4.1")] {
465        Self { sse: blend_varying_m128d(f.sse, t.sse, self.sse) }
466      } else if #[cfg(target_feature="simd128")] {
467        Self { simd: v128_bitselect(t.simd, f.simd, self.simd) }
468      } else {
469        generic_bit_blend(self, t, f)
470      }
471    }
472  }
473  #[inline]
474  #[must_use]
475  pub fn abs(self) -> Self {
476    pick! {
477      if #[cfg(target_feature="simd128")] {
478        Self { simd: f64x2_abs(self.simd) }
479      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
480        unsafe {Self { neon: vabsq_f64(self.neon) }}
481      } else {
482        let non_sign_bits = f64x2::from(f64::from_bits(i64::MAX as u64));
483        self & non_sign_bits
484      }
485    }
486  }
487
488  /// Calculates the lanewise maximum of both vectors. This is a faster
489  /// implementation than `max`, but it doesn't specify any behavior if NaNs are
490  /// involved.
491  #[inline]
492  #[must_use]
493  pub fn fast_max(self, rhs: Self) -> Self {
494    pick! {
495      if #[cfg(target_feature="sse2")] {
496        Self { sse: max_m128d(self.sse, rhs.sse) }
497      } else if #[cfg(target_feature="simd128")] {
498        Self {
499          simd: f64x2_pmax(self.simd, rhs.simd),
500        }
501      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
502        unsafe {Self { neon: vmaxq_f64(self.neon, rhs.neon) }}
503      } else {
504        Self { arr: [
505          if self.arr[0] < rhs.arr[0] { rhs.arr[0] } else { self.arr[0] },
506          if self.arr[1] < rhs.arr[1] { rhs.arr[1] } else { self.arr[1] },
507        ]}
508      }
509    }
510  }
511
512  /// Calculates the lanewise maximum of both vectors. If either lane is NaN,
513  /// the other lane gets chosen. Use `fast_max` for a faster implementation
514  /// that doesn't handle NaNs.
515  #[inline]
516  #[must_use]
517  pub fn max(self, rhs: Self) -> Self {
518    pick! {
519      if #[cfg(target_feature="sse2")] {
520        // max_m128d seems to do rhs < self ? self : rhs. So if there's any NaN
521        // involved, it chooses rhs, so we need to specifically check rhs for
522        // NaN.
523        rhs.is_nan().blend(self, Self { sse: max_m128d(self.sse, rhs.sse) })
524      } else if #[cfg(target_feature="simd128")] {
525        // WASM has two max intrinsics:
526        // - max: This propagates NaN, that's the opposite of what we need.
527        // - pmax: This is defined as self < rhs ? rhs : self, which basically
528        //   chooses self if either is NaN.
529        //
530        // pmax is what we want, but we need to specifically check self for NaN.
531        Self {
532          simd: v128_bitselect(
533            rhs.simd,
534            f64x2_pmax(self.simd, rhs.simd),
535            f64x2_ne(self.simd, self.simd), // NaN check
536          )
537        }
538      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
539        unsafe {Self { neon: vmaxnmq_f64(self.neon, rhs.neon) }}
540            } else {
541        Self { arr: [
542          self.arr[0].max(rhs.arr[0]),
543          self.arr[1].max(rhs.arr[1]),
544        ]}
545      }
546    }
547  }
548
549  /// Calculates the lanewise minimum of both vectors. This is a faster
550  /// implementation than `min`, but it doesn't specify any behavior if NaNs are
551  /// involved.
552  #[inline]
553  #[must_use]
554  pub fn fast_min(self, rhs: Self) -> Self {
555    pick! {
556      if #[cfg(target_feature="sse2")] {
557        Self { sse: min_m128d(self.sse, rhs.sse) }
558      } else if #[cfg(target_feature="simd128")] {
559        Self {
560          simd: f64x2_pmin(self.simd, rhs.simd),
561        }
562      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
563        unsafe {Self { neon: vminq_f64(self.neon, rhs.neon) }}
564      } else {
565        Self { arr: [
566          if self.arr[0] < rhs.arr[0] { self.arr[0] } else { rhs.arr[0] },
567          if self.arr[1] < rhs.arr[1] { self.arr[1] } else { rhs.arr[1] },
568        ]}
569      }
570    }
571  }
572
573  /// Calculates the lanewise minimum of both vectors. If either lane is NaN,
574  /// the other lane gets chosen. Use `fast_min` for a faster implementation
575  /// that doesn't handle NaNs.
576  #[inline]
577  #[must_use]
578  pub fn min(self, rhs: Self) -> Self {
579    pick! {
580      if #[cfg(target_feature="sse2")] {
581        // min_m128d seems to do rhs < self ? rhs : self. So if there's any NaN
582        // involved, it chooses rhs, so we need to specifically check rhs for
583        // NaN.
584        rhs.is_nan().blend(self, Self { sse: min_m128d(self.sse, rhs.sse) })
585      } else if #[cfg(target_feature="simd128")] {
586        // WASM has two min intrinsics:
587        // - min: This propagates NaN, that's the opposite of what we need.
588        // - pmin: This is defined as rhs < self ? rhs : self, which basically
589        //   chooses self if either is NaN.
590        //
591        // pmin is what we want, but we need to specifically check self for NaN.
592        Self {
593          simd: v128_bitselect(
594            rhs.simd,
595            f64x2_pmin(self.simd, rhs.simd),
596            f64x2_ne(self.simd, self.simd), // NaN check
597          )
598        }
599      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
600        unsafe {Self { neon: vminnmq_f64(self.neon, rhs.neon) }}
601      } else {
602        Self { arr: [
603          self.arr[0].min(rhs.arr[0]),
604          self.arr[1].min(rhs.arr[1]),
605        ]}
606      }
607    }
608  }
609
610  #[inline]
611  #[must_use]
612  pub fn is_nan(self) -> Self {
613    pick! {
614      if #[cfg(target_feature="sse2")] {
615        Self { sse: cmp_unord_mask_m128d(self.sse, self.sse) }
616      } else if #[cfg(target_feature="simd128")] {
617        Self { simd: f64x2_ne(self.simd, self.simd) }
618      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
619        unsafe {Self { neon: vreinterpretq_f64_u64(vceqq_f64(self.neon, self.neon)) }.not() }
620      } else {
621        Self { arr: [
622          if self.arr[0].is_nan() { f64::from_bits(u64::MAX) } else { 0.0 },
623          if self.arr[1].is_nan() { f64::from_bits(u64::MAX) } else { 0.0 },
624        ]}
625      }
626    }
627  }
628  #[inline]
629  #[must_use]
630  pub fn is_finite(self) -> Self {
631    let shifted_exp_mask = u64x2::from(0xFFE0000000000000);
632    let u: u64x2 = cast(self);
633    let shift_u = u << 1_u64;
634    let out = !(shift_u & shifted_exp_mask).cmp_eq(shifted_exp_mask);
635    cast(out)
636  }
637  #[inline]
638  #[must_use]
639  pub fn is_inf(self) -> Self {
640    let shifted_inf = u64x2::from(0xFFE0000000000000);
641    let u: u64x2 = cast(self);
642    let shift_u = u << 1_u64;
643    let out = (shift_u).cmp_eq(shifted_inf);
644    cast(out)
645  }
646
647  #[inline]
648  #[must_use]
649  pub fn round(self) -> Self {
650    pick! {
651      if #[cfg(target_feature="sse4.1")] {
652        Self { sse: round_m128d::<{round_op!(Nearest)}>(self.sse) }
653      } else if #[cfg(target_feature="simd128")] {
654        Self { simd: f64x2_nearest(self.simd) }
655      } else {
656        let sign_mask = f64x2::from(-0.0);
657        let magic = f64x2::from(f64::from_bits(0x43300000_00000000));
658        let sign = self & sign_mask;
659        let signed_magic = magic | sign;
660        self + signed_magic - signed_magic
661      }
662    }
663  }
664  #[inline]
665  #[must_use]
666  pub fn round_int(self) -> i64x2 {
667    let rounded: [f64; 2] = cast(self.round());
668    cast([rounded[0] as i64, rounded[1] as i64])
669  }
670  #[inline]
671  #[must_use]
672  pub fn mul_add(self, m: Self, a: Self) -> Self {
673    pick! {
674      if #[cfg(all(target_feature="fma"))] {
675        Self { sse: fused_mul_add_m128d(self.sse, m.sse, a.sse) }
676      } else {
677        (self * m) + a
678      }
679    }
680  }
681
682  #[inline]
683  #[must_use]
684  pub fn mul_sub(self, m: Self, a: Self) -> Self {
685    pick! {
686      if #[cfg(all(target_feature="fma"))] {
687        Self { sse: fused_mul_sub_m128d(self.sse, m.sse, a.sse) }
688      } else {
689        (self * m) - a
690      }
691    }
692  }
693
694  #[inline]
695  #[must_use]
696  pub fn mul_neg_add(self, m: Self, a: Self) -> Self {
697    pick! {
698        if #[cfg(all(target_feature="fma"))] {
699          Self { sse: fused_mul_neg_add_m128d(self.sse, m.sse, a.sse) }
700        } else {
701          a - (self * m)
702        }
703    }
704  }
705
706  #[inline]
707  #[must_use]
708  pub fn mul_neg_sub(self, m: Self, a: Self) -> Self {
709    pick! {
710        if #[cfg(all(target_feature="fma"))] {
711          Self { sse: fused_mul_neg_sub_m128d(self.sse, m.sse, a.sse) }
712        } else {
713          -(self * m) - a
714        }
715    }
716  }
717
718  #[inline]
719  #[must_use]
720  pub fn flip_signs(self, signs: Self) -> Self {
721    self ^ (signs & Self::from(-0.0))
722  }
723
724  #[inline]
725  #[must_use]
726  pub fn copysign(self, sign: Self) -> Self {
727    let magnitude_mask = Self::from(f64::from_bits(u64::MAX >> 1));
728    (self & magnitude_mask) | (sign & Self::from(-0.0))
729  }
730
731  #[inline]
732  pub fn asin_acos(self) -> (Self, Self) {
733    // Based on the Agner Fog "vector class library":
734    // https://github.com/vectorclass/version2/blob/master/vectormath_trig.h
735    const_f64_as_f64x2!(R4asin, 2.967721961301243206100E-3);
736    const_f64_as_f64x2!(R3asin, -5.634242780008963776856E-1);
737    const_f64_as_f64x2!(R2asin, 6.968710824104713396794E0);
738    const_f64_as_f64x2!(R1asin, -2.556901049652824852289E1);
739    const_f64_as_f64x2!(R0asin, 2.853665548261061424989E1);
740
741    const_f64_as_f64x2!(S3asin, -2.194779531642920639778E1);
742    const_f64_as_f64x2!(S2asin, 1.470656354026814941758E2);
743    const_f64_as_f64x2!(S1asin, -3.838770957603691357202E2);
744    const_f64_as_f64x2!(S0asin, 3.424398657913078477438E2);
745
746    const_f64_as_f64x2!(P5asin, 4.253011369004428248960E-3);
747    const_f64_as_f64x2!(P4asin, -6.019598008014123785661E-1);
748    const_f64_as_f64x2!(P3asin, 5.444622390564711410273E0);
749    const_f64_as_f64x2!(P2asin, -1.626247967210700244449E1);
750    const_f64_as_f64x2!(P1asin, 1.956261983317594739197E1);
751    const_f64_as_f64x2!(P0asin, -8.198089802484824371615E0);
752
753    const_f64_as_f64x2!(Q4asin, -1.474091372988853791896E1);
754    const_f64_as_f64x2!(Q3asin, 7.049610280856842141659E1);
755    const_f64_as_f64x2!(Q2asin, -1.471791292232726029859E2);
756    const_f64_as_f64x2!(Q1asin, 1.395105614657485689735E2);
757    const_f64_as_f64x2!(Q0asin, -4.918853881490881290097E1);
758
759    let xa = self.abs();
760
761    let big = xa.cmp_ge(f64x2::splat(0.625));
762
763    let x1 = big.blend(f64x2::splat(1.0) - xa, xa * xa);
764
765    let x2 = x1 * x1;
766    let x3 = x2 * x1;
767    let x4 = x2 * x2;
768    let x5 = x4 * x1;
769
770    let do_big = big.any();
771    let do_small = !big.all();
772
773    let mut rx = f64x2::default();
774    let mut sx = f64x2::default();
775    let mut px = f64x2::default();
776    let mut qx = f64x2::default();
777
778    if do_big {
779      rx = x3.mul_add(R3asin, x2 * R2asin)
780        + x4.mul_add(R4asin, x1.mul_add(R1asin, R0asin));
781      sx =
782        x3.mul_add(S3asin, x4) + x2.mul_add(S2asin, x1.mul_add(S1asin, S0asin));
783    }
784    if do_small {
785      px = x3.mul_add(P3asin, P0asin)
786        + x4.mul_add(P4asin, x1 * P1asin)
787        + x5.mul_add(P5asin, x2 * P2asin);
788      qx = x4.mul_add(Q4asin, x5)
789        + x3.mul_add(Q3asin, x1 * Q1asin)
790        + x2.mul_add(Q2asin, Q0asin);
791    };
792
793    let vx = big.blend(rx, px);
794    let wx = big.blend(sx, qx);
795
796    let y1 = vx / wx * x1;
797
798    let mut z1 = f64x2::default();
799    let mut z2 = f64x2::default();
800    if do_big {
801      let xb = (x1 + x1).sqrt();
802      z1 = xb.mul_add(y1, xb);
803    }
804
805    if do_small {
806      z2 = xa.mul_add(y1, xa);
807    }
808
809    // asin
810    let z3 = f64x2::FRAC_PI_2 - z1;
811    let asin = big.blend(z3, z2);
812    let asin = asin.flip_signs(self);
813
814    // acos
815    let z3 = self.cmp_lt(f64x2::ZERO).blend(f64x2::PI - z1, z1);
816    let z4 = f64x2::FRAC_PI_2 - z2.flip_signs(self);
817    let acos = big.blend(z3, z4);
818
819    (asin, acos)
820  }
821
822  #[inline]
823  pub fn acos(self) -> Self {
824    // Based on the Agner Fog "vector class library":
825    // https://github.com/vectorclass/version2/blob/master/vectormath_trig.h
826    const_f64_as_f64x2!(R4asin, 2.967721961301243206100E-3);
827    const_f64_as_f64x2!(R3asin, -5.634242780008963776856E-1);
828    const_f64_as_f64x2!(R2asin, 6.968710824104713396794E0);
829    const_f64_as_f64x2!(R1asin, -2.556901049652824852289E1);
830    const_f64_as_f64x2!(R0asin, 2.853665548261061424989E1);
831
832    const_f64_as_f64x2!(S3asin, -2.194779531642920639778E1);
833    const_f64_as_f64x2!(S2asin, 1.470656354026814941758E2);
834    const_f64_as_f64x2!(S1asin, -3.838770957603691357202E2);
835    const_f64_as_f64x2!(S0asin, 3.424398657913078477438E2);
836
837    const_f64_as_f64x2!(P5asin, 4.253011369004428248960E-3);
838    const_f64_as_f64x2!(P4asin, -6.019598008014123785661E-1);
839    const_f64_as_f64x2!(P3asin, 5.444622390564711410273E0);
840    const_f64_as_f64x2!(P2asin, -1.626247967210700244449E1);
841    const_f64_as_f64x2!(P1asin, 1.956261983317594739197E1);
842    const_f64_as_f64x2!(P0asin, -8.198089802484824371615E0);
843
844    const_f64_as_f64x2!(Q4asin, -1.474091372988853791896E1);
845    const_f64_as_f64x2!(Q3asin, 7.049610280856842141659E1);
846    const_f64_as_f64x2!(Q2asin, -1.471791292232726029859E2);
847    const_f64_as_f64x2!(Q1asin, 1.395105614657485689735E2);
848    const_f64_as_f64x2!(Q0asin, -4.918853881490881290097E1);
849
850    let xa = self.abs();
851
852    let big = xa.cmp_ge(f64x2::splat(0.625));
853
854    let x1 = big.blend(f64x2::splat(1.0) - xa, xa * xa);
855
856    let x2 = x1 * x1;
857    let x3 = x2 * x1;
858    let x4 = x2 * x2;
859    let x5 = x4 * x1;
860
861    let do_big = big.any();
862    let do_small = !big.all();
863
864    let mut rx = f64x2::default();
865    let mut sx = f64x2::default();
866    let mut px = f64x2::default();
867    let mut qx = f64x2::default();
868
869    if do_big {
870      rx = x3.mul_add(R3asin, x2 * R2asin)
871        + x4.mul_add(R4asin, x1.mul_add(R1asin, R0asin));
872      sx =
873        x3.mul_add(S3asin, x4) + x2.mul_add(S2asin, x1.mul_add(S1asin, S0asin));
874    }
875    if do_small {
876      px = x3.mul_add(P3asin, P0asin)
877        + x4.mul_add(P4asin, x1 * P1asin)
878        + x5.mul_add(P5asin, x2 * P2asin);
879      qx = x4.mul_add(Q4asin, x5)
880        + x3.mul_add(Q3asin, x1 * Q1asin)
881        + x2.mul_add(Q2asin, Q0asin);
882    };
883
884    let vx = big.blend(rx, px);
885    let wx = big.blend(sx, qx);
886
887    let y1 = vx / wx * x1;
888
889    let mut z1 = f64x2::default();
890    let mut z2 = f64x2::default();
891    if do_big {
892      let xb = (x1 + x1).sqrt();
893      z1 = xb.mul_add(y1, xb);
894    }
895
896    if do_small {
897      z2 = xa.mul_add(y1, xa);
898    }
899
900    // acos
901    let z3 = self.cmp_lt(f64x2::ZERO).blend(f64x2::PI - z1, z1);
902    let z4 = f64x2::FRAC_PI_2 - z2.flip_signs(self);
903    let acos = big.blend(z3, z4);
904
905    acos
906  }
907
908  #[inline]
909  pub fn asin(self) -> Self {
910    // Based on the Agner Fog "vector class library":
911    // https://github.com/vectorclass/version2/blob/master/vectormath_trig.h
912    const_f64_as_f64x2!(R4asin, 2.967721961301243206100E-3);
913    const_f64_as_f64x2!(R3asin, -5.634242780008963776856E-1);
914    const_f64_as_f64x2!(R2asin, 6.968710824104713396794E0);
915    const_f64_as_f64x2!(R1asin, -2.556901049652824852289E1);
916    const_f64_as_f64x2!(R0asin, 2.853665548261061424989E1);
917
918    const_f64_as_f64x2!(S3asin, -2.194779531642920639778E1);
919    const_f64_as_f64x2!(S2asin, 1.470656354026814941758E2);
920    const_f64_as_f64x2!(S1asin, -3.838770957603691357202E2);
921    const_f64_as_f64x2!(S0asin, 3.424398657913078477438E2);
922
923    const_f64_as_f64x2!(P5asin, 4.253011369004428248960E-3);
924    const_f64_as_f64x2!(P4asin, -6.019598008014123785661E-1);
925    const_f64_as_f64x2!(P3asin, 5.444622390564711410273E0);
926    const_f64_as_f64x2!(P2asin, -1.626247967210700244449E1);
927    const_f64_as_f64x2!(P1asin, 1.956261983317594739197E1);
928    const_f64_as_f64x2!(P0asin, -8.198089802484824371615E0);
929
930    const_f64_as_f64x2!(Q4asin, -1.474091372988853791896E1);
931    const_f64_as_f64x2!(Q3asin, 7.049610280856842141659E1);
932    const_f64_as_f64x2!(Q2asin, -1.471791292232726029859E2);
933    const_f64_as_f64x2!(Q1asin, 1.395105614657485689735E2);
934    const_f64_as_f64x2!(Q0asin, -4.918853881490881290097E1);
935
936    let xa = self.abs();
937
938    let big = xa.cmp_ge(f64x2::splat(0.625));
939
940    let x1 = big.blend(f64x2::splat(1.0) - xa, xa * xa);
941
942    let x2 = x1 * x1;
943    let x3 = x2 * x1;
944    let x4 = x2 * x2;
945    let x5 = x4 * x1;
946
947    let do_big = big.any();
948    let do_small = !big.all();
949
950    let mut rx = f64x2::default();
951    let mut sx = f64x2::default();
952    let mut px = f64x2::default();
953    let mut qx = f64x2::default();
954
955    if do_big {
956      rx = x3.mul_add(R3asin, x2 * R2asin)
957        + x4.mul_add(R4asin, x1.mul_add(R1asin, R0asin));
958      sx =
959        x3.mul_add(S3asin, x4) + x2.mul_add(S2asin, x1.mul_add(S1asin, S0asin));
960    }
961    if do_small {
962      px = x3.mul_add(P3asin, P0asin)
963        + x4.mul_add(P4asin, x1 * P1asin)
964        + x5.mul_add(P5asin, x2 * P2asin);
965      qx = x4.mul_add(Q4asin, x5)
966        + x3.mul_add(Q3asin, x1 * Q1asin)
967        + x2.mul_add(Q2asin, Q0asin);
968    };
969
970    let vx = big.blend(rx, px);
971    let wx = big.blend(sx, qx);
972
973    let y1 = vx / wx * x1;
974
975    let mut z1 = f64x2::default();
976    let mut z2 = f64x2::default();
977    if do_big {
978      let xb = (x1 + x1).sqrt();
979      z1 = xb.mul_add(y1, xb);
980    }
981
982    if do_small {
983      z2 = xa.mul_add(y1, xa);
984    }
985
986    // asin
987    let z3 = f64x2::FRAC_PI_2 - z1;
988    let asin = big.blend(z3, z2);
989    let asin = asin.flip_signs(self);
990
991    asin
992  }
993
994  #[inline]
995  pub fn atan(self) -> Self {
996    // Based on the Agner Fog "vector class library":
997    // https://github.com/vectorclass/version2/blob/master/vectormath_trig.h
998    const_f64_as_f64x2!(MORE_BITS, 6.123233995736765886130E-17);
999    const_f64_as_f64x2!(MORE_BITS_O2, 6.123233995736765886130E-17 * 0.5);
1000    const_f64_as_f64x2!(T3PO8, core::f64::consts::SQRT_2 + 1.0);
1001
1002    const_f64_as_f64x2!(P4atan, -8.750608600031904122785E-1);
1003    const_f64_as_f64x2!(P3atan, -1.615753718733365076637E1);
1004    const_f64_as_f64x2!(P2atan, -7.500855792314704667340E1);
1005    const_f64_as_f64x2!(P1atan, -1.228866684490136173410E2);
1006    const_f64_as_f64x2!(P0atan, -6.485021904942025371773E1);
1007
1008    const_f64_as_f64x2!(Q4atan, 2.485846490142306297962E1);
1009    const_f64_as_f64x2!(Q3atan, 1.650270098316988542046E2);
1010    const_f64_as_f64x2!(Q2atan, 4.328810604912902668951E2);
1011    const_f64_as_f64x2!(Q1atan, 4.853903996359136964868E2);
1012    const_f64_as_f64x2!(Q0atan, 1.945506571482613964425E2);
1013
1014    let t = self.abs();
1015
1016    // small:  t < 0.66
1017    // medium: t <= t <= 2.4142 (1+sqrt(2))
1018    // big:    t > 2.4142
1019    let notbig = t.cmp_le(T3PO8);
1020    let notsmal = t.cmp_ge(Self::splat(0.66));
1021
1022    let mut s = notbig.blend(Self::FRAC_PI_4, Self::FRAC_PI_2);
1023    s = notsmal & s;
1024    let mut fac = notbig.blend(MORE_BITS_O2, MORE_BITS);
1025    fac = notsmal & fac;
1026
1027    // small:  z = t / 1.0;
1028    // medium: z = (t-1.0) / (t+1.0);
1029    // big:    z = -1.0 / t;
1030    let mut a = notbig & t;
1031    a = notsmal.blend(a - Self::ONE, a);
1032    let mut b = notbig & Self::ONE;
1033    b = notsmal.blend(b + t, b);
1034    let z = a / b;
1035
1036    let zz = z * z;
1037
1038    let px = polynomial_4!(zz, P0atan, P1atan, P2atan, P3atan, P4atan);
1039    let qx = polynomial_5n!(zz, Q0atan, Q1atan, Q2atan, Q3atan, Q4atan);
1040
1041    let mut re = (px / qx).mul_add(z * zz, z);
1042    re += s + fac;
1043
1044    // get sign bit
1045    re = (self.sign_bit()).blend(-re, re);
1046
1047    re
1048  }
1049
1050  #[inline]
1051  pub fn atan2(self, x: Self) -> Self {
1052    // Based on the Agner Fog "vector class library":
1053    // https://github.com/vectorclass/version2/blob/master/vectormath_trig.h
1054    const_f64_as_f64x2!(MORE_BITS, 6.123233995736765886130E-17);
1055    const_f64_as_f64x2!(MORE_BITS_O2, 6.123233995736765886130E-17 * 0.5);
1056    const_f64_as_f64x2!(T3PO8, core::f64::consts::SQRT_2 + 1.0);
1057
1058    const_f64_as_f64x2!(P4atan, -8.750608600031904122785E-1);
1059    const_f64_as_f64x2!(P3atan, -1.615753718733365076637E1);
1060    const_f64_as_f64x2!(P2atan, -7.500855792314704667340E1);
1061    const_f64_as_f64x2!(P1atan, -1.228866684490136173410E2);
1062    const_f64_as_f64x2!(P0atan, -6.485021904942025371773E1);
1063
1064    const_f64_as_f64x2!(Q4atan, 2.485846490142306297962E1);
1065    const_f64_as_f64x2!(Q3atan, 1.650270098316988542046E2);
1066    const_f64_as_f64x2!(Q2atan, 4.328810604912902668951E2);
1067    const_f64_as_f64x2!(Q1atan, 4.853903996359136964868E2);
1068    const_f64_as_f64x2!(Q0atan, 1.945506571482613964425E2);
1069
1070    let y = self;
1071
1072    // move in first octant
1073    let x1 = x.abs();
1074    let y1 = y.abs();
1075    let swapxy = y1.cmp_gt(x1);
1076    // swap x and y if y1 > x1
1077    let mut x2 = swapxy.blend(y1, x1);
1078    let mut y2 = swapxy.blend(x1, y1);
1079
1080    // check for special case: x and y are both +/- INF
1081    let both_infinite = x.is_inf() & y.is_inf();
1082    if both_infinite.any() {
1083      let minus_one = -Self::ONE;
1084      x2 = both_infinite.blend(x2 & minus_one, x2);
1085      y2 = both_infinite.blend(y2 & minus_one, y2);
1086    }
1087
1088    // x = y = 0 gives NAN here
1089    let t = y2 / x2;
1090
1091    // small:  t < 0.66
1092    // medium: t <= t <= 2.4142 (1+sqrt(2))
1093    // big:    t > 2.4142
1094    let notbig = t.cmp_le(T3PO8);
1095    let notsmal = t.cmp_ge(Self::splat(0.66));
1096
1097    let mut s = notbig.blend(Self::FRAC_PI_4, Self::FRAC_PI_2);
1098    s = notsmal & s;
1099    let mut fac = notbig.blend(MORE_BITS_O2, MORE_BITS);
1100    fac = notsmal & fac;
1101
1102    // small:  z = t / 1.0;
1103    // medium: z = (t-1.0) / (t+1.0);
1104    // big:    z = -1.0 / t;
1105    let mut a = notbig & t;
1106    a = notsmal.blend(a - Self::ONE, a);
1107    let mut b = notbig & Self::ONE;
1108    b = notsmal.blend(b + t, b);
1109    let z = a / b;
1110
1111    let zz = z * z;
1112
1113    let px = polynomial_4!(zz, P0atan, P1atan, P2atan, P3atan, P4atan);
1114    let qx = polynomial_5n!(zz, Q0atan, Q1atan, Q2atan, Q3atan, Q4atan);
1115
1116    let mut re = (px / qx).mul_add(z * zz, z);
1117    re += s + fac;
1118
1119    // move back in place
1120    re = swapxy.blend(Self::FRAC_PI_2 - re, re);
1121    re = ((x | y).cmp_eq(Self::ZERO)).blend(Self::ZERO, re);
1122    re = (x.sign_bit()).blend(Self::PI - re, re);
1123
1124    // get sign bit
1125    re = (y.sign_bit()).blend(-re, re);
1126
1127    re
1128  }
1129
1130  #[inline]
1131  #[must_use]
1132  pub fn sin_cos(self) -> (Self, Self) {
1133    // Based on the Agner Fog "vector class library":
1134    // https://github.com/vectorclass/version2/blob/master/vectormath_trig.h
1135
1136    const_f64_as_f64x2!(P0sin, -1.66666666666666307295E-1);
1137    const_f64_as_f64x2!(P1sin, 8.33333333332211858878E-3);
1138    const_f64_as_f64x2!(P2sin, -1.98412698295895385996E-4);
1139    const_f64_as_f64x2!(P3sin, 2.75573136213857245213E-6);
1140    const_f64_as_f64x2!(P4sin, -2.50507477628578072866E-8);
1141    const_f64_as_f64x2!(P5sin, 1.58962301576546568060E-10);
1142
1143    const_f64_as_f64x2!(P0cos, 4.16666666666665929218E-2);
1144    const_f64_as_f64x2!(P1cos, -1.38888888888730564116E-3);
1145    const_f64_as_f64x2!(P2cos, 2.48015872888517045348E-5);
1146    const_f64_as_f64x2!(P3cos, -2.75573141792967388112E-7);
1147    const_f64_as_f64x2!(P4cos, 2.08757008419747316778E-9);
1148    const_f64_as_f64x2!(P5cos, -1.13585365213876817300E-11);
1149
1150    const_f64_as_f64x2!(DP1, 7.853981554508209228515625E-1 * 2.);
1151    const_f64_as_f64x2!(DP2, 7.94662735614792836714E-9 * 2.);
1152    const_f64_as_f64x2!(DP3, 3.06161699786838294307E-17 * 2.);
1153
1154    const_f64_as_f64x2!(TWO_OVER_PI, 2.0 / core::f64::consts::PI);
1155
1156    let xa = self.abs();
1157
1158    let y = (xa * TWO_OVER_PI).round();
1159    let q = y.round_int();
1160
1161    let x = y.mul_neg_add(DP3, y.mul_neg_add(DP2, y.mul_neg_add(DP1, xa)));
1162
1163    let x2 = x * x;
1164    let mut s = polynomial_5!(x2, P0sin, P1sin, P2sin, P3sin, P4sin, P5sin);
1165    let mut c = polynomial_5!(x2, P0cos, P1cos, P2cos, P3cos, P4cos, P5cos);
1166    s = (x * x2).mul_add(s, x);
1167    c =
1168      (x2 * x2).mul_add(c, x2.mul_neg_add(f64x2::from(0.5), f64x2::from(1.0)));
1169
1170    let swap = !((q & i64x2::from(1)).cmp_eq(i64x2::from(0)));
1171
1172    let mut overflow: f64x2 = cast(q.cmp_gt(i64x2::from(0x80000000000000)));
1173    overflow &= xa.is_finite();
1174    s = overflow.blend(f64x2::from(0.0), s);
1175    c = overflow.blend(f64x2::from(1.0), c);
1176
1177    // calc sin
1178    let mut sin1 = cast::<_, f64x2>(swap).blend(c, s);
1179    let sign_sin: i64x2 = (q << 62) ^ cast::<_, i64x2>(self);
1180    sin1 = sin1.flip_signs(cast(sign_sin));
1181
1182    // calc cos
1183    let mut cos1 = cast::<_, f64x2>(swap).blend(s, c);
1184    let sign_cos: i64x2 = ((q + i64x2::from(1)) & i64x2::from(2)) << 62;
1185    cos1 ^= cast::<_, f64x2>(sign_cos);
1186
1187    (sin1, cos1)
1188  }
1189  #[inline]
1190  #[must_use]
1191  pub fn sin(self) -> Self {
1192    let (s, _) = self.sin_cos();
1193    s
1194  }
1195  #[inline]
1196  #[must_use]
1197  pub fn cos(self) -> Self {
1198    let (_, c) = self.sin_cos();
1199    c
1200  }
1201  #[inline]
1202  #[must_use]
1203  pub fn tan(self) -> Self {
1204    let (s, c) = self.sin_cos();
1205    s / c
1206  }
1207  #[inline]
1208  #[must_use]
1209  pub fn to_degrees(self) -> Self {
1210    const_f64_as_f64x2!(RAD_TO_DEG_RATIO, 180.0_f64 / core::f64::consts::PI);
1211    self * RAD_TO_DEG_RATIO
1212  }
1213  #[inline]
1214  #[must_use]
1215  pub fn to_radians(self) -> Self {
1216    const_f64_as_f64x2!(DEG_TO_RAD_RATIO, core::f64::consts::PI / 180.0_f64);
1217    self * DEG_TO_RAD_RATIO
1218  }
1219  #[inline]
1220  #[must_use]
1221  pub fn sqrt(self) -> Self {
1222    pick! {
1223      if #[cfg(target_feature="sse2")] {
1224        Self { sse: sqrt_m128d(self.sse) }
1225      } else if #[cfg(target_feature="simd128")] {
1226        Self { simd: f64x2_sqrt(self.simd) }
1227      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1228        unsafe {Self { neon: vsqrtq_f64(self.neon) }}
1229      } else if #[cfg(feature="std")] {
1230        Self { arr: [
1231          self.arr[0].sqrt(),
1232          self.arr[1].sqrt(),
1233        ]}
1234      } else {
1235        Self { arr: [
1236          software_sqrt(self.arr[0]),
1237          software_sqrt(self.arr[1]),
1238        ]}
1239      }
1240    }
1241  }
1242  #[inline]
1243  #[must_use]
1244  pub fn move_mask(self) -> i32 {
1245    pick! {
1246      if #[cfg(target_feature="sse2")] {
1247        move_mask_m128d(self.sse)
1248      } else if #[cfg(target_feature="simd128")] {
1249        u64x2_bitmask(self.simd) as i32
1250      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1251        unsafe
1252        {
1253          let e = vreinterpretq_u64_f64(self.neon);
1254
1255          (vgetq_lane_u64(e,0) >> 63 | ((vgetq_lane_u64(e,1) >> 62) & 0x2)) as i32
1256        }
1257      } else {
1258        (((self.arr[0].to_bits() as i64) < 0) as i32) << 0 |
1259        (((self.arr[1].to_bits() as i64) < 0) as i32) << 1
1260      }
1261    }
1262  }
1263  #[inline]
1264  #[must_use]
1265  pub fn any(self) -> bool {
1266    pick! {
1267      if #[cfg(target_feature="simd128")] {
1268        v128_any_true(self.simd)
1269      } else {
1270        self.move_mask() != 0
1271      }
1272    }
1273  }
1274  #[inline]
1275  #[must_use]
1276  pub fn all(self) -> bool {
1277    pick! {
1278      if #[cfg(target_feature="simd128")] {
1279        u64x2_all_true(self.simd)
1280      } else {
1281        // two lanes
1282        self.move_mask() == 0b11
1283      }
1284    }
1285  }
1286  #[inline]
1287  #[must_use]
1288  pub fn none(self) -> bool {
1289    !self.any()
1290  }
1291
1292  #[inline]
1293  fn vm_pow2n(self) -> Self {
1294    const_f64_as_f64x2!(pow2_52, 4503599627370496.0);
1295    const_f64_as_f64x2!(bias, 1023.0);
1296    let a = self + (bias + pow2_52);
1297    let c = cast::<_, i64x2>(a) << 52;
1298    cast::<_, f64x2>(c)
1299  }
1300
1301  /// Calculate the exponent of a packed `f64x2`
1302  #[inline]
1303  #[must_use]
1304  pub fn exp(self) -> Self {
1305    const_f64_as_f64x2!(P2, 1.0 / 2.0);
1306    const_f64_as_f64x2!(P3, 1.0 / 6.0);
1307    const_f64_as_f64x2!(P4, 1. / 24.);
1308    const_f64_as_f64x2!(P5, 1. / 120.);
1309    const_f64_as_f64x2!(P6, 1. / 720.);
1310    const_f64_as_f64x2!(P7, 1. / 5040.);
1311    const_f64_as_f64x2!(P8, 1. / 40320.);
1312    const_f64_as_f64x2!(P9, 1. / 362880.);
1313    const_f64_as_f64x2!(P10, 1. / 3628800.);
1314    const_f64_as_f64x2!(P11, 1. / 39916800.);
1315    const_f64_as_f64x2!(P12, 1. / 479001600.);
1316    const_f64_as_f64x2!(P13, 1. / 6227020800.);
1317    const_f64_as_f64x2!(LN2D_HI, 0.693145751953125);
1318    const_f64_as_f64x2!(LN2D_LO, 1.42860682030941723212E-6);
1319    let max_x = f64x2::from(708.39);
1320    let r = (self * Self::LOG2_E).round();
1321    let x = r.mul_neg_add(LN2D_HI, self);
1322    let x = r.mul_neg_add(LN2D_LO, x);
1323    let z =
1324      polynomial_13!(x, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11, P12, P13);
1325    let n2 = Self::vm_pow2n(r);
1326    let z = (z + Self::ONE) * n2;
1327    // check for overflow
1328    let in_range = self.abs().cmp_lt(max_x);
1329    let in_range = in_range & self.is_finite();
1330    in_range.blend(z, Self::ZERO)
1331  }
1332
1333  #[inline]
1334  fn exponent(self) -> f64x2 {
1335    const_f64_as_f64x2!(pow2_52, 4503599627370496.0);
1336    const_f64_as_f64x2!(bias, 1023.0);
1337    let a = cast::<_, u64x2>(self);
1338    let b = a >> 52;
1339    let c = b | cast::<_, u64x2>(pow2_52);
1340    let d = cast::<_, f64x2>(c);
1341    let e = d - (pow2_52 + bias);
1342    e
1343  }
1344
1345  #[inline]
1346  fn fraction_2(self) -> Self {
1347    let t1 = cast::<_, u64x2>(self);
1348    let t2 = cast::<_, u64x2>(
1349      (t1 & u64x2::from(0x000FFFFFFFFFFFFF)) | u64x2::from(0x3FE0000000000000),
1350    );
1351    cast::<_, f64x2>(t2)
1352  }
1353
1354  #[inline]
1355  fn is_zero_or_subnormal(self) -> Self {
1356    let t = cast::<_, i64x2>(self);
1357    let t = t & i64x2::splat(0x7FF0000000000000);
1358    i64x2::round_float(t.cmp_eq(i64x2::splat(0)))
1359  }
1360
1361  #[inline]
1362  fn infinity() -> Self {
1363    cast::<_, f64x2>(i64x2::splat(0x7FF0000000000000))
1364  }
1365
1366  #[inline]
1367  fn nan_log() -> Self {
1368    cast::<_, f64x2>(i64x2::splat(0x7FF8000000000000 | 0x101 << 29))
1369  }
1370
1371  #[inline]
1372  fn nan_pow() -> Self {
1373    cast::<_, f64x2>(i64x2::splat(0x7FF8000000000000 | 0x101 << 29))
1374  }
1375
1376  #[inline]
1377  fn sign_bit(self) -> Self {
1378    let t1 = cast::<_, i64x2>(self);
1379    let t2 = t1 >> 63;
1380    !cast::<_, f64x2>(t2).cmp_eq(f64x2::ZERO)
1381  }
1382
1383  /// horizontal add of all the elements of the vector
1384  #[inline]
1385  #[must_use]
1386  pub fn reduce_add(self) -> f64 {
1387    pick! {
1388      if #[cfg(target_feature="ssse3")] {
1389        let a = add_horizontal_m128d(self.sse, self.sse);
1390        a.to_array()[0]
1391      } else if #[cfg(any(target_feature="sse2", target_feature="simd128"))] {
1392        let a: [f64;2] = cast(self);
1393        a.iter().sum()
1394      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1395        unsafe { vgetq_lane_f64(self.neon,0) + vgetq_lane_f64(self.neon,1) }
1396      } else {
1397        self.arr.iter().sum()
1398      }
1399    }
1400  }
1401
1402  #[inline]
1403  #[must_use]
1404  pub fn ln(self) -> Self {
1405    const_f64_as_f64x2!(P0, 7.70838733755885391666E0);
1406    const_f64_as_f64x2!(P1, 1.79368678507819816313E1);
1407    const_f64_as_f64x2!(P2, 1.44989225341610930846E1);
1408    const_f64_as_f64x2!(P3, 4.70579119878881725854E0);
1409    const_f64_as_f64x2!(P4, 4.97494994976747001425E-1);
1410    const_f64_as_f64x2!(P5, 1.01875663804580931796E-4);
1411
1412    const_f64_as_f64x2!(Q0, 2.31251620126765340583E1);
1413    const_f64_as_f64x2!(Q1, 7.11544750618563894466E1);
1414    const_f64_as_f64x2!(Q2, 8.29875266912776603211E1);
1415    const_f64_as_f64x2!(Q3, 4.52279145837532221105E1);
1416    const_f64_as_f64x2!(Q4, 1.12873587189167450590E1);
1417    const_f64_as_f64x2!(LN2F_HI, 0.693359375);
1418    const_f64_as_f64x2!(LN2F_LO, -2.12194440e-4);
1419    const_f64_as_f64x2!(VM_SQRT2, 1.414213562373095048801);
1420    const_f64_as_f64x2!(VM_SMALLEST_NORMAL, 1.17549435E-38);
1421
1422    let x1 = self;
1423    let x = Self::fraction_2(x1);
1424    let e = Self::exponent(x1);
1425    let mask = x.cmp_gt(VM_SQRT2 * f64x2::HALF);
1426    let x = (!mask).blend(x + x, x);
1427    let fe = mask.blend(e + Self::ONE, e);
1428    let x = x - Self::ONE;
1429    let px = polynomial_5!(x, P0, P1, P2, P3, P4, P5);
1430    let x2 = x * x;
1431    let px = x2 * x * px;
1432    let qx = polynomial_5n!(x, Q0, Q1, Q2, Q3, Q4);
1433    let res = px / qx;
1434    let res = fe.mul_add(LN2F_LO, res);
1435    let res = res + x2.mul_neg_add(f64x2::HALF, x);
1436    let res = fe.mul_add(LN2F_HI, res);
1437    let overflow = !self.is_finite();
1438    let underflow = x1.cmp_lt(VM_SMALLEST_NORMAL);
1439    let mask = overflow | underflow;
1440    if !mask.any() {
1441      res
1442    } else {
1443      let is_zero = self.is_zero_or_subnormal();
1444      let res = underflow.blend(Self::nan_log(), res);
1445      let res = is_zero.blend(Self::infinity(), res);
1446      let res = overflow.blend(self, res);
1447      res
1448    }
1449  }
1450
1451  #[inline]
1452  #[must_use]
1453  pub fn log2(self) -> Self {
1454    Self::ln(self) * Self::LOG2_E
1455  }
1456  #[inline]
1457  #[must_use]
1458  pub fn log10(self) -> Self {
1459    Self::ln(self) * Self::LOG10_E
1460  }
1461
1462  #[inline]
1463  #[must_use]
1464  pub fn pow_f64x2(self, y: Self) -> Self {
1465    const_f64_as_f64x2!(ln2d_hi, 0.693145751953125);
1466    const_f64_as_f64x2!(ln2d_lo, 1.42860682030941723212E-6);
1467    const_f64_as_f64x2!(P0log, 2.0039553499201281259648E1);
1468    const_f64_as_f64x2!(P1log, 5.7112963590585538103336E1);
1469    const_f64_as_f64x2!(P2log, 6.0949667980987787057556E1);
1470    const_f64_as_f64x2!(P3log, 2.9911919328553073277375E1);
1471    const_f64_as_f64x2!(P4log, 6.5787325942061044846969E0);
1472    const_f64_as_f64x2!(P5log, 4.9854102823193375972212E-1);
1473    const_f64_as_f64x2!(P6log, 4.5270000862445199635215E-5);
1474    const_f64_as_f64x2!(Q0log, 6.0118660497603843919306E1);
1475    const_f64_as_f64x2!(Q1log, 2.1642788614495947685003E2);
1476    const_f64_as_f64x2!(Q2log, 3.0909872225312059774938E2);
1477    const_f64_as_f64x2!(Q3log, 2.2176239823732856465394E2);
1478    const_f64_as_f64x2!(Q4log, 8.3047565967967209469434E1);
1479    const_f64_as_f64x2!(Q5log, 1.5062909083469192043167E1);
1480
1481    // Taylor expansion constants
1482    const_f64_as_f64x2!(p2, 1.0 / 2.0); // coefficients for Taylor expansion of exp
1483    const_f64_as_f64x2!(p3, 1.0 / 6.0);
1484    const_f64_as_f64x2!(p4, 1.0 / 24.0);
1485    const_f64_as_f64x2!(p5, 1.0 / 120.0);
1486    const_f64_as_f64x2!(p6, 1.0 / 720.0);
1487    const_f64_as_f64x2!(p7, 1.0 / 5040.0);
1488    const_f64_as_f64x2!(p8, 1.0 / 40320.0);
1489    const_f64_as_f64x2!(p9, 1.0 / 362880.0);
1490    const_f64_as_f64x2!(p10, 1.0 / 3628800.0);
1491    const_f64_as_f64x2!(p11, 1.0 / 39916800.0);
1492    const_f64_as_f64x2!(p12, 1.0 / 479001600.0);
1493    const_f64_as_f64x2!(p13, 1.0 / 6227020800.0);
1494
1495    let x1 = self.abs();
1496    let x = x1.fraction_2();
1497    let mask = x.cmp_gt(f64x2::SQRT_2 * f64x2::HALF);
1498    let x = (!mask).blend(x + x, x);
1499    let x = x - f64x2::ONE;
1500    let x2 = x * x;
1501    let px = polynomial_6!(x, P0log, P1log, P2log, P3log, P4log, P5log, P6log);
1502    let px = px * x * x2;
1503    let qx = polynomial_6n!(x, Q0log, Q1log, Q2log, Q3log, Q4log, Q5log);
1504    let lg1 = px / qx;
1505
1506    let ef = x1.exponent();
1507    let ef = mask.blend(ef + f64x2::ONE, ef);
1508    let e1 = (ef * y).round();
1509    let yr = ef.mul_sub(y, e1);
1510
1511    let lg = f64x2::HALF.mul_neg_add(x2, x) + lg1;
1512    let x2err = (f64x2::HALF * x).mul_sub(x, f64x2::HALF * x2);
1513    let lg_err = f64x2::HALF.mul_add(x2, lg - x) - lg1;
1514
1515    let e2 = (lg * y * f64x2::LOG2_E).round();
1516    let v = lg.mul_sub(y, e2 * ln2d_hi);
1517    let v = e2.mul_neg_add(ln2d_lo, v);
1518    let v = v - (lg_err + x2err).mul_sub(y, yr * f64x2::LN_2);
1519
1520    let x = v;
1521    let e3 = (x * f64x2::LOG2_E).round();
1522    let x = e3.mul_neg_add(f64x2::LN_2, x);
1523    let z =
1524      polynomial_13m!(x, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13)
1525        + f64x2::ONE;
1526    let ee = e1 + e2 + e3;
1527    let ei = cast::<_, i64x2>(ee.round_int());
1528    let ej = cast::<_, i64x2>(ei + (cast::<_, i64x2>(z) >> 52));
1529
1530    let overflow = cast::<_, f64x2>(!ej.cmp_lt(i64x2::splat(0x07FF)))
1531      | ee.cmp_gt(f64x2::splat(3000.0));
1532    let underflow = cast::<_, f64x2>(!ej.cmp_gt(i64x2::splat(0x000)))
1533      | ee.cmp_lt(f64x2::splat(-3000.0));
1534
1535    // Add exponent by integer addition
1536    let z = cast::<_, f64x2>(cast::<_, i64x2>(z) + (ei << 52));
1537
1538    // Check for overflow/underflow
1539    let z = if (overflow | underflow).any() {
1540      let z = underflow.blend(f64x2::ZERO, z);
1541      overflow.blend(Self::infinity(), z)
1542    } else {
1543      z
1544    };
1545
1546    // Check for self == 0
1547    let x_zero = self.is_zero_or_subnormal();
1548    let z = x_zero.blend(
1549      y.cmp_lt(f64x2::ZERO).blend(
1550        Self::infinity(),
1551        y.cmp_eq(f64x2::ZERO).blend(f64x2::ONE, f64x2::ZERO),
1552      ),
1553      z,
1554    );
1555
1556    let x_sign = self.sign_bit();
1557    let z = if x_sign.any() {
1558      // Y into an integer
1559      let yi = y.cmp_eq(y.round());
1560      // Is y odd?
1561      let y_odd = cast::<_, i64x2>(y.round_int() << 63).round_float();
1562
1563      let z1 =
1564        yi.blend(z | y_odd, self.cmp_eq(Self::ZERO).blend(z, Self::nan_pow()));
1565      x_sign.blend(z1, z)
1566    } else {
1567      z
1568    };
1569
1570    let x_finite = self.is_finite();
1571    let y_finite = y.is_finite();
1572    let e_finite = ee.is_finite();
1573
1574    if (x_finite & y_finite & (e_finite | x_zero)).all() {
1575      return z;
1576    }
1577
1578    (self.is_nan() | y.is_nan()).blend(self + y, z)
1579  }
1580
1581  #[inline]
1582  pub fn powf(self, y: f64) -> Self {
1583    Self::pow_f64x2(self, f64x2::splat(y))
1584  }
1585
1586  #[inline]
1587  pub fn to_array(self) -> [f64; 2] {
1588    cast(self)
1589  }
1590
1591  #[inline]
1592  pub fn as_array_ref(&self) -> &[f64; 2] {
1593    cast_ref(self)
1594  }
1595
1596  #[inline]
1597  pub fn as_array_mut(&mut self) -> &mut [f64; 2] {
1598    cast_mut(self)
1599  }
1600
1601  /// Converts the lower two `i32` lanes to two `f64` lanes (and dropping the
1602  /// higher two `i32` lanes)
1603  #[inline]
1604  pub fn from_i32x4_lower2(v: i32x4) -> Self {
1605    pick! {
1606      if #[cfg(target_feature="sse2")] {
1607        Self { sse: convert_to_m128d_from_lower2_i32_m128i(v.sse) }
1608      } else if #[cfg(target_feature="simd128")] {
1609        Self { simd: f64x2_convert_low_i32x4(v.simd)}
1610      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
1611        Self { neon: unsafe { vcvtq_f64_s64(vmovl_s32(vget_low_s32(v.neon))) }}
1612      } else {
1613        Self { arr: [
1614            v.as_array_ref()[0] as f64,
1615            v.as_array_ref()[1] as f64,
1616        ]}
1617      }
1618    }
1619  }
1620}
1621
1622impl From<i32x4> for f64x2 {
1623  /// Converts the lower two `i32` lanes to two `f64` lanes (and dropping the
1624  /// higher two `i32` lanes)
1625  #[inline]
1626  fn from(v: i32x4) -> Self {
1627    Self::from_i32x4_lower2(v)
1628  }
1629}
1630
1631impl Not for f64x2 {
1632  type Output = Self;
1633  #[inline]
1634  fn not(self) -> Self {
1635    pick! {
1636      if #[cfg(target_feature="sse2")] {
1637        Self { sse: self.sse.not() }
1638      } else if #[cfg(target_feature="simd128")] {
1639        Self { simd: v128_not(self.simd) }
1640      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1641        unsafe {Self { neon: vreinterpretq_f64_u32(vmvnq_u32(vreinterpretq_u32_f64(self.neon))) }}
1642      } else {
1643        Self { arr: [
1644          f64::from_bits(!self.arr[0].to_bits()),
1645          f64::from_bits(!self.arr[1].to_bits()),
1646        ]}
1647      }
1648    }
1649  }
1650}
wide/f64x2_.rs

wide/
f64x2_.rs