wide/
lib.rs

1#![no_std]
2#![allow(non_camel_case_types)]
3#![warn(clippy::doc_markdown)]
4#![warn(clippy::missing_inline_in_public_items)]
5#![allow(clippy::eq_op)]
6#![allow(clippy::excessive_precision)]
7#![allow(clippy::let_and_return)]
8#![allow(clippy::unusual_byte_groupings)]
9#![allow(clippy::misrefactored_assign_op)]
10#![cfg_attr(test, allow(clippy::approx_constant))]
11
12//! A crate to help you go wide.
13//!
14//! This crate provides SIMD-compatible data types.
15//!
16//! When possible, explicit SIMD is used with all the math operations here. As a
17//! fallback, the fact that all the lengths of a fixed length array are doing
18//! the same thing will often make LLVM notice that it should use SIMD
19//! instructions to complete the task. In the worst case, the code just becomes
20//! totally scalar (though the math is still correct, at least).
21//!
22//! ## Crate Features
23//!
24//! * `std`: This causes the feature to link to `std`.
25//!   * Currently this just improves the performance of `sqrt` when an explicit
26//!     SIMD `sqrt` isn't available.
27
28// Note(Lokathor): Due to standard library magic, the std-only methods for f32
29// and f64 will automatically be available simply by declaring this.
30#[cfg(feature = "std")]
31extern crate std;
32
33// TODO
34// Add/Sub/Mul/Div with constant
35// Shuffle left/right/by index
36
37use core::{
38  fmt::{
39    Binary, Debug, Display, LowerExp, LowerHex, Octal, UpperExp, UpperHex,
40  },
41  ops::*,
42};
43
44#[allow(unused_imports)]
45use safe_arch::*;
46
47use bytemuck::*;
48
49#[cfg(feature = "serde")]
50use serde::{ser::SerializeSeq, Deserialize, Serialize};
51
52#[macro_use]
53mod macros;
54
55macro_rules! pick {
56  ($(if #[cfg($($test:meta),*)] {
57      $($if_tokens:tt)*
58    })else+ else {
59      $($else_tokens:tt)*
60    }) => {
61    pick!{
62      @__forests [ ] ;
63      $( [ {$($test),*} {$($if_tokens)*} ], )*
64      [ { } {$($else_tokens)*} ],
65    }
66  };
67  (if #[cfg($($if_meta:meta),*)] {
68      $($if_tokens:tt)*
69    } $(else if #[cfg($($else_meta:meta),*)] {
70      $($else_tokens:tt)*
71    })*) => {
72    pick!{
73      @__forests [ ] ;
74      [ {$($if_meta),*} {$($if_tokens)*} ],
75      $( [ {$($else_meta),*} {$($else_tokens)*} ], )*
76    }
77  };
78  (@__forests [$($not:meta,)*];) => {
79    /* halt expansion */
80  };
81  (@__forests [$($not:meta,)*]; [{$($m:meta),*} {$($tokens:tt)*}], $($rest:tt)*) => {
82    #[cfg(all( $($m,)* not(any($($not),*)) ))]
83    pick!{ @__identity $($tokens)* }
84    pick!{ @__forests [ $($not,)* $($m,)* ] ; $($rest)* }
85  };
86  (@__identity $($tokens:tt)*) => {
87    $($tokens)*
88  };
89}
90
91// TODO: make these generic over `mul_add`? Worth it?
92
93macro_rules! polynomial_2 {
94  ($x:expr, $c0:expr, $c1:expr, $c2:expr $(,)?) => {{
95    let x = $x;
96    let x2 = x * x;
97    x2.mul_add($c2, x.mul_add($c1, $c0))
98  }};
99}
100
101macro_rules! polynomial_3 {
102  ($x:expr, $c0:expr, $c1:expr, $c2:expr, $c3:expr $(,)?) => {{
103    let x = $x;
104    let x2 = x * x;
105    $c3.mul_add(x, $c2).mul_add(x2, $c1.mul_add(x, $c0))
106  }};
107}
108
109macro_rules! polynomial_4 {
110  ($x:expr, $c0:expr, $c1:expr, $c2:expr ,$c3:expr, $c4:expr $(,)?) => {{
111    let x = $x;
112    let x2 = x * x;
113    let x4 = x2 * x2;
114    $c3.mul_add(x, $c2).mul_add(x2, $c1.mul_add(x, $c0)) + $c4 * x4
115  }};
116}
117
118macro_rules! polynomial_5 {
119  ($x:expr, $c0:expr, $c1:expr, $c2:expr, $c3:expr, $c4:expr, $c5:expr $(,)?) => {{
120    let x = $x;
121    let x2 = x * x;
122    let x4 = x2 * x2;
123    $c3
124      .mul_add(x, $c2)
125      .mul_add(x2, $c5.mul_add(x, $c4).mul_add(x4, $c1.mul_add(x, $c0)))
126  }};
127}
128
129macro_rules! polynomial_5n {
130  ($x:expr, $c0:expr, $c1:expr, $c2:expr, $c3:expr, $c4:expr $(,)?) => {{
131    let x = $x;
132    let x2 = x * x;
133    let x4 = x2 * x2;
134    x2.mul_add(x.mul_add($c3, $c2), (x4.mul_add($c4 + x, x.mul_add($c1, $c0))))
135  }};
136}
137
138macro_rules! polynomial_6 {
139  ($x:expr, $c0:expr, $c1:expr, $c2:expr, $c3:expr, $c4:expr, $c5:expr ,$c6:expr $(,)?) => {{
140    let x = $x;
141    let x2 = x * x;
142    let x4 = x2 * x2;
143    x4.mul_add(
144      x2.mul_add($c6, x.mul_add($c5, $c4)),
145      x2.mul_add(x.mul_add($c3, $c2), x.mul_add($c1, $c0)),
146    )
147  }};
148}
149
150macro_rules! polynomial_6n {
151  ($x:expr, $c0:expr, $c1:expr, $c2:expr, $c3:expr, $c4:expr, $c5:expr $(,)?) => {{
152    let x = $x;
153    let x2 = x * x;
154    let x4 = x2 * x2;
155    x4.mul_add(
156      x.mul_add($c5, x2 + $c4),
157      x2.mul_add(x.mul_add($c3, $c2), x.mul_add($c1, $c0)),
158    )
159  }};
160}
161
162macro_rules! polynomial_8 {
163  ($x:expr, $c0:expr, $c1:expr, $c2:expr, $c3:expr, $c4:expr, $c5:expr,  $c6:expr, $c7:expr, $c8:expr $(,)?) => {{
164    let x = $x;
165    let x2 = x * x;
166    let x4 = x2 * x2;
167    let x8 = x4 * x4;
168    x4.mul_add(
169      x2.mul_add($c7.mul_add(x, $c6), x.mul_add($c5, $c4)),
170      x8.mul_add($c8, x2.mul_add(x.mul_add($c3, $c2), x.mul_add($c1, $c0))),
171    )
172  }};
173}
174
175macro_rules! polynomial_13 {
176  // calculates polynomial c13*x^13 + c12*x^12 + ... + c1*x + c0
177  ($x:expr,  $c2:expr, $c3:expr, $c4:expr, $c5:expr,$c6:expr, $c7:expr, $c8:expr,$c9:expr, $c10:expr, $c11:expr, $c12:expr, $c13:expr  $(,)?) => {{
178    let x = $x;
179    let x2 = x * x;
180    let x4 = x2 * x2;
181    let x8 = x4 * x4;
182    x8.mul_add(
183      x4.mul_add(
184        x.mul_add($c13, $c12),
185        x2.mul_add(x.mul_add($c11, $c10), x.mul_add($c9, $c8)),
186      ),
187      x4.mul_add(
188        x2.mul_add(x.mul_add($c7, $c6), x.mul_add($c5, $c4)),
189        x2.mul_add(x.mul_add($c3, $c2), x),
190      ),
191    )
192  }};
193}
194
195macro_rules! polynomial_13m {
196  // return  ((c8+c9*x) + (c10+c11*x)*x2 + (c12+c13*x)*x4)*x8 + (((c6+c7*x)*x2 +
197  // (c4+c5*x))*x4 + ((c2+c3*x)*x2 + x));
198  ($x:expr,  $c2:expr, $c3:expr, $c4:expr, $c5:expr,$c6:expr, $c7:expr, $c8:expr,$c9:expr, $c10:expr, $c11:expr, $c12:expr, $c13:expr  $(,)?) => {{
199    let x = $x;
200    let x2 = x * x;
201    let x4 = x2 * x2;
202    let x8 = x4 * x4;
203
204    x8.mul_add(
205      x4.mul_add(
206        x.mul_add($c13, $c12),
207        x2.mul_add(x.mul_add($c11, $c10), x.mul_add($c9, $c8)),
208      ),
209      x4.mul_add(
210        x2.mul_add(x.mul_add($c7, $c6), x.mul_add($c5, $c4)),
211        x2.mul_add(x.mul_add($c3, $c2), x),
212      ),
213    )
214  }};
215}
216
217mod f32x8_;
218pub use f32x8_::*;
219
220mod f32x4_;
221pub use f32x4_::*;
222
223mod f64x4_;
224pub use f64x4_::*;
225
226mod f64x2_;
227pub use f64x2_::*;
228
229mod i8x16_;
230pub use i8x16_::*;
231
232mod i16x16_;
233pub use i16x16_::*;
234
235mod i8x32_;
236pub use i8x32_::*;
237
238mod i16x8_;
239pub use i16x8_::*;
240
241mod i32x4_;
242pub use i32x4_::*;
243
244mod i32x8_;
245pub use i32x8_::*;
246
247mod i64x2_;
248pub use i64x2_::*;
249
250mod i64x4_;
251pub use i64x4_::*;
252
253mod u8x16_;
254pub use u8x16_::*;
255
256mod u16x8_;
257pub use u16x8_::*;
258
259mod u16x16_;
260pub use u16x16_::*;
261
262mod u32x4_;
263pub use u32x4_::*;
264
265mod u32x8_;
266pub use u32x8_::*;
267
268mod u64x2_;
269pub use u64x2_::*;
270
271mod u64x4_;
272pub use u64x4_::*;
273
274#[allow(non_camel_case_types)]
275#[repr(C, align(16))]
276#[rustfmt::skip]
277union ConstUnionHack128bit {
278  f32a4: [f32; 4],
279  f64a2: [f64; 2],
280  i8a16: [i8; 16],
281  i16a8: [i16; 8],
282  i32a4: [i32; 4],
283  i64a2: [i64; 2],
284  u8a16: [u8; 16],
285  u16a8: [u16; 8],
286  u32a4: [u32; 4],
287  u64a2: [u64; 2],
288  f32x4: f32x4,
289  f64x2: f64x2,
290  i8x16: i8x16,
291  i16x8: i16x8,
292  i32x4: i32x4,
293  i64x2: i64x2,
294  u8x16: u8x16,
295  u16x8: u16x8,
296  u32x4: u32x4,
297  u64x2: u64x2,
298  u128:  u128,
299}
300
301#[allow(non_camel_case_types)]
302#[repr(C, align(16))]
303#[rustfmt::skip]
304union ConstUnionHack256bit {
305  f32a8:  [f32; 8],
306  f64a4:  [f64; 4],
307  i8a32:  [i8; 32],
308  i16a16: [i16; 16],
309  i32a8:  [i32; 8],
310  i64a4:  [i64; 4],
311  u8a32:  [u8; 32],
312  u16a16: [u16; 16],
313  u32a8:  [u32; 8],
314  u64a4:  [u64; 4],
315  u128x2: [u128; 2],
316  f32x8:  f32x8,
317  f64x4:  f64x4,
318  i8x32:  i8x32,
319  i16x16: i16x16,
320  i32x8:  i32x8,
321  i64x4:  i64x4,
322  // u8x32:  u8x32,
323  u16x16: u16x16,
324  u32x8:  u32x8,
325  u64x4:  u64x4,
326}
327
328#[allow(dead_code)]
329fn generic_bit_blend<T>(mask: T, y: T, n: T) -> T
330where
331  T: Copy + BitXor<Output = T> + BitAnd<Output = T>,
332{
333  n ^ ((n ^ y) & mask)
334}
335
336/// given `type.op(type)` and type is `Copy`, impls `type.op(&type)`
337macro_rules! bulk_impl_op_ref_self_for {
338  ($(($op:ident, $method:ident) => [$($t:ty),+]),+ $(,)?) => {
339    $( // do each trait/list matching given
340      $( // do the current trait for each type in its list.
341        impl $op<&Self> for $t {
342          type Output = Self;
343          #[inline]
344          #[must_use]
345          fn $method(self, rhs: &Self) -> Self::Output {
346            self.$method(*rhs)
347          }
348        }
349      )+
350    )+
351  };
352}
353
354bulk_impl_op_ref_self_for! {
355  (Add, add) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u16x16, u32x8, u32x4, u64x4, u64x2],
356  (Sub, sub) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u16x16, u32x8, u32x4, u64x4, u64x2],
357  (Mul, mul) => [f32x8, f32x4, f64x4, f64x2, i16x8, i16x16, i32x8, i32x4, u16x8, u16x16],
358  (Div, div) => [f32x8, f32x4, f64x4, f64x2],
359  (BitAnd, bitand) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u16x16,u32x8, u32x4, u64x4, u64x2],
360  (BitOr, bitor) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u16x16, u32x8, u32x4, u64x4, u64x2],
361  (BitXor, bitxor) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u16x16, u32x8, u32x4, u64x4, u64x2],
362}
363
364/// given `type.op(rhs)` and type is Copy, impls `type.op_assign(rhs)`
365macro_rules! bulk_impl_op_assign_for {
366  ($(($op:ident<$rhs:ty>, $method:ident, $method_assign:ident) => [$($t:ty),+]),+ $(,)?) => {
367    $( // do each trait/list matching given
368      $( // do the current trait for each type in its list.
369        impl $op<$rhs> for $t {
370          #[inline]
371          fn $method_assign(&mut self, rhs: $rhs) {
372            *self = self.$method(rhs);
373          }
374        }
375      )+
376    )+
377  };
378}
379
380// Note: remember to update bulk_impl_op_ref_self_for first or this will give
381// weird errors!
382bulk_impl_op_assign_for! {
383  (AddAssign<Self>, add, add_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u16x16, u32x8, u32x4, u64x4, u64x2],
384  (AddAssign<&Self>, add, add_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u16x16, u32x8, u32x4, u64x4, u64x2],
385  (SubAssign<Self>, sub, sub_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u16x16, u32x8, u32x4, u64x4, u64x2],
386  (SubAssign<&Self>, sub, sub_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u16x16, u32x8, u32x4, u64x4, u64x2],
387  (MulAssign<Self>, mul, mul_assign) => [f32x8, f32x4, f64x4, f64x2, i16x8, i16x16, i32x8, i32x4, u16x8, u16x16],
388  (MulAssign<&Self>, mul, mul_assign) => [f32x8, f32x4, f64x4, f64x2, i16x8, i16x16, i32x8, i32x4, u16x8, u16x16],
389  (DivAssign<Self>, div, div_assign) => [f32x8, f32x4, f64x4, f64x2],
390  (DivAssign<&Self>, div, div_assign) => [f32x8, f32x4, f64x4, f64x2],
391  (BitAndAssign<Self>, bitand, bitand_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, u16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
392  (BitAndAssign<&Self>, bitand, bitand_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, u16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
393  (BitOrAssign<Self>, bitor, bitor_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, u16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
394  (BitOrAssign<&Self>, bitor, bitor_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, u16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
395  (BitXorAssign<Self>, bitxor, bitxor_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, u16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
396  (BitXorAssign<&Self>, bitxor, bitxor_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, u16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
397}
398
399macro_rules! impl_simple_neg {
400  ($($t:ty),+ $(,)?) => {
401    $(
402      impl Neg for $t {
403        type Output = Self;
404        #[inline]
405        #[must_use]
406        fn neg(self) -> Self::Output {
407          Self::default() - self
408        }
409      }
410      impl Neg for &'_ $t {
411        type Output = $t;
412        #[inline]
413        #[must_use]
414        fn neg(self) -> Self::Output {
415          <$t>::default() - *self
416        }
417      }
418    )+
419  };
420}
421
422impl_simple_neg! {
423  f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x2, u64x4
424}
425
426macro_rules! impl_simple_not {
427  ($($t:ty),+ $(,)?) => {
428    $(
429      impl Not for $t {
430        type Output = Self;
431        #[inline]
432        #[must_use]
433        fn not(self) -> Self::Output {
434          self ^ cast::<u128, $t>(u128::MAX)
435        }
436      }
437      impl Not for &'_ $t {
438        type Output = $t;
439        #[inline]
440        #[must_use]
441        fn not(self) -> Self::Output {
442          *self ^ cast::<u128, $t>(u128::MAX)
443        }
444      }
445    )+
446  };
447}
448
449impl_simple_not! {
450  f32x4, i8x32, i8x16, i16x8, i16x16, i32x4, i64x2, u8x16, u16x8, u16x16, u32x4, u64x2,
451}
452
453macro_rules! impl_simple_sum {
454  ($($t:ty),+ $(,)?) => {
455    $(
456      impl<RHS> core::iter::Sum<RHS> for $t where $t: AddAssign<RHS> {
457        #[inline]
458        fn sum<I: Iterator<Item = RHS>>(iter: I) -> Self {
459          let mut total = Self::zeroed();
460          for val in iter {
461            total += val;
462          }
463          total
464        }
465      }
466    )+
467  };
468}
469
470impl_simple_sum! {
471  f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x4, i64x2, u8x16, u16x8, u16x16, u32x8, u32x4, u64x2, u64x4
472}
473
474macro_rules! impl_floating_product {
475  ($($t:ty),+ $(,)?) => {
476    $(
477      impl<RHS> core::iter::Product<RHS> for $t where $t: MulAssign<RHS> {
478        #[inline]
479        fn product<I: Iterator<Item = RHS>>(iter: I) -> Self {
480          let mut total = Self::from(1.0);
481          for val in iter {
482            total *= val;
483          }
484          total
485        }
486      }
487    )+
488  };
489}
490
491impl_floating_product! {
492  f32x8, f32x4, f64x4, f64x2
493}
494
495macro_rules! impl_integer_product {
496  ($($t:ty),+ $(,)?) => {
497    $(
498      impl<RHS> core::iter::Product<RHS> for $t where $t: MulAssign<RHS> {
499        #[inline]
500        fn product<I: Iterator<Item = RHS>>(iter: I) -> Self {
501          let mut total = Self::from(1);
502          for val in iter {
503            total *= val;
504          }
505          total
506        }
507      }
508    )+
509  };
510}
511
512impl_integer_product! {
513  i16x8, i32x4, i32x8,
514}
515
516/// impls `From<a> for b` by just calling `cast`
517macro_rules! impl_from_a_for_b_with_cast {
518  ($(($arr:ty, $simd:ty)),+  $(,)?) => {
519    $(impl From<$arr> for $simd {
520      #[inline]
521      #[must_use]
522      fn from(arr: $arr) -> Self {
523        cast(arr)
524      }
525    }
526    impl From<$simd> for $arr {
527      #[inline]
528      #[must_use]
529      fn from(simd: $simd) -> Self {
530        cast(simd)
531      }
532    })+
533  };
534}
535
536impl_from_a_for_b_with_cast! {
537  ([f32;8], f32x8),
538  ([f32;4], f32x4), ([f64;4], f64x4), ([f64;2], f64x2),
539  ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2), ([i64;4], i64x4),
540  ([u8;16], u8x16), ([u16;8], u16x8), ([u16;16], u16x16), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2), ([u64;4], u64x4),
541}
542
543macro_rules! impl_from_single_value {
544  ($(([$elem:ty;$len:expr], $simd:ty)),+  $(,)?) => {
545    $(impl From<$elem> for $simd {
546      /// Splats the single value given across all lanes.
547      #[inline]
548      #[must_use]
549      fn from(elem: $elem) -> Self {
550        cast([elem; $len])
551      }
552    }
553    impl $simd {
554      #[inline]
555      #[must_use]
556      pub fn splat(elem: $elem) -> $simd {
557        cast([elem; $len])
558      }
559    })+
560  };
561}
562
563impl_from_single_value! {
564  ([f32;8], f32x8),
565  ([f32;4], f32x4), ([f64;4], f64x4), ([f64;2], f64x2),
566  ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2), ([i64;4], i64x4),
567  ([u8;16], u8x16), ([u16;8], u16x8), ([u16;16], u16x16), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2), ([u64;4], u64x4),
568}
569
570/// formatter => [(arr, simd)+],+
571macro_rules! impl_formatter_for {
572  ($($trait:ident => [$(($arr:ty, $simd:ty)),+]),+ $(,)?) => {
573    $( // do per trait
574      $( // do per simd type
575        impl $trait for $simd {
576          #[allow(clippy::missing_inline_in_public_items)]
577          fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
578            let a: $arr = cast(*self);
579            write!(f, "(")?;
580            for (x, a_ref) in a.iter().enumerate() {
581              if x > 0 {
582                write!(f, ", ")?;
583              }
584              $trait::fmt(a_ref, f)?;
585            }
586            write!(f, ")")
587          }
588        }
589      )+
590    )+
591  }
592}
593
594impl_formatter_for! {
595  Binary => [([u32;8], f32x8), ([u32;4], f32x4), ([u64;4], f64x4), ([u64;2], f64x2),
596  ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
597  ([u8;16], u8x16), ([u16;8], u16x8), ([u16;16], u16x16), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
598  Debug => [([f32;8], f32x8), ([f32;4], f32x4), ([f64;4], f64x4), ([f64;2], f64x2),
599  ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
600  ([u8;16], u8x16), ([u16;8], u16x8), ([u16;16], u16x16), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
601  Display => [([f32;8], f32x8), ([f32;4], f32x4), ([f64;4], f64x4), ([f64;2], f64x2),
602  ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
603  ([u8;16], u8x16), ([u16;8], u16x8), ([u16;16], u16x16), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
604  LowerExp => [([f32;8], f32x8), ([f32;4], f32x4), ([u64;4], f64x4), ([u64;2], f64x2),
605  ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
606  ([u8;16], u8x16), ([u16;8], u16x8), ([u16;16], u16x16), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
607  LowerHex => [([u32;8], f32x8), ([u32;4], f32x4), ([u64;4], f64x4), ([u64;2], f64x2),
608  ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
609  ([u8;16], u8x16), ([u16;8], u16x8), ([u16;16], u16x16), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
610  Octal => [([u32;8], f32x8), ([u32;4], f32x4), ([u64;4], f64x4), ([u64;2], f64x2),
611  ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
612  ([u8;16], u8x16), ([u16;8], u16x8), ([u16;16], u16x16), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
613  UpperExp => [([u32;8], f32x8), ([u32;4], f32x4), ([u64;4], f64x4), ([u64;2], f64x2),
614  ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
615  ([u8;16], u8x16), ([u16;8], u16x8), ([u16;16], u16x16), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
616  UpperHex => [([u32;8], f32x8), ([u32;4], f32x4), ([u64;4], f64x4), ([u64;2], f64x2),
617  ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
618  ([u8;16], u8x16), ([u16;8], u16x8), ([u16;16], u16x16), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
619}
620
621// With const generics this could be simplified I hope
622macro_rules! from_array {
623  ($ty:ty,$dst:ty,$dst_wide:ident,32) => {
624    impl From<&[$ty]> for $dst_wide {
625      #[inline]
626      fn from(src: &[$ty]) -> $dst_wide {
627        match src.len() {
628          32 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst, src[25] as $dst, src[26] as $dst, src[27] as $dst, src[28] as $dst, src[29] as $dst, src[30] as $dst, src[31] as $dst,]),
629          31 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst, src[25] as $dst, src[26] as $dst, src[27] as $dst, src[28] as $dst, src[29] as $dst, src[30] as $dst,0 as $dst,]),
630          30 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst, src[25] as $dst, src[26] as $dst, src[27] as $dst, src[28] as $dst, src[29] as $dst,0 as $dst,0 as $dst,]),
631          29 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst, src[25] as $dst, src[26] as $dst, src[27] as $dst, src[28] as $dst,0 as $dst,0 as $dst,0 as $dst,]),
632          28 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst, src[25] as $dst, src[26] as $dst, src[27] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
633          27 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst, src[25] as $dst, src[26] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
634          26 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst, src[25] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
635          25 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
636          24 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
637          23 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
638          22 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
639          21 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
640          20 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
641          19 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
642          18 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
643          17 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
644          16 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
645          15 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
646          14 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
647          13 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
648          12 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
649          11 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
650          10 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
651          9 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
652          8 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
653          7 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
654          6 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
655          5 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
656          4 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
657          3 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
658          2 => $dst_wide::from([src[0] as $dst, src[1] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
659          1 => $dst_wide::from([src[0] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
660          _ => panic!(
661            "Converting from an array larger than what can be stored in $dst_wide"
662          ),
663        }
664      }
665    }
666  };
667  ($ty:ty,$dst:ty,$dst_wide:ident,16) => {
668    impl From<&[$ty]> for $dst_wide {
669      #[inline]
670      fn from(src: &[$ty]) -> $dst_wide {
671        match src.len() {
672          16 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst,]),
673          15 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst,0 as $dst,]),
674          14 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst,0 as $dst,0 as $dst,]),
675          13 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst,0 as $dst,0 as $dst,0 as $dst,]),
676          12 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
677          11 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
678          10 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
679          9 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
680          8 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
681          7 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
682          6 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
683          5 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
684          4 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
685          3 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
686          2 => $dst_wide::from([src[0] as $dst, src[1] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
687          1 => $dst_wide::from([src[0] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
688          _ => panic!(
689            "Converting from an array larger than what can be stored in $dst_wide"
690          ),
691        }
692      }
693    }
694  };
695  ($ty:ty,$dst:ty,$dst_wide:ident,8) => {
696    impl From<&[$ty]> for $dst_wide {
697      #[inline]
698      fn from(src: &[$ty]) -> $dst_wide {
699        match src.len() {
700          8 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst,]),
701          7 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst,0 as $dst,]),
702          6 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst,0 as $dst,0 as $dst,]),
703          5 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst,0 as $dst,0 as $dst,0 as $dst,]),
704          4 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
705          3 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
706          2 => $dst_wide::from([src[0] as $dst, src[1] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
707          1 => $dst_wide::from([src[0] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
708          0 => $dst_wide::from([0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
709          _ => panic!(
710            "Converting from an array larger than what can be stored in $dst_wide"
711          ),
712        }
713      }
714    }
715  };
716  ($ty:ty,$dst:ty,$dst_wide:ident,4) => {
717    impl From<&[$ty]> for $dst_wide {
718      #[inline]
719      fn from(src: &[$ty]) -> $dst_wide {
720        match src.len() {
721          4 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst,]),
722          3 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst,0 as $dst,]),
723          2 => $dst_wide::from([src[0] as $dst, src[1] as $dst,0 as $dst,0 as $dst,]),
724          1 => $dst_wide::from([src[0] as $dst,0 as $dst,0 as $dst,0 as $dst,]),
725          _ => panic!(
726            "Converting from an array larger than what can be stored in $dst_wide"
727          ),
728        }
729      }
730    }
731  };
732}
733
734from_array!(i8, i8, i8x32, 32);
735from_array!(i8, i8, i8x16, 16);
736from_array!(i8, i32, i32x8, 8);
737from_array!(u8, u8, u8x16, 16);
738from_array!(i16, i16, i16x16, 16);
739from_array!(u16, u16, u16x16, 16);
740from_array!(i32, i32, i32x8, 8);
741from_array!(f32, f32, f32x8, 8);
742from_array!(f32, f32, f32x4, 4);
743from_array!(f64, f64, f64x4, 4);
744from_array!(u64, u64, u64x4, 4);
745from_array!(i64, i64, i64x4, 4);
746
747#[allow(unused)]
748fn software_sqrt(x: f64) -> f64 {
749  use core::num::Wrapping;
750  type wu32 = Wrapping<u32>;
751  const fn w(u: u32) -> wu32 {
752    Wrapping(u)
753  }
754  let mut z: f64;
755  let sign: wu32 = w(0x80000000);
756  let mut ix0: i32;
757  let mut s0: i32;
758  let mut q: i32;
759  let mut m: i32;
760  let mut t: i32;
761  let mut i: i32;
762  let mut r: wu32;
763  let mut t1: wu32;
764  let mut s1: wu32;
765  let mut ix1: wu32;
766  let mut q1: wu32;
767  // extract data
768
769  pick! {
770    if #[cfg(target_endian = "little")]
771    {
772      let [low, high]: [u32; 2] = cast(x);
773      ix0 = high as i32;
774      ix1 = w(low);
775    }
776    else
777    {
778      let [high, low]: [u32; 2] = cast(x);
779      ix0 = high as i32;
780      ix1 = w(low);
781    }
782  }
783
784  // inf and nan
785  {
786    if x.is_nan() {
787      return f64::NAN;
788    }
789    if ix0 & 0x7ff00000 == 0x7ff00000 {
790      return x * x + x;
791    }
792  }
793  // handle zero
794  {
795    if ix0 <= 0 {
796      if ((ix0 & (!sign).0 as i32) | (ix1.0 as i32)) == 0 {
797        return x;
798      } else if ix0 < 0 {
799        return (x - x) / (x - x);
800      }
801    }
802  }
803  // normalize
804  {
805    m = ix0 >> 20;
806    if m == 0 {
807      // subnormal
808      while ix0 == 0 {
809        m -= 21;
810        ix0 |= (ix1 >> 11).0 as i32;
811        ix1 <<= 21;
812      }
813      i = 0;
814      while ix0 & 0x00100000 == 0 {
815        ix0 <<= 1;
816        i += 1;
817      }
818      m -= i - 1;
819      ix0 |= (ix1.0 >> (31 - i)) as i32;
820      ix1 <<= i as usize;
821    }
822    // un-bias exponent
823    m -= 1023;
824    ix0 = (ix0 & 0x000fffff) | 0x00100000;
825    if (m & 1) != 0 {
826      // odd m, double the input to make it even
827      ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
828      ix1 += ix1;
829    }
830    m >>= 1;
831  }
832  // generate sqrt bit by bit
833  {
834    ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
835    ix1 += ix1;
836    // q and q1 store the sqrt(x);
837    q = 0;
838    q1 = w(0);
839    s0 = 0;
840    s1 = w(0);
841    // our bit that moves from right to left
842    r = w(0x00200000);
843    while r != w(0) {
844      t = s0 + (r.0 as i32);
845      if t <= ix0 {
846        s0 = t + (r.0 as i32);
847        ix0 -= t;
848        q += (r.0 as i32);
849      }
850      ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
851      ix1 += ix1;
852      r >>= 1;
853    }
854    r = sign;
855    while r != w(0) {
856      t1 = s1 + r;
857      t = s0;
858      if (t < ix0) || ((t == ix0) && (t1 <= ix1)) {
859        s1 = t1 + r;
860        if t1 & sign == sign && (s1 & sign) == w(0) {
861          s0 += 1;
862        }
863        ix0 -= t;
864        if ix1 < t1 {
865          ix0 -= 1;
866        }
867        ix1 -= t1;
868        q1 += r;
869      }
870      ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
871      ix1 += ix1;
872      r >>= 1;
873    }
874  }
875  // use floating add to find out rounding direction
876  {
877    if ix0 | (ix1.0 as i32) != 0 {
878      z = 1.0 - 1.0e-300;
879      if z >= 1.0 {
880        z = 1.0 + 1.0e-300;
881        if q1 == w(0xffffffff) {
882          q1 = w(0);
883          q += 1;
884        } else if z > 1.0 {
885          if q1 == w(0xfffffffe) {
886            q += 1;
887          }
888          q1 += w(2);
889        } else {
890          q1 += q1 & w(1);
891        }
892      }
893    }
894  }
895  // finish up
896  ix0 = (q >> 1) + 0x3fe00000;
897  ix1 = q1 >> 1;
898  if q & 1 == 1 {
899    ix1 |= sign;
900  }
901  ix0 += m << 20;
902
903  pick! {
904    if #[cfg(target_endian = "little")]
905    {
906      cast::<[u32; 2], f64>([ix1.0, ix0 as u32])
907    }
908    else
909    {
910      cast::<[u32; 2], f64>([ix0 as u32, ix1.0])
911    }
912  }
913}
914
915#[test]
916fn test_software_sqrt() {
917  assert!(software_sqrt(f64::NAN).is_nan());
918  assert_eq!(software_sqrt(f64::INFINITY), f64::INFINITY);
919  assert_eq!(software_sqrt(0.0), 0.0);
920  assert_eq!(software_sqrt(-0.0), -0.0);
921  assert!(software_sqrt(-1.0).is_nan());
922  assert!(software_sqrt(f64::NEG_INFINITY).is_nan());
923  assert_eq!(software_sqrt(4.0), 2.0);
924  assert_eq!(software_sqrt(9.0), 3.0);
925  assert_eq!(software_sqrt(16.0), 4.0);
926  assert_eq!(software_sqrt(25.0), 5.0);
927  assert_eq!(software_sqrt(5000.0 * 5000.0), 5000.0);
928}
929
930pub trait CmpEq<Rhs = Self> {
931  type Output;
932  fn cmp_eq(self, rhs: Rhs) -> Self::Output;
933}
934
935pub trait CmpGt<Rhs = Self> {
936  type Output;
937  fn cmp_gt(self, rhs: Rhs) -> Self::Output;
938}
939
940pub trait CmpGe<Rhs = Self> {
941  type Output;
942  fn cmp_ge(self, rhs: Rhs) -> Self::Output;
943}
944
945pub trait CmpNe<Rhs = Self> {
946  type Output;
947  fn cmp_ne(self, rhs: Rhs) -> Self::Output;
948}
949
950pub trait CmpLt<Rhs = Self> {
951  type Output;
952  fn cmp_lt(self, rhs: Rhs) -> Self::Output;
953}
954
955pub trait CmpLe<Rhs = Self> {
956  type Output;
957  fn cmp_le(self, rhs: Rhs) -> Self::Output;
958}
959
960macro_rules! bulk_impl_const_rhs_op {
961  (($op:ident,$method:ident) => [$(($lhs:ty,$rhs:ty),)+]) => {
962    $(
963    impl $op<$rhs> for $lhs {
964      type Output = Self;
965      #[inline]
966      #[must_use]
967      fn $method(self, rhs: $rhs) -> Self::Output {
968        self.$method(<$lhs>::splat(rhs))
969      }
970    }
971    )+
972  };
973}
974
975bulk_impl_const_rhs_op!((CmpEq, cmp_eq) => [(f64x4, f64), (f64x2, f64), (f32x4,f32), (f32x8,f32),]);
976bulk_impl_const_rhs_op!((CmpLt, cmp_lt) => [(f64x4, f64), (f64x2, f64), (f32x4,f32), (f32x8,f32),]);
977bulk_impl_const_rhs_op!((CmpGt, cmp_gt) => [(f64x4, f64), (f64x2, f64), (f32x4,f32), (f32x8,f32),]);
978bulk_impl_const_rhs_op!((CmpNe, cmp_ne) => [(f64x4, f64), (f64x2, f64), (f32x4,f32), (f32x8,f32),]);
979bulk_impl_const_rhs_op!((CmpLe, cmp_le) => [(f64x4, f64), (f64x2, f64), (f32x4,f32), (f32x8,f32),]);
980bulk_impl_const_rhs_op!((CmpGe, cmp_ge) => [(f64x4, f64), (f64x2, f64), (f32x4,f32), (f32x8,f32),]);
981
982macro_rules! impl_serde {
983  ($i:ident, $t:ty) => {
984    #[cfg(feature = "serde")]
985    impl Serialize for $i {
986      #[inline]
987      fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
988      where
989        S: serde::Serializer,
990      {
991        let array = self.as_array_ref();
992        let mut seq = serializer.serialize_seq(Some(array.len()))?;
993        for e in array {
994          seq.serialize_element(e)?;
995        }
996        seq.end()
997      }
998    }
999
1000    #[cfg(feature = "serde")]
1001    impl<'de> Deserialize<'de> for $i {
1002      #[inline]
1003      fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1004      where
1005        D: serde::Deserializer<'de>,
1006      {
1007        Ok(<$t>::deserialize(deserializer)?.into())
1008      }
1009    }
1010  };
1011}
1012
1013impl_serde!(f32x8, [f32; 8]);
1014impl_serde!(f32x4, [f32; 4]);
1015impl_serde!(f64x4, [f64; 4]);
1016impl_serde!(f64x2, [f64; 2]);
1017impl_serde!(i8x16, [i8; 16]);
1018impl_serde!(i16x16, [i16; 16]);
1019impl_serde!(i8x32, [i8; 32]);
1020impl_serde!(i16x8, [i16; 8]);
1021impl_serde!(i32x4, [i32; 4]);
1022impl_serde!(i32x8, [i32; 8]);
1023impl_serde!(i64x2, [i64; 2]);
1024impl_serde!(i64x4, [i64; 4]);
1025impl_serde!(u8x16, [u8; 16]);
1026impl_serde!(u16x8, [u16; 8]);
1027impl_serde!(u16x16, [u16; 16]);
1028impl_serde!(u32x4, [u32; 4]);
1029impl_serde!(u32x8, [u32; 8]);
1030impl_serde!(u64x2, [u64; 2]);
1031impl_serde!(u64x4, [u64; 4]);