1use super::*;
2
3pick! {
4 if #[cfg(target_feature="avx")] {
5 #[derive(Default, Clone, Copy, PartialEq)]
6 #[repr(C, align(32))]
7 pub struct f32x8 { avx: m256 }
8 } else {
9 #[derive(Default, Clone, Copy, PartialEq)]
10 #[repr(C, align(32))]
11 pub struct f32x8 { a : f32x4, b : f32x4 }
12 }
13}
14
15macro_rules! const_f32_as_f32x8 {
16 ($i:ident, $f:expr) => {
17 #[allow(non_upper_case_globals)]
18 pub const $i: f32x8 =
19 unsafe { ConstUnionHack256bit { f32a8: [$f; 8] }.f32x8 };
20 };
21}
22
23impl f32x8 {
24 const_f32_as_f32x8!(ONE, 1.0);
25 const_f32_as_f32x8!(HALF, 0.5);
26 const_f32_as_f32x8!(ZERO, 0.0);
27 const_f32_as_f32x8!(E, core::f32::consts::E);
28 const_f32_as_f32x8!(FRAC_1_PI, core::f32::consts::FRAC_1_PI);
29 const_f32_as_f32x8!(FRAC_2_PI, core::f32::consts::FRAC_2_PI);
30 const_f32_as_f32x8!(FRAC_2_SQRT_PI, core::f32::consts::FRAC_2_SQRT_PI);
31 const_f32_as_f32x8!(FRAC_1_SQRT_2, core::f32::consts::FRAC_1_SQRT_2);
32 const_f32_as_f32x8!(FRAC_PI_2, core::f32::consts::FRAC_PI_2);
33 const_f32_as_f32x8!(FRAC_PI_3, core::f32::consts::FRAC_PI_3);
34 const_f32_as_f32x8!(FRAC_PI_4, core::f32::consts::FRAC_PI_4);
35 const_f32_as_f32x8!(FRAC_PI_6, core::f32::consts::FRAC_PI_6);
36 const_f32_as_f32x8!(FRAC_PI_8, core::f32::consts::FRAC_PI_8);
37 const_f32_as_f32x8!(LN_2, core::f32::consts::LN_2);
38 const_f32_as_f32x8!(LN_10, core::f32::consts::LN_10);
39 const_f32_as_f32x8!(LOG2_E, core::f32::consts::LOG2_E);
40 const_f32_as_f32x8!(LOG10_E, core::f32::consts::LOG10_E);
41 const_f32_as_f32x8!(LOG10_2, core::f32::consts::LOG10_2);
42 const_f32_as_f32x8!(LOG2_10, core::f32::consts::LOG2_10);
43 const_f32_as_f32x8!(PI, core::f32::consts::PI);
44 const_f32_as_f32x8!(SQRT_2, core::f32::consts::SQRT_2);
45 const_f32_as_f32x8!(TAU, core::f32::consts::TAU);
46}
47
48unsafe impl Zeroable for f32x8 {}
49unsafe impl Pod for f32x8 {}
50
51impl Add for f32x8 {
52 type Output = Self;
53 #[inline]
54 #[must_use]
55 fn add(self, rhs: Self) -> Self::Output {
56 pick! {
57 if #[cfg(target_feature="avx")] {
58 Self { avx: add_m256(self.avx, rhs.avx) }
59 } else {
60 Self {
61 a : self.a.add(rhs.a),
62 b : self.b.add(rhs.b),
63 }
64 }
65 }
66 }
67}
68
69impl Sub for f32x8 {
70 type Output = Self;
71 #[inline]
72 #[must_use]
73 fn sub(self, rhs: Self) -> Self::Output {
74 pick! {
75 if #[cfg(target_feature="avx")] {
76 Self { avx: sub_m256(self.avx, rhs.avx) }
77 } else {
78 Self {
79 a : self.a.sub(rhs.a),
80 b : self.b.sub(rhs.b),
81 }
82 }
83 }
84 }
85}
86
87impl Mul for f32x8 {
88 type Output = Self;
89 #[inline]
90 #[must_use]
91 fn mul(self, rhs: Self) -> Self::Output {
92 pick! {
93 if #[cfg(target_feature="avx")] {
94 Self { avx: mul_m256(self.avx, rhs.avx) }
95 } else {
96 Self {
97 a : self.a.mul(rhs.a),
98 b : self.b.mul(rhs.b),
99 }
100 }
101 }
102 }
103}
104
105impl Div for f32x8 {
106 type Output = Self;
107 #[inline]
108 #[must_use]
109 fn div(self, rhs: Self) -> Self::Output {
110 pick! {
111 if #[cfg(target_feature="avx")] {
112 Self { avx: div_m256(self.avx, rhs.avx) }
113 } else {
114 Self {
115 a : self.a.div(rhs.a),
116 b : self.b.div(rhs.b),
117 }
118 }
119 }
120 }
121}
122
123impl Add<f32> for f32x8 {
124 type Output = Self;
125 #[inline]
126 #[must_use]
127 fn add(self, rhs: f32) -> Self::Output {
128 self.add(Self::splat(rhs))
129 }
130}
131
132impl Sub<f32> for f32x8 {
133 type Output = Self;
134 #[inline]
135 #[must_use]
136 fn sub(self, rhs: f32) -> Self::Output {
137 self.sub(Self::splat(rhs))
138 }
139}
140
141impl Mul<f32> for f32x8 {
142 type Output = Self;
143 #[inline]
144 #[must_use]
145 fn mul(self, rhs: f32) -> Self::Output {
146 self.mul(Self::splat(rhs))
147 }
148}
149
150impl Div<f32> for f32x8 {
151 type Output = Self;
152 #[inline]
153 #[must_use]
154 fn div(self, rhs: f32) -> Self::Output {
155 self.div(Self::splat(rhs))
156 }
157}
158
159impl Add<f32x8> for f32 {
160 type Output = f32x8;
161 #[inline]
162 #[must_use]
163 fn add(self, rhs: f32x8) -> Self::Output {
164 f32x8::splat(self).add(rhs)
165 }
166}
167
168impl Sub<f32x8> for f32 {
169 type Output = f32x8;
170 #[inline]
171 #[must_use]
172 fn sub(self, rhs: f32x8) -> Self::Output {
173 f32x8::splat(self).sub(rhs)
174 }
175}
176
177impl Mul<f32x8> for f32 {
178 type Output = f32x8;
179 #[inline]
180 #[must_use]
181 fn mul(self, rhs: f32x8) -> Self::Output {
182 f32x8::splat(self).mul(rhs)
183 }
184}
185
186impl Div<f32x8> for f32 {
187 type Output = f32x8;
188 #[inline]
189 #[must_use]
190 fn div(self, rhs: f32x8) -> Self::Output {
191 f32x8::splat(self).div(rhs)
192 }
193}
194
195impl BitAnd for f32x8 {
196 type Output = Self;
197 #[inline]
198 #[must_use]
199 fn bitand(self, rhs: Self) -> Self::Output {
200 pick! {
201 if #[cfg(target_feature="avx")] {
202 Self { avx: bitand_m256(self.avx, rhs.avx) }
203 } else {
204 Self {
205 a : self.a.bitand(rhs.a),
206 b : self.b.bitand(rhs.b),
207 }
208 }
209 }
210 }
211}
212
213impl BitOr for f32x8 {
214 type Output = Self;
215 #[inline]
216 #[must_use]
217 fn bitor(self, rhs: Self) -> Self::Output {
218 pick! {
219 if #[cfg(target_feature="avx")] {
220 Self { avx: bitor_m256(self.avx, rhs.avx) }
221 } else {
222 Self {
223 a : self.a.bitor(rhs.a),
224 b : self.b.bitor(rhs.b),
225 }
226 }
227 }
228 }
229}
230
231impl BitXor for f32x8 {
232 type Output = Self;
233 #[inline]
234 #[must_use]
235 fn bitxor(self, rhs: Self) -> Self::Output {
236 pick! {
237 if #[cfg(target_feature="avx")] {
238 Self { avx: bitxor_m256(self.avx, rhs.avx) }
239 } else {
240 Self {
241 a : self.a.bitxor(rhs.a),
242 b : self.b.bitxor(rhs.b),
243 }
244 }
245 }
246 }
247}
248
249impl CmpEq for f32x8 {
250 type Output = Self;
251 #[inline]
252 #[must_use]
253 fn cmp_eq(self, rhs: Self) -> Self::Output {
254 pick! {
255 if #[cfg(target_feature="avx")] {
256 Self { avx: cmp_op_mask_m256::<{cmp_op!(EqualOrdered)}>(self.avx, rhs.avx) }
257 } else {
258 Self {
259 a : self.a.cmp_eq(rhs.a),
260 b : self.b.cmp_eq(rhs.b),
261 }
262 }
263 }
264 }
265}
266
267impl CmpGe for f32x8 {
268 type Output = Self;
269 #[inline]
270 #[must_use]
271 fn cmp_ge(self, rhs: Self) -> Self::Output {
272 pick! {
273 if #[cfg(target_feature="avx")] {
274 Self { avx: cmp_op_mask_m256::<{cmp_op!(GreaterEqualOrdered)}>(self.avx, rhs.avx) }
275 } else {
276 Self {
277 a : self.a.cmp_ge(rhs.a),
278 b : self.b.cmp_ge(rhs.b),
279 }
280 }
281 }
282 }
283}
284
285impl CmpGt for f32x8 {
286 type Output = Self;
287 #[inline]
288 #[must_use]
289 fn cmp_gt(self, rhs: Self) -> Self::Output {
290 pick! {
291 if #[cfg(target_feature="avx")] {
292 Self { avx: cmp_op_mask_m256::<{cmp_op!(GreaterThanOrdered)}>(self.avx, rhs.avx) }
293 } else {
294 Self {
295 a : self.a.cmp_gt(rhs.a),
296 b : self.b.cmp_gt(rhs.b),
297 }
298 }
299 }
300 }
301}
302
303impl CmpNe for f32x8 {
304 type Output = Self;
305 #[inline]
306 #[must_use]
307 fn cmp_ne(self, rhs: Self) -> Self::Output {
308 pick! {
309 if #[cfg(target_feature="avx")] {
310 Self { avx: cmp_op_mask_m256::<{cmp_op!(NotEqualOrdered)}>(self.avx, rhs.avx) }
311 } else {
312 Self {
313 a : self.a.cmp_ne(rhs.a),
314 b : self.b.cmp_ne(rhs.b),
315 }
316 }
317 }
318 }
319}
320
321impl CmpLe for f32x8 {
322 type Output = Self;
323 #[inline]
324 #[must_use]
325 fn cmp_le(self, rhs: Self) -> Self::Output {
326 pick! {
327 if #[cfg(target_feature="avx")] {
328 Self { avx: cmp_op_mask_m256::<{cmp_op!(LessEqualOrdered)}>(self.avx, rhs.avx) }
329 } else {
330 Self {
331 a : self.a.cmp_le(rhs.a),
332 b : self.b.cmp_le(rhs.b),
333 }
334 }
335 }
336 }
337}
338
339impl CmpLt for f32x8 {
340 type Output = Self;
341 #[inline]
342 #[must_use]
343 fn cmp_lt(self, rhs: Self) -> Self::Output {
344 pick! {
345 if #[cfg(target_feature="avx")] {
346 Self { avx: cmp_op_mask_m256::<{cmp_op!(LessThanOrdered)}>(self.avx, rhs.avx) }
347 } else {
348 Self {
349 a : self.a.cmp_lt(rhs.a),
350 b : self.b.cmp_lt(rhs.b),
351 }
352 }
353 }
354 }
355}
356
357impl f32x8 {
358 #[inline]
359 #[must_use]
360 pub fn new(array: [f32; 8]) -> Self {
361 Self::from(array)
362 }
363 #[inline]
364 #[must_use]
365 pub fn blend(self, t: Self, f: Self) -> Self {
366 pick! {
367 if #[cfg(target_feature="avx")] {
368 Self { avx: blend_varying_m256(f.avx, t.avx, self.avx) }
369 } else {
370 Self {
371 a : self.a.blend(t.a, f.a),
372 b : self.b.blend(t.b, f.b),
373 }
374 }
375 }
376 }
377 #[inline]
378 #[must_use]
379 pub fn abs(self) -> Self {
380 pick! {
381 if #[cfg(target_feature="avx")] {
382 let non_sign_bits = f32x8::from(f32::from_bits(i32::MAX as u32));
383 self & non_sign_bits
384 } else {
385 Self {
386 a : self.a.abs(),
387 b : self.b.abs(),
388 }
389 }
390 }
391 }
392
393 #[inline]
397 #[must_use]
398 pub fn fast_max(self, rhs: Self) -> Self {
399 pick! {
400 if #[cfg(target_feature="avx")] {
401 Self { avx: max_m256(self.avx, rhs.avx) }
402 } else {
403 Self {
404 a : self.a.fast_max(rhs.a),
405 b : self.b.fast_max(rhs.b),
406 }
407 }
408 }
409 }
410
411 #[inline]
414 #[must_use]
415 pub fn max(self, rhs: Self) -> Self {
416 pick! {
417 if #[cfg(target_feature="avx")] {
418 rhs.is_nan().blend(self, Self { avx: max_m256(self.avx, rhs.avx) })
422 } else {
423 Self {
424 a : self.a.max(rhs.a),
425 b : self.b.max(rhs.b),
426 }
427 }
428
429 }
430 }
431
432 #[inline]
436 #[must_use]
437 pub fn fast_min(self, rhs: Self) -> Self {
438 pick! {
439 if #[cfg(target_feature="avx")] {
440 Self { avx: min_m256(self.avx, rhs.avx) }
441 } else {
442 Self {
443 a : self.a.fast_min(rhs.a),
444 b : self.b.fast_min(rhs.b),
445 }
446 }
447 }
448 }
449
450 #[inline]
454 #[must_use]
455 pub fn min(self, rhs: Self) -> Self {
456 pick! {
457 if #[cfg(target_feature="avx")] {
458 rhs.is_nan().blend(self, Self { avx: min_m256(self.avx, rhs.avx) })
462 } else {
463 Self {
464 a : self.a.min(rhs.a),
465 b : self.b.min(rhs.b),
466 }
467 }
468 }
469 }
470 #[inline]
471 #[must_use]
472 pub fn is_nan(self) -> Self {
473 pick! {
474 if #[cfg(target_feature="avx")] {
475 Self { avx: cmp_op_mask_m256::<{cmp_op!(Unordered)}>(self.avx, self.avx) }
476 } else {
477 Self {
478 a : self.a.is_nan(),
479 b : self.b.is_nan(),
480 }
481 }
482 }
483 }
484 #[inline]
485 #[must_use]
486 pub fn is_finite(self) -> Self {
487 let shifted_exp_mask = u32x8::from(0xFF000000);
488 let u: u32x8 = cast(self);
489 let shift_u = u << 1_u64;
490 let out = !(shift_u & shifted_exp_mask).cmp_eq(shifted_exp_mask);
491 cast(out)
492 }
493 #[inline]
494 #[must_use]
495 pub fn is_inf(self) -> Self {
496 let shifted_inf = u32x8::from(0xFF000000);
497 let u: u32x8 = cast(self);
498 let shift_u = u << 1_u64;
499 let out = (shift_u).cmp_eq(shifted_inf);
500 cast(out)
501 }
502
503 #[inline]
504 #[must_use]
505 pub fn round(self) -> Self {
506 pick! {
507 if #[cfg(target_feature="avx")] {
509 Self { avx: round_m256::<{round_op!(Nearest)}>(self.avx) }
510 } else {
511 Self {
512 a : self.a.round(),
513 b : self.b.round(),
514 }
515 }
516 }
517 }
518
519 #[inline]
523 #[must_use]
524 pub fn fast_round_int(self) -> i32x8 {
525 pick! {
526 if #[cfg(target_feature="avx")] {
527 cast(convert_to_i32_m256i_from_m256(self.avx))
528 } else {
529 cast([
530 self.a.fast_round_int(),
531 self.b.fast_round_int()])
532 }
533 }
534 }
535
536 #[inline]
540 #[must_use]
541 pub fn round_int(self) -> i32x8 {
542 pick! {
543 if #[cfg(target_feature="avx")] {
544 let non_nan_mask = self.cmp_eq(self);
546 let non_nan = self & non_nan_mask;
547 let flip_to_max: i32x8 = cast(self.cmp_ge(Self::splat(2147483648.0)));
548 let cast: i32x8 = cast(convert_to_i32_m256i_from_m256(non_nan.avx));
549 flip_to_max ^ cast
550 } else {
551 cast([
552 self.a.round_int(),
553 self.b.round_int(),
554 ])
555 }
556 }
557 }
558
559 #[inline]
563 #[must_use]
564 pub fn fast_trunc_int(self) -> i32x8 {
565 pick! {
566 if #[cfg(all(target_feature="avx"))] {
567 cast(convert_truncate_to_i32_m256i_from_m256(self.avx))
568 } else {
569 cast([
570 self.a.fast_trunc_int(),
571 self.b.fast_trunc_int(),
572 ])
573 }
574 }
575 }
576
577 #[inline]
581 #[must_use]
582 pub fn trunc_int(self) -> i32x8 {
583 pick! {
584 if #[cfg(target_feature="avx")] {
585 let non_nan_mask = self.cmp_eq(self);
587 let non_nan = self & non_nan_mask;
588 let flip_to_max: i32x8 = cast(self.cmp_ge(Self::splat(2147483648.0)));
589 let cast: i32x8 = cast(convert_truncate_to_i32_m256i_from_m256(non_nan.avx));
590 flip_to_max ^ cast
591 } else {
592 cast([
593 self.a.trunc_int(),
594 self.b.trunc_int(),
595 ])
596 }
597 }
598 }
599 #[inline]
600 #[must_use]
601 pub fn mul_add(self, m: Self, a: Self) -> Self {
602 pick! {
603 if #[cfg(all(target_feature="avx",target_feature="fma"))] {
604 Self { avx: fused_mul_add_m256(self.avx, m.avx, a.avx) }
605 } else if #[cfg(target_feature="avx")] {
606 (self * m) + a
608 } else {
609 Self {
610 a : self.a.mul_add(m.a, a.a),
611 b : self.b.mul_add(m.b, a.b),
612 }
613 }
614 }
615 }
616
617 #[inline]
618 #[must_use]
619 pub fn mul_sub(self, m: Self, a: Self) -> Self {
620 pick! {
621 if #[cfg(all(target_feature="avx",target_feature="fma"))] {
622 Self { avx: fused_mul_sub_m256(self.avx, m.avx, a.avx) }
623 } else if #[cfg(target_feature="avx")] {
624 (self * m) - a
626 } else {
627 Self {
628 a : self.a.mul_sub(m.a, a.a),
629 b : self.b.mul_sub(m.b, a.b),
630 }
631 }
632 }
633 }
634
635 #[inline]
636 #[must_use]
637 pub fn mul_neg_add(self, m: Self, a: Self) -> Self {
638 pick! {
639 if #[cfg(all(target_feature="avx",target_feature="fma"))] {
640 Self { avx: fused_mul_neg_add_m256(self.avx, m.avx, a.avx) }
641 } else if #[cfg(target_feature="avx")] {
642 a - (self * m)
644 } else {
645 Self {
646 a : self.a.mul_neg_add(m.a, a.a),
647 b : self.b.mul_neg_add(m.b, a.b),
648 }
649 }
650 }
651 }
652
653 #[inline]
654 #[must_use]
655 pub fn mul_neg_sub(self, m: Self, a: Self) -> Self {
656 pick! {
657 if #[cfg(all(target_feature="avx",target_feature="fma"))] {
658 Self { avx: fused_mul_neg_sub_m256(self.avx, m.avx, a.avx) }
659 } else if #[cfg(target_feature="avx")] {
660 -(self * m) - a
662 } else {
663 Self {
664 a : self.a.mul_neg_sub(m.a, a.a),
665 b : self.b.mul_neg_sub(m.b, a.b),
666 }
667 }
668 }
669 }
670
671 #[inline]
672 #[must_use]
673 pub fn flip_signs(self, signs: Self) -> Self {
674 self ^ (signs & Self::from(-0.0))
675 }
676
677 #[inline]
678 #[must_use]
679 pub fn copysign(self, sign: Self) -> Self {
680 let magnitude_mask = Self::from(f32::from_bits(u32::MAX >> 1));
681 (self & magnitude_mask) | (sign & Self::from(-0.0))
682 }
683
684 #[inline]
685 pub fn asin_acos(self) -> (Self, Self) {
686 const_f32_as_f32x8!(P4asinf, 4.2163199048E-2);
689 const_f32_as_f32x8!(P3asinf, 2.4181311049E-2);
690 const_f32_as_f32x8!(P2asinf, 4.5470025998E-2);
691 const_f32_as_f32x8!(P1asinf, 7.4953002686E-2);
692 const_f32_as_f32x8!(P0asinf, 1.6666752422E-1);
693
694 let xa = self.abs();
695 let big = xa.cmp_ge(f32x8::splat(0.5));
696
697 let x1 = f32x8::splat(0.5) * (f32x8::ONE - xa);
698 let x2 = xa * xa;
699 let x3 = big.blend(x1, x2);
700
701 let xb = x1.sqrt();
702
703 let x4 = big.blend(xb, xa);
704
705 let z = polynomial_4!(x3, P0asinf, P1asinf, P2asinf, P3asinf, P4asinf);
706 let z = z.mul_add(x3 * x4, x4);
707
708 let z1 = z + z;
709
710 let z3 = self.cmp_lt(f32x8::ZERO).blend(f32x8::PI - z1, z1);
712 let z4 = f32x8::FRAC_PI_2 - z.flip_signs(self);
713 let acos = big.blend(z3, z4);
714
715 let z3 = f32x8::FRAC_PI_2 - z1;
717 let asin = big.blend(z3, z);
718 let asin = asin.flip_signs(self);
719
720 (asin, acos)
721 }
722
723 #[inline]
724 #[must_use]
725 pub fn asin(self) -> Self {
726 const_f32_as_f32x8!(P4asinf, 4.2163199048E-2);
729 const_f32_as_f32x8!(P3asinf, 2.4181311049E-2);
730 const_f32_as_f32x8!(P2asinf, 4.5470025998E-2);
731 const_f32_as_f32x8!(P1asinf, 7.4953002686E-2);
732 const_f32_as_f32x8!(P0asinf, 1.6666752422E-1);
733
734 let xa = self.abs();
735 let big = xa.cmp_ge(f32x8::splat(0.5));
736
737 let x1 = f32x8::splat(0.5) * (f32x8::ONE - xa);
738 let x2 = xa * xa;
739 let x3 = big.blend(x1, x2);
740
741 let xb = x1.sqrt();
742
743 let x4 = big.blend(xb, xa);
744
745 let z = polynomial_4!(x3, P0asinf, P1asinf, P2asinf, P3asinf, P4asinf);
746 let z = z.mul_add(x3 * x4, x4);
747
748 let z1 = z + z;
749
750 let z3 = f32x8::FRAC_PI_2 - z1;
752 let asin = big.blend(z3, z);
753 let asin = asin.flip_signs(self);
754
755 asin
756 }
757
758 #[inline]
759 #[must_use]
760 pub fn acos(self) -> Self {
761 const_f32_as_f32x8!(P4asinf, 4.2163199048E-2);
764 const_f32_as_f32x8!(P3asinf, 2.4181311049E-2);
765 const_f32_as_f32x8!(P2asinf, 4.5470025998E-2);
766 const_f32_as_f32x8!(P1asinf, 7.4953002686E-2);
767 const_f32_as_f32x8!(P0asinf, 1.6666752422E-1);
768
769 let xa = self.abs();
770 let big = xa.cmp_ge(f32x8::splat(0.5));
771
772 let x1 = f32x8::splat(0.5) * (f32x8::ONE - xa);
773 let x2 = xa * xa;
774 let x3 = big.blend(x1, x2);
775
776 let xb = x1.sqrt();
777
778 let x4 = big.blend(xb, xa);
779
780 let z = polynomial_4!(x3, P0asinf, P1asinf, P2asinf, P3asinf, P4asinf);
781 let z = z.mul_add(x3 * x4, x4);
782
783 let z1 = z + z;
784
785 let z3 = self.cmp_lt(f32x8::ZERO).blend(f32x8::PI - z1, z1);
787 let z4 = f32x8::FRAC_PI_2 - z.flip_signs(self);
788 let acos = big.blend(z3, z4);
789
790 acos
791 }
792
793 #[inline]
794 pub fn atan(self) -> Self {
795 const_f32_as_f32x8!(P3atanf, 8.05374449538E-2);
798 const_f32_as_f32x8!(P2atanf, -1.38776856032E-1);
799 const_f32_as_f32x8!(P1atanf, 1.99777106478E-1);
800 const_f32_as_f32x8!(P0atanf, -3.33329491539E-1);
801
802 let t = self.abs();
803
804 let notsmal = t.cmp_ge(Self::SQRT_2 - Self::ONE);
808 let notbig = t.cmp_le(Self::SQRT_2 + Self::ONE);
809
810 let mut s = notbig.blend(Self::FRAC_PI_4, Self::FRAC_PI_2);
811 s = notsmal & s;
812
813 let mut a = notbig & t;
814 a = notsmal.blend(a - Self::ONE, a);
815 let mut b = notbig & Self::ONE;
816 b = notsmal.blend(b + t, b);
817 let z = a / b;
818
819 let zz = z * z;
820
821 let mut re = polynomial_3!(zz, P0atanf, P1atanf, P2atanf, P3atanf);
823 re = re.mul_add(zz * z, z) + s;
824
825 re = (self.sign_bit()).blend(-re, re);
827
828 re
829 }
830
831 #[inline]
832 pub fn atan2(self, x: Self) -> Self {
833 const_f32_as_f32x8!(P3atanf, 8.05374449538E-2);
836 const_f32_as_f32x8!(P2atanf, -1.38776856032E-1);
837 const_f32_as_f32x8!(P1atanf, 1.99777106478E-1);
838 const_f32_as_f32x8!(P0atanf, -3.33329491539E-1);
839
840 let y = self;
841
842 let x1 = x.abs();
844 let y1 = y.abs();
845 let swapxy = y1.cmp_gt(x1);
846 let mut x2 = swapxy.blend(y1, x1);
848 let mut y2 = swapxy.blend(x1, y1);
849
850 let both_infinite = x.is_inf() & y.is_inf();
852 if both_infinite.any() {
853 let minus_one = -Self::ONE;
854 x2 = both_infinite.blend(x2 & minus_one, x2);
855 y2 = both_infinite.blend(y2 & minus_one, y2);
856 }
857
858 let t = y2 / x2;
860
861 let notsmal = t.cmp_ge(Self::SQRT_2 - Self::ONE);
864
865 let a = notsmal.blend(t - Self::ONE, t);
866 let b = notsmal.blend(t + Self::ONE, Self::ONE);
867 let s = notsmal & Self::FRAC_PI_4;
868 let z = a / b;
869
870 let zz = z * z;
871
872 let mut re = polynomial_3!(zz, P0atanf, P1atanf, P2atanf, P3atanf);
874 re = re.mul_add(zz * z, z) + s;
875
876 re = swapxy.blend(Self::FRAC_PI_2 - re, re);
878 re = ((x | y).cmp_eq(Self::ZERO)).blend(Self::ZERO, re);
879 re = (x.sign_bit()).blend(Self::PI - re, re);
880
881 re = (y.sign_bit()).blend(-re, re);
883
884 re
885 }
886
887 #[inline]
888 #[must_use]
889 pub fn sin_cos(self) -> (Self, Self) {
890 const_f32_as_f32x8!(DP1F, 0.78515625_f32 * 2.0);
894 const_f32_as_f32x8!(DP2F, 2.4187564849853515625E-4_f32 * 2.0);
895 const_f32_as_f32x8!(DP3F, 3.77489497744594108E-8_f32 * 2.0);
896
897 const_f32_as_f32x8!(P0sinf, -1.6666654611E-1);
898 const_f32_as_f32x8!(P1sinf, 8.3321608736E-3);
899 const_f32_as_f32x8!(P2sinf, -1.9515295891E-4);
900
901 const_f32_as_f32x8!(P0cosf, 4.166664568298827E-2);
902 const_f32_as_f32x8!(P1cosf, -1.388731625493765E-3);
903 const_f32_as_f32x8!(P2cosf, 2.443315711809948E-5);
904
905 const_f32_as_f32x8!(TWO_OVER_PI, 2.0 / core::f32::consts::PI);
906
907 let xa = self.abs();
908
909 let y = (xa * TWO_OVER_PI).round();
911 let q: i32x8 = y.round_int();
912
913 let x = y.mul_neg_add(DP3F, y.mul_neg_add(DP2F, y.mul_neg_add(DP1F, xa)));
914
915 let x2 = x * x;
916 let mut s = polynomial_2!(x2, P0sinf, P1sinf, P2sinf) * (x * x2) + x;
917 let mut c = polynomial_2!(x2, P0cosf, P1cosf, P2cosf) * (x2 * x2)
918 + f32x8::from(0.5).mul_neg_add(x2, f32x8::from(1.0));
919
920 let swap = !(q & i32x8::from(1)).cmp_eq(i32x8::from(0));
921
922 let mut overflow: f32x8 = cast(q.cmp_gt(i32x8::from(0x2000000)));
923 overflow &= xa.is_finite();
924 s = overflow.blend(f32x8::from(0.0), s);
925 c = overflow.blend(f32x8::from(1.0), c);
926
927 let mut sin1 = cast::<_, f32x8>(swap).blend(c, s);
929 let sign_sin: i32x8 = (q << 30) ^ cast::<_, i32x8>(self);
930 sin1 = sin1.flip_signs(cast(sign_sin));
931
932 let mut cos1 = cast::<_, f32x8>(swap).blend(s, c);
934 let sign_cos: i32x8 = ((q + i32x8::from(1)) & i32x8::from(2)) << 30;
935 cos1 ^= cast::<_, f32x8>(sign_cos);
936
937 (sin1, cos1)
938 }
939 #[inline]
940 #[must_use]
941 pub fn sin(self) -> Self {
942 let (s, _) = self.sin_cos();
943 s
944 }
945 #[inline]
946 #[must_use]
947 pub fn cos(self) -> Self {
948 let (_, c) = self.sin_cos();
949 c
950 }
951 #[inline]
952 #[must_use]
953 pub fn tan(self) -> Self {
954 let (s, c) = self.sin_cos();
955 s / c
956 }
957 #[inline]
958 #[must_use]
959 pub fn to_degrees(self) -> Self {
960 const_f32_as_f32x8!(RAD_TO_DEG_RATIO, 180.0_f32 / core::f32::consts::PI);
961 self * RAD_TO_DEG_RATIO
962 }
963 #[inline]
964 #[must_use]
965 pub fn to_radians(self) -> Self {
966 const_f32_as_f32x8!(DEG_TO_RAD_RATIO, core::f32::consts::PI / 180.0_f32);
967 self * DEG_TO_RAD_RATIO
968 }
969 #[inline]
970 #[must_use]
971 pub fn recip(self) -> Self {
972 pick! {
973 if #[cfg(target_feature="avx")] {
974 Self { avx: reciprocal_m256(self.avx) }
975 } else {
976 Self {
977 a : self.a.recip(),
978 b : self.b.recip(),
979 }
980 }
981 }
982 }
983 #[inline]
984 #[must_use]
985 pub fn recip_sqrt(self) -> Self {
986 pick! {
987 if #[cfg(target_feature="avx")] {
988 Self { avx: reciprocal_sqrt_m256(self.avx) }
989 } else {
990 Self {
991 a : self.a.recip_sqrt(),
992 b : self.b.recip_sqrt(),
993 }
994 }
995 }
996 }
997 #[inline]
998 #[must_use]
999 pub fn sqrt(self) -> Self {
1000 pick! {
1001 if #[cfg(target_feature="avx")] {
1002 Self { avx: sqrt_m256(self.avx) }
1003 } else {
1004 Self {
1005 a : self.a.sqrt(),
1006 b : self.b.sqrt(),
1007 }
1008 }
1009 }
1010 }
1011 #[inline]
1012 #[must_use]
1013 pub fn move_mask(self) -> i32 {
1014 pick! {
1015 if #[cfg(target_feature="avx")] {
1016 move_mask_m256(self.avx)
1017 } else {
1018 (self.b.move_mask() << 4) | self.a.move_mask()
1019 }
1020 }
1021 }
1022 #[inline]
1023 #[must_use]
1024 pub fn any(self) -> bool {
1025 pick! {
1026 if #[cfg(target_feature="avx")] {
1027 move_mask_m256(self.avx) != 0
1028 } else {
1029 self.a.any() || self.b.any()
1030 }
1031 }
1032 }
1033 #[inline]
1034 #[must_use]
1035 pub fn all(self) -> bool {
1036 pick! {
1037 if #[cfg(target_feature="avx")] {
1038 move_mask_m256(self.avx) == 0b11111111
1039 } else {
1040 self.a.all() && self.b.all()
1041 }
1042 }
1043 }
1044 #[inline]
1045 #[must_use]
1046 pub fn none(self) -> bool {
1047 !self.any()
1048 }
1049
1050 #[inline]
1051 fn vm_pow2n(self) -> Self {
1052 const_f32_as_f32x8!(pow2_23, 8388608.0);
1053 const_f32_as_f32x8!(bias, 127.0);
1054 let a = self + (bias + pow2_23);
1055 let c = cast::<_, i32x8>(a) << 23;
1056 cast::<_, f32x8>(c)
1057 }
1058
1059 #[inline]
1061 #[must_use]
1062 pub fn exp(self) -> Self {
1063 const_f32_as_f32x8!(P0, 1.0 / 2.0);
1064 const_f32_as_f32x8!(P1, 1.0 / 6.0);
1065 const_f32_as_f32x8!(P2, 1. / 24.);
1066 const_f32_as_f32x8!(P3, 1. / 120.);
1067 const_f32_as_f32x8!(P4, 1. / 720.);
1068 const_f32_as_f32x8!(P5, 1. / 5040.);
1069 const_f32_as_f32x8!(LN2D_HI, 0.693359375);
1070 const_f32_as_f32x8!(LN2D_LO, -2.12194440e-4);
1071 let max_x = f32x8::from(87.3);
1072 let r = (self * Self::LOG2_E).round();
1073 let x = r.mul_neg_add(LN2D_HI, self);
1074 let x = r.mul_neg_add(LN2D_LO, x);
1075 let z = polynomial_5!(x, P0, P1, P2, P3, P4, P5);
1076 let x2 = x * x;
1077 let z = z.mul_add(x2, x);
1078 let n2 = Self::vm_pow2n(r);
1079 let z = (z + Self::ONE) * n2;
1080 let in_range = self.abs().cmp_lt(max_x);
1082 let in_range = in_range & self.is_finite();
1083 in_range.blend(z, Self::ZERO)
1084 }
1085
1086 #[inline]
1087 fn exponent(self) -> f32x8 {
1088 const_f32_as_f32x8!(pow2_23, 8388608.0);
1089 const_f32_as_f32x8!(bias, 127.0);
1090 let a = cast::<_, u32x8>(self);
1091 let b = a >> 23;
1092 let c = b | cast::<_, u32x8>(pow2_23);
1093 let d = cast::<_, f32x8>(c);
1094 let e = d - (pow2_23 + bias);
1095 e
1096 }
1097
1098 #[inline]
1099 fn fraction_2(self) -> Self {
1100 let t1 = cast::<_, u32x8>(self);
1101 let t2 = cast::<_, u32x8>(
1102 (t1 & u32x8::from(0x007FFFFF)) | u32x8::from(0x3F000000),
1103 );
1104 cast::<_, f32x8>(t2)
1105 }
1106 #[inline]
1107 fn is_zero_or_subnormal(self) -> Self {
1108 let t = cast::<_, i32x8>(self);
1109 let t = t & i32x8::splat(0x7F800000);
1110 i32x8::round_float(t.cmp_eq(i32x8::splat(0)))
1111 }
1112 #[inline]
1113 fn infinity() -> Self {
1114 cast::<_, f32x8>(i32x8::splat(0x7F800000))
1115 }
1116 #[inline]
1117 fn nan_log() -> Self {
1118 cast::<_, f32x8>(i32x8::splat(0x7FC00000 | 0x101 & 0x003FFFFF))
1119 }
1120 #[inline]
1121 fn nan_pow() -> Self {
1122 cast::<_, f32x8>(i32x8::splat(0x7FC00000 | 0x101 & 0x003FFFFF))
1123 }
1124 #[inline]
1125 pub fn sign_bit(self) -> Self {
1126 let t1 = cast::<_, i32x8>(self);
1127 let t2 = t1 >> 31;
1128 !cast::<_, f32x8>(t2).cmp_eq(f32x8::ZERO)
1129 }
1130
1131 #[inline]
1133 #[must_use]
1134 pub fn reduce_add(self) -> f32 {
1135 pick! {
1136 if #[cfg(target_feature="avx")]{
1138 let hi_quad = extract_m128_from_m256::<1>(self.avx);
1139 let lo_quad = cast_to_m128_from_m256(self.avx);
1140 let sum_quad = add_m128(lo_quad,hi_quad);
1141 let lo_dual = sum_quad;
1142 let hi_dual = move_high_low_m128(sum_quad,sum_quad);
1143 let sum_dual = add_m128(lo_dual,hi_dual);
1144 let lo = sum_dual;
1145 let hi = shuffle_abi_f32_all_m128::<0b_01>(sum_dual, sum_dual);
1146 let sum = add_m128_s(lo, hi);
1147 get_f32_from_m128_s(sum)
1148 } else {
1149 self.a.reduce_add() + self.b.reduce_add()
1150 }
1151 }
1152 }
1153
1154 #[inline]
1156 #[must_use]
1157 pub fn ln(self) -> Self {
1158 const_f32_as_f32x8!(HALF, 0.5);
1159 const_f32_as_f32x8!(P0, 3.3333331174E-1);
1160 const_f32_as_f32x8!(P1, -2.4999993993E-1);
1161 const_f32_as_f32x8!(P2, 2.0000714765E-1);
1162 const_f32_as_f32x8!(P3, -1.6668057665E-1);
1163 const_f32_as_f32x8!(P4, 1.4249322787E-1);
1164 const_f32_as_f32x8!(P5, -1.2420140846E-1);
1165 const_f32_as_f32x8!(P6, 1.1676998740E-1);
1166 const_f32_as_f32x8!(P7, -1.1514610310E-1);
1167 const_f32_as_f32x8!(P8, 7.0376836292E-2);
1168 const_f32_as_f32x8!(LN2F_HI, 0.693359375);
1169 const_f32_as_f32x8!(LN2F_LO, -2.12194440e-4);
1170 const_f32_as_f32x8!(VM_SMALLEST_NORMAL, 1.17549435E-38);
1171
1172 let x1 = self;
1173 let x = Self::fraction_2(x1);
1174 let e = Self::exponent(x1);
1175 let mask = x.cmp_gt(Self::SQRT_2 * HALF);
1176 let x = (!mask).blend(x + x, x);
1177 let fe = mask.blend(e + Self::ONE, e);
1178 let x = x - Self::ONE;
1179 let res = polynomial_8!(x, P0, P1, P2, P3, P4, P5, P6, P7, P8);
1180 let x2 = x * x;
1181 let res = x2 * x * res;
1182 let res = fe.mul_add(LN2F_LO, res);
1183 let res = res + x2.mul_neg_add(HALF, x);
1184 let res = fe.mul_add(LN2F_HI, res);
1185 let overflow = !self.is_finite();
1186 let underflow = x1.cmp_lt(VM_SMALLEST_NORMAL);
1187 let mask = overflow | underflow;
1188 if !mask.any() {
1189 res
1190 } else {
1191 let is_zero = self.is_zero_or_subnormal();
1192 let res = underflow.blend(Self::nan_log(), res);
1193 let res = is_zero.blend(Self::infinity(), res);
1194 let res = overflow.blend(self, res);
1195 res
1196 }
1197 }
1198
1199 #[inline]
1200 #[must_use]
1201 pub fn log2(self) -> Self {
1202 Self::ln(self) * Self::LOG2_E
1203 }
1204 #[inline]
1205 #[must_use]
1206 pub fn log10(self) -> Self {
1207 Self::ln(self) * Self::LOG10_E
1208 }
1209
1210 #[inline]
1211 #[must_use]
1212 pub fn pow_f32x8(self, y: Self) -> Self {
1213 const_f32_as_f32x8!(ln2f_hi, 0.693359375);
1214 const_f32_as_f32x8!(ln2f_lo, -2.12194440e-4);
1215 const_f32_as_f32x8!(P0logf, 3.3333331174E-1);
1216 const_f32_as_f32x8!(P1logf, -2.4999993993E-1);
1217 const_f32_as_f32x8!(P2logf, 2.0000714765E-1);
1218 const_f32_as_f32x8!(P3logf, -1.6668057665E-1);
1219 const_f32_as_f32x8!(P4logf, 1.4249322787E-1);
1220 const_f32_as_f32x8!(P5logf, -1.2420140846E-1);
1221 const_f32_as_f32x8!(P6logf, 1.1676998740E-1);
1222 const_f32_as_f32x8!(P7logf, -1.1514610310E-1);
1223 const_f32_as_f32x8!(P8logf, 7.0376836292E-2);
1224
1225 const_f32_as_f32x8!(p2expf, 1.0 / 2.0); const_f32_as_f32x8!(p3expf, 1.0 / 6.0);
1227 const_f32_as_f32x8!(p4expf, 1.0 / 24.0);
1228 const_f32_as_f32x8!(p5expf, 1.0 / 120.0);
1229 const_f32_as_f32x8!(p6expf, 1.0 / 720.0);
1230 const_f32_as_f32x8!(p7expf, 1.0 / 5040.0);
1231
1232 let x1 = self.abs();
1233 let x = x1.fraction_2();
1234 let mask = x.cmp_gt(f32x8::SQRT_2 * f32x8::HALF);
1235 let x = (!mask).blend(x + x, x);
1236
1237 let x = x - f32x8::ONE;
1238 let x2 = x * x;
1239 let lg1 = polynomial_8!(
1240 x, P0logf, P1logf, P2logf, P3logf, P4logf, P5logf, P6logf, P7logf, P8logf
1241 );
1242 let lg1 = lg1 * x2 * x;
1243
1244 let ef = x1.exponent();
1245 let ef = mask.blend(ef + f32x8::ONE, ef);
1246 let e1 = (ef * y).round();
1247 let yr = ef.mul_sub(y, e1);
1248
1249 let lg = f32x8::HALF.mul_neg_add(x2, x) + lg1;
1250 let x2_err = (f32x8::HALF * x).mul_sub(x, f32x8::HALF * x2);
1251 let lg_err = f32x8::HALF.mul_add(x2, lg - x) - lg1;
1252
1253 let e2 = (lg * y * f32x8::LOG2_E).round();
1254 let v = lg.mul_sub(y, e2 * ln2f_hi);
1255 let v = e2.mul_neg_add(ln2f_lo, v);
1256 let v = v - (lg_err + x2_err).mul_sub(y, yr * f32x8::LN_2);
1257
1258 let x = v;
1259 let e3 = (x * f32x8::LOG2_E).round();
1260 let x = e3.mul_neg_add(f32x8::LN_2, x);
1261 let x2 = x * x;
1262 let z = x2.mul_add(
1263 polynomial_5!(x, p2expf, p3expf, p4expf, p5expf, p6expf, p7expf),
1264 x + f32x8::ONE,
1265 );
1266
1267 let ee = e1 + e2 + e3;
1268 let ei = cast::<_, i32x8>(ee.round_int());
1269 let ej = cast::<_, i32x8>(ei + (cast::<_, i32x8>(z) >> 23));
1270
1271 let overflow = cast::<_, f32x8>(ej.cmp_gt(i32x8::splat(0x0FF)))
1272 | (ee.cmp_gt(f32x8::splat(300.0)));
1273 let underflow = cast::<_, f32x8>(ej.cmp_lt(i32x8::splat(0x000)))
1274 | (ee.cmp_lt(f32x8::splat(-300.0)));
1275
1276 let z = cast::<_, f32x8>(cast::<_, i32x8>(z) + (ei << 23));
1278 let z = underflow.blend(f32x8::ZERO, z);
1280 let z = overflow.blend(Self::infinity(), z);
1281
1282 let x_zero = self.is_zero_or_subnormal();
1284 let z = x_zero.blend(
1285 y.cmp_lt(f32x8::ZERO).blend(
1286 Self::infinity(),
1287 y.cmp_eq(f32x8::ZERO).blend(f32x8::ONE, f32x8::ZERO),
1288 ),
1289 z,
1290 );
1291
1292 let x_sign = self.sign_bit();
1293 let z = if x_sign.any() {
1294 let yi = y.cmp_eq(y.round());
1296
1297 let y_odd = cast::<_, i32x8>(y.round_int() << 31).round_float();
1299
1300 let z1 =
1301 yi.blend(z | y_odd, self.cmp_eq(Self::ZERO).blend(z, Self::nan_pow()));
1302
1303 x_sign.blend(z1, z)
1304 } else {
1305 z
1306 };
1307
1308 let x_finite = self.is_finite();
1309 let y_finite = y.is_finite();
1310 let e_finite = ee.is_finite();
1311 if (x_finite & y_finite & (e_finite | x_zero)).all() {
1312 return z;
1313 }
1314
1315 (self.is_nan() | y.is_nan()).blend(self + y, z)
1316 }
1317 #[inline]
1318 pub fn powf(self, y: f32) -> Self {
1319 Self::pow_f32x8(self, f32x8::splat(y))
1320 }
1321
1322 #[must_use]
1324 #[inline]
1325 pub fn transpose(data: [f32x8; 8]) -> [f32x8; 8] {
1326 pick! {
1327 if #[cfg(target_feature="avx")] {
1328 let a0 = unpack_lo_m256(data[0].avx, data[1].avx);
1329 let a1 = unpack_hi_m256(data[0].avx, data[1].avx);
1330 let a2 = unpack_lo_m256(data[2].avx, data[3].avx);
1331 let a3 = unpack_hi_m256(data[2].avx, data[3].avx);
1332 let a4 = unpack_lo_m256(data[4].avx, data[5].avx);
1333 let a5 = unpack_hi_m256(data[4].avx, data[5].avx);
1334 let a6 = unpack_lo_m256(data[6].avx, data[7].avx);
1335 let a7 = unpack_hi_m256(data[6].avx, data[7].avx);
1336
1337 pub const fn mm_shuffle(z: i32, y: i32, x: i32, w: i32) -> i32 {
1338 (z << 6) | (y << 4) | (x << 2) | w
1339 }
1340
1341 const SHUFF_LO : i32 = mm_shuffle(1,0,1,0);
1342 const SHUFF_HI : i32 = mm_shuffle(3,2,3,2);
1343
1344 let b0 = shuffle_m256::<SHUFF_LO>(a0,a2);
1347 let b1 = shuffle_m256::<SHUFF_HI>(a0,a2);
1348 let b2 = shuffle_m256::<SHUFF_LO>(a1,a3);
1349 let b3 = shuffle_m256::<SHUFF_HI>(a1,a3);
1350 let b4 = shuffle_m256::<SHUFF_LO>(a4,a6);
1351 let b5 = shuffle_m256::<SHUFF_HI>(a4,a6);
1352 let b6 = shuffle_m256::<SHUFF_LO>(a5,a7);
1353 let b7 = shuffle_m256::<SHUFF_HI>(a5,a7);
1354
1355 [
1356 f32x8 { avx: permute2z_m256::<0x20>(b0, b4) },
1357 f32x8 { avx: permute2z_m256::<0x20>(b1, b5) },
1358 f32x8 { avx: permute2z_m256::<0x20>(b2, b6) },
1359 f32x8 { avx: permute2z_m256::<0x20>(b3, b7) },
1360 f32x8 { avx: permute2z_m256::<0x31>(b0, b4) },
1361 f32x8 { avx: permute2z_m256::<0x31>(b1, b5) },
1362 f32x8 { avx: permute2z_m256::<0x31>(b2, b6) },
1363 f32x8 { avx: permute2z_m256::<0x31>(b3, b7) }
1364 ]
1365 } else {
1366 #[inline(always)]
1369 fn transpose_column(data: &[f32x8; 8], index: usize) -> f32x8 {
1370 f32x8::new([
1371 data[0].as_array_ref()[index],
1372 data[1].as_array_ref()[index],
1373 data[2].as_array_ref()[index],
1374 data[3].as_array_ref()[index],
1375 data[4].as_array_ref()[index],
1376 data[5].as_array_ref()[index],
1377 data[6].as_array_ref()[index],
1378 data[7].as_array_ref()[index],
1379 ])
1380 }
1381
1382 [
1383 transpose_column(&data, 0),
1384 transpose_column(&data, 1),
1385 transpose_column(&data, 2),
1386 transpose_column(&data, 3),
1387 transpose_column(&data, 4),
1388 transpose_column(&data, 5),
1389 transpose_column(&data, 6),
1390 transpose_column(&data, 7),
1391 ]
1392 }
1393 }
1394 }
1395
1396 #[inline]
1397 pub fn to_array(self) -> [f32; 8] {
1398 cast(self)
1399 }
1400
1401 #[inline]
1402 pub fn as_array_ref(&self) -> &[f32; 8] {
1403 cast_ref(self)
1404 }
1405
1406 #[inline]
1407 pub fn as_array_mut(&mut self) -> &mut [f32; 8] {
1408 cast_mut(self)
1409 }
1410
1411 #[inline]
1412 pub fn from_i32x8(v: i32x8) -> Self {
1413 pick! {
1414 if #[cfg(target_feature="avx2")] {
1415 Self { avx: convert_to_m256_from_i32_m256i(v.avx2) }
1416 } else {
1417 Self::new([
1418 v.as_array_ref()[0] as f32,
1419 v.as_array_ref()[1] as f32,
1420 v.as_array_ref()[2] as f32,
1421 v.as_array_ref()[3] as f32,
1422 v.as_array_ref()[4] as f32,
1423 v.as_array_ref()[5] as f32,
1424 v.as_array_ref()[6] as f32,
1425 v.as_array_ref()[7] as f32,
1426 ])
1427 }
1428 }
1429 }
1430}
1431
1432impl Not for f32x8 {
1433 type Output = Self;
1434 #[inline]
1435 fn not(self) -> Self {
1436 pick! {
1437 if #[cfg(target_feature="avx")] {
1438 Self { avx: self.avx.not() }
1439 } else {
1440 Self {
1441 a : self.a.not(),
1442 b : self.b.not(),
1443 }
1444 }
1445 }
1446 }
1447}