48 lines
1.5 KiB
Rust
48 lines
1.5 KiB
Rust
use core::arch::wasm32::*;
|
|
|
|
pub const fn v128_from_f32x4(a: [f32; 4]) -> v128 {
|
|
f32x4(a[0], a[1], a[2], a[3])
|
|
}
|
|
|
|
/// Calculates the vector 3 dot product and returns answer in x lane of v128.
|
|
#[inline(always)]
|
|
pub(crate) fn dot3_in_x(lhs: v128, rhs: v128) -> v128 {
|
|
let x2_y2_z2_w2 = f32x4_mul(lhs, rhs);
|
|
let y2_0_0_0 = i32x4_shuffle::<1, 0, 0, 0>(x2_y2_z2_w2, x2_y2_z2_w2);
|
|
let z2_0_0_0 = i32x4_shuffle::<2, 0, 0, 0>(x2_y2_z2_w2, x2_y2_z2_w2);
|
|
let x2y2_0_0_0 = f32x4_add(x2_y2_z2_w2, y2_0_0_0);
|
|
f32x4_add(x2y2_0_0_0, z2_0_0_0)
|
|
}
|
|
|
|
/// Calculates the vector 4 dot product and returns answer in x lane of v128.
|
|
#[inline(always)]
|
|
pub(crate) fn dot4_in_x(lhs: v128, rhs: v128) -> v128 {
|
|
let x2_y2_z2_w2 = f32x4_mul(lhs, rhs);
|
|
let z2_w2_0_0 = i32x4_shuffle::<2, 3, 0, 0>(x2_y2_z2_w2, x2_y2_z2_w2);
|
|
let x2z2_y2w2_0_0 = f32x4_add(x2_y2_z2_w2, z2_w2_0_0);
|
|
let y2w2_0_0_0 = i32x4_shuffle::<1, 0, 0, 0>(x2z2_y2w2_0_0, x2z2_y2w2_0_0);
|
|
f32x4_add(x2z2_y2w2_0_0, y2w2_0_0_0)
|
|
}
|
|
|
|
#[inline]
|
|
pub(crate) fn dot3(lhs: v128, rhs: v128) -> f32 {
|
|
f32x4_extract_lane::<0>(dot3_in_x(lhs, rhs))
|
|
}
|
|
|
|
#[inline]
|
|
pub(crate) fn dot3_into_v128(lhs: v128, rhs: v128) -> v128 {
|
|
let dot_in_x = dot3_in_x(lhs, rhs);
|
|
i32x4_shuffle::<0, 0, 0, 0>(dot_in_x, dot_in_x)
|
|
}
|
|
|
|
#[inline]
|
|
pub(crate) fn dot4(lhs: v128, rhs: v128) -> f32 {
|
|
f32x4_extract_lane::<0>(dot4_in_x(lhs, rhs))
|
|
}
|
|
|
|
#[inline]
|
|
pub(crate) fn dot4_into_v128(lhs: v128, rhs: v128) -> v128 {
|
|
let dot_in_x = dot4_in_x(lhs, rhs);
|
|
i32x4_shuffle::<0, 0, 0, 0>(dot_in_x, dot_in_x)
|
|
}
|