311 lines
12 KiB
C++
311 lines
12 KiB
C++
// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
|
|
// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
#pragma once
|
|
|
|
#include <Jolt/Math/DVec3.h>
|
|
|
|
JPH_NAMESPACE_BEGIN
|
|
|
|
DMat44::DMat44(Vec4Arg inC1, Vec4Arg inC2, Vec4Arg inC3, DVec3Arg inC4) :
|
|
mCol { inC1, inC2, inC3 },
|
|
mCol3(inC4)
|
|
{
|
|
}
|
|
|
|
DMat44::DMat44(Type inC1, Type inC2, Type inC3, DTypeArg inC4) :
|
|
mCol { inC1, inC2, inC3 },
|
|
mCol3(inC4)
|
|
{
|
|
}
|
|
|
|
DMat44::DMat44(Mat44Arg inM) :
|
|
mCol { inM.GetColumn4(0), inM.GetColumn4(1), inM.GetColumn4(2) },
|
|
mCol3(inM.GetTranslation())
|
|
{
|
|
}
|
|
|
|
DMat44::DMat44(Mat44Arg inRot, DVec3Arg inT) :
|
|
mCol { inRot.GetColumn4(0), inRot.GetColumn4(1), inRot.GetColumn4(2) },
|
|
mCol3(inT)
|
|
{
|
|
}
|
|
|
|
DMat44 DMat44::sZero()
|
|
{
|
|
return DMat44(Vec4::sZero(), Vec4::sZero(), Vec4::sZero(), DVec3::sZero());
|
|
}
|
|
|
|
DMat44 DMat44::sIdentity()
|
|
{
|
|
return DMat44(Vec4(1, 0, 0, 0), Vec4(0, 1, 0, 0), Vec4(0, 0, 1, 0), DVec3::sZero());
|
|
}
|
|
|
|
DMat44 DMat44::sInverseRotationTranslation(QuatArg inR, DVec3Arg inT)
|
|
{
|
|
Mat44 m = Mat44::sRotation(inR.Conjugated());
|
|
DMat44 dm(m, DVec3::sZero());
|
|
dm.SetTranslation(-dm.Multiply3x3(inT));
|
|
return dm;
|
|
}
|
|
|
|
bool DMat44::operator == (DMat44Arg inM2) const
|
|
{
|
|
return mCol[0] == inM2.mCol[0]
|
|
&& mCol[1] == inM2.mCol[1]
|
|
&& mCol[2] == inM2.mCol[2]
|
|
&& mCol3 == inM2.mCol3;
|
|
}
|
|
|
|
bool DMat44::IsClose(DMat44Arg inM2, float inMaxDistSq) const
|
|
{
|
|
for (int i = 0; i < 3; ++i)
|
|
if (!mCol[i].IsClose(inM2.mCol[i], inMaxDistSq))
|
|
return false;
|
|
return mCol3.IsClose(inM2.mCol3, double(inMaxDistSq));
|
|
}
|
|
|
|
DVec3 DMat44::operator * (Vec3Arg inV) const
|
|
{
|
|
#if defined(JPH_USE_AVX)
|
|
__m128 t = _mm_mul_ps(mCol[0].mValue, _mm_shuffle_ps(inV.mValue, inV.mValue, _MM_SHUFFLE(0, 0, 0, 0)));
|
|
t = _mm_add_ps(t, _mm_mul_ps(mCol[1].mValue, _mm_shuffle_ps(inV.mValue, inV.mValue, _MM_SHUFFLE(1, 1, 1, 1))));
|
|
t = _mm_add_ps(t, _mm_mul_ps(mCol[2].mValue, _mm_shuffle_ps(inV.mValue, inV.mValue, _MM_SHUFFLE(2, 2, 2, 2))));
|
|
return DVec3::sFixW(_mm256_add_pd(mCol3.mValue, _mm256_cvtps_pd(t)));
|
|
#elif defined(JPH_USE_SSE)
|
|
__m128 t = _mm_mul_ps(mCol[0].mValue, _mm_shuffle_ps(inV.mValue, inV.mValue, _MM_SHUFFLE(0, 0, 0, 0)));
|
|
t = _mm_add_ps(t, _mm_mul_ps(mCol[1].mValue, _mm_shuffle_ps(inV.mValue, inV.mValue, _MM_SHUFFLE(1, 1, 1, 1))));
|
|
t = _mm_add_ps(t, _mm_mul_ps(mCol[2].mValue, _mm_shuffle_ps(inV.mValue, inV.mValue, _MM_SHUFFLE(2, 2, 2, 2))));
|
|
__m128d low = _mm_add_pd(mCol3.mValue.mLow, _mm_cvtps_pd(t));
|
|
__m128d high = _mm_add_pd(mCol3.mValue.mHigh, _mm_cvtps_pd(_mm_shuffle_ps(t, t, _MM_SHUFFLE(2, 2, 2, 2))));
|
|
return DVec3({ low, high });
|
|
#elif defined(JPH_USE_NEON)
|
|
float32x4_t t = vmulq_f32(mCol[0].mValue, vdupq_laneq_f32(inV.mValue, 0));
|
|
t = vmlaq_f32(t, mCol[1].mValue, vdupq_laneq_f32(inV.mValue, 1));
|
|
t = vmlaq_f32(t, mCol[2].mValue, vdupq_laneq_f32(inV.mValue, 2));
|
|
float64x2_t low = vaddq_f64(mCol3.mValue.val[0], vcvt_f64_f32(vget_low_f32(t)));
|
|
float64x2_t high = vaddq_f64(mCol3.mValue.val[1], vcvt_high_f64_f32(t));
|
|
return DVec3::sFixW({ low, high });
|
|
#else
|
|
return DVec3(
|
|
mCol3.mF64[0] + double(mCol[0].mF32[0] * inV.mF32[0] + mCol[1].mF32[0] * inV.mF32[1] + mCol[2].mF32[0] * inV.mF32[2]),
|
|
mCol3.mF64[1] + double(mCol[0].mF32[1] * inV.mF32[0] + mCol[1].mF32[1] * inV.mF32[1] + mCol[2].mF32[1] * inV.mF32[2]),
|
|
mCol3.mF64[2] + double(mCol[0].mF32[2] * inV.mF32[0] + mCol[1].mF32[2] * inV.mF32[1] + mCol[2].mF32[2] * inV.mF32[2]));
|
|
#endif
|
|
}
|
|
|
|
DVec3 DMat44::operator * (DVec3Arg inV) const
|
|
{
|
|
#if defined(JPH_USE_AVX)
|
|
__m256d t = _mm256_add_pd(mCol3.mValue, _mm256_mul_pd(_mm256_cvtps_pd(mCol[0].mValue), _mm256_set1_pd(inV.mF64[0])));
|
|
t = _mm256_add_pd(t, _mm256_mul_pd(_mm256_cvtps_pd(mCol[1].mValue), _mm256_set1_pd(inV.mF64[1])));
|
|
t = _mm256_add_pd(t, _mm256_mul_pd(_mm256_cvtps_pd(mCol[2].mValue), _mm256_set1_pd(inV.mF64[2])));
|
|
return DVec3::sFixW(t);
|
|
#elif defined(JPH_USE_SSE)
|
|
__m128d xxxx = _mm_set1_pd(inV.mF64[0]);
|
|
__m128d yyyy = _mm_set1_pd(inV.mF64[1]);
|
|
__m128d zzzz = _mm_set1_pd(inV.mF64[2]);
|
|
__m128 col0 = mCol[0].mValue;
|
|
__m128 col1 = mCol[1].mValue;
|
|
__m128 col2 = mCol[2].mValue;
|
|
__m128d t_low = _mm_add_pd(mCol3.mValue.mLow, _mm_mul_pd(_mm_cvtps_pd(col0), xxxx));
|
|
t_low = _mm_add_pd(t_low, _mm_mul_pd(_mm_cvtps_pd(col1), yyyy));
|
|
t_low = _mm_add_pd(t_low, _mm_mul_pd(_mm_cvtps_pd(col2), zzzz));
|
|
__m128d t_high = _mm_add_pd(mCol3.mValue.mHigh, _mm_mul_pd(_mm_cvtps_pd(_mm_shuffle_ps(col0, col0, _MM_SHUFFLE(2, 2, 2, 2))), xxxx));
|
|
t_high = _mm_add_pd(t_high, _mm_mul_pd(_mm_cvtps_pd(_mm_shuffle_ps(col1, col1, _MM_SHUFFLE(2, 2, 2, 2))), yyyy));
|
|
t_high = _mm_add_pd(t_high, _mm_mul_pd(_mm_cvtps_pd(_mm_shuffle_ps(col2, col2, _MM_SHUFFLE(2, 2, 2, 2))), zzzz));
|
|
return DVec3({ t_low, t_high });
|
|
#elif defined(JPH_USE_NEON)
|
|
float64x2_t xxxx = vdupq_laneq_f64(inV.mValue.val[0], 0);
|
|
float64x2_t yyyy = vdupq_laneq_f64(inV.mValue.val[0], 1);
|
|
float64x2_t zzzz = vdupq_laneq_f64(inV.mValue.val[1], 0);
|
|
float32x4_t col0 = mCol[0].mValue;
|
|
float32x4_t col1 = mCol[1].mValue;
|
|
float32x4_t col2 = mCol[2].mValue;
|
|
float64x2_t t_low = vaddq_f64(mCol3.mValue.val[0], vmulq_f64(vcvt_f64_f32(vget_low_f32(col0)), xxxx));
|
|
t_low = vaddq_f64(t_low, vmulq_f64(vcvt_f64_f32(vget_low_f32(col1)), yyyy));
|
|
t_low = vaddq_f64(t_low, vmulq_f64(vcvt_f64_f32(vget_low_f32(col2)), zzzz));
|
|
float64x2_t t_high = vaddq_f64(mCol3.mValue.val[1], vmulq_f64(vcvt_high_f64_f32(col0), xxxx));
|
|
t_high = vaddq_f64(t_high, vmulq_f64(vcvt_high_f64_f32(col1), yyyy));
|
|
t_high = vaddq_f64(t_high, vmulq_f64(vcvt_high_f64_f32(col2), zzzz));
|
|
return DVec3::sFixW({ t_low, t_high });
|
|
#else
|
|
return DVec3(
|
|
mCol3.mF64[0] + double(mCol[0].mF32[0]) * inV.mF64[0] + double(mCol[1].mF32[0]) * inV.mF64[1] + double(mCol[2].mF32[0]) * inV.mF64[2],
|
|
mCol3.mF64[1] + double(mCol[0].mF32[1]) * inV.mF64[0] + double(mCol[1].mF32[1]) * inV.mF64[1] + double(mCol[2].mF32[1]) * inV.mF64[2],
|
|
mCol3.mF64[2] + double(mCol[0].mF32[2]) * inV.mF64[0] + double(mCol[1].mF32[2]) * inV.mF64[1] + double(mCol[2].mF32[2]) * inV.mF64[2]);
|
|
#endif
|
|
}
|
|
|
|
DVec3 DMat44::Multiply3x3(DVec3Arg inV) const
|
|
{
|
|
#if defined(JPH_USE_AVX)
|
|
__m256d t = _mm256_mul_pd(_mm256_cvtps_pd(mCol[0].mValue), _mm256_set1_pd(inV.mF64[0]));
|
|
t = _mm256_add_pd(t, _mm256_mul_pd(_mm256_cvtps_pd(mCol[1].mValue), _mm256_set1_pd(inV.mF64[1])));
|
|
t = _mm256_add_pd(t, _mm256_mul_pd(_mm256_cvtps_pd(mCol[2].mValue), _mm256_set1_pd(inV.mF64[2])));
|
|
return DVec3::sFixW(t);
|
|
#elif defined(JPH_USE_SSE)
|
|
__m128d xxxx = _mm_set1_pd(inV.mF64[0]);
|
|
__m128d yyyy = _mm_set1_pd(inV.mF64[1]);
|
|
__m128d zzzz = _mm_set1_pd(inV.mF64[2]);
|
|
__m128 col0 = mCol[0].mValue;
|
|
__m128 col1 = mCol[1].mValue;
|
|
__m128 col2 = mCol[2].mValue;
|
|
__m128d t_low = _mm_mul_pd(_mm_cvtps_pd(col0), xxxx);
|
|
t_low = _mm_add_pd(t_low, _mm_mul_pd(_mm_cvtps_pd(col1), yyyy));
|
|
t_low = _mm_add_pd(t_low, _mm_mul_pd(_mm_cvtps_pd(col2), zzzz));
|
|
__m128d t_high = _mm_mul_pd(_mm_cvtps_pd(_mm_shuffle_ps(col0, col0, _MM_SHUFFLE(2, 2, 2, 2))), xxxx);
|
|
t_high = _mm_add_pd(t_high, _mm_mul_pd(_mm_cvtps_pd(_mm_shuffle_ps(col1, col1, _MM_SHUFFLE(2, 2, 2, 2))), yyyy));
|
|
t_high = _mm_add_pd(t_high, _mm_mul_pd(_mm_cvtps_pd(_mm_shuffle_ps(col2, col2, _MM_SHUFFLE(2, 2, 2, 2))), zzzz));
|
|
return DVec3({ t_low, t_high });
|
|
#elif defined(JPH_USE_NEON)
|
|
float64x2_t xxxx = vdupq_laneq_f64(inV.mValue.val[0], 0);
|
|
float64x2_t yyyy = vdupq_laneq_f64(inV.mValue.val[0], 1);
|
|
float64x2_t zzzz = vdupq_laneq_f64(inV.mValue.val[1], 0);
|
|
float32x4_t col0 = mCol[0].mValue;
|
|
float32x4_t col1 = mCol[1].mValue;
|
|
float32x4_t col2 = mCol[2].mValue;
|
|
float64x2_t t_low = vmulq_f64(vcvt_f64_f32(vget_low_f32(col0)), xxxx);
|
|
t_low = vaddq_f64(t_low, vmulq_f64(vcvt_f64_f32(vget_low_f32(col1)), yyyy));
|
|
t_low = vaddq_f64(t_low, vmulq_f64(vcvt_f64_f32(vget_low_f32(col2)), zzzz));
|
|
float64x2_t t_high = vmulq_f64(vcvt_high_f64_f32(col0), xxxx);
|
|
t_high = vaddq_f64(t_high, vmulq_f64(vcvt_high_f64_f32(col1), yyyy));
|
|
t_high = vaddq_f64(t_high, vmulq_f64(vcvt_high_f64_f32(col2), zzzz));
|
|
return DVec3::sFixW({ t_low, t_high });
|
|
#else
|
|
return DVec3(
|
|
double(mCol[0].mF32[0]) * inV.mF64[0] + double(mCol[1].mF32[0]) * inV.mF64[1] + double(mCol[2].mF32[0]) * inV.mF64[2],
|
|
double(mCol[0].mF32[1]) * inV.mF64[0] + double(mCol[1].mF32[1]) * inV.mF64[1] + double(mCol[2].mF32[1]) * inV.mF64[2],
|
|
double(mCol[0].mF32[2]) * inV.mF64[0] + double(mCol[1].mF32[2]) * inV.mF64[1] + double(mCol[2].mF32[2]) * inV.mF64[2]);
|
|
#endif
|
|
}
|
|
|
|
DMat44 DMat44::operator * (Mat44Arg inM) const
|
|
{
|
|
DMat44 result;
|
|
|
|
// Rotation part
|
|
#if defined(JPH_USE_SSE)
|
|
for (int i = 0; i < 3; ++i)
|
|
{
|
|
__m128 c = inM.GetColumn4(i).mValue;
|
|
__m128 t = _mm_mul_ps(mCol[0].mValue, _mm_shuffle_ps(c, c, _MM_SHUFFLE(0, 0, 0, 0)));
|
|
t = _mm_add_ps(t, _mm_mul_ps(mCol[1].mValue, _mm_shuffle_ps(c, c, _MM_SHUFFLE(1, 1, 1, 1))));
|
|
t = _mm_add_ps(t, _mm_mul_ps(mCol[2].mValue, _mm_shuffle_ps(c, c, _MM_SHUFFLE(2, 2, 2, 2))));
|
|
result.mCol[i].mValue = t;
|
|
}
|
|
#elif defined(JPH_USE_NEON)
|
|
for (int i = 0; i < 3; ++i)
|
|
{
|
|
Type c = inM.GetColumn4(i).mValue;
|
|
Type t = vmulq_f32(mCol[0].mValue, vdupq_laneq_f32(c, 0));
|
|
t = vmlaq_f32(t, mCol[1].mValue, vdupq_laneq_f32(c, 1));
|
|
t = vmlaq_f32(t, mCol[2].mValue, vdupq_laneq_f32(c, 2));
|
|
result.mCol[i].mValue = t;
|
|
}
|
|
#else
|
|
for (int i = 0; i < 3; ++i)
|
|
{
|
|
Vec4 coli = inM.GetColumn4(i);
|
|
result.mCol[i] = mCol[0] * coli.mF32[0] + mCol[1] * coli.mF32[1] + mCol[2] * coli.mF32[2];
|
|
}
|
|
#endif
|
|
|
|
// Translation part
|
|
result.mCol3 = *this * inM.GetTranslation();
|
|
|
|
return result;
|
|
}
|
|
|
|
DMat44 DMat44::operator * (DMat44Arg inM) const
|
|
{
|
|
DMat44 result;
|
|
|
|
// Rotation part
|
|
#if defined(JPH_USE_SSE)
|
|
for (int i = 0; i < 3; ++i)
|
|
{
|
|
__m128 c = inM.mCol[i].mValue;
|
|
__m128 t = _mm_mul_ps(mCol[0].mValue, _mm_shuffle_ps(c, c, _MM_SHUFFLE(0, 0, 0, 0)));
|
|
t = _mm_add_ps(t, _mm_mul_ps(mCol[1].mValue, _mm_shuffle_ps(c, c, _MM_SHUFFLE(1, 1, 1, 1))));
|
|
t = _mm_add_ps(t, _mm_mul_ps(mCol[2].mValue, _mm_shuffle_ps(c, c, _MM_SHUFFLE(2, 2, 2, 2))));
|
|
result.mCol[i].mValue = t;
|
|
}
|
|
#elif defined(JPH_USE_NEON)
|
|
for (int i = 0; i < 3; ++i)
|
|
{
|
|
Type c = inM.GetColumn4(i).mValue;
|
|
Type t = vmulq_f32(mCol[0].mValue, vdupq_laneq_f32(c, 0));
|
|
t = vmlaq_f32(t, mCol[1].mValue, vdupq_laneq_f32(c, 1));
|
|
t = vmlaq_f32(t, mCol[2].mValue, vdupq_laneq_f32(c, 2));
|
|
result.mCol[i].mValue = t;
|
|
}
|
|
#else
|
|
for (int i = 0; i < 3; ++i)
|
|
{
|
|
Vec4 coli = inM.mCol[i];
|
|
result.mCol[i] = mCol[0] * coli.mF32[0] + mCol[1] * coli.mF32[1] + mCol[2] * coli.mF32[2];
|
|
}
|
|
#endif
|
|
|
|
// Translation part
|
|
result.mCol3 = *this * inM.GetTranslation();
|
|
|
|
return result;
|
|
}
|
|
|
|
void DMat44::SetRotation(Mat44Arg inRotation)
|
|
{
|
|
mCol[0] = inRotation.GetColumn4(0);
|
|
mCol[1] = inRotation.GetColumn4(1);
|
|
mCol[2] = inRotation.GetColumn4(2);
|
|
}
|
|
|
|
DMat44 DMat44::PreScaled(Vec3Arg inScale) const
|
|
{
|
|
return DMat44(inScale.GetX() * mCol[0], inScale.GetY() * mCol[1], inScale.GetZ() * mCol[2], mCol3);
|
|
}
|
|
|
|
DMat44 DMat44::PostScaled(Vec3Arg inScale) const
|
|
{
|
|
Vec4 scale(inScale, 1);
|
|
return DMat44(scale * mCol[0], scale * mCol[1], scale * mCol[2], DVec3(scale) * mCol3);
|
|
}
|
|
|
|
DMat44 DMat44::PreTranslated(Vec3Arg inTranslation) const
|
|
{
|
|
return DMat44(mCol[0], mCol[1], mCol[2], GetTranslation() + Multiply3x3(inTranslation));
|
|
}
|
|
|
|
DMat44 DMat44::PreTranslated(DVec3Arg inTranslation) const
|
|
{
|
|
return DMat44(mCol[0], mCol[1], mCol[2], GetTranslation() + Multiply3x3(inTranslation));
|
|
}
|
|
|
|
DMat44 DMat44::PostTranslated(Vec3Arg inTranslation) const
|
|
{
|
|
return DMat44(mCol[0], mCol[1], mCol[2], GetTranslation() + inTranslation);
|
|
}
|
|
|
|
DMat44 DMat44::PostTranslated(DVec3Arg inTranslation) const
|
|
{
|
|
return DMat44(mCol[0], mCol[1], mCol[2], GetTranslation() + inTranslation);
|
|
}
|
|
|
|
DMat44 DMat44::Inversed() const
|
|
{
|
|
DMat44 m(GetRotation().Inversed3x3());
|
|
m.mCol3 = -m.Multiply3x3(mCol3);
|
|
return m;
|
|
}
|
|
|
|
DMat44 DMat44::InversedRotationTranslation() const
|
|
{
|
|
DMat44 m(GetRotation().Transposed3x3());
|
|
m.mCol3 = -m.Multiply3x3(mCol3);
|
|
return m;
|
|
}
|
|
|
|
JPH_NAMESPACE_END
|