163#ifdef NUDGE_USER_CFG_FILE_NAME
164# include NUDGE_USER_CFG_FILE_NAME
166#ifndef NUDGE_NO_STDIO
170# error nudge.h is a c++ file and should be compiled as c++
171#elif __cplusplus < 201103L
172# define NUDGE_NO_CPP11_DETECTED
173# define NUDGE_CONSTEXPRFNC
174# define NUDGE_CONSTEXPR const
175# define NUDGE_STATIC_ASSERT_WITH_MESSAGE(X,MESSAGE) assert(X)
176# undef NUDGE_USE_INT32_ENUMS
177# define NUDGE_USE_INT32_ENUMS
182# define NUDGE_CONSTEXPRFNC constexpr
183# define NUDGE_CONSTEXPR constexpr
184# define NUDGE_STATIC_ASSERT_WITH_MESSAGE(X,MESSAGE) static_assert((X), MESSAGE)
186#define NUDGE_STATIC_ASSERT(X) NUDGE_STATIC_ASSERT_WITH_MESSAGE((X), "")
188#ifdef __SIZEOF_POINTER__
189# define NUDGE_POINTER_SIZE ((__SIZEOF_POINTER__)*8)
190#elif defined(_WIN64) || defined(_M_X64)
191# define NUDGE_POINTER_SIZE (64)
192#elif defined(_WIN32) || defined(_M_X86)
193# define NUDGE_POINTER_SIZE (32)
195# define NUDGE_POINTER_SIZE (0)
198#ifdef NUDGE_USE_INT32_ENUMS
199# undef NUDGE_COLLISION_MASK_TYPE
200# define NUDGE_COLLISION_MASK_TYPE uint32_t
201# undef NUDGE_FLAG_MASK_TYPE
202# define NUDGE_FLAG_MASK_TYPE uint32_t
204# ifndef NUDGE_COLLISION_MASK_TYPE
205# define NUDGE_COLLISION_MASK_TYPE uint8_t
207# ifndef NUDGE_FLAG_MASK_TYPE
208# define NUDGE_FLAG_MASK_TYPE uint16_t
227# if NUDGE_POINTER_SIZE==64
231# if NUDGE_POINTER_SIZE==32
254# ifndef NUDGE_NO_ANONYMOUS_STRUCTS
267# ifndef NUDGE_NO_ANONYMOUS_STRUCTS
366# ifndef NUDGE_USE_INT32_ENUMS
395# ifndef NUDGE_USE_INT32_ENUMS
409# ifndef NUDGE_BODYFLAG_ENUM_NO_UNUSED_FLAGS
421# ifdef NUDGE_BODYFLAG_ENUM_EXTRA_FIELDS
422# define NUDGE_BODYFLAG_ENUM_EXTRA_FIELDS
454# ifndef NUDGE_BODYINFO_STRUCT_NO_USER_DATA
457# ifdef NUDGE_BODYINFO_STRUCT_EXTRA_FIELDS
458 NUDGE_BODYINFO_STRUCT_EXTRA_FIELDS
464# ifndef NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES
465# define NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES (2)
467# if NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES>0
471# if (NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES%4)==0
472# if NUDGE_POINTER_SIZE==64
478# if (NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES%2)==0
479# if NUDGE_POINTER_SIZE==32
492# ifdef NUDGE_BODYINFO_STRUCT_EXTRA_PADDING
493 NUDGE_BODYINFO_STRUCT_EXTRA_PADDING
545 struct ContactImpulseData;
546 struct ContactConstraintData;
552# define NUDGE_INVALID_BODY_ID (32767)
636# ifdef NUDGE_GLOBALDATAMASK_ENUM_EXTRA_FIELDS
637# define NUDGE_GLOBALDATAMASK_ENUM_EXTRA_FIELDS
677# ifdef NUDGE_CONTEXT_STRUCT_EXTRA_FIELDS
678 NUDGE_CONTEXT_STRUCT_EXTRA_FIELDS
680# ifndef NUDGE_CONTEXT_STRUCT_NO_USER_DATA
691# ifdef NUDGE_USE_TIME_CONTEXT
695 struct time_context_t {
699 inline void update(
double globalTimeInSeconds) {
700 double elapsedTime,elapsedNetTime,deltaTime;
701 double currentTime = totalTime;
704 if (wasPausedLastFrame!=paused) {
705 wasPausedLastFrame = paused;
706 if (paused) beginPausedTime=globalTimeInSeconds;
708 beginNetTime+=globalTimeInSeconds-beginPausedTime;beginPausedTime = 0;
712 if (beginTime==0) beginTime = globalTimeInSeconds;
713 if (beginNetTime==0) beginNetTime = globalTimeInSeconds;
715 elapsedTime = globalTimeInSeconds;
if (elapsedTime<beginTime) beginTime=elapsedTime;
716 elapsedTime-=beginTime;
717 totalTime = elapsedTime;
720 elapsedNetTime = globalTimeInSeconds;
if (elapsedNetTime<beginNetTime) beginNetTime=elapsedNetTime;
721 elapsedNetTime-=beginNetTime;
722 totalTimeWithoutPause = elapsedNetTime;
725 deltaTime = elapsedTime;
if (deltaTime<currentTime) currentTime=deltaTime;
726 deltaTime-=currentTime;
727 currentTime = elapsedTime;
728 instantFrameTime = deltaTime;
730 timeNow = globalTimeInSeconds;
734 inline double getInstantFrameTime()
const {
return instantFrameTime;}
735 inline double getTotalTime()
const {
return totalTime;}
736 inline double getTotalTimeWithoutPause()
const {
return totalTimeWithoutPause;}
737 inline double getBeginTime()
const {
return beginTime;}
738 inline double getTimeNow()
const {
return timeNow;}
739 inline double getInstantFPS()
const {
return instantFrameTime!=0?1.0/instantFrameTime:0;}
740 inline unsigned long getNumFrames()
const {
return num_frames;}
741 inline bool getPaused()
const {
return paused;}
742 inline void setPaused(
bool flag) {paused = flag;}
743 inline void togglePaused() {paused = !paused;}
744 time_context_t() : instantFrameTime(16.2),totalTime(0),totalTimeWithoutPause(0),paused(false),
745 beginTime(0),beginNetTime(0),beginPausedTime(0),timeNow(0),num_frames(0),wasPausedLastFrame(false) {}
746 inline void restoreFrom(time_context_t* o) {
749 beginNetTime += totalTimeWithoutPause - o->totalTimeWithoutPause;
750 totalTimeWithoutPause = o->totalTimeWithoutPause;
751 num_frames = o->num_frames;
755 double instantFrameTime;
757 double totalTimeWithoutPause;
760 double beginTime,beginNetTime,beginPausedTime,timeNow;
761 unsigned long num_frames;
762 bool wasPausedLastFrame;
804# ifndef NUDGE_NO_STDIO
// end of context_group
883 unsigned add_box(
context_t* c,
float mass,
float hsizex,
float hsizey,
float hsizez,
const Transform* T=NULL,
const float comOffset[3]=NULL);
888 unsigned add_box(
context_t* c,
float mass,
float hsizex,
float hsizey,
float hsizez,
const float* mMatrix16WithoutScaling,
const float comOffset[3]=NULL);
905 unsigned add_sphere(
context_t* c,
float mass,
float radius,
const float* mMatrix16WithoutScaling,
const float comOffset[3]=NULL);
924 unsigned add_compound(
context_t* c,
float mass,
float inertia[3],
unsigned num_boxes,
const float* hsizeTriplets,
const Transform* boxOffsetTransforms,
unsigned num_spheres,
const float* radii,
const Transform* sphereOffsetTransforms,
const Transform* T=NULL,
const float comOffset[3]=NULL,
float *centerMeshAndRetrieveOldCenter3Out = NULL);
929 unsigned add_compound(
context_t* c,
float mass,
float inertia[3],
unsigned num_boxes,
const float* hsizeTriplets,
const float* boxOffsetMatrices16WithoutScaling,
unsigned num_spheres,
const float* radii,
const float* sphereOffsetMatrices16WithoutScaling,
const float* mMatrix16WithoutScaling=NULL,
const float comOffset[3]=NULL,
float *centerMeshAndRetrieveOldCenter3Out = NULL);
942 unsigned add_clone(
context_t* c,
unsigned body_to_clone,
float mass,
const Transform* T=NULL,
float scale_factor=1.f,
const float newComOffsetInPreScaledUnits[3]=NULL);
947 unsigned add_clone(
context_t* c,
unsigned body_to_clone,
float mass,
const float* mMatrix16WithoutScaling,
float scale_factor=1.f,
const float newComOffsetInPreScaledUnits[3]=NULL);
1121 unsigned add_compound_cylinder(
context_t* c,
float mass,
float radius,
float hheight,
const float* mMatrix16WithoutScaling,
AxisEnum axis=
AXIS_Y,
unsigned num_boxes=0,
unsigned num_spheres=0,
const float comOffset[3]=NULL,
float box_lateral_side_shrinking=-1.f);
1142 unsigned add_compound_capsule(
context_t* c,
float mass,
float radius,
float hheight,
const float* mMatrix16WithoutScaling,
AxisEnum axis=
AXIS_Y,
unsigned num_boxes=1,
unsigned num_spheres=3,
const float comOffset[3]=NULL,
float box_lateral_side_shrinking=-1.f);
1204 unsigned add_compound_cone(
context_t* c,
float mass,
float radius,
float hheight,
const float* mMatrix16WithoutScaling,
AxisEnum axis=
AXIS_Y,
unsigned num_boxes=0,
unsigned num_spheres=0,
const float comOffset[3]=NULL);
1226 unsigned add_compound_staircase(
context_t* c,
float mass,
float hdepth,
float hheight,
float hlength,
unsigned num_steps,
const float* mMatrix16WithoutScaling,
int orientation_in_0_3=0,
const float comOffset[3]=NULL);
1245 filter->
collision_mask=collision_group_mask_body_can_collide_with;
1297 int log(
const char* format, ...);
1315 unsigned contact_data_index,
1316 int16_t* box_collider_index_for_body_a,
1317 int16_t* sphere_collider_index_for_body_a,
1318 int16_t* box_collider_index_for_body_b,
1319 int16_t* sphere_collider_index_for_body_b,
1320 int use_relative_values_for_output_indices=0
1329 static const Transform identity_transform = { {}, {0}, { {0.0f, 0.0f, 0.0f, 1.0f} } };
1390 void nm_QuatAdvance(
float* __restrict qOut4,
const float* __restrict q4,
const float* __restrict angVel3,
float halfTimeStep);
1442 float*
nm_QuatSlerp(
float* __restrict result4,
const float* __restrict a4,
const float* __restrict b4,
float slerpTime_In_0_1,
int normalizeResult4AfterLerp);
1476 void nm_QuatToAngleAxis(
const float* __restrict q4,
float* __restrict rfAngleOut1,
float* __restrict rkAxisOut3);
1498 float nm_Vec3Dot(
const float* __restrict a3,
const float* __restrict b3);
1507 float*
nm_Vec3Cross(
float* __restrict vOut3,
const float* __restrict a3,
const float* __restrict b3);
1519 float*
nm_QuatMulVec3(
float* __restrict vOut3,
const float* __restrict q4,
const float* __restrict vIn3);
1534 float*
nm_QuatGetAxis(
float* __restrict vOut3,
const float* __restrict q4,
float axisX,
float axisY,
float axisZ);
1550 float*
nm_QuatRotate(
float* __restrict qInOut4,
float angle,
float axisX,
float axisY,
float axisZ);
1561 float*
nm_Mat4Mul(
float* result16,
const float* ml16,
const float* mr16);
1569 void calculate_box_inertia(
float result[3],
float mass,
float hsizex,
float hsizey,
float hsizez,
const float comOffset[3]=NULL);
1589# define M_PIOVER180 ((float)(3.14159265358979323846/180.0))
1592# define M_180OVERPI ((float)(180.0/3.14159265358979323846))
1595# define M_DEG2RAD(X) ((X)*(float)M_PIOVER180)
1598# define M_RAD2DEG(X) ((X)*(float)M_180OVERPI)
1601# ifndef NUDGE_NO_STDIO
1602# ifdef NUDGE_USE_TIME_CONTEXT
1603 void save_time_context(FILE* f,
const time_context_t* c);
1604 void load_time_context(FILE* f,time_context_t* c);
1608#ifndef NUDGE_DEFAULT_SIMULATION_TIMESTEP
1609# define NUDGE_DEFAULT_SIMULATION_TIMESTEP (1.0/60.0)
1611#ifndef NUDGE_DEFAULT_MAX_NUM_SIMULATION_SUBSTEPS
1612# define NUDGE_DEFAULT_MAX_NUM_SIMULATION_SUBSTEPS (2)
1614#ifndef NUDGE_DEFAULT_NUM_SIMULATION_ITERATIONS
1615# define NUDGE_DEFAULT_NUM_SIMULATION_ITERATIONS (5)
1617#ifndef NUDGE_DEFAULT_DAMPING_LINEAR
1618# define NUDGE_DEFAULT_DAMPING_LINEAR (0.25f)
1620#ifndef NUDGE_DEFAULT_DAMPING_ANGULAR
1621# define NUDGE_DEFAULT_DAMPING_ANGULAR (0.25f)
1623#ifndef NUDGE_DEFAULT_SLEEPING_THRESHOLD_LINEAR_VELOCITY_SQUARED
1624# define NUDGE_DEFAULT_SLEEPING_THRESHOLD_LINEAR_VELOCITY_SQUARED (1e-2f)
1626#ifndef NUDGE_DEFAULT_SLEEPING_THRESHOLD_ANGULAR_VELOCITY_SQUARED
1627# define NUDGE_DEFAULT_SLEEPING_THRESHOLD_ANGULAR_VELOCITY_SQUARED (1e-1f)
1629#ifndef NUDGE_DEFAULT_PENETRATION_ALLOWED_AMOUNT
1630# define NUDGE_DEFAULT_PENETRATION_ALLOWED_AMOUNT (1e-3f)
1632#ifndef NUDGE_DEFAULT_PENETRATION_BIAS_FACTOR
1633# define NUDGE_DEFAULT_PENETRATION_BIAS_FACTOR (2.0f)
1648#if (!defined(HELLO_WORLD_CPP_) && !defined(EXAMPLE02_CPP_) && !defined(NUDGE_IMPLEMENTATION) && defined(NUDGE_DEVELOPMENT))
1649 #define NUDGE_IMPLEMENTATION
1653#ifdef NUDGE_IMPLEMENTATION
1654#ifndef NUDGE_IMPLEMENTATION_GUARD
1655#define NUDGE_IMPLEMENTATION_GUARD
1659#ifdef NUDGE_USE_SIMDE
1660# ifndef SIMDE_ENABLE_NATIVE_ALIASES
1662# define SIMDE_ENABLE_NATIVE_ALIASES
1665# include "./simde/x86/sse2.h"
1666#ifndef NUDGE_SIMDE_USE_CUSTOM_MM_MALLOC
1667# include <mm_malloc.h>
1670# include <immintrin.h>
1678#define NUDGE_ALIGNED(n) __declspec(align(n))
1679#define NUDGE_FORCEINLINE __forceinline
1681#define NUDGE_ALIGNED(n) __attribute__((aligned(n)))
1682#define NUDGE_FORCEINLINE inline __attribute__((always_inline))
1686#define NUDGE_SIMDV_WIDTH 256
1688#define NUDGE_SIMDV_WIDTH 128
1691#define NUDGE_ARENA_SCOPE(A) Arena& scope_arena_##A = A; Arena A = scope_arena_##A
1696 int dummy_vprintf(
const char* , va_list ) {
return 0;}
1697 int dummy_flush(
void) {
return 0;}
1699#ifdef NUDGE_NO_STDIO
1700# ifndef NUDGE_VLOG_FUNC
1702# define NUDGE_VLOG_FUNC(A,B) nudge::dummy_vprintf((A),(B))
1703# undef NUDGE_LOG_FLUSH
1706#ifndef NUDGE_VLOG_FUNC
1708# ifndef NUDGE_LOG_FILE_PTR
1713# define NUDGE_LOG_FILE_PTR (stdout)
1720# define NUDGE_VLOG_FUNC(CONST_CHAR_PTR_ARG_PTR,VA_LIST_ARG) vfprintf(NUDGE_LOG_FILE_PTR,CONST_CHAR_PTR_ARG_PTR,VA_LIST_ARG)
1725# define NUDGE_LOG_FLUSH() fflush(NUDGE_LOG_FILE_PTR)
1727#ifndef NUDGE_LOG_FLUSH
1728# define NUDGE_LOG_FLUSH() nudge::dummy_flush()
1735int log(
const char* format, ...) {va_list ap;va_start(ap, format);
int rv=NUDGE_VLOG_FUNC(format, ap);va_end(ap);
return rv;}
1736int flush(
void) {
return NUDGE_LOG_FLUSH();}
1739#if NUDGE_SIMDV_WIDTH == 128
1740#define NUDGE_SIMDV_ALIGNED NUDGE_ALIGNED(16)
1741static const unsigned simdv_width32 = 4;
1742static const unsigned simdv_width32_log2 = 2;
1743#elif NUDGE_SIMDV_WIDTH == 256
1744#define NUDGE_SIMDV_ALIGNED NUDGE_ALIGNED(32)
1745static const unsigned simdv_width32 = 8;
1746static const unsigned simdv_width32_log2 = 3;
1750NUDGE_FORCEINLINE __m128 operator - (__m128 a) {
1751 return _mm_xor_ps(a, _mm_set1_ps(-0.0f));
1754NUDGE_FORCEINLINE __m128 operator + (__m128 a, __m128 b) {
1755 return _mm_add_ps(a, b);
1758NUDGE_FORCEINLINE __m128 operator - (__m128 a, __m128 b) {
1759 return _mm_sub_ps(a, b);
1762NUDGE_FORCEINLINE __m128 operator * (__m128 a, __m128 b) {
1763 return _mm_mul_ps(a, b);
1766NUDGE_FORCEINLINE __m128 operator / (__m128 a, __m128 b) {
1767 return _mm_div_ps(a, b);
1770NUDGE_FORCEINLINE __m128& operator += (__m128& a, __m128 b) {
1771 return a = _mm_add_ps(a, b);
1774NUDGE_FORCEINLINE __m128& operator -= (__m128& a, __m128 b) {
1775 return a = _mm_sub_ps(a, b);
1778NUDGE_FORCEINLINE __m128& operator *= (__m128& a, __m128 b) {
1779 return a = _mm_mul_ps(a, b);
1782NUDGE_FORCEINLINE __m128& operator /= (__m128& a, __m128 b) {
1783 return a = _mm_div_ps(a, b);
1786NUDGE_FORCEINLINE __m256 operator - (__m256 a) {
1787 return _mm256_xor_ps(a, _mm256_set1_ps(-0.0f));
1790NUDGE_FORCEINLINE __m256 operator + (__m256 a, __m256 b) {
1791 return _mm256_add_ps(a, b);
1794NUDGE_FORCEINLINE __m256 operator - (__m256 a, __m256 b) {
1795 return _mm256_sub_ps(a, b);
1798NUDGE_FORCEINLINE __m256 operator * (__m256 a, __m256 b) {
1799 return _mm256_mul_ps(a, b);
1802NUDGE_FORCEINLINE __m256 operator / (__m256 a, __m256 b) {
1803 return _mm256_div_ps(a, b);
1806NUDGE_FORCEINLINE __m256& operator += (__m256& a, __m256 b) {
1807 return a = _mm256_add_ps(a, b);
1810NUDGE_FORCEINLINE __m256& operator -= (__m256& a, __m256 b) {
1811 return a = _mm256_sub_ps(a, b);
1814NUDGE_FORCEINLINE __m256& operator *= (__m256& a, __m256 b) {
1815 return a = _mm256_mul_ps(a, b);
1818NUDGE_FORCEINLINE __m256& operator /= (__m256& a, __m256 b) {
1819 return a = _mm256_div_ps(a, b);
1824typedef __m128 simd4_float;
1825typedef __m128i simd4_int32;
1828 NUDGE_FORCEINLINE __m128 unpacklo32(__m128 x, __m128 y) {
1829 return _mm_unpacklo_ps(x, y);
1832 NUDGE_FORCEINLINE __m128 unpackhi32(__m128 x, __m128 y) {
1833 return _mm_unpackhi_ps(x, y);
1836 NUDGE_FORCEINLINE __m128i unpacklo32(__m128i x, __m128i y) {
1837 return _mm_unpacklo_epi32(x, y);
1840 NUDGE_FORCEINLINE __m128i unpackhi32(__m128i x, __m128i y) {
1841 return _mm_unpackhi_epi32(x, y);
1844 template<
unsigned x0,
unsigned x1,
unsigned y0,
unsigned y1>
1845 NUDGE_FORCEINLINE __m128 concat2x32(__m128 x, __m128 y) {
1846 return _mm_shuffle_ps(x, y, _MM_SHUFFLE(y1, y0, x1, x0));
1849 template<
unsigned i0,
unsigned i1,
unsigned i2,
unsigned i3>
1850 NUDGE_FORCEINLINE __m128 shuffle32(__m128 x) {
1851 return _mm_shuffle_ps(x, x, _MM_SHUFFLE(i3, i2, i1, i0));
1854 template<
unsigned i0,
unsigned i1,
unsigned i2,
unsigned i3>
1855 NUDGE_FORCEINLINE __m128i shuffle32(__m128i x) {
1856 return _mm_shuffle_epi32(x, _MM_SHUFFLE(i3, i2, i1, i0));
1859 NUDGE_FORCEINLINE
void transpose32(simd4_float& x, simd4_float& y, simd4_float& z, simd4_float& w) {
1860 _MM_TRANSPOSE4_PS(x, y, z, w);
1865 NUDGE_FORCEINLINE
unsigned signmask32(__m128 x) {
1866 return _mm_movemask_ps(x);
1869 NUDGE_FORCEINLINE
unsigned signmask32(__m128i x) {
1870 return _mm_movemask_ps(_mm_castsi128_ps(x));
1873 NUDGE_FORCEINLINE __m128 bitwise_xor(__m128 x, __m128 y) {
1874 return _mm_xor_ps(x, y);
1877 NUDGE_FORCEINLINE __m128 bitwise_or(__m128 x, __m128 y) {
1878 return _mm_or_ps(x, y);
1881 NUDGE_FORCEINLINE __m128 bitwise_and(__m128 x, __m128 y) {
1882 return _mm_and_ps(x, y);
1885 NUDGE_FORCEINLINE __m128 bitwise_notand(__m128 x, __m128 y) {
1886 return _mm_andnot_ps(x, y);
1889 NUDGE_FORCEINLINE __m128i bitwise_xor(__m128i x, __m128i y) {
1890 return _mm_xor_si128(x, y);
1893 NUDGE_FORCEINLINE __m128i bitwise_or(__m128i x, __m128i y) {
1894 return _mm_or_si128(x, y);
1897 NUDGE_FORCEINLINE __m128i bitwise_and(__m128i x, __m128i y) {
1898 return _mm_and_si128(x, y);
1901 NUDGE_FORCEINLINE __m128i bitwise_notand(__m128i x, __m128i y) {
1902 return _mm_andnot_si128(x, y);
1905 NUDGE_FORCEINLINE __m128 blendv32(__m128 x, __m128 y, __m128 s) {
1906#if defined(__SSE4_1__) || defined(__AVX__)
1907#define NUDGE_NATIVE_BLENDV32
1908 return _mm_blendv_ps(x, y, s);
1910 s = _mm_castsi128_ps(_mm_srai_epi32(_mm_castps_si128(s), 31));
1911 return _mm_or_ps(_mm_andnot_ps(s, x), _mm_and_ps(s, y));
1915 NUDGE_FORCEINLINE __m128i blendv32(__m128i x, __m128i y, __m128i s) {
1916 return _mm_castps_si128(blendv32(_mm_castsi128_ps(x), _mm_castsi128_ps(y), _mm_castsi128_ps(s)));
1920namespace simd_float {
1921 NUDGE_FORCEINLINE
float extract_first_float(simd4_float x) {
1922 return _mm_cvtss_f32(x);
1925 NUDGE_FORCEINLINE simd4_float zero4() {
1926 return _mm_setzero_ps();
1929 NUDGE_FORCEINLINE simd4_float make4(
float x) {
1930 return _mm_set1_ps(x);
1933 NUDGE_FORCEINLINE simd4_float make4(
float x,
float y,
float z,
float w) {
1934 return _mm_setr_ps(x, y, z, w);
1937 NUDGE_FORCEINLINE simd4_float broadcast_load4(
const float* p) {
1938 return _mm_set1_ps(*p);
1941 NUDGE_FORCEINLINE simd4_float load4(
const float* p) {
1942 return _mm_load_ps(p);
1945 NUDGE_FORCEINLINE simd4_float loadu4(
const float* p) {
1946 return _mm_loadu_ps(p);
1949 NUDGE_FORCEINLINE
void store4(
float* p, simd4_float x) {
1953 NUDGE_FORCEINLINE
void storeu4(
float* p, simd4_float x) {
1954 _mm_storeu_ps(p, x);
1957 NUDGE_FORCEINLINE simd4_float madd(simd4_float x, simd4_float y, simd4_float z) {
1959 return _mm_fmadd_ps(x, y, z);
1961 return _mm_add_ps(_mm_mul_ps(x, y), z);
1965 NUDGE_FORCEINLINE simd4_float msub(simd4_float x, simd4_float y, simd4_float z) {
1967 return _mm_fmsub_ps(x, y, z);
1969 return _mm_sub_ps(_mm_mul_ps(x, y), z);
1974 NUDGE_FORCEINLINE simd4_float min(simd4_float x, simd4_float y) {
1975 return _mm_min_ps(y, x);
1979 NUDGE_FORCEINLINE simd4_float max(simd4_float x, simd4_float y) {
1980 return _mm_max_ps(y, x);
1983 NUDGE_FORCEINLINE simd4_float rsqrt(simd4_float x) {
1984 return _mm_rsqrt_ps(x);
1987 NUDGE_FORCEINLINE simd4_float recip(simd4_float x) {
1988 return _mm_rcp_ps(x);
1991 NUDGE_FORCEINLINE simd4_float sqrt(simd4_float x) {
1992 return _mm_sqrt_ps(x);
1995 NUDGE_FORCEINLINE simd4_float abs(simd4_float x) {
1996 return _mm_andnot_ps(_mm_set1_ps(-0.0f), x);
1999 NUDGE_FORCEINLINE simd4_float cmp_gt(simd4_float x, simd4_float y) {
2000 return _mm_cmpgt_ps(x, y);
2003 NUDGE_FORCEINLINE simd4_float cmp_ge(simd4_float x, simd4_float y) {
2004 return _mm_cmpge_ps(x, y);
2007 NUDGE_FORCEINLINE simd4_float cmp_le(simd4_float x, simd4_float y) {
2008 return _mm_cmple_ps(x, y);
2011 NUDGE_FORCEINLINE simd4_float cmp_eq(simd4_float x, simd4_float y) {
2012 return _mm_cmpeq_ps(x, y);
2015 NUDGE_FORCEINLINE simd4_float cmp_neq(simd4_float x, simd4_float y) {
2016 return _mm_cmpneq_ps(x, y);
2019 NUDGE_FORCEINLINE simd4_int32 asint(simd4_float x) {
2020 return _mm_castps_si128(x);
2023 NUDGE_FORCEINLINE simd4_int32 toint(simd4_float x) {
2024 return _mm_cvttps_epi32(x);
2028namespace simd_int32 {
2029 NUDGE_FORCEINLINE simd4_int32 zero4() {
2030 return _mm_setzero_si128();
2033 NUDGE_FORCEINLINE simd4_int32 make4(int32_t x) {
2034 return _mm_set1_epi32(x);
2037 NUDGE_FORCEINLINE simd4_int32 make4(int32_t x, int32_t y, int32_t z, int32_t w) {
2038 return _mm_setr_epi32(x, y, z, w);
2041 NUDGE_FORCEINLINE simd4_int32 load4(
const int32_t* p) {
2042 return _mm_load_si128((
const __m128i*)p);
2045 NUDGE_FORCEINLINE simd4_int32 loadu4(
const int32_t* p) {
2046 return _mm_loadu_si128((
const __m128i*)p);
2049 NUDGE_FORCEINLINE
void store4(int32_t* p, simd4_int32 x) {
2050 _mm_store_si128((__m128i*)p, x);
2053 NUDGE_FORCEINLINE
void storeu4(int32_t* p, simd4_int32 x) {
2054 _mm_storeu_si128((__m128i*)p, x);
2057 template<
unsigned bits>
2058 NUDGE_FORCEINLINE simd4_int32 shift_left(simd4_int32 x) {
2059 return _mm_slli_epi32(x, bits);
2062 template<
unsigned bits>
2063 NUDGE_FORCEINLINE simd4_int32 shift_right(simd4_int32 x) {
2064 return _mm_srli_epi32(x, bits);
2067 NUDGE_FORCEINLINE simd4_int32 add(simd4_int32 x, simd4_int32 y) {
2068 return _mm_add_epi32(x, y);
2071 NUDGE_FORCEINLINE simd4_int32 cmp_eq(simd4_int32 x, simd4_int32 y) {
2072 return _mm_cmpeq_epi32(x, y);
2075 NUDGE_FORCEINLINE simd4_float asfloat(simd4_int32 x) {
2076 return _mm_castsi128_ps(x);
2081typedef __m256 simd8_float;
2082typedef __m256i simd8_int32;
2085 NUDGE_FORCEINLINE __m256 unpacklo32(__m256 x, __m256 y) {
2086 return _mm256_unpacklo_ps(x, y);
2089 NUDGE_FORCEINLINE __m256 unpackhi32(__m256 x, __m256 y) {
2090 return _mm256_unpackhi_ps(x, y);
2093 NUDGE_FORCEINLINE __m256i unpacklo32(__m256i x, __m256i y) {
2094 return _mm256_unpacklo_epi32(x, y);
2097 NUDGE_FORCEINLINE __m256i unpackhi32(__m256i x, __m256i y) {
2098 return _mm256_unpackhi_epi32(x, y);
2101 template<
unsigned x0,
unsigned x1,
unsigned y0,
unsigned y1>
2102 NUDGE_FORCEINLINE __m256 concat2x32(__m256 x, __m256 y) {
2103 return _mm256_shuffle_ps(x, y, _MM_SHUFFLE(y1, y0, x1, x0));
2106 template<
unsigned i0,
unsigned i1,
unsigned i2,
unsigned i3>
2107 NUDGE_FORCEINLINE __m256 shuffle32(__m256 x) {
2108 return _mm256_shuffle_ps(x, x, _MM_SHUFFLE(i3, i2, i1, i0));
2111 template<
unsigned i0,
unsigned i1,
unsigned i2,
unsigned i3>
2112 NUDGE_FORCEINLINE __m256i shuffle32(__m256i x) {
2113 return _mm256_shuffle_epi32(x, _MM_SHUFFLE(i3, i2, i1, i0));
2116 NUDGE_FORCEINLINE
void transpose32(simd8_float& x, simd8_float& y, simd8_float& z, simd8_float& w) {
2117 __m256 t0 = _mm256_unpacklo_ps(x, y);
2118 __m256 t1 = _mm256_unpacklo_ps(z, w);
2119 __m256 t2 = _mm256_unpackhi_ps(x, y);
2120 __m256 t3 = _mm256_unpackhi_ps(z, w);
2121 x = _mm256_shuffle_ps(t0, t1, _MM_SHUFFLE(1,0,1,0));
2122 y = _mm256_shuffle_ps(t0, t1, _MM_SHUFFLE(3,2,3,2));
2123 z = _mm256_shuffle_ps(t2, t3, _MM_SHUFFLE(1,0,1,0));
2124 w = _mm256_shuffle_ps(t2, t3, _MM_SHUFFLE(3,2,3,2));
2129 template<
unsigned i0,
unsigned i1>
2130 NUDGE_FORCEINLINE simd8_float permute128(simd8_float x, simd8_float y) {
2131 return _mm256_castsi256_ps(_mm256_permute2x128_si256(_mm256_castps_si256(x), _mm256_castps_si256(y), i0 | (i1 << 4)));
2134 template<
unsigned i0,
unsigned i1>
2135 NUDGE_FORCEINLINE simd8_int32 permute128(simd8_int32 x, simd8_int32 y) {
2136 return _mm256_permute2x128_si256(x, y, i0 | (i1 << 4));
2139 template<
unsigned i0,
unsigned i1>
2140 NUDGE_FORCEINLINE simd8_float shuffle128(simd8_float x) {
2141 return _mm256_castsi256_ps(_mm256_permute2x128_si256(_mm256_castps_si256(x), _mm256_castps_si256(x), i0 | (i1 << 4)));
2144 template<
unsigned i0,
unsigned i1>
2145 NUDGE_FORCEINLINE simd8_int32 shuffle128(simd8_int32 x) {
2146 return _mm256_permute2x128_si256(x, x, i0 | (i1 << 4));
2149 NUDGE_FORCEINLINE simd8_float broadcast(simd4_float x) {
2150 return _mm256_insertf128_ps(_mm256_castps128_ps256(x), x, 1);
2153 NUDGE_FORCEINLINE simd8_int32 broadcast(simd4_int32 x) {
2154 return _mm256_insertf128_si256(_mm256_castsi128_si256(x), x, 1);
2159 NUDGE_FORCEINLINE simd8_float concat(simd4_float x, simd4_float y) {
2160 return _mm256_insertf128_ps(_mm256_castps128_ps256(x), y, 1);
2163 NUDGE_FORCEINLINE simd4_float extract_low(simd8_float x) {
2164 return _mm256_castps256_ps128(x);
2167 NUDGE_FORCEINLINE simd4_float extract_high(simd8_float x) {
2168 return _mm256_extractf128_ps(x, 1);
2171 NUDGE_FORCEINLINE simd4_int32 extract_low(simd8_int32 x) {
2172 return _mm256_castsi256_si128(x);
2175 NUDGE_FORCEINLINE simd4_int32 extract_high(simd8_int32 x) {
2176 return _mm256_extractf128_si256(x, 1);
2179 NUDGE_FORCEINLINE
unsigned signmask32(__m256 x) {
2180 return _mm256_movemask_ps(x);
2183 NUDGE_FORCEINLINE
unsigned signmask32(__m256i x) {
2184 return _mm256_movemask_ps(_mm256_castsi256_ps(x));
2187 NUDGE_FORCEINLINE __m256 bitwise_xor(__m256 x, __m256 y) {
2188 return _mm256_xor_ps(x, y);
2191 NUDGE_FORCEINLINE __m256 bitwise_or(__m256 x, __m256 y) {
2192 return _mm256_or_ps(x, y);
2195 NUDGE_FORCEINLINE __m256 bitwise_and(__m256 x, __m256 y) {
2196 return _mm256_and_ps(x, y);
2199 NUDGE_FORCEINLINE __m256 bitwise_notand(__m256 x, __m256 y) {
2200 return _mm256_andnot_ps(x, y);
2203 NUDGE_FORCEINLINE __m256i bitwise_xor(__m256i x, __m256i y) {
2204 return _mm256_xor_si256(x, y);
2207 NUDGE_FORCEINLINE __m256i bitwise_or(__m256i x, __m256i y) {
2208 return _mm256_or_si256(x, y);
2211 NUDGE_FORCEINLINE __m256i bitwise_and(__m256i x, __m256i y) {
2212 return _mm256_and_si256(x, y);
2215 NUDGE_FORCEINLINE __m256i bitwise_notand(__m256i x, __m256i y) {
2216 return _mm256_andnot_si256(x, y);
2219 NUDGE_FORCEINLINE __m256 blendv32(__m256 x, __m256 y, __m256 s) {
2220 return _mm256_blendv_ps(x, y, s);
2223 NUDGE_FORCEINLINE __m256i blendv32(__m256i x, __m256i y, __m256i s) {
2224 return _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(x), _mm256_castsi256_ps(y), _mm256_castsi256_ps(s)));
2228namespace simd_float {
2229 NUDGE_FORCEINLINE
float extract_first_float(simd8_float x) {
2230 return _mm_cvtss_f32(_mm256_castps256_ps128(x));
2233 NUDGE_FORCEINLINE simd8_float zero8() {
2234 return _mm256_setzero_ps();
2237 NUDGE_FORCEINLINE simd8_float make8(
float x) {
2238 return _mm256_set1_ps(x);
2241 NUDGE_FORCEINLINE simd8_float make8(
float x0,
float y0,
float z0,
float w0,
float x1,
float y1,
float z1,
float w1) {
2242 return _mm256_setr_ps(x0, y0, z0, w0, x1, y1, z1, w1);
2245 NUDGE_FORCEINLINE simd8_float broadcast_load8(
const float* p) {
2246 return _mm256_broadcast_ss(p);
2249 NUDGE_FORCEINLINE simd8_float load8(
const float* p) {
2250 return _mm256_load_ps(p);
2253 NUDGE_FORCEINLINE simd8_float loadu8(
const float* p) {
2254 return _mm256_loadu_ps(p);
2257 NUDGE_FORCEINLINE
void store8(
float* p, simd8_float x) {
2258 _mm256_store_ps(p, x);
2261 NUDGE_FORCEINLINE
void storeu8(
float* p, simd8_float x) {
2262 _mm256_storeu_ps(p, x);
2265 NUDGE_FORCEINLINE simd8_float madd(simd8_float x, simd8_float y, simd8_float z) {
2267 return _mm256_fmadd_ps(x, y, z);
2269 return _mm256_add_ps(_mm256_mul_ps(x, y), z);
2273 NUDGE_FORCEINLINE simd8_float msub(simd8_float x, simd8_float y, simd8_float z) {
2275 return _mm256_fmsub_ps(x, y, z);
2277 return _mm256_sub_ps(_mm256_mul_ps(x, y), z);
2282 NUDGE_FORCEINLINE simd8_float min(simd8_float x, simd8_float y) {
2283 return _mm256_min_ps(y, x);
2287 NUDGE_FORCEINLINE simd8_float max(simd8_float x, simd8_float y) {
2288 return _mm256_max_ps(y, x);
2291 NUDGE_FORCEINLINE simd8_float rsqrt(simd8_float x) {
2292 return _mm256_rsqrt_ps(x);
2295 NUDGE_FORCEINLINE simd8_float recip(simd8_float x) {
2296 return _mm256_rcp_ps(x);
2299 NUDGE_FORCEINLINE simd8_float sqrt(simd8_float x) {
2300 return _mm256_sqrt_ps(x);
2303 NUDGE_FORCEINLINE simd8_float abs(simd8_float x) {
2304 return _mm256_andnot_ps(_mm256_set1_ps(-0.0f), x);
2307 NUDGE_FORCEINLINE simd8_float cmp_gt(simd8_float x, simd8_float y) {
2308 return _mm256_cmp_ps(x, y, _CMP_GT_OQ);
2311 NUDGE_FORCEINLINE simd8_float cmp_ge(simd8_float x, simd8_float y) {
2312 return _mm256_cmp_ps(x, y, _CMP_GE_OQ);
2315 NUDGE_FORCEINLINE simd8_float cmp_le(simd8_float x, simd8_float y) {
2316 return _mm256_cmp_ps(x, y, _CMP_LE_OQ);
2319 NUDGE_FORCEINLINE simd8_float cmp_eq(simd8_float x, simd8_float y) {
2320 return _mm256_cmp_ps(x, y, _CMP_EQ_OQ);
2323 NUDGE_FORCEINLINE simd8_float cmp_neq(simd8_float x, simd8_float y) {
2324 return _mm256_cmp_ps(x, y, _CMP_NEQ_OQ);
2327 NUDGE_FORCEINLINE simd8_int32 asint(simd8_float x) {
2328 return _mm256_castps_si256(x);
2331 NUDGE_FORCEINLINE simd8_int32 toint(simd8_float x) {
2332 return _mm256_cvttps_epi32(x);
2336namespace simd_int32 {
2337 NUDGE_FORCEINLINE simd8_int32 zero8() {
2338 return _mm256_setzero_si256();
2341 NUDGE_FORCEINLINE simd8_int32 make8(int32_t x) {
2342 return _mm256_set1_epi32(x);
2345 NUDGE_FORCEINLINE simd8_int32 make8(int32_t x0, int32_t y0, int32_t z0, int32_t w0, int32_t x1, int32_t y1, int32_t z1, int32_t w1) {
2346 return _mm256_setr_epi32(x0, y0, z0, w0, x1, y1, z1, w1);
2349 NUDGE_FORCEINLINE simd8_int32 load8(
const int32_t* p) {
2350 return _mm256_load_si256((
const __m256i*)p);
2353 NUDGE_FORCEINLINE simd8_int32 loadu8(
const int32_t* p) {
2354 return _mm256_loadu_si256((
const __m256i*)p);
2357 NUDGE_FORCEINLINE
void store8(int32_t* p, simd8_int32 x) {
2358 _mm256_store_si256((__m256i*)p, x);
2361 NUDGE_FORCEINLINE
void storeu8(int32_t* p, simd8_int32 x) {
2362 _mm256_storeu_si256((__m256i*)p, x);
2365 template<
unsigned bits>
2366 NUDGE_FORCEINLINE simd8_int32 shift_left(simd8_int32 x) {
2367 return _mm256_slli_epi32(x, bits);
2370 template<
unsigned bits>
2371 NUDGE_FORCEINLINE simd8_int32 shift_right(simd8_int32 x) {
2372 return _mm256_srli_epi32(x, bits);
2375 NUDGE_FORCEINLINE simd8_int32 add(simd8_int32 x, simd8_int32 y) {
2376 return _mm256_add_epi32(x, y);
2379 NUDGE_FORCEINLINE simd8_int32 cmp_eq(simd8_int32 x, simd8_int32 y) {
2380 return _mm256_cmpeq_epi32(x, y);
2383 NUDGE_FORCEINLINE simd8_float asfloat(simd8_int32 x) {
2384 return _mm256_castsi256_ps(x);
2389#if NUDGE_SIMDV_WIDTH == 128
2390typedef simd4_float simdv_float;
2391typedef simd4_int32 simdv_int32;
2393namespace simd_float {
2394 NUDGE_FORCEINLINE simdv_float zerov() {
2398 NUDGE_FORCEINLINE simdv_float makev(
float x) {
2402 NUDGE_FORCEINLINE simdv_float broadcast_loadv(
const float* p) {
2403 return broadcast_load4(p);
2406 NUDGE_FORCEINLINE simdv_float loadv(
const float* p) {
2410 NUDGE_FORCEINLINE simdv_float loaduv(
const float* p) {
2414 NUDGE_FORCEINLINE
void storev(
float* p, simdv_float x) {
2418 NUDGE_FORCEINLINE
void storeuv(
float* p, simdv_float x) {
2423namespace simd_int32 {
2424 NUDGE_FORCEINLINE simdv_int32 zerov() {
2428 NUDGE_FORCEINLINE simdv_int32 makev(int32_t x) {
2432 NUDGE_FORCEINLINE simdv_int32 loadv(
const int32_t* p) {
2436 NUDGE_FORCEINLINE simdv_int32 loaduv(
const int32_t* p) {
2440 NUDGE_FORCEINLINE
void storev(int32_t* p, simdv_int32 x) {
2444 NUDGE_FORCEINLINE
void storeuv(int32_t* p, simdv_int32 x) {
2448#elif NUDGE_SIMDV_WIDTH == 256
2449typedef simd8_float simdv_float;
2450typedef simd8_int32 simdv_int32;
2452namespace simd_float {
2453 NUDGE_FORCEINLINE simdv_float zerov() {
2457 NUDGE_FORCEINLINE simdv_float makev(
float x) {
2461 NUDGE_FORCEINLINE simdv_float broadcast_loadv(
const float* p) {
2462 return broadcast_load8(p);
2465 NUDGE_FORCEINLINE simdv_float loadv(
const float* p) {
2469 NUDGE_FORCEINLINE simdv_float loaduv(
const float* p) {
2473 NUDGE_FORCEINLINE
void storev(
float* p, simdv_float x) {
2477 NUDGE_FORCEINLINE
void storeuv(
float* p, simdv_float x) {
2482namespace simd_int32 {
2483 NUDGE_FORCEINLINE simdv_int32 zerov() {
2487 NUDGE_FORCEINLINE simdv_int32 makev(int32_t x) {
2491 NUDGE_FORCEINLINE simdv_int32 loadv(
const int32_t* p) {
2495 NUDGE_FORCEINLINE simdv_int32 loaduv(
const int32_t* p) {
2499 NUDGE_FORCEINLINE
void storev(int32_t* p, simdv_int32 x) {
2503 NUDGE_FORCEINLINE
void storeuv(int32_t* p, simdv_int32 x) {
2510 NUDGE_FORCEINLINE simd4_float dot(simd4_float a, simd4_float b) {
2511 simd4_float c = a*b;
2512 return simd128::shuffle32<0,0,0,0>(c) + simd128::shuffle32<1,1,1,1>(c) + simd128::shuffle32<2,2,2,2>(c);
2515 NUDGE_FORCEINLINE simd4_float cross(simd4_float a, simd4_float b) {
2516 simd4_float c = simd128::shuffle32<1,2,0,0>(a) * simd128::shuffle32<2,0,1,0>(b);
2517 simd4_float d = simd128::shuffle32<2,0,1,0>(a) * simd128::shuffle32<1,2,0,0>(b);
2523 NUDGE_FORCEINLINE
void cross(simd4_float ax, simd4_float ay, simd4_float az, simd4_float bx, simd4_float by, simd4_float bz, simd4_float& rx, simd4_float& ry, simd4_float& rz) {
2529 NUDGE_FORCEINLINE
void normalize(simd4_float& x, simd4_float& y, simd4_float& z) {
2530 simd4_float f = simd_float::rsqrt(x*x + y*y + z*z);
2536#if NUDGE_SIMDV_WIDTH >= 256
2537 NUDGE_FORCEINLINE
void cross(simd8_float ax, simd8_float ay, simd8_float az, simd8_float bx, simd8_float by, simd8_float bz, simd8_float& rx, simd8_float& ry, simd8_float& rz) {
2543 NUDGE_FORCEINLINE
void normalize(simd8_float& x, simd8_float& y, simd8_float& z) {
2544 simd8_float f = simd_float::rsqrt(x*x + y*y + z*z);
2552#ifdef NUDGE_USE_ANONYMOUS_NAMESPACE
2576 float min_x[simdv_width32];
2577 float max_x[simdv_width32];
2578 float min_y[simdv_width32];
2579 float max_y[simdv_width32];
2580 float min_z[simdv_width32];
2581 float max_z[simdv_width32];
2584 struct ContactSlotV {
2585 uint32_t indices[simdv_width32];
2588 struct ContactPairV {
2589 uint32_t ab[simdv_width32];
2592 struct ContactConstraintV {
2593 uint16_t a[simdv_width32];
2594 uint16_t b[simdv_width32];
2596 float pa_z[simdv_width32];
2597 float pa_x[simdv_width32];
2598 float pa_y[simdv_width32];
2600 float pb_z[simdv_width32];
2601 float pb_x[simdv_width32];
2602 float pb_y[simdv_width32];
2604 float n_x[simdv_width32];
2605 float u_x[simdv_width32];
2606 float v_x[simdv_width32];
2608 float n_y[simdv_width32];
2609 float u_y[simdv_width32];
2610 float v_y[simdv_width32];
2612 float n_z[simdv_width32];
2613 float u_z[simdv_width32];
2614 float v_z[simdv_width32];
2616 float bias[simdv_width32];
2617 float friction[simdv_width32];
2618 float normal_velocity_to_normal_impulse[simdv_width32];
2620 float friction_coefficient_x[simdv_width32];
2621 float friction_coefficient_y[simdv_width32];
2622 float friction_coefficient_z[simdv_width32];
2624 float na_x[simdv_width32];
2625 float na_y[simdv_width32];
2626 float na_z[simdv_width32];
2628 float nb_x[simdv_width32];
2629 float nb_y[simdv_width32];
2630 float nb_z[simdv_width32];
2632 float ua_x[simdv_width32];
2633 float ua_y[simdv_width32];
2634 float ua_z[simdv_width32];
2636 float va_x[simdv_width32];
2637 float va_y[simdv_width32];
2638 float va_z[simdv_width32];
2640 float ub_x[simdv_width32];
2641 float ub_y[simdv_width32];
2642 float ub_z[simdv_width32];
2644 float vb_x[simdv_width32];
2645 float vb_y[simdv_width32];
2646 float vb_z[simdv_width32];
2649 struct ContactConstraintStateV {
2650 float applied_normal_impulse[simdv_width32];
2651 float applied_friction_impulse_x[simdv_width32];
2652 float applied_friction_impulse_y[simdv_width32];
2655 struct InertiaTransform {
2665#ifdef NUDGE_USE_ANONYMOUS_NAMESPACE
2670static inline unsigned first_set_bit(
unsigned x) {
2671 unsigned long r = 0;
2672 _BitScanForward(&r, x);
2676static inline unsigned first_set_bit(
unsigned x) {
2677 return __builtin_ctz(x);
2682static inline void* align(Arena* arena, uintptr_t alignment) {
2683 uintptr_t data = (uintptr_t)arena->data;
2684 uintptr_t end = data + arena->size;
2685 uintptr_t mask = alignment-1;
2687 data = (data + mask) & ~mask;
2689 arena->data = (
void*)data;
2690 arena->size = end - data;
2692 assert((intptr_t)arena->size >= 0);
2697static inline void* allocate(Arena* arena, uintptr_t size) {
2698 void* data = arena->data;
2699 arena->data = (
void*)((uintptr_t)data + size);
2700 arena->size -= size;
2702 assert((intptr_t)arena->size >= 0);
2707static inline void* allocate(Arena* arena, uintptr_t size, uintptr_t alignment) {
2708 align(arena, alignment);
2710 void* data = arena->data;
2711 arena->data = (
void*)((uintptr_t)data + size);
2712 arena->size -= size;
2714 assert((intptr_t)arena->size >= 0);
2720static inline T* allocate_struct(Arena* arena, uintptr_t alignment) {
2721 return static_cast<T*
>(allocate(arena,
sizeof(T), alignment));
2725static inline T* allocate_array(Arena* arena, uintptr_t count, uintptr_t alignment) {
2726 return static_cast<T*
>(allocate(arena,
sizeof(T)*count, alignment));
2729static inline void* reserve(Arena* arena, uintptr_t size, uintptr_t alignment) {
2730 align(arena, alignment);
2731 assert(size <= arena->size);
2735static inline void commit(Arena* arena, uintptr_t size) {
2736 allocate(arena, size);
2740static inline T* reserve_array(Arena* arena, uintptr_t count, uintptr_t alignment) {
2741 return static_cast<T*
>(reserve(arena,
sizeof(T)*count, alignment));
2745static inline void commit_array(Arena* arena, uintptr_t count) {
2746 commit(arena,
sizeof(T)*count);
2749static inline Rotation make_rotation(
const float q[4]) {
2750 Rotation r = { { q[0], q[1], q[2] }, q[3] };
2754static inline float3 make_float3(
const float x[3]) {
2755 float3 r = { x[0], x[1], x[2] };
2759static inline float3 make_float3(
float x,
float y,
float z) {
2760 float3 r = { x, y, z };
2764static inline float3 make_float3(
float x) {
2765 float3 r = { x, x, x };
2769static inline float3 operator + (float3 a, float3 b) {
2770 float3 r = { a.x + b.x, a.y + b.y, a.z + b.z };
2774static inline float3 operator - (float3 a, float3 b) {
2775 float3 r = { a.x - b.x, a.y - b.y, a.z - b.z };
2779static inline float3 operator * (
float a, float3 b) {
2780 float3 r = { a * b.x, a * b.y, a * b.z };
2784static inline float3 operator * (float3 a,
float b) {
2785 float3 r = { a.x * b, a.y * b, a.z * b };
2789static inline float3& operator *= (float3& a,
float b) {
2796static inline float dot(float3 a, float3 b) {
2797 return a.x*b.x + a.y*b.y + a.z*b.z;
2800static inline float length2(float3 a) {
2804static inline float3 cross(float3 a, float3 b) {
2805 float3 v = { a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x };
2809static inline float3 operator * (Rotation lhs, float3 rhs) {
2810 float3 t = 2.0f * cross(lhs.v, rhs);
2811 return rhs + lhs.s * t + cross(lhs.v, t);
2814static inline Rotation operator * (Rotation lhs, Rotation rhs) {
2815 float3 v = rhs.v*lhs.s + lhs.v*rhs.s + cross(lhs.v, rhs.v);
2816 Rotation r = { v, lhs.s*rhs.s - dot(lhs.v, rhs.v) };
2820static inline Rotation normalize(Rotation r) {
2821 float f = 1.0f / sqrtf(r.s*r.s + r.v.x*r.v.x + r.v.y*r.v.y + r.v.z*r.v.z);
2827static inline Rotation inverse(Rotation r) {
2834static inline float3x3 matrix(Rotation q) {
2835 float kx = q.v.x + q.v.x;
2836 float ky = q.v.y + q.v.y;
2837 float kz = q.v.z + q.v.z;
2839 float xx = kx*q.v.x;
2840 float yy = ky*q.v.y;
2841 float zz = kz*q.v.z;
2842 float xy = kx*q.v.y;
2843 float xz = kx*q.v.z;
2844 float yz = ky*q.v.z;
2850 { 1.0f - yy - zz, xy + sz, xz - sy },
2851 { xy - sz, 1.0f - xx - zz, yz + sx },
2852 { xz + sy, yz - sx, 1.0f - xx - yy },
2857static inline Transform operator * (Transform lhs, Transform rhs) {
2858 float3 p = make_rotation(lhs.rotation) * make_float3(rhs.position) + make_float3(lhs.position);
2859 Rotation q = make_rotation(lhs.rotation) * make_rotation(rhs.rotation);
2862 {{ p.x, p.y, p.z }},
2864 {{ q.v.x, q.v.y, q.v.z, q.s }},
2870void simulate(context_t* c,
float timeStep,
unsigned numSubSteps,
unsigned numIterations);
2871void collide(context_t* c, BodyConnections body_connections);
2872ContactImpulseData* read_cached_impulses(ContactCache contact_cache, ContactData contacts, Arena* memory);
2873void write_cached_impulses(ContactCache* contact_cache, ContactData contacts, ContactImpulseData* contact_impulses);
2874ContactConstraintData* setup_contact_constraints(context_t* c, ContactImpulseData* contact_impulses, Arena* memory);
2875void apply_impulses(ContactConstraintData* data, BodyData bodies);
2876void update_cached_impulses(ContactConstraintData* data, ContactImpulseData* contact_impulses);
2877void advance(context_t* c,
float time_step);
2880Transform
TransformMul(Transform T0,Transform T1) {
return T0*T1;}
2882#ifdef NUDGE_SIMDE_USE_CUSTOM_MM_MALLOC
2883#if (defined(__EMSCRIPTEN__) || (defined(NUDGE_USE_SIMDE) && defined(SIMDE_NO_NATIVE)))
2884static inline void* _mm_malloc (
size_t size,
size_t alignment) {
2886 return ::_aligned_malloc(size, alignment);
2889 if (alignment == 1) return ::malloc (size);
2890 if (alignment == 2 || (
sizeof (
void *) == 8 && alignment == 4)) alignment =
sizeof (
void *);
2891 if (::posix_memalign (&ptr, alignment, size) == 0)
return ptr;
2895static inline void _mm_free (
void * ptr) {
2897 ::_aligned_free(ptr);
2905void* malloc(
size_t size) {
return _mm_malloc(size,64);}
2906void free(
void* ptr) {_mm_free(ptr);}
2908inline float* nm_QuatFromMat3Or4(
float* __restrict result4,
const float* __restrict m16,
int num_m16_cols=4) {
2910 float* q=result4;
const float* m=m16;
2911 float *qx=&q[0],*qy=&q[1],*qz=&q[2],*qw=&q[3];
const int bc2=num_m16_cols,bc3=num_m16_cols*2;
2912 const float c00=m[0],c01=m[1],c02=m[2], c10=m[bc2],c11=m[bc2+1],c12=m[bc2+2], c20=m[bc3],c21=m[bc3+1],c22=m[bc3+2];
2914 float fourXSquaredMinus1 = c00 - c11 - c22, fourYSquaredMinus1 = c11 - c00 - c22;
2915 float fourZSquaredMinus1 = c22 - c00 - c11, fourWSquaredMinus1 = c00 + c11 + c22;
2916 float biggestVal,mult,fourBiggestSquaredMinus1 = fourWSquaredMinus1;
2917 int biggestIndex = 0;
2919 if(fourXSquaredMinus1 > fourBiggestSquaredMinus1) {fourBiggestSquaredMinus1 = fourXSquaredMinus1;biggestIndex = 1;}
2920 if(fourYSquaredMinus1 > fourBiggestSquaredMinus1) {fourBiggestSquaredMinus1 = fourYSquaredMinus1;biggestIndex = 2;}
2921 if(fourZSquaredMinus1 > fourBiggestSquaredMinus1) {fourBiggestSquaredMinus1 = fourZSquaredMinus1;biggestIndex = 3;}
2923 biggestVal = sqrtf(fourBiggestSquaredMinus1 + (
float)1) * (float)0.5;
2924 mult = (float)0.25 / biggestVal;
2926 switch (biggestIndex) {
2928 *qw = biggestVal; *qx = (c12 - c21) * mult; *qy = (c20 - c02) * mult; *qz = (c01 - c10) * mult;
2931 *qw = (c12 - c21) * mult; *qx = biggestVal; *qy = (c01 + c10) * mult; *qz = (c20 + c02) * mult;
2934 *qw = (c20 - c02) * mult; *qx = (c01 + c10) * mult; *qy = biggestVal; *qz = (c12 + c21) * mult;
2937 *qw = (c01 - c10) * mult; *qx = (c20 + c02) * mult; *qy = (c12 + c21) * mult; *qz = biggestVal;
2942 *qx=*qy=*qz=(float)0;*qw=(float)1;
2947float*
nm_QuatFromMat4(
float* __restrict result4,
const float* __restrict m16) {
return nm_QuatFromMat3Or4(result4,m16,4);}
2948float*
nm_QuatFromMat3(
float* __restrict result4,
const float* __restrict m9) {
return nm_QuatFromMat3Or4(result4,m9,3);}
2950inline float* nm_Mat3Or4SetRotationFromQuat(
float* __restrict result16,
const float* __restrict q4,
int num_res_cols=4) {
2952 const float one =(float)1,two=(
float)2;
2953 float* m=result16;
const float* q=q4;
2954 const float qx=q[0],qy=q[1],qz=q[2],qw=q[3];
const int bc2=num_res_cols,bc3=num_res_cols*2;
2955 float *c00=&m[0],*c01=&m[1],*c02=&m[2], *c10=&m[bc2],*c11=&m[bc2+1],*c12=&m[bc2+2], *c20=&m[bc3],*c21=&m[bc3+1],*c22=&m[bc3+2];
2957 float qxx = (qx * qx), qyy = (qy * qy), qzz = (qz * qz);
2958 float qxz = (qx * qz), qxy = (qx * qy), qyz = (qy * qz);
2959 float qwx = (qw * qx), qwy = (qw * qy), qwz = (qw * qz);
2961 *c00 = one - two * (qyy + qzz); *c01 = two * (qxy + qwz); *c02 = two * (qxz - qwy);
2962 *c10 = two * (qxy - qwz); *c11 = one - two * (qxx + qzz); *c12 = two * (qyz + qwx);
2963 *c20 = two * (qxz + qwy); *c21 = two * (qyz - qwx); *c22 = one - two * (qxx + qyy);
2967float*
nm_Mat4SetRotationFromQuat(
float* __restrict result16,
const float* __restrict q4) {
return nm_Mat3Or4SetRotationFromQuat(result16,q4,4);}
2968float*
nm_Mat3FromQuat(
float* __restrict result9,
const float* __restrict q4) {
return nm_Mat3Or4SetRotationFromQuat(result9,q4,3);}
2970void nm_QuatGetAngularVelocity(
float* __restrict angVel3,
const float* newQuat4,
const float* oldQuat4,
float halfTimeStep) {
2973 const float a[4] = {newQuat4[0]-oldQuat4[0],newQuat4[1]-oldQuat4[1],newQuat4[2]-oldQuat4[2],newQuat4[3]-oldQuat4[3]};
2974 const float b[4] = {-oldQuat4[0],-oldQuat4[1],-oldQuat4[2],oldQuat4[3]};
2975 const float invHalfTimeStep = halfTimeStep!=(float)0 ? (
float)1/halfTimeStep : (float)0;
2977 angVel3[0] = (a[3] * b[0] + a[0] * b[3] + a[1] * b[2] - a[2] * b[1])*invHalfTimeStep;
2978 angVel3[1] = (a[3] * b[1] + a[1] * b[3] + a[2] * b[0] - a[0] * b[2])*invHalfTimeStep;
2979 angVel3[2] = (a[3] * b[2] + a[2] * b[3] + a[0] * b[1] - a[1] * b[0])*invHalfTimeStep;
2983float*
nm_QuatMul(
float* qOut4,
const float* a4,
const float* b4) {
2985# if (defined(NM_USE_SIMD) && defined(__SSE__))
2992# define NM_MM_LOAD_PS(X) _mm_load_ps(X)
2993# define NM_MM256_LOAD_PD(X) _mm256_load_pd(X)
2994# define NM_MM_STORE_PS(X,Y) _mm_store_ps(X,Y)
2995# define NM_MM256_STORE_PD(X,Y) _mm256_store_pd(X,Y)
2997 __m128 xyzw = NM_MM_LOAD_PS(a4);
2998 __m128 abcd = NM_MM_LOAD_PS(b4);
3000 __m128 wzyx = _mm_shuffle_ps(xyzw, xyzw, _MM_SHUFFLE(0,1,2,3));
3001 __m128 baba = _mm_shuffle_ps(abcd, abcd, _MM_SHUFFLE(0,1,0,1));
3002 __m128 dcdc = _mm_shuffle_ps(abcd, abcd, _MM_SHUFFLE(2,3,2,3));
3008 __m128 ZnXWY = _mm_hsub_ps(_mm_mul_ps(xyzw, baba), _mm_mul_ps(wzyx, dcdc));
3011 __m128 XZYnW = _mm_hadd_ps(_mm_mul_ps(xyzw, dcdc), _mm_mul_ps(wzyx, baba));
3021 __m128 XZWY = _mm_addsub_ps(_mm_shuffle_ps(XZYnW, ZnXWY, _MM_SHUFFLE(3,2,1,0)),
3022 _mm_shuffle_ps(ZnXWY, XZYnW, _MM_SHUFFLE(2,3,0,1)));
3025 NM_MM_STORE_PS(qOut4,_mm_shuffle_ps(XZWY, XZWY, _MM_SHUFFLE(2,1,3,0)));
3028 const float x = a4[0],y = a4[1],z = a4[2], w = a4[3];
3029 const float a = b4[0],b = b4[1],c = b4[2], d = b4[3];
3030 qOut4[0] = x*d + y*c - z*b + w*a;
3031 qOut4[1] = -x*c + y*d + z*a + w*b;
3032 qOut4[2] = x*b - y*a + z*d + w*c;
3033 qOut4[3] = -x*a - y*b - z*c + w*d;
3043void nm_QuatAdvance(
float* __restrict qOut4,
const float* __restrict q4,
const float* __restrict angVel3,
float halfTimeStep) {
3046 float deltaQ[4] = {angVel3[0],angVel3[1],angVel3[2],(float)0};
int i;
3048 for (i=0;i<4;i++) qOut4[i] = q4[i]+deltaQ[i]*halfTimeStep;
3053float nm_Vec3Dot(
const float* __restrict a3,
const float* __restrict b3) {
return a3[0]*b3[0]+a3[1]*b3[1]+a3[2]*b3[2];}
3054float*
nm_Vec3Cross(
float* __restrict vOut3,
const float* __restrict a3,
const float* __restrict b3) {
3055 vOut3[0] = a3[1] * b3[2] - a3[2] * b3[1];
3056 vOut3[1] = a3[2] * b3[0] - a3[0] * b3[2];
3057 vOut3[2] = a3[0] * b3[1] - a3[1] * b3[0];
3061# define NM_EPSILON (0.00000000001f)
3064 float len = v3[0]*v3[0]+v3[1]*v3[1]+v3[2]*v3[2];
int i;
3065 if (len>NM_EPSILON) {len = sqrtf(len);
for (i=0;i<3;i++) v3[i]/=len;}
3066 else {len=v3[0]=v3[2]=(float)0;v3[1]=(float)1;}
3070 float len = v3[0]*v3[0]+v3[1]*v3[1]+v3[2]*v3[2];
int i;
3071 if (len>NM_EPSILON) {len = sqrtf(len);
for (i=0;i<3;i++) v3Out[i]=v3[i]/len;}
3072 else {len=v3Out[0]=v3Out[2]=(float)0;v3Out[1]=(float)1;}
3075void nm_QuatNormalize(
float* __restrict q4) {
const float len=sqrtf(q4[0]*q4[0]+q4[1]*q4[1]+q4[2]*q4[2]+q4[3]*q4[3]);
if (len>0) {q4[0]/=len;q4[1]/=len;q4[2]/=len;q4[3]/=len;}
else {q4[0]=q4[1]=q4[2]=q4[3]=(float)0;}}
3076float* nm_QuatSlerpEps(
float* __restrict result4,
const float* __restrict a4,
const float* __restrict b4,
float slerpTime_In_0_1,
int normalizeResult4AfterLerp,
float eps) {
3081 const float one = (float)1;
3082 const float *qStart=a4;
3083 float qEnd[4]={b4[0],b4[1],b4[2],b4[3]};
3084 float* qOut=result4;
3086 float fCos = qStart[0] * qEnd[0] + qStart[1] * qEnd[1] + qStart[2] * qEnd[2] + qStart[3] * qEnd[3];
3090 {fCos = -fCos;qEnd[0] = -qEnd[0];qEnd[1] = -qEnd[1];qEnd[2] = -qEnd[2];qEnd[3] = -qEnd[3];}
3092 if( fCos < one - eps)
3095# ifndef NM_QUAT_SLERP_USE_ACOS_AND_SIN_INSTEAD_OF_ATAN2_AND_SQRT
3097 float fSin = sqrtf(one - fCos*fCos);
3098 float fAngle = atan2f(fSin, fCos);
3103 float fAngle = acosf(fCos);
3104 float fSin = sinf(fAngle);
3107 const float fInvSin = one / fSin;
3108 const float fCoeff0 = sinf((one - slerpTime_In_0_1) * fAngle) * fInvSin;
3109 const float fCoeff1 = sinf(slerpTime_In_0_1 * fAngle) * fInvSin;
3112 qOut[0] = (fCoeff0 * qStart[0] + fCoeff1 * qEnd[0]);
3113 qOut[1] = (fCoeff0 * qStart[1] + fCoeff1 * qEnd[1]);
3114 qOut[2] = (fCoeff0 * qStart[2] + fCoeff1 * qEnd[2]);
3115 qOut[3] = (fCoeff0 * qStart[3] + fCoeff1 * qEnd[3]);
3126 const float fCoeff0 = one - slerpTime_In_0_1;
3127 const float fCoeff1 = slerpTime_In_0_1;
3130 qOut[0] = (fCoeff0 * qStart[0] + fCoeff1 * qEnd[0]);
3131 qOut[1] = (fCoeff0 * qStart[1] + fCoeff1 * qEnd[1]);
3132 qOut[2] = (fCoeff0 * qStart[2] + fCoeff1 * qEnd[2]);
3133 qOut[3] = (fCoeff0 * qStart[3] + fCoeff1 * qEnd[3]);
3139#ifndef NM_SLERP_EPSILON
3140# define NM_SLERP_EPSILON (0.0001f)
3142float*
nm_QuatSlerp(
float* __restrict result4,
const float* __restrict a4,
const float* __restrict b4,
float slerpTime_In_0_1,
int normalizeResult4AfterLerp) {
return nm_QuatSlerpEps(result4,a4,b4,slerpTime_In_0_1,normalizeResult4AfterLerp,NM_SLERP_EPSILON);}
3143float*
nm_QuatFromAngleAxis(
float* __restrict qOut4,
float rfAngle,
float rkAxisX,
float rkAxisY,
float rkAxisZ) {
3149 const float hangle=(float)(0.5)*rfAngle;fSin=sinf(hangle),fCos=cosf(hangle);
3150 qOut4[3]=fCos; qOut4[0]=fSin*rkAxisX; qOut4[1]=fSin*rkAxisY; qOut4[2]=fSin*rkAxisZ;
3153void nm_QuatToAngleAxis(
const float* __restrict q4,
float* __restrict rfAngleOut1,
float* __restrict rkAxisOut3) {
3160 float fSqrLength = q[0]*q[0]+q[1]*q[1]+q[2]*q[2];
3161 if (fSqrLength > (
float)0) {
3162 float fInvLength;*rfAngleOut1 = (float)2*acosf(q[3]);fInvLength = (float)1/sqrtf(fSqrLength);
3163 rkAxisOut3[0] = q[0]*fInvLength;rkAxisOut3[1] = q[1]*fInvLength;rkAxisOut3[2] = q[2]*fInvLength;
3167 *rfAngleOut1 = rkAxisOut3[0] = rkAxisOut3[2] = (float)0;
3168 rkAxisOut3[1] = (float)1;
3181 if (matrix16WithoutScaling) {
3183 memcpy(Tout->position,&matrix16WithoutScaling[12],3*
sizeof(
float));
3185 else *Tout = identity_transform;
3190 assert(matrix16WithoutScaling);
3192 memcpy(Tout.position,&matrix16WithoutScaling[12],3*
sizeof(
float));
3198 for (i=0;i<3;i++) matrix16Out[12+i] = T->position[i];
3199 matrix16Out[3]=matrix16Out[7]=matrix16Out[11]=0.f;matrix16Out[15]=1.f;
3202float*
nm_QuatMulVec3(
float* __restrict vOut3,
const float* __restrict q4,
const float* __restrict vIn3) {
3203 float uv[3],uuv[3];
int i;
3205 for (i=0;i<3;i++) vOut3[i] = vIn3[i] + ((uv[i] * q4[3]) + uuv[i]) * (float)2;
3208float*
nm_QuatGetAxis(
float* __restrict vOut3,
const float* __restrict q4,
float axisX,
float axisY,
float axisZ) {
3209 const float vIn[3]={axisX,axisY,axisZ};
3223float*
nm_QuatRotate(
float* __restrict qInOut4,
float angle,
float axisX,
float axisY,
float axisZ) {
3227inline float* nm_Mat4Mul_NoCheck(
float* __restrict result16,
const float* __restrict ml16,
const float* __restrict mr16) {
3228 int i,i4;
float mri4plus0,mri4plus1,mri4plus2,mri4plus3;
3229 for(i = 0; i < 4; i++) {
3230 i4=4*i;mri4plus0=mr16[i4];mri4plus1=mr16[i4+1];mri4plus2=mr16[i4+2];mri4plus3=mr16[i4+3];
3231 result16[ i4] = ml16[0]*mri4plus0 + ml16[4]*mri4plus1 + ml16[ 8]*mri4plus2 + ml16[12]*mri4plus3;
3232 result16[1+i4] = ml16[1]*mri4plus0 + ml16[5]*mri4plus1 + ml16[ 9]*mri4plus2 + ml16[13]*mri4plus3;
3233 result16[2+i4] = ml16[2]*mri4plus0 + ml16[6]*mri4plus1 + ml16[10]*mri4plus2 + ml16[14]*mri4plus3;
3234 result16[3+i4] = ml16[3]*mri4plus0 + ml16[7]*mri4plus1 + ml16[11]*mri4plus2 + ml16[15]*mri4plus3;
3238float*
nm_Mat4Mul(
float* result16,
const float* ml16,
const float* mr16) {
3239 if (result16==ml16) {
float ML16[16];memcpy(ML16,ml16,16*
sizeof(
float));
return nm_Mat4Mul_NoCheck(result16,ML16,mr16);}
3240 else if (result16==mr16) {
float MR16[16];memcpy(MR16,mr16,16*
sizeof(
float));
return nm_Mat4Mul_NoCheck(result16,ml16,MR16);}
3241 return nm_Mat4Mul_NoCheck(result16,ml16,mr16);
3244void TransformAssignToBody(context_t* c,
unsigned body,Transform newT,
float deltaTime,int16_t aux_body) {
3245 assert(c && body<c->bodies.count);
3246 BodyFilter* filter = &c->bodies.filters[body];
3247 const FlagMask flags = filter->flags;
3248 Transform* T = &c->bodies.transforms[body];
3249 float* P = newT.position;
float* Q = newT.rotation;
3251 float* linvel = c->bodies.momentum[body].velocity;
3252 float* angvel = c->bodies.momentum[body].angular_velocity;
3253 if (deltaTime!=0.f) {
3256 assert((
unsigned)aux_body!=T->body);
3257 assert((
unsigned)aux_body<c->bodies.count);
3258 const float* auxLinVel = c->bodies.momentum[aux_body].velocity;
3259 const float* auxAngVel = c->bodies.momentum[aux_body].angular_velocity;
3260 const Transform* auxT = &c->bodies.transforms[aux_body];
3262 const float delta_position[3] = {T->position[0] - auxT->position[0],T->position[1] - auxT->position[1],T->position[2] - auxT->position[2]};
3263 float deltaLinVel[3];
nm_Vec3Cross(deltaLinVel,auxAngVel,delta_position);
3264 for (
int l=0;l<3;l++) {
3265 linvel[l]=(P[l]-T->position[l])/deltaTime + auxLinVel[l] + deltaLinVel[l];
3266 angvel[l]+=auxAngVel[l];
3270 for (
int l=0;l<3;l++) {P[l]=T->position[l]+linvel[l]*deltaTime;}
3275 else {
for (
int l=0;l<3;l++) linvel[l]=(P[l]-T->position[l])/deltaTime;}
3278 memcpy(T->position,P,3*
sizeof(
float));
3279 memcpy(T->rotation,Q,4*
sizeof(
float));
3284 memset(linvel,0,3*
sizeof(
float));
3285 memset(angvel,0,3*
sizeof(
float));
3287 memcpy(T->position,P,3*
sizeof(
float));
3288 memcpy(T->rotation,Q,4*
sizeof(
float));
3291 c->bodies.idle_counters[body]=0;
3295 assert(c && body<c->bodies.count);
3296 Transform* newT = &c->bodies.transforms[body];
3297 Transform oldT = *newT;
3298 float* linvel = c->bodies.momentum[body].velocity;
3299 float* angvel = c->bodies.momentum[body].angular_velocity;
3302 const uint32_t flags = c->bodies.filters[body].flags;
3305 for (
int l=0;l<3;l++) {newT->position[l]=oldT.position[l]+linvel[l]*deltaTime;}
3306 nm_QuatAdvance(newT->rotation,oldT.rotation,angvel,deltaTime*0.5f);
3308 else if (flags&
BF_IS_DYNAMIC) c->bodies.idle_counters[body]=0;
3312 Transform R;
const float c0 = 1.f - time, c1 = time;R.
body=T0.body;
3313 for (
int l=0;l<3;l++) R.position[l]=c0*T0.position[l]+c1*T1.position[l];
3314 nm_QuatSlerp(R.rotation,T0.rotation,T1.rotation,time,1);
3318void calculate_box_inertia(
float result[3],
float mass,
float hsizex,
float hsizey,
float hsizez,
const float comOffset[3]) {
3321 float kcx2 = k*hsizex*hsizex, kcy2 = k*hsizey*hsizey, kcz2 = k*hsizez*hsizez;
3322 result[0] = (kcy2+kcz2); result[1] = (kcx2+kcz2); result[2] = (kcx2+kcy2);
3324 result[0] += mass * (comOffset[1] * comOffset[1] + comOffset[2] * comOffset[2]);
3325 result[1] += mass * (comOffset[0] * comOffset[0] + comOffset[2] * comOffset[2]);
3326 result[2] += mass * (comOffset[0] * comOffset[0] + comOffset[1] * comOffset[1]);
3329 else memset(result, 0, 3*
sizeof(
float));
3334 for (
int i=0;i<3;i++) result[i] = 1.f/result[i];
3336 else memset(result, 0, 3*
sizeof(
float));
3340 result[0] = result[1] = result[2] = (mass*radius*radius)/(hollow?1.5f:2.5f);
3342 result[0] += mass * (comOffset[1] * comOffset[1] + comOffset[2] * comOffset[2]);
3343 result[1] += mass * (comOffset[0] * comOffset[0] + comOffset[2] * comOffset[2]);
3344 result[2] += mass * (comOffset[0] * comOffset[0] + comOffset[1] * comOffset[1]);
3347 else memset(result, 0, 3*
sizeof(
float));
3352 for (
int i=0;i<3;i++) result[i] = 1.f/result[i];
3354 else memset(result, 0, 3*
sizeof(
float));
3358 float radius2 = radius*radius, h2 = halfHeight*halfHeight*4.f;
3359 result[0] = result[1] = result[2] = mass*(3.f*radius2+h2)/12.f;
3360 result[upAxis] = mass*radius2/2.f;
3362 result[0] += mass * (comOffset[1] * comOffset[1] + comOffset[2] * comOffset[2]);
3363 result[1] += mass * (comOffset[0] * comOffset[0] + comOffset[2] * comOffset[2]);
3364 result[2] += mass * (comOffset[0] * comOffset[0] + comOffset[1] * comOffset[1]);
3367 else memset(result, 0, 3*
sizeof(
float));
3372 for (
int i=0;i<3;i++) result[i] = 1.f/result[i];
3374 else memset(result, 0, 3*
sizeof(
float));
3379 const float L = 2.f*halfCylinderHeight;
3380 float radius2 = radius*radius;
3381 const float Vcyl = M_PI*radius2*L, Vhem = 2.0f*M_PI*radius2*radius;
const float Vtot = Vcyl + 2.f*Vhem;
3382 const float Mcyl = mass*Vcyl/Vtot, Mhem = mass*Vhem/Vtot;
3383 const float Icyl = Mcyl*(L*L+3.f*radius2)/12.f;
3384 const float Ihem = Mhem*radius2/2.5f;
3385 result[0] = result[1] = result[2] = Icyl + 2.f*Ihem + Mhem*(4.f*L+3*radius)*(4.f*L+3*radius)/32.f;
3386 result[upAxis] = (5.f*Mcyl+8.f*Mhem)*radius2*0.1f;
3388 result[0] += mass * (comOffset[1] * comOffset[1] + comOffset[2] * comOffset[2]);
3389 result[1] += mass * (comOffset[0] * comOffset[0] + comOffset[2] * comOffset[2]);
3390 result[2] += mass * (comOffset[0] * comOffset[0] + comOffset[1] * comOffset[1]);
3393 else memset(result, 0, 3*
sizeof(
float));
3398 for (
int i=0;i<3;i++) result[i] = 1.f/result[i];
3400 else memset(result, 0, 3*
sizeof(
float));
3407 float radius2 = R*R+r*r, h2 = halfHeight*halfHeight*4.f;
3408 result[0] = result[1] = result[2] = mass*(3.f*radius2+h2)/12.f;
3409 result[upAxis] = mass*radius2/2.f;
3411 result[0] += mass * (comOffset[1] * comOffset[1] + comOffset[2] * comOffset[2]);
3412 result[1] += mass * (comOffset[0] * comOffset[0] + comOffset[2] * comOffset[2]);
3413 result[2] += mass * (comOffset[0] * comOffset[0] + comOffset[1] * comOffset[1]);
3416 else memset(result, 0, 3*
sizeof(
float));
3421 for (
int i=0;i<3;i++) result[i] = 1.f/result[i];
3423 else memset(result, 0, 3*
sizeof(
float));
3429 float a2 = minorRadius*minorRadius, b2 = majorRadius*majorRadius;
3430 result[0] = result[1] = result[2] = 0.25f*mass*(4.f*b2+3*a2);
3431 result[upAxis] = 0.125f*mass*(5.f*a2+4.f*b2);
3433 result[0] += mass * (comOffset[1] * comOffset[1] + comOffset[2] * comOffset[2]);
3434 result[1] += mass * (comOffset[0] * comOffset[0] + comOffset[2] * comOffset[2]);
3435 result[2] += mass * (comOffset[0] * comOffset[0] + comOffset[1] * comOffset[1]);
3438 else memset(result, 0, 3*
sizeof(
float));
3443 for (
int i=0;i<3;i++) result[i] = 1.f/result[i];
3445 else memset(result, 0, 3*
sizeof(
float));
3449 float radius2 = radius*radius, h2 = halfHeight*halfHeight;
3450 result[0] = result[1] = result[2] = mass*(3.f*radius2+2.f*h2)/20.f;
3451 result[upAxis] = mass*3.f*radius2/10.f;
3453 result[0] += mass * (comOffset[1] * comOffset[1] + comOffset[2] * comOffset[2]);
3454 result[1] += mass * (comOffset[0] * comOffset[0] + comOffset[2] * comOffset[2]);
3455 result[2] += mass * (comOffset[0] * comOffset[0] + comOffset[1] * comOffset[1]);
3458 else memset(result, 0, 3*
sizeof(
float));
3463 for (
int i=0;i<3;i++) result[i] = 1.f/result[i];
3465 else memset(result, 0, 3*
sizeof(
float));
3473 log(
"Using 8-wide AVX\n");
3475 log(
"Using 4-wide SSE\n");
3476#if defined(__SSE4_1__) || defined(__AVX__)
3477 log(
"BLENDVPS: Enabled\n");
3479 log(
"BLENDVPS: Disabled\n");
3484 log(
"FMA: Enabled\n");
3486 log(
"FMA: Disabled\n");
3489# ifdef NUDGE_USE_SIMDE
3490 log(
"\nUSING SIMDE (simd everywhere).\n");
3491# ifdef SIMDE_AVX2_NATIVE
3492 log(
"SIMDE: SIMDE_AVX2_NATIVE is defined.\n");
3495# ifdef SIMDE_AVX_NATIVE
3496 log(
"SIMDE: SIMDE_AVX_NATIVE is defined.\n");
3499# ifdef SIMDE_SSE2_NATIVE
3500 log(
"SIMDE: SIMDE_SSE2_NATIVE is defined.\n");
3503# ifdef SIMDE_SSE_NATIVE
3504 log(
"SIMDE: SIMDE_SSE_NATIVE is defined.\n");
3507# ifdef SIMDE_MMX_NATIVE
3508 log(
"SIMDE: SIMDE_MMX_NATIVE is defined.\n");
3511# ifdef SIMDE_NO_NATIVE
3512 log(
"SIMDE: SIMDE_NO_NATIVE is defined.\n");
3519#ifndef NUDGE_DEFAULT_MAX_NUM_BOXES
3520# define NUDGE_DEFAULT_MAX_NUM_BOXES 256
3522#ifndef NUDGE_DEFAULT_MAX_NUM_SPHERES
3523# define NUDGE_DEFAULT_MAX_NUM_SPHERES 256
3525#define NUDGE_START_SPHERE_TAG (16384)
3527#if ((NUDGE_DEFAULT_MAX_NUM_BOXES+NUDGE_DEFAULT_MAX_NUM_SPHERES)>8192)
3528# error. It must be (NUDGE_DEFAULT_MAX_NUM_BOXES+NUDGE_DEFAULT_MAX_NUM_SPHERES)<=8192
3531#ifndef NUDGE_FRICTION_MODEL
3532# define NUDGE_FRICTION_MODEL(F1,F2) ((F1)*(F2)*0.5f)
3535#ifndef NUDGE_TOTAL_NUM_KINEMATIC_ANIMATION_KEY_FRAMES
3536# define NUDGE_TOTAL_NUM_KINEMATIC_ANIMATION_KEY_FRAMES 40
3538#ifndef NUDGE_MAX_NUM_KINEMATIC_ANIMATIONS
3539# define NUDGE_MAX_NUM_KINEMATIC_ANIMATIONS 10
3542void* _my_mm_realloc(
void** pp,
size_t new_capacity,
size_t capacity,
size_t item_size,
size_t alignment) {
3544 unsigned char* p_old = (
unsigned char*) *pp;assert(p_old);
3545 unsigned char* p = (
unsigned char*) _mm_malloc(new_capacity*item_size, alignment);assert(p);
3546 memcpy(p,p_old,capacity*item_size);
3551size_t _my_mm_realloc_grow(
void** pp,
size_t new_size,
size_t capacity,
size_t item_size,
size_t alignment) {
3553 if (new_size>=capacity)
return capacity;
3554 const size_t new_capacity = capacity==0 ? new_size : (new_size + capacity/2);
3555 void* p = _my_mm_realloc(pp,new_capacity,capacity,item_size,alignment);
3557 return new_capacity;
3560 const uint32_t capacity = kd->key_frame_capacity;
3561 if (capacity>=new_size)
return;
3562 const size_t new_capacity = _my_mm_realloc_grow((
void**) &kd->key_frame_transforms,new_size,capacity,
sizeof(kd->key_frame_transforms[0]),64);
3563 const size_t tmp = _my_mm_realloc_grow((
void**) &kd->key_frame_modes,new_size,capacity,
sizeof(kd->key_frame_modes[0]),64);assert(tmp==new_capacity);
3564 kd->key_frame_capacity = new_capacity;
3565 for (uint32_t i=capacity;i<new_capacity;i++) {
3566 kd->key_frame_transforms[i] = identity_transform;
3569 assert(new_capacity>new_size);
3572 const uint32_t capacity = kd->animations_capacity;
3573 if (capacity>=new_size)
return;
3574 const size_t new_capacity = _my_mm_realloc_grow((
void**) &kd->animations,new_size,capacity,
sizeof(kd->animations[0]),64);
3575 kd->animations_capacity = new_capacity;
3576 memset(&kd->animations[capacity],0,
sizeof(KinematicData::Animation)*(new_capacity-capacity));
3577 for (uint32_t i=capacity;i<new_capacity;i++) {
3578 KinematicData::Animation* m = &kd->animations[i];
3579 m->baseT = identity_transform;
3581 m->total_time = m->play_time = -1.f;
3584 assert(new_capacity>new_size);
3590# ifndef NUDGE_DEFAULT_GRAVITY
3591# define NUDGE_DEFAULT_GRAVITY (-9.82f)
3593# ifndef NUDGE_DEFAULT_FRICTION
3594# define NUDGE_DEFAULT_FRICTION (1.f)
3596 assert(c && c->MAX_NUM_BODIES==c->MAX_NUM_SPHERES+c->MAX_NUM_BOXES && c->MAX_NUM_SPHERES>0 && c->MAX_NUM_BOXES>0);
3597 memset(c->bodies.idle_counters,0,
sizeof(uint8_t)*c->MAX_NUM_BODIES);
3598 memset(c->colliders.boxes.data,0,
sizeof(SphereCollider)*c->MAX_NUM_BOXES);
3599 memset(c->colliders.spheres.data,0,
sizeof(SphereCollider)*c->MAX_NUM_SPHERES);
3600 memset(c->kinematic_data.animations,0,
sizeof(KinematicData::Animation)*c->kinematic_data.animations_capacity);
3601 memset(c->kinematic_data.key_frame_transforms,0,
sizeof(Transform)*c->kinematic_data.key_frame_capacity);
3602 memset(c->kinematic_data.key_frame_modes,0,
sizeof(
KinematicData::TimeMode)*c->kinematic_data.key_frame_capacity);
3603 c->active_bodies.count=0;
3605 c->colliders.boxes.count = 0;
3606 for (uint16_t i=0;i<c->MAX_NUM_BOXES;i++) {c->colliders.boxes.tags[i] = i;}
3607 c->colliders.spheres.count = 0;
3608 for (uint16_t i=0;i<c->MAX_NUM_SPHERES;i++) {c->colliders.spheres.tags[i] = NUDGE_START_SPHERE_TAG+i;}
3609 c->contact_cache.count=0;assert(c->contact_cache.capacity>0);
3610 c->contact_data.count=0;assert(c->contact_data.capacity>0);
3611 c->global_data.removed_bodies_count = c->global_data.finalized_removed_bodies_count = 0;assert(c->global_data.removed_bodies_capacity>0);
3612 c->kinematic_data.key_frame_count=0;
3613 c->kinematic_data.animations_count=0;
3614 for (uint32_t i=0;i<c->kinematic_data.key_frame_capacity;i++) {
3615 c->kinematic_data.key_frame_transforms[i] = identity_transform;
3618 for (uint32_t i=0;i<c->kinematic_data.animations_capacity;i++) {
3619 KinematicData::Animation* m = &c->kinematic_data.animations[i];
3620 m->baseT = identity_transform;
3622 m->total_time = m->play_time = -1.f;
3626 for (
unsigned i=0;i<c->MAX_NUM_BODIES;i++) {
3627 BodyInfo* info = &c->bodies.infos[i];
3628 BodyProperties*
property = &c->bodies.properties[i];
3629 BodyFilter* filter = &c->bodies.filters[i];
3630 BodyLayout* layout = &c->bodies.layouts[i];
3631 memset(info,0,
sizeof(BodyInfo));
3632 memset(property,0,
sizeof(BodyProperties));
3633 memset(filter,0,
sizeof(BodyFilter));
3634 memset(layout,0,
sizeof(BodyLayout));
3635 layout->first_box_index=layout->first_sphere_index=-1;
3636 property->gravity[1] = NUDGE_DEFAULT_GRAVITY;
3637 property->friction = NUDGE_DEFAULT_FRICTION;
3641# if NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES>0
3646 SimulationParams* sp = &c->simulation_params;
3647 sp->numsubsteps_overflow_in_last_frame=0;
3648 sp->num_substeps_in_last_frame=0;
3649 sp->remaining_time_in_seconds=0;
3650 sp->time_step_minus_remaining_time=0;
3654void init_context_with(context_t* c,
unsigned MAX_NUM_BOXES,
unsigned MAX_NUM_SPHERES) {
3655#if (!defined(__EMSCRIPTEN__) && !defined(NUDGE_USE_SIMDE))
3657#ifndef NUDGE_USE_SIMDE
3658 _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
3659 _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
3661 SIMDE_MM_SET_FLUSH_ZERO_MODE(SIMDE_MM_FLUSH_ZERO_ON);
3662 SIMDE_MM_SET_DENORMALS_ZERO_MODE(SIMDE_MM_DENORMALS_ZERO_ON);
3666#ifndef NUDGE_ARENA_SIZE_MACRO
3667# define NUDGE_ARENA_SIZE_MACRO(MAX_NUM_BODIES) (512000+50*(MAX_NUM_BODIES)*(MAX_NUM_BODIES))
3671 assert(c->MAX_NUM_BODIES==0);
3672 assert((MAX_NUM_BOXES+MAX_NUM_SPHERES<=8192) &&
"nudge has a upper limit on the number of colliders: (MAX_NUM_BOXES+MAX_NUM_SPHERES<=8192).");
3675 *((
unsigned*)&c->MAX_NUM_BOXES) = MAX_NUM_BOXES;
3676 *((
unsigned*)&c->MAX_NUM_SPHERES) = MAX_NUM_SPHERES;
3677 *((
unsigned*)&c->MAX_NUM_BODIES) = c->MAX_NUM_BOXES+c->MAX_NUM_SPHERES;
3679 const unsigned NUDGE_MAX_NUM_BODIES = c->MAX_NUM_BODIES;
3680 const unsigned NUDGE_MAX_NUM_BOXES = c->MAX_NUM_BOXES;
3681 const unsigned NUDGE_MAX_NUM_SPHERES = c->MAX_NUM_SPHERES;
3684 struct SimulationParams* sp = &c->simulation_params;
3686 assert(sp->time_step>0);
3688 assert(sp->max_num_substeps>0);
3690 assert(sp->num_iterations_per_substep>0);
3697 if (sp->numsubsteps_overflow_warning_mode>2) sp->numsubsteps_overflow_warning_mode=0;
3698 sp->num_total_substeps=sp->num_frames=0;
3701# ifndef NUDGE_ARENA_SIZE_ALIGNMENT
3702# define NUDGE_ARENA_SIZE_ALIGNMENT (4096)
3704 assert(c->arena.size==0);assert(c->arena.data==NULL);
3705 c->arena.size = NUDGE_ARENA_SIZE_MACRO(NUDGE_MAX_NUM_BODIES);
3706 c->arena.data = _mm_malloc(c->arena.size, NUDGE_ARENA_SIZE_ALIGNMENT);
3709 c->active_bodies.capacity = NUDGE_MAX_NUM_BODIES;
3710 c->active_bodies.indices =
static_cast<uint16_t*
>(_mm_malloc(
sizeof(uint16_t)*NUDGE_MAX_NUM_BODIES, 64));
3712 c->bodies.transforms =
static_cast<Transform*
>(_mm_malloc(
sizeof(Transform)*NUDGE_MAX_NUM_BODIES, 64));
3713 c->bodies.momentum =
static_cast<BodyMomentum*
>(_mm_malloc(
sizeof(BodyMomentum)*NUDGE_MAX_NUM_BODIES, 64));
3714 c->bodies.properties =
static_cast<BodyProperties*
>(_mm_malloc(
sizeof(BodyProperties)*NUDGE_MAX_NUM_BODIES, 64));
3715 c->bodies.filters =
static_cast<BodyFilter*
>(_mm_malloc(
sizeof(BodyFilter)*NUDGE_MAX_NUM_BODIES, 64));
3716 c->bodies.layouts =
static_cast<BodyLayout*
>(_mm_malloc(
sizeof(BodyLayout)*NUDGE_MAX_NUM_BODIES, 64));
3717 c->bodies.idle_counters =
static_cast<uint8_t*
>(_mm_malloc(
sizeof(uint8_t)*NUDGE_MAX_NUM_BODIES, 64));
3718 c->bodies.infos =
static_cast<BodyInfo*
>(_mm_malloc(
sizeof(BodyInfo)*NUDGE_MAX_NUM_BODIES,64));
3720 c->colliders.boxes.data =
static_cast<BoxCollider*
>(_mm_malloc(
sizeof(BoxCollider)*NUDGE_MAX_NUM_BOXES, 64));
3721 c->colliders.boxes.tags =
static_cast<uint16_t*
>(_mm_malloc(
sizeof(uint16_t)*NUDGE_MAX_NUM_BOXES, 64));
3722 c->colliders.boxes.transforms =
static_cast<Transform*
>(_mm_malloc(
sizeof(Transform)*NUDGE_MAX_NUM_BOXES, 64));
3724 c->colliders.spheres.data =
static_cast<SphereCollider*
>(_mm_malloc(
sizeof(SphereCollider)*NUDGE_MAX_NUM_SPHERES, 64));
3725 c->colliders.spheres.tags =
static_cast<uint16_t*
>(_mm_malloc(
sizeof(uint16_t)*NUDGE_MAX_NUM_SPHERES, 64));
3726 c->colliders.spheres.transforms =
static_cast<Transform*
>(_mm_malloc(
sizeof(Transform)*NUDGE_MAX_NUM_SPHERES, 64));
3728 c->contact_data.capacity = NUDGE_MAX_NUM_BODIES*64;
3729 c->contact_data.bodies =
static_cast<BodyPair*
>(_mm_malloc(
sizeof(BodyPair)*c->contact_data.capacity, 64));
3730 c->contact_data.data =
static_cast<Contact*
>(_mm_malloc(
sizeof(Contact)*c->contact_data.capacity, 64));
3731 c->contact_data.tags =
static_cast<uint64_t*
>(_mm_malloc(
sizeof(uint64_t)*c->contact_data.capacity, 64));
3732 c->contact_data.sleeping_pairs =
static_cast<uint32_t*
>(_mm_malloc(
sizeof(uint32_t)*c->contact_data.capacity, 64));
3734 c->contact_cache.capacity = NUDGE_MAX_NUM_BODIES*64;
3735 c->contact_cache.data =
static_cast<CachedContactImpulse*
>(_mm_malloc(
sizeof(CachedContactImpulse)*c->contact_cache.capacity, 64));
3736 c->contact_cache.tags =
static_cast<uint64_t*
>(_mm_malloc(
sizeof(uint64_t)*c->contact_cache.capacity, 64));
3738 c->kinematic_data.key_frame_capacity = NUDGE_TOTAL_NUM_KINEMATIC_ANIMATION_KEY_FRAMES;
3739 c->kinematic_data.key_frame_transforms =
static_cast<Transform*
>(_mm_malloc(
sizeof(Transform)*c->kinematic_data.key_frame_capacity,64));
3741 c->kinematic_data.animations_capacity = NUDGE_MAX_NUM_KINEMATIC_ANIMATIONS;
3742 c->kinematic_data.animations =
static_cast<KinematicData::Animation*
>(_mm_malloc(
sizeof(KinematicData::Animation)*c->kinematic_data.animations_capacity,64));
3744 *((uint32_t*)&c->global_data.removed_bodies_capacity) = NUDGE_MAX_NUM_BODIES;assert(c->global_data.removed_bodies_capacity==NUDGE_MAX_NUM_BODIES);
3745 c->global_data.removed_bodies =
static_cast<uint32_t*
>(_mm_malloc(
sizeof(uint32_t)*c->global_data.removed_bodies_capacity, 64));
3746 c->global_data.flags = 0;
3747 c->global_data.exclude_smoothing_graphic_transform_flags=0;
3748 c->global_data.gravity[0]=c->global_data.gravity[2]=0.f;
3749 c->global_data.gravity[1]=NUDGE_DEFAULT_GRAVITY;
3755 assert(c->MAX_NUM_BODIES>0);
3756 assert(c->global_data.removed_bodies_capacity>0 && c->global_data.removed_bodies);
3757 _mm_free(c->global_data.removed_bodies);c->global_data.removed_bodies_count = c->global_data.finalized_removed_bodies_count = 0;
3758 *((uint32_t*)&c->global_data.removed_bodies_capacity) = 0;
3760 _mm_free(c->kinematic_data.animations);c->kinematic_data.animations_capacity = c->kinematic_data.animations_count = 0;
3761 _mm_free(c->kinematic_data.key_frame_modes);
3762 _mm_free(c->kinematic_data.key_frame_transforms);
3763 c->kinematic_data.key_frame_capacity = c->kinematic_data.key_frame_count = 0;
3765 _mm_free(c->contact_cache.data);c->contact_cache.data = 0;
3766 _mm_free(c->contact_cache.tags);c->contact_cache.tags = 0;
3767 c->contact_cache.capacity = c->contact_cache.count = 0;
3769 _mm_free(c->contact_data.bodies);c->contact_data.bodies = 0;
3770 _mm_free(c->contact_data.data);c->contact_data.data = 0;
3771 _mm_free(c->contact_data.tags);c->contact_data.tags = 0;
3772 _mm_free(c->contact_data.sleeping_pairs);c->contact_data.sleeping_pairs = 0;
3773 c->contact_data.count = c->contact_data.capacity = 0;
3775 _mm_free(c->colliders.spheres.data);c->colliders.spheres.data = 0;
3776 _mm_free(c->colliders.spheres.tags);c->colliders.spheres.tags = 0;
3777 _mm_free(c->colliders.spheres.transforms);c->colliders.spheres.transforms = 0;
3778 c->colliders.spheres.count = 0;
3780 _mm_free(c->colliders.boxes.data);c->colliders.boxes.data = 0;
3781 _mm_free(c->colliders.boxes.tags);c->colliders.boxes.tags = 0;
3782 _mm_free(c->colliders.boxes.transforms);c->colliders.boxes.transforms = 0;
3783 c->colliders.boxes.count = 0;
3785 _mm_free(c->bodies.infos);c->bodies.infos = 0;
3786 _mm_free(c->bodies.idle_counters);c->bodies.idle_counters = 0;
3787 _mm_free(c->bodies.filters);c->bodies.filters = 0;
3788 _mm_free(c->bodies.layouts);c->bodies.layouts = 0;
3789 _mm_free(c->bodies.transforms);c->bodies.transforms = 0;
3790 _mm_free(c->bodies.momentum);c->bodies.momentum = 0;
3791 _mm_free(c->bodies.properties);c->bodies.properties = 0;
3792 c->bodies.count = 0;
3794 _mm_free(c->active_bodies.indices);c->active_bodies.indices = 0;
3795 c->active_bodies.capacity = c->active_bodies.count = 0;
3797 _mm_free(c->arena.data);c->arena.data = 0;
3801 *((
unsigned*)&c->MAX_NUM_BODIES) = *((
unsigned*)&c->MAX_NUM_BOXES) = *((
unsigned*)&c->MAX_NUM_SPHERES) =0;
3805typedef unsigned body_type;
3806static context_t* _tmpc = NULL;
3807static inline int _compare_bodies_by_box_collider(
const void* av,
const void*bv) {
3808 const body_type a = *((body_type*)av), b = *((body_type*)bv);
3810 assert(a<_tmpc->bodies.count);
3811 assert(b<_tmpc->bodies.count);
3812 const int aa=_tmpc->bodies.layouts[a].first_box_index,bb=_tmpc->bodies.layouts[b].first_box_index;
3813 return (aa<bb)?-1:(aa>bb)?1:0;
3815static inline int _compare_bodies_by_sphere_collider(
const void* av,
const void*bv) {
3816 const body_type a = *((body_type*)av), b = *((body_type*)bv);
3818 assert(a<_tmpc->bodies.count);
3819 assert(b<_tmpc->bodies.count);
3820 const int aa=_tmpc->bodies.layouts[a].first_sphere_index,bb=_tmpc->bodies.layouts[b].first_sphere_index;
3821 return (aa<bb)?-1:(aa>bb)?1:0;
3823void finalize_removed_bodies(context_t* c) {
3826 const int clean_active_bodies = 0;
3827 const int clean_contact_data = 0;
3828 const int clean_cached_impulses = 0;
3831 const int32_t removed_bodies_count = (int32_t) c->global_data.removed_bodies_count;
3832 const int32_t finalized_removed_bodies_count = (int32_t) c->global_data.finalized_removed_bodies_count;
3833 assert(finalized_removed_bodies_count<=removed_bodies_count);
3834 if (finalized_removed_bodies_count==removed_bodies_count)
return;
3837 assert(
sizeof(body_type)==
sizeof(c->global_data.removed_bodies[0]));
3838 body_type* removed_bodies = &c->global_data.removed_bodies[0];
3839 int16_t start;uint16_t count;
3840 BodyLayout* layouts = c->bodies.layouts;
3841 uint32_t num_boxes_to_remove=0,num_spheres_to_remove=0;
int i;
3842 uint32_t max_num_allocated_tags = 0;
3843 Arena arena = c->arena;
3844 uint16_t* tags = NULL;
3847 for (i=removed_bodies_count-1;i>=finalized_removed_bodies_count;--i) {
3848 const body_type body = removed_bodies[i];assert(body<c->bodies.count);
3849 const uint16_t num_boxes=layouts[body].num_boxes;
3850 const uint16_t num_spheres=layouts[body].num_spheres;
3851 num_boxes_to_remove+=num_boxes;
3852 num_spheres_to_remove+=num_spheres;
3854 if (num_boxes>0) {assert(layouts[body].first_box_index>=0);}
3855 if (num_spheres>0) {assert(layouts[body].first_sphere_index>=0);}
3857 if (clean_active_bodies && c->active_bodies.count) {
3858 for (
int j=(
int)(c->active_bodies.count-1);j>=0;--j) {
3859 assert(j<(
int)c->active_bodies.count);
3860 if (body==c->active_bodies.indices[j]) {
3863 memmove(&c->active_bodies.indices[j],&c->active_bodies.indices[j+1],
sizeof(c->active_bodies.indices[0])*(c->active_bodies.count-(j+1)));
3864 --c->active_bodies.count;
3868 if (clean_contact_data && c->contact_data.count) {
3869 for (
int j=(
int)(c->contact_data.count-1);j>=0;--j) {
3871 const BodyPair* bp = &c->contact_data.bodies[j];
3872 if (body==bp->a || body==bp->b) {
3873 memmove(&c->contact_data.data[j],&c->contact_data.data[j+1],
sizeof(c->contact_data.data[0])*(c->contact_data.count-(j+1)));
3874 memmove(&c->contact_data.bodies[j],&c->contact_data.bodies[j+1],
sizeof(c->contact_data.bodies[0])*(c->contact_data.count-(j+1)));
3875 memmove(&c->contact_data.tags[j],&c->contact_data.tags[j+1],
sizeof(c->contact_data.tags[0])*(c->contact_data.count-(j+1)));
3876 --c->active_bodies.count;
3882 max_num_allocated_tags = num_boxes_to_remove>num_spheres_to_remove?num_boxes_to_remove:num_spheres_to_remove;
3883 tags = allocate_array<uint16_t>(&arena, max_num_allocated_tags, 32);
3884 assert(
sizeof(tags[0])==
sizeof(c->colliders.boxes.tags[0]));
3885 assert(
sizeof(tags[0])==
sizeof(c->colliders.spheres.tags[0]));
3888 if (num_boxes_to_remove>0)
3890 _tmpc=c;qsort(&removed_bodies[finalized_removed_bodies_count],removed_bodies_count-finalized_removed_bodies_count,
sizeof(body_type),&_compare_bodies_by_box_collider);_tmpc=NULL;
3892 uint32_t num_finalized_boxes=0;
unsigned moveGap,amount,lastBodyId;
3893 const body_type last_body = removed_bodies[removed_bodies_count-1];
3894 start=layouts[last_body].first_box_index;count=layouts[last_body].num_boxes;
3895 for (i=removed_bodies_count-1;i>=finalized_removed_bodies_count;--i) {
3896 if (i>finalized_removed_bodies_count) {
3897 const body_type body = removed_bodies[i-1];
3898 const int16_t body_start = layouts[body].first_box_index;
3899 const uint16_t body_count = layouts[body].num_boxes;
3900 if (body_start+body_count==start) {start=body_start;count+=body_count;
continue;}
3905 num_finalized_boxes+=count;
3924 assert(start+count<=(
int)c->colliders.boxes.count);
3925 moveGap = count;amount = c->colliders.boxes.count-(start+count);
3926 memmove(&c->colliders.boxes.data[start],&c->colliders.boxes.data[start+count],amount*
sizeof(c->colliders.boxes.data[0]));
3927 memmove(&c->colliders.boxes.transforms[start],&c->colliders.boxes.transforms[start+count],amount*
sizeof(c->colliders.boxes.transforms[0]));
3929 assert(count<=max_num_allocated_tags);
3930 memcpy(tags,&c->colliders.boxes.tags[start],moveGap*
sizeof(c->colliders.boxes.tags[0]));
3931 memmove(&c->colliders.boxes.tags[start],&c->colliders.boxes.tags[start+count],amount*
sizeof(c->colliders.boxes.tags[0]));
3933 memcpy(&c->colliders.boxes.tags[start+amount],tags,moveGap*
sizeof(c->colliders.boxes.tags[0]));
3935 c->colliders.boxes.count-=count;
3939 lastBodyId = c->MAX_NUM_BODIES;
3940 for (
unsigned i=start,isz=c->colliders.boxes.count;i<isz;i++) {
3941 const unsigned bodyId = c->colliders.boxes.transforms[i].body;
3942 assert(bodyId<c->bodies.count);
3944 if (lastBodyId!=bodyId) {
3946 BodyLayout* bl = &c->bodies.layouts[bodyId];
3947 assert(bl->first_box_index>=0 && bl->num_boxes>0);
3948 assert(bl->first_box_index>=start+count);
3949 assert((int16_t)i==bl->first_box_index-(int16_t)count);
3950 bl->first_box_index = (int16_t) i;
3951 assert((uint16_t)bl->first_box_index+bl->num_boxes<=c->colliders.boxes.count);
3956 if (i>finalized_removed_bodies_count) {
3957 const body_type body = removed_bodies[i-1];
3958 start = layouts[body].first_box_index;
3959 count = layouts[body].num_boxes;
3962 if (num_finalized_boxes!=num_boxes_to_remove) {
3963 log(
"[nudge_frame:%llu] finalize_removed_bodies(...) has NOT handled %u box colliders and %u sphere colliders\n",c->simulation_params.num_frames,num_boxes_to_remove,num_spheres_to_remove);
3965 assert(num_finalized_boxes==num_boxes_to_remove);
3970 if (num_spheres_to_remove>0)
3972 _tmpc=c;qsort(&removed_bodies[finalized_removed_bodies_count],removed_bodies_count-finalized_removed_bodies_count,
sizeof(body_type),&_compare_bodies_by_sphere_collider);_tmpc=NULL;
3974 uint32_t num_finalized_spheres=0;
unsigned moveGap,amount,lastBodyId;
3975 const body_type last_body = removed_bodies[removed_bodies_count-1];
3976 start=layouts[last_body].first_sphere_index;count=layouts[last_body].num_spheres;
3977 for (i=removed_bodies_count-1;i>=finalized_removed_bodies_count;--i) {
3978 if (i>finalized_removed_bodies_count) {
3979 const body_type body = removed_bodies[i-1];
3980 const int16_t body_start = layouts[body].first_sphere_index;
3981 const uint16_t body_count = layouts[body].num_spheres;
3982 if (body_start+body_count==start) {start=body_start;count+=body_count;
continue;}
3987 num_finalized_spheres+=count;
4006 assert(start+count<=(
int)c->colliders.spheres.count);
4007 moveGap = count;amount = c->colliders.spheres.count-(start+count);
4008 memmove(&c->colliders.spheres.data[start],&c->colliders.spheres.data[start+count],amount*
sizeof(c->colliders.spheres.data[0]));
4009 memmove(&c->colliders.spheres.transforms[start],&c->colliders.spheres.transforms[start+count],amount*
sizeof(c->colliders.spheres.transforms[0]));
4011 assert(count<=max_num_allocated_tags);
4012 memcpy(tags,&c->colliders.spheres.tags[start],moveGap*
sizeof(c->colliders.spheres.tags[0]));
4013 memmove(&c->colliders.spheres.tags[start],&c->colliders.spheres.tags[start+count],amount*
sizeof(c->colliders.spheres.tags[0]));
4015 memcpy(&c->colliders.spheres.tags[start+amount],tags,moveGap*
sizeof(c->colliders.spheres.tags[0]));
4017 c->colliders.spheres.count-=count;
4021 lastBodyId = c->MAX_NUM_BOXES+c->MAX_NUM_SPHERES;assert(c->MAX_NUM_BODIES==c->MAX_NUM_BOXES+c->MAX_NUM_SPHERES);
4022 for (
unsigned i=start,isz=c->colliders.spheres.count;i<isz;i++) {
4023 const unsigned bodyId = c->colliders.spheres.transforms[i].body;
4024 assert(bodyId<c->bodies.count);
4026 if (lastBodyId!=bodyId) {
4028 BodyLayout* bl = &c->bodies.layouts[bodyId];
4029 assert(bl->first_sphere_index>=0 && bl->num_spheres>0);
4030 assert(bl->first_sphere_index>=start+count);
4031 assert((int16_t)i==bl->first_sphere_index-(int16_t)count);
4032 bl->first_sphere_index = (int16_t) i;
4033 assert((uint16_t)bl->first_sphere_index+bl->num_spheres<=c->colliders.spheres.count);
4038 if (i>finalized_removed_bodies_count) {
4039 const body_type body = removed_bodies[i-1];
4040 start = layouts[body].first_sphere_index;
4041 count = layouts[body].num_spheres;
4044 if (num_finalized_spheres!=num_spheres_to_remove) {
4045 log(
"[nudge_frame:%llu] finalize_removed_bodies(...) has NOT handled %u box colliders and %u sphere colliders\n",c->simulation_params.num_frames,num_boxes_to_remove,num_spheres_to_remove);
4047 assert(num_finalized_spheres==num_spheres_to_remove);
4052 for (
int i=finalized_removed_bodies_count;i<removed_bodies_count;i++) {
4053 const body_type body = removed_bodies[i];
4054 BodyLayout* bl = &c->bodies.layouts[body];
4055 bl->first_box_index=-1;bl->first_sphere_index=-1;
4056 bl->num_boxes=0;bl->num_spheres=0;
4057 BodyInfo* info = &c->bodies.infos[body];
4058 memset(&info->aabb_center[0],0,3*
sizeof(
float));
4059 memset(&info->aabb_half_extents[0],0,3*
sizeof(
float));
4060 memset(&info->com_offset[0],0,3*
sizeof(
float));
4061 info->aabb_enlarged_radius=0.f;
4063 c->global_data.finalized_removed_bodies_count=c->global_data.removed_bodies_count;
4067 assert(num_boxes_to_remove || num_spheres_to_remove);
4070 if (clean_cached_impulses && c->contact_cache.count) {
4072 for (
int i=(
int)c->contact_cache.count-1;i>=0;--i) {
4074 const uint64_t tag = c->contact_cache.tags[i];
4075 const uint16_t a_tag = (uint16_t) ((tag&0x0000FFFF00000000ULL)>>(2ULL*16ULL));
4076 const uint16_t b_tag = (uint16_t) ((tag&0xFFFF000000000000ULL)>>(3ULL*16ULL));
4082 for (
unsigned j=c->colliders.boxes.count,jsz=c->colliders.boxes.count+num_boxes_to_remove;j<jsz;j++) {
4083 const uint16_t tg = c->colliders.boxes.tags[j];
4084 if (tg==a_tag || tg==b_tag) {found = 1;
break;}
4087 for (
unsigned j=c->colliders.spheres.count,jsz=c->colliders.spheres.count+num_spheres_to_remove;j<jsz;j++) {
4088 const uint16_t tg = c->colliders.spheres.tags[j];
4089 if (tg==a_tag || tg==b_tag) {found = 1;
break;}
4096 memmove(&c->contact_cache.tags[i],&c->contact_cache.tags[i+1],
sizeof(c->contact_cache.tags[0])*(c->contact_cache.count-(i+1)));
4097 memmove(&c->contact_cache.data[i],&c->contact_cache.data[i+1],
sizeof(CachedContactImpulse)*(c->contact_cache.count-(i+1)));
4098 --c->contact_cache.count;
4103# define TEST_NUDGE_COLLIDER_TAGS_INTEGRITY
4104# ifdef TEST_NUDGE_COLLIDER_TAGS_INTEGRITY
4106 Arena arena = c->arena;
4107 if (arena.size>=
sizeof(uint8_t)*(c->MAX_NUM_BOXES+c->MAX_NUM_SPHERES)) {
4108 uint8_t* tagsMap = allocate_array<uint8_t>(&arena, c->MAX_NUM_BOXES+c->MAX_NUM_SPHERES, 32);assert(tagsMap);
4109 memset(tagsMap,0,(c->MAX_NUM_BOXES+c->MAX_NUM_SPHERES)*
sizeof(uint8_t));
4110 for (
unsigned i=0;i<c->MAX_NUM_BOXES;i++) {
4111 const uint16_t tag = c->colliders.boxes.tags[i];
4112 assert(tag<c->MAX_NUM_BOXES);
4113 assert(tagsMap[tag]==0);
4116 for (
unsigned i=0;i<c->MAX_NUM_SPHERES;i++) {
4117 uint16_t tag = c->colliders.spheres.tags[i];
4118 assert(tag>=NUDGE_START_SPHERE_TAG && tag<NUDGE_START_SPHERE_TAG+c->MAX_NUM_SPHERES);
4119 tag=tag-NUDGE_START_SPHERE_TAG+c->MAX_NUM_BOXES;
4120 assert(tagsMap[tag]==0);
4123 unsigned unset_tags=0;
4124 for (
unsigned i=0;i<c->MAX_NUM_BOXES+c->MAX_NUM_SPHERES;i++) {
4125 if (!tagsMap[i]) ++unset_tags;
4127 assert(unset_tags==0);
4132# define TEST_COLLIDER_COHERENCY
4133# ifdef TEST_COLLIDER_COHERENCY
4136 for (
unsigned i=0;i<c->colliders.boxes.count;i++) {
4138 const unsigned body = c->colliders.boxes.transforms[i].body;
4139 assert(body<c->bodies.count);
4140 BodyLayout* bl = &c->bodies.layouts[body];
4141 if (body_last!=body) {
4143 assert(bl->first_box_index==(
int)i);
4144 delta_shape_count=0;
4147 ++delta_shape_count;
4148 assert(i>=(uint16_t)bl->first_box_index);
4149 assert(i<(uint16_t)bl->first_box_index+bl->num_boxes);
4150 assert(i==(uint16_t)bl->first_box_index+delta_shape_count);
4154 for (
unsigned i=0;i<c->colliders.spheres.count;i++) {
4156 const unsigned body = c->colliders.spheres.transforms[i].body;
4157 assert(body<c->bodies.count);
4159 BodyLayout* bl = &c->bodies.layouts[body];assert(bl->first_sphere_index>=0);
4160 if (body_last!=body) {
4162 assert(bl->first_sphere_index==(
int)i);
4163 delta_shape_count=0;
4166 ++delta_shape_count;
4167 assert(bl->first_sphere_index>=0);
4168 assert(i>=(uint16_t)bl->first_sphere_index);
4169 assert(i<(uint16_t)bl->first_sphere_index+bl->num_spheres);
4170 assert(i==(uint16_t)bl->first_sphere_index+delta_shape_count);
4179 assert(body<c->bodies.count);
4180 for (
unsigned i=0;i<c->global_data.removed_bodies_count;i++) {
if (c->global_data.removed_bodies[i]==body)
return;}
4181 assert(c->global_data.removed_bodies_count<c->MAX_NUM_BODIES);
4182 BodyFilter* f = &c->bodies.filters[body];
4184 f->collision_group=f->collision_mask=0;
4185 c->bodies.idle_counters[body]=0xff;
4186 c->bodies.properties[body].mass_inverse = 0.f;
4188 float* lvel = &c->bodies.momentum[body].velocity[0];
4189 float* avel = &c->bodies.momentum[body].angular_velocity[0];
4190 lvel[0]=lvel[1]=lvel[2]=avel[0]=avel[1]=avel[2]=0.f;
4191 float* pos = c->bodies.transforms[body].position;pos[1]-=100000.f;
4192 c->global_data.removed_bodies[c->global_data.removed_bodies_count++] = body;
4199unsigned add_box(context_t* c,
float mass,
float hsizex,
float hsizey,
float hsizez,
const Transform* T,
const float comOffset[3]) {
4200 unsigned body,collider;
4201 if (c->global_data.finalized_removed_bodies_count>0) {
4202 assert(c->global_data.finalized_removed_bodies_count<=c->global_data.removed_bodies_count);
4203 assert(
sizeof(c->global_data.removed_bodies[0])==
sizeof(body_type));
4204 body=c->global_data.removed_bodies[0];--c->global_data.finalized_removed_bodies_count;--c->global_data.removed_bodies_count;
4205 memmove(&c->global_data.removed_bodies[0],&c->global_data.removed_bodies[1],c->global_data.removed_bodies_count*
sizeof(body_type));
4206 assert(body<c->bodies.count);
4207 const BodyLayout* bl = &c->bodies.layouts[body];
4208 assert(bl->first_box_index==-1);
4209 assert(bl->num_boxes==0);
4210 assert(bl->first_sphere_index==-1);
4211 assert(bl->num_spheres==0);
4214 assert(c->bodies.count<c->MAX_NUM_BODIES && c->colliders.boxes.count<c->MAX_NUM_BOXES);
4215 if (c->bodies.count == c->MAX_NUM_BODIES || c->colliders.boxes.count == c->MAX_NUM_BOXES)
return NUDGE_INVALID_BODY_ID;
4216 body = c->bodies.count++;
4219 BodyProperties* prop = &c->bodies.properties[body];
4220 Transform* xform = &c->bodies.transforms[body], *xform_collider = NULL;
4222 BodyInfo* info = &c->bodies.infos[body];BodyFilter* filter = &c->bodies.filters[body];BodyLayout* layout = &c->bodies.layouts[body];
4223 if (comOffset && comOffset[0]==0.f && comOffset[1]==0.f && comOffset[2]==0.f) comOffset = NULL;
4224 collider = c->colliders.boxes.count++;
4225 BoxCollider* boxCollider = &c->colliders.boxes.data[collider];
4228 *xform = T ? (*T) : identity_transform;
4230 memset(&c->bodies.momentum[body], 0,
sizeof(c->bodies.momentum[body]));
4231 memset(prop,0,
sizeof(*prop));prop->friction = NUDGE_DEFAULT_FRICTION;prop->gravity[1]=NUDGE_DEFAULT_GRAVITY;
4232 if (mass<0) mass=-mass;
4234 c->bodies.idle_counters[body] = (filter->flags&
BF_IS_DYNAMIC)?0:0xff;
4237 info->com_offset[0]=info->com_offset[1]=info->com_offset[2]=0.f;
4238 layout->num_boxes = 1;layout->num_spheres = 0;
4239 layout->first_box_index = (int16_t) collider;
4240 layout->first_sphere_index = -1;
4241 boxCollider->size[0] = hsizex;
4242 boxCollider->size[1] = hsizey;
4243 boxCollider->size[2] = hsizez;
4244 xform_collider = &c->colliders.boxes.transforms[collider];
4245 *xform_collider = identity_transform;
4246 xform_collider->
body = body;
4247 if (comOffset) {filter->flags|=
BF_HAS_COM_OFFSET;
for (
int l=0;l<3;l++) {xform_collider->position[l]-=comOffset[l];info->com_offset[l]=comOffset[l];}}
4257unsigned add_box(context_t* c,
float mass,
float hsizex,
float hsizey,
float hsizez,
const float* mMatrix16WithoutScaling,
const float comOffset[3]) {
4258 if (!mMatrix16WithoutScaling)
return add_box(c,mass,hsizex,hsizey,hsizez,(
const Transform*)NULL);
4262unsigned add_sphere(context_t* c,
float mass,
float radius,
const Transform* T,
const float comOffset[3]) {
4263 unsigned body,collider;
4264 if (c->global_data.finalized_removed_bodies_count>0) {
4265 assert(c->global_data.finalized_removed_bodies_count<=c->global_data.removed_bodies_count);
4266 assert(
sizeof(c->global_data.removed_bodies[0])==
sizeof(body_type));
4267 body=c->global_data.removed_bodies[0];--c->global_data.finalized_removed_bodies_count;--c->global_data.removed_bodies_count;
4268 memmove(&c->global_data.removed_bodies[0],&c->global_data.removed_bodies[1],c->global_data.removed_bodies_count*
sizeof(body_type));
4269 assert(body<c->bodies.count);
4270 const BodyLayout* bl = &c->bodies.layouts[body];
4271 assert(bl->first_box_index==-1);
4272 assert(bl->num_boxes==0);
4273 assert(bl->first_sphere_index==-1);
4274 assert(bl->num_spheres==0);
4277 assert(c->bodies.count<c->MAX_NUM_BODIES && c->colliders.spheres.count<c->MAX_NUM_SPHERES);
4278 if (c->bodies.count == c->MAX_NUM_BODIES || c->colliders.spheres.count == c->MAX_NUM_SPHERES)
return NUDGE_INVALID_BODY_ID;
4279 body = c->bodies.count++;
4282 BodyProperties* prop = &c->bodies.properties[body];
4283 Transform *xform = &c->bodies.transforms[body], *xform_collider = NULL;
4284 BodyInfo* info = &c->bodies.infos[body];BodyFilter* filter = &c->bodies.filters[body];BodyLayout* layout = &c->bodies.layouts[body];
4285 if (comOffset && comOffset[0]==0.f && comOffset[1]==0.f && comOffset[2]==0.f) comOffset = NULL;
4286 collider = c->colliders.spheres.count++;
4289 *xform = T ? (*T) : identity_transform;
4291 memset(&c->bodies.momentum[body], 0,
sizeof(c->bodies.momentum[body]));
4292 memset(prop,0,
sizeof(*prop));prop->friction = NUDGE_DEFAULT_FRICTION;prop->gravity[1]=NUDGE_DEFAULT_GRAVITY;
4293 if (mass<0) mass=-mass;
4295 c->bodies.idle_counters[body] = (filter->flags&
BF_IS_DYNAMIC)?0:0xff;
4298 info->com_offset[0]=info->com_offset[1]=info->com_offset[2]=0.f;
4299 layout->num_boxes = 0;layout->num_spheres = 1;
4300 layout->first_box_index = -1;
4301 layout->first_sphere_index = (int16_t) collider;
4302 c->colliders.spheres.data[collider].radius = radius;
4303 xform_collider = &c->colliders.spheres.transforms[collider];
4304 *xform_collider = identity_transform;
4305 xform_collider->
body = body;
4306 if (comOffset) {filter->flags|=
BF_HAS_COM_OFFSET;
for (
int l=0;l<3;l++) {xform_collider->position[l]-=comOffset[l];info->com_offset[l]=comOffset[l];}}
4316unsigned add_sphere(context_t* c,
float mass,
float radius,
const float* mMatrix16WithoutScaling,
const float comOffset[3]) {
4317 if (!mMatrix16WithoutScaling)
return add_sphere(c,mass,radius,(
const Transform*)NULL);
4321unsigned add_compound(context_t* c,
float mass,
float inertia[3],
unsigned num_boxes,
const float* hsizeTriplets,
const Transform* boxOffsetTransforms,
unsigned num_spheres,
const float* radii,
const Transform* sphereOffsetTransforms,
const Transform* T,
const float comOffset[3],
float* centerMeshAndRetrieveOldCenter3Out) {
4323 assert(num_boxes+num_spheres>0);
4324 assert(c->colliders.boxes.count+num_boxes<=c->MAX_NUM_BOXES);
4325 assert(c->colliders.spheres.count+num_spheres<=c->MAX_NUM_SPHERES);
4326 if (c->colliders.boxes.count+num_boxes>c->MAX_NUM_BOXES || c->colliders.spheres.count+num_spheres>c->MAX_NUM_SPHERES)
return NUDGE_INVALID_BODY_ID;
4327 if (c->global_data.finalized_removed_bodies_count>0) {
4328 assert(c->global_data.finalized_removed_bodies_count<=c->global_data.removed_bodies_count);
4329 assert(
sizeof(c->global_data.removed_bodies[0])==
sizeof(body_type));
4330 body=c->global_data.removed_bodies[0];--c->global_data.finalized_removed_bodies_count;--c->global_data.removed_bodies_count;
4331 memmove(&c->global_data.removed_bodies[0],&c->global_data.removed_bodies[1],c->global_data.removed_bodies_count*
sizeof(body_type));
4332 assert(body<c->bodies.count);
4333 const BodyLayout* bl = &c->bodies.layouts[body];
4334 assert(bl->first_box_index==-1);
4335 assert(bl->num_boxes==0);
4336 assert(bl->first_sphere_index==-1);
4337 assert(bl->num_spheres==0);
4340 assert(c->bodies.count<c->MAX_NUM_BODIES);
4342 body = c->bodies.count++;
4344 BodyProperties* prop = &c->bodies.properties[body];
4345 Transform *xform = &c->bodies.transforms[body];
4346 BodyInfo* info = &c->bodies.infos[body];BodyFilter* filter = &c->bodies.filters[body];BodyLayout* layout = &c->bodies.layouts[body];
4347 if (comOffset && comOffset[0]==0.f && comOffset[1]==0.f && comOffset[2]==0.f) comOffset = NULL;
4349 info->com_offset[0]=info->com_offset[1]=info->com_offset[2]=0.f;
4353 *xform = T ? (*T) : identity_transform;
4355 memset(&c->bodies.momentum[body], 0,
sizeof(c->bodies.momentum[body]));
4356 memset(prop,0,
sizeof(*prop));prop->friction = NUDGE_DEFAULT_FRICTION;prop->gravity[1]=NUDGE_DEFAULT_GRAVITY;
4357 if (mass<0) mass=-mass;
4358 if (mass>0) prop->mass_inverse = 1.0f/mass;
4359 c->bodies.idle_counters[body] = (filter->flags&
BF_IS_DYNAMIC)?0:0xff;
4361 for (
unsigned i=0;i<num_boxes;i++) {
4362 unsigned collider = c->colliders.boxes.count++;
4363 BoxCollider* boxCollider = &c->colliders.boxes.data[collider];
4364 Transform* xf = &c->colliders.boxes.transforms[collider];
4365 for (
int j=0;j<3;j++) boxCollider->size[j] = hsizeTriplets[3*i+j];
4366 *xf = boxOffsetTransforms ? boxOffsetTransforms[i] : identity_transform;
4367 if (comOffset && !centerMeshAndRetrieveOldCenter3Out) {
for (
int l=0;l<3;l++) {xf->
position[l]-=comOffset[l];info->com_offset[l]=comOffset[l];}}
4369 if (i==0) layout->first_box_index = collider;
4371 layout->num_boxes = num_boxes;
4373 for (
unsigned i=0;i<num_spheres;i++) {
4374 unsigned collider = c->colliders.spheres.count++;
4375 assert(collider<c->MAX_NUM_SPHERES && collider<c->colliders.spheres.count);
4376 Transform* xf = &c->colliders.spheres.transforms[collider];
4377 c->colliders.spheres.data[collider].radius = radii[i];
4378 *xf = sphereOffsetTransforms ? sphereOffsetTransforms[i] : identity_transform;
4379 if (comOffset && !centerMeshAndRetrieveOldCenter3Out) {
for (
int l=0;l<3;l++) {xf->
position[l]-=comOffset[l];info->com_offset[l]=comOffset[l];}}
4381 if (i==0) layout->first_sphere_index = collider;
4383 layout->num_spheres = num_spheres;
4389 float aabb_he[3] = {info->aabb_half_extents[0],info->aabb_half_extents[1],info->aabb_half_extents[2]};
4390 if (centerMeshAndRetrieveOldCenter3Out) {
4391 centerMeshAndRetrieveOldCenter3Out[0]=info->aabb_center[0];
4392 centerMeshAndRetrieveOldCenter3Out[1]=info->aabb_center[1];
4393 centerMeshAndRetrieveOldCenter3Out[2]=info->aabb_center[2];
4396 for (
int i=0;i<3;i++) {
4397 offset[i]=centerMeshAndRetrieveOldCenter3Out[i]+(comOffset?comOffset[i]:0.f);
4398 info->aabb_center[i]-=centerMeshAndRetrieveOldCenter3Out[i];
4401 for (
unsigned i=0;i<layout->num_boxes;i++) {
4402 Transform* xf = &c->colliders.boxes.transforms[layout->first_box_index+i];
4403 {
for (
int l=0;l<3;l++) {xf->position[l]-=offset[l];
if (comOffset) info->com_offset[l]=comOffset[l];}}
4405 for (
unsigned i=0;i<layout->num_spheres;i++) {
4406 Transform* xf = &c->colliders.spheres.transforms[layout->first_sphere_index+i];
4407 {
for (
int l=0;l<3;l++) {xf->position[l]-=offset[l];
if (comOffset) info->com_offset[l]=comOffset[l];}}
4410 info->aabb_enlarged_radius = 0;
4411 const float* t = info->aabb_half_extents;
float s = t[0]*t[0] + t[1]*t[1] + t[2]*t[2];
if (s>NM_EPSILON) info->aabb_enlarged_radius+=sqrtf(s);
4412 t = info->aabb_center;s = t[0]*t[0] + t[1]*t[1] + t[2]*t[2];
if (s>NM_EPSILON) info->aabb_enlarged_radius+=sqrtf(s);
4423 if (inertia) {
for (
int i=0;i<3;i++) prop->inertia_inverse[i] = inertia[i]!=0.f ? (1.0f / inertia[i]) : 0.f;}
4430unsigned add_compound(context_t* c,
float mass,
float inertia[3],
unsigned num_boxes,
const float* hsizeTriplets,
const float* boxOffsetMatrices16WithoutScaling,
unsigned num_spheres,
const float* radii,
const float* sphereOffsetMatrices16WithoutScaling,
const float* mMatrix16WithoutScaling,
const float comOffset[3],
float *centerMeshAndRetrieveOldCenter3Out) {
4431 Arena arena = c->arena;
4432 Transform* boxTransforms = allocate_array<Transform>(&arena, num_boxes+num_spheres, 32);
4433 Transform* sphereTransforms = &boxTransforms[num_boxes];
4436 Transform T = identity_transform;
4438 return add_compound(c,mass,inertia,num_boxes,hsizeTriplets,boxTransforms,num_spheres,radii,sphereTransforms,&T,comOffset,centerMeshAndRetrieveOldCenter3Out);
4441unsigned add_clone(context_t* c,
unsigned body_to_clone,
float mass,
const Transform* T,
float scale_factor,
const float newComOffsetInPreScaledUnits[3]) {
4443 const unsigned srcbody = body_to_clone;
4444 assert(srcbody<c->bodies.count);
4445 assert(scale_factor!=0.f);
4446 const BodyLayout* srclayout = &c->bodies.layouts[srcbody];
const uint16_t num_boxes = srclayout->num_boxes, num_spheres = srclayout->num_spheres;
4448 assert(num_boxes+num_spheres>0);
4449 assert(c->colliders.boxes.count+num_boxes<=c->MAX_NUM_BOXES);
4450 assert(c->colliders.spheres.count+num_spheres<=c->MAX_NUM_SPHERES);
4451 if (c->colliders.boxes.count+num_boxes>c->MAX_NUM_BOXES || c->colliders.spheres.count+num_spheres>c->MAX_NUM_SPHERES)
return NUDGE_INVALID_BODY_ID;
4452 if (c->global_data.finalized_removed_bodies_count>0) {
4453 assert(c->global_data.finalized_removed_bodies_count<=c->global_data.removed_bodies_count);
4454 assert(
sizeof(c->global_data.removed_bodies[0])==
sizeof(body_type));
4455 body=c->global_data.removed_bodies[0];--c->global_data.finalized_removed_bodies_count;--c->global_data.removed_bodies_count;
4456 memmove(&c->global_data.removed_bodies[0],&c->global_data.removed_bodies[1],c->global_data.removed_bodies_count*
sizeof(body_type));
4457 assert(body<c->bodies.count);
4458 const BodyLayout* bl = &c->bodies.layouts[body];
4459 assert(bl->first_box_index==-1);
4460 assert(bl->num_boxes==0);
4461 assert(bl->first_sphere_index==-1);
4462 assert(bl->num_spheres==0);
4465 assert(c->bodies.count<c->MAX_NUM_BODIES);
4467 body = c->bodies.count++;
4469 const BodyProperties* srcprop = &c->bodies.properties[srcbody];BodyProperties* prop = &c->bodies.properties[body];
4470 const BodyInfo* srcinfo = &c->bodies.infos[srcbody];BodyInfo* info = &c->bodies.infos[body];
4471 const BodyFilter* srcfilter = &c->bodies.filters[srcbody];BodyFilter* filter = &c->bodies.filters[body];
4472 BodyLayout* layout = &c->bodies.layouts[body];
4473 Transform *xform = &c->bodies.transforms[body];
4474 *xform = T ? (*T) : identity_transform;
4476 memset(&c->bodies.momentum[body], 0,
sizeof(c->bodies.momentum[body]));
4477 float com_delta[3] = {0.f,0.f,0.f};
if (newComOffsetInPreScaledUnits) {
for (
int k=0;k<3;k++) com_delta[k] = newComOffsetInPreScaledUnits[k]-srcinfo->com_offset[k];}
4478 if (scale_factor<0.f) scale_factor=(srcinfo->aabb_half_extents[1]>=0.f)?(-scale_factor/srcinfo->aabb_half_extents[1]):-scale_factor;
4479 assert(scale_factor>0.f);
4485 c->bodies.idle_counters[body]=(filter->flags&
BF_IS_DYNAMIC)?0:0xFF;
4487 assert(srclayout->first_box_index>=0 && (uint16_t)srclayout->first_box_index+num_boxes<=c->colliders.boxes.count);
4488 const Transform* srcT = &c->colliders.boxes.transforms[srclayout->first_box_index];
4489 const BoxCollider* srcC = &c->colliders.boxes.data[srclayout->first_box_index];
4490 layout->first_box_index = c->colliders.boxes.count;layout->num_boxes = num_boxes;c->colliders.boxes.count+=num_boxes;assert(c->colliders.boxes.count<=c->MAX_NUM_BOXES);
4491 Transform* T = &c->colliders.boxes.transforms[layout->first_box_index];
4492 BoxCollider* C = &c->colliders.boxes.data[layout->first_box_index];
4493 for (uint16_t i=0;i<num_boxes;i++) {
4494 T[i]=srcT[i];T[i].body=body;C[i]=srcC[i];
4495 for (
int k=0;k<3;k++) {
4496 T[i].p[k]=scale_factor*(T[i].p[k]-com_delta[k]);
4497 C[i].size[k]*=scale_factor;
4502 assert(srclayout->first_sphere_index>=0 && (uint16_t)srclayout->first_sphere_index+num_spheres<=c->colliders.spheres.count);
4503 const Transform* srcT = &c->colliders.spheres.transforms[srclayout->first_sphere_index];
4504 const SphereCollider* srcC = &c->colliders.spheres.data[srclayout->first_sphere_index];
4505 layout->first_sphere_index = c->colliders.spheres.count;layout->num_spheres = num_spheres;c->colliders.spheres.count+=num_spheres;assert(c->colliders.spheres.count<=c->MAX_NUM_SPHERES);
4506 Transform* T = &c->colliders.spheres.transforms[layout->first_sphere_index];
4507 SphereCollider* C = &c->colliders.spheres.data[layout->first_sphere_index];
4508 for (uint16_t i=0;i<num_spheres;i++) {
4509 T[i]=srcT[i];T[i].body=body;C[i]=srcC[i];C[i].radius*=scale_factor;
4510 for (
int k=0;k<3;k++) T[i].p[k]=scale_factor*(T[i].p[k]-com_delta[k]);
4514 if (newComOffsetInPreScaledUnits) {
4515 if (newComOffsetInPreScaledUnits[0]==0.f && newComOffsetInPreScaledUnits[1]==0.f && newComOffsetInPreScaledUnits[2]==0.f) memset(info->com_offset,0,3*
sizeof(
float));
4517 for (
int k=0;k<3;k++) info->com_offset[k]=scale_factor*newComOffsetInPreScaledUnits[k];
4523 if (srcinfo->com_offset[0]==0.f && srcinfo->com_offset[1]==0.f && srcinfo->com_offset[2]==0.f) {
4525 memset(info->com_offset,0,3*
sizeof(
float));
4529 for (
int k=0;k<3;k++) info->com_offset[k]=scale_factor*srcinfo->com_offset[k];
4536 assert(mass>=0);assert(srcprop->mass_inverse>=0.f);
4538 if (srcprop->mass_inverse>0.f) {
4541 for (
int k=0;k<3;k++) prop->inertia_inverse[k]/=scale_factor*scale_factor*mass*srcprop->mass_inverse;
4542 prop->mass_inverse=1.f/mass;
4545 prop->mass_inverse=0.f;
4546 calculate_box_inertia_inverse(prop->inertia_inverse,mass,info->aabb_half_extents[0],info->aabb_half_extents[1],info->aabb_half_extents[2],info->com_offset);
4552unsigned add_clone(context_t* c,
unsigned body_to_clone,
float mass,
const float* mMatrix16WithoutScaling,
float scale_factor,
const float newComOffsetInPreScaledUnits[3]) {
4559#ifndef NUDGE_EXTRA_RADIUS_ON_BOX_SHRINK
4560# define NUDGE_EXTRA_RADIUS_ON_BOX_SHRINK (1.f/3.5f)
4563unsigned add_compound_prism(context_t* c,
float mass,
float radius,
float hsize,
unsigned num_lateral_faces,
const Transform* T,
AxisEnum axis,
const float comOffset[3]) {
4564 if (num_lateral_faces==0) num_lateral_faces=8;
4566 if (num_lateral_faces==4)
return add_box(c,mass,axis==
AXIS_X?hsize:radius,axis==
AXIS_Y?hsize:radius,axis==
AXIS_Z?hsize:radius,T,comOffset);
4567 const int use_half_number_of_boxes = ((num_lateral_faces%2)==0);
4569 const unsigned num_boxes = num_lateral_faces/2;
4570 Arena arena = c->arena;assert(arena.size>num_boxes*(3*
sizeof(
float)+
sizeof(Transform))+64);
4571 const float axisv[3] = {(axis==
AXIS_X)?1.f:0.f,(axis==
AXIS_Y)?1.f:0.f,(axis==
AXIS_Z)?1.f:0.f};
4572 float* box_hsizes = NULL;Transform* boxT=NULL;
4573 const float hsz = radius*tanf(M_PI/(
float)num_lateral_faces);
4574 const float hln = radius;
4575 int axisi[3] = {0,1,2};
4576 if (axis==
AXIS_X) {axisi[0]=2;axisi[1]=0;axisi[2]=1;}
4577 else if (axis==
AXIS_Z) {axisi[0]=1;axisi[1]=2;axisi[2]=0;}
4578 box_hsizes = allocate_array<float>(&arena, num_boxes*3, 32);
4579 boxT = allocate_array<Transform>(&arena, num_boxes, 32);
4580 for (
unsigned i=0;i<num_boxes;i++) {
4581 float* hs = &box_hsizes[3*i];Transform* t = &boxT[i];*t=identity_transform;
4582 const float angle = (float)i*M_PI/(
float)num_boxes;
4584 hs[axisi[0]]=hln;hs[axisi[1]]=hsize;hs[axisi[2]]=hsz;
4587 return add_compound(c,mass,inertia,num_boxes,box_hsizes,boxT,0,NULL,NULL,T,comOffset,stripped_center);
4590unsigned add_compound_cylinder(context_t* c,
float mass,
float radius,
float hsize,
const Transform* T,
AxisEnum axis,
unsigned num_boxes,
unsigned num_spheres,
const float comOffset[3],
float box_lateral_side_shrinking) {
4591 const bool is_short_cylinder = (radius>=hsize);
4592 if (num_boxes==0 && num_spheres==0) {
4593 if (is_short_cylinder) {num_boxes=8;num_spheres=0;}
4594 else {num_boxes=1;num_spheres=3;}
4596 if (is_short_cylinder) num_spheres = 0;
4597 if (box_lateral_side_shrinking<0.f) {
4598 if (num_spheres==0) box_lateral_side_shrinking=(num_boxes<=1)?0.f:(1.f-1.f/1.41f);
4599 else box_lateral_side_shrinking=NUDGE_EXTRA_RADIUS_ON_BOX_SHRINK;
4601 Arena arena = c->arena;assert(arena.size>num_boxes*(3*
sizeof(
float)+
sizeof(Transform))+num_spheres*(1*
sizeof(
float)+
sizeof(Transform))+128);
4602 const float axisv[3] = {(axis==
AXIS_X)?1.f:0.f,(axis==
AXIS_Y)?1.f:0.f,(axis==
AXIS_Z)?1.f:0.f};
4603 float* box_hsizes = NULL;Transform* boxT=NULL;
4605 const float offset = radius*box_lateral_side_shrinking;
4606 const float box_size = radius-offset;
4607 float angle = M_PI*0.5f/num_boxes;
4608 box_hsizes = allocate_array<float>(&arena, num_boxes*3, 32);
4609 boxT = allocate_array<Transform>(&arena, num_boxes, 32);
4610 for (
unsigned i=0;i<num_boxes;i++) {
4611 float* hs = &box_hsizes[3*i];Transform* t = &boxT[i];*t=identity_transform;
4612 hs[0]=hs[1]=hs[2]=box_size;hs[axis]=hsize;
4616 float* sphere_radii = NULL;Transform* sphereT = NULL;
4617 if (num_spheres>0) {
4618 sphere_radii = allocate_array<float>(&arena, num_boxes*1, 32);
4619 sphereT = allocate_array<Transform>(&arena, num_boxes, 32);
4620 for (
unsigned i=0;i<num_spheres;i++) {sphere_radii[i]=radius;sphereT[i]=identity_transform;}
4621 if (num_spheres>=2) {
4622 Transform* t = &sphereT[0];t->
p[axis]=-hsize+radius;
4623 t = &sphereT[1];t->p[axis]=hsize-radius;
4624 if (num_spheres>2) {
4628 const float dist = (2.f*hsize)/(
float)(num_spheres+1);
4629 for (
unsigned i=2;i<num_spheres;i++) {sphereT[i].p[axis]=-hsize+dist*i;}
4638 return add_compound(c,mass,inertia,num_boxes,box_hsizes,boxT,num_spheres,sphere_radii,sphereT,T,comOffset,stripped_center);
4640unsigned add_compound_capsule(context_t* c,
float mass,
float radius,
float hsize,
const Transform* T,
AxisEnum axis,
unsigned num_boxes,
unsigned num_spheres,
const float comOffset[3],
float box_lateral_side_shrinking) {
4641 if (num_boxes==0 && num_spheres==0) {num_boxes=1;num_spheres=3;}
4642 assert(num_spheres>=2);
4643 if (box_lateral_side_shrinking<0.f) box_lateral_side_shrinking=NUDGE_EXTRA_RADIUS_ON_BOX_SHRINK;
4644 Arena arena = c->arena;assert(arena.size>num_boxes*(3*
sizeof(
float)+
sizeof(Transform))+num_spheres*(1*
sizeof(
float)+
sizeof(Transform))+128);
4645 const float axisv[3] = {(axis==
AXIS_X)?1.f:0.f,(axis==
AXIS_Y)?1.f:0.f,(axis==
AXIS_Z)?1.f:0.f};
4646 float* box_hsizes = NULL;Transform* boxT=NULL;
4648 const float offset = radius*box_lateral_side_shrinking;
4649 float angle = M_PI*0.5f/num_boxes;
4650 box_hsizes = allocate_array<float>(&arena, num_boxes*3, 32);
4651 boxT = allocate_array<Transform>(&arena, num_boxes, 32);
4652 for (
unsigned i=0;i<num_boxes;i++) {
4653 float* hs = &box_hsizes[3*i];Transform* t = &boxT[i];*t=identity_transform;
4654 hs[0]=hs[1]=hs[2]=radius-offset;hs[axis]=hsize;
4658 float* sphere_radii = NULL;Transform* sphereT = NULL;
4659 if (num_spheres>0) {
4660 sphere_radii = allocate_array<float>(&arena, num_boxes*1, 32);
4661 sphereT = allocate_array<Transform>(&arena, num_boxes, 32);
4662 for (
unsigned i=0;i<num_spheres;i++) {sphere_radii[i]=radius;sphereT[i]=identity_transform;}
4663 Transform* t = &sphereT[0];t->
p[axis]=-hsize;
4664 t = &sphereT[1];t->p[axis]=hsize;
4665 if (num_spheres>2) {
4669 const float dist = (2.f*(hsize+radius))/(float)(num_spheres+1);
4670 for (
unsigned i=2;i<num_spheres;i++) {sphereT[i].p[axis]=-hsize-radius+dist*i;}
4674 return add_compound(c,mass,inertia,num_boxes,box_hsizes,boxT,num_spheres,sphere_radii,sphereT,T,comOffset,stripped_center);
4676unsigned add_compound_hollow_cylinder(context_t* c,
float mass,
float min_radius,
float max_radius,
float hsize,
const Transform* T,
AxisEnum axis,
unsigned num_boxes,
const float comOffset[3]) {
4677 const unsigned num_spheres = 0;assert(min_radius<max_radius);
if (num_boxes==0) num_boxes=8;
4678 const float radius = (max_radius+min_radius)*0.5f,inner_radius=(max_radius-min_radius)*0.5f;
4679 Arena arena = c->arena;assert(arena.size>num_boxes*(3*
sizeof(
float)+
sizeof(Transform))+num_spheres*(1*
sizeof(
float)+
sizeof(Transform))+128);
4680 const float axisv[3] = {(axis==
AXIS_X)?1.f:0.f,(axis==
AXIS_Y)?1.f:0.f,(axis==
AXIS_Z)?1.f:0.f};
4681 int axisi[3] = {0,1,2};
4682 if (axis==
AXIS_X) {axisi[0]=2;axisi[1]=0;axisi[2]=1;}
4683 else if (axis==
AXIS_Z) {axisi[0]=1;axisi[1]=2;axisi[2]=0;}
4684 float* box_hsizes = NULL;Transform* boxT=NULL;
4686 const float box_length = max_radius*tanf(M_PI/(
float)num_boxes);
4687 box_hsizes = allocate_array<float>(&arena, num_boxes*3, 32);
4688 boxT = allocate_array<Transform>(&arena, num_boxes, 32);
4689 for (
unsigned i=0;i<num_boxes;i++) {
4690 float* hs = &box_hsizes[3*i];Transform* t = &boxT[i];*t=identity_transform;
4691 hs[axisi[0]]=box_length;hs[axisi[1]]=hsize;hs[axisi[2]]=inner_radius;
4692 const float angle = (float)i*2.f*M_PI/(
float)num_boxes;
4693 const float sinAngle = sinf(angle), cosAngle = cosf(angle);
4695 t->p[axisi[0]]=(radius)*sinAngle;t->p[axisi[1]]=0.f;t->p[axisi[2]]=-(radius)*cosAngle;
4698 float* sphere_radii = NULL;Transform* sphereT = NULL;
4716 return add_compound(c,mass,inertia,num_boxes,box_hsizes,boxT,num_spheres,sphere_radii,sphereT,T,comOffset,stripped_center);
4718unsigned add_compound_torus(context_t* c,
float mass,
float radius,
float inner_radius,
const Transform* T,
AxisEnum axis,
unsigned num_boxes,
const float comOffset[3]) {
4719 const unsigned num_spheres = 0;assert(inner_radius<=radius);
if (num_boxes==0) num_boxes=8;
4720 Arena arena = c->arena;assert(arena.size>num_boxes*(3*
sizeof(
float)+
sizeof(Transform))+num_spheres*(1*
sizeof(
float)+
sizeof(Transform))+128);
4721 const float axisv[3] = {(axis==
AXIS_X)?1.f:0.f,(axis==
AXIS_Y)?1.f:0.f,(axis==
AXIS_Z)?1.f:0.f};
4722 int axisi[3] = {0,1,2};
4723 if (axis==
AXIS_X) {axisi[0]=2;axisi[1]=0;axisi[2]=1;}
4724 else if (axis==
AXIS_Z) {axisi[0]=1;axisi[1]=2;axisi[2]=0;}
4725 float* box_hsizes = NULL;Transform* boxT=NULL;
4727 const float box_length = (radius+inner_radius)*tanf(M_PI/(
float)num_boxes);
4728 box_hsizes = allocate_array<float>(&arena, num_boxes*3, 32);
4729 boxT = allocate_array<Transform>(&arena, num_boxes, 32);
4730 for (
unsigned i=0;i<num_boxes;i++) {
4731 float* hs = &box_hsizes[3*i];Transform* t = &boxT[i];*t=identity_transform;
4732 hs[axisi[0]]=box_length;hs[axisi[1]]=inner_radius;hs[axisi[2]]=inner_radius;
4733 const float angle = (float)i*2.f*M_PI/(
float)num_boxes;
4734 const float sinAngle = sinf(angle), cosAngle = cosf(angle);
4736 t->p[axisi[0]]=(radius)*sinAngle;t->p[axisi[1]]=0.f;t->p[axisi[2]]=-(radius)*cosAngle;
4739 float* sphere_radii = NULL;Transform* sphereT = NULL;
4756 float inertia[3];
calculate_torus_inertia(inertia,mass,radius,inner_radius,axis,comOffset);
float stripped_center[3];
4757 return add_compound(c,mass,inertia,num_boxes,box_hsizes,boxT,num_spheres,sphere_radii,sphereT,T,comOffset,stripped_center);
4759unsigned add_compound_cone(context_t* c,
float mass,
float radius,
float hheight,
const Transform* T,
AxisEnum axis,
unsigned num_boxes,
unsigned num_spheres,
const float comOffset[3]) {
4760 if (num_boxes==0) num_boxes=4;
4761 if (num_spheres==0) num_spheres=3;
4762 Arena arena = c->arena;assert(arena.size>num_boxes*(3*
sizeof(
float)+
sizeof(Transform))+num_spheres*(1*
sizeof(
float)+
sizeof(Transform))+128);
4763 const float axisv[3] = {(axis==
AXIS_X)?1.f:0.f,(axis==
AXIS_Y)?1.f:0.f,(axis==
AXIS_Z)?1.f:0.f};
4764 const float R=radius,HH=hheight,H=hheight*2.f,theta=atanf(R/H);
4765 float* box_hsizes = NULL;Transform* boxT=NULL;
4767 box_hsizes = allocate_array<float>(&arena, num_boxes*3, 32);
4768 boxT = allocate_array<Transform>(&arena, num_boxes, 32);
for (
unsigned i=0;i<num_boxes;i++) boxT[i]=identity_transform;
4770 unsigned num_group_boxes[4] = {};
4771 if (num_boxes>3) num_group_boxes[3]=1;
4772 if (num_boxes>5) {num_group_boxes[1]=2;}
4773 else if (num_boxes>4) {num_group_boxes[1]=1;}
4774 if (num_spheres>=3 && num_group_boxes[1]==1) {num_group_boxes[3]=0;num_group_boxes[1]=2;}
4775 num_group_boxes[0]=num_boxes-num_group_boxes[1]-num_group_boxes[2]-num_group_boxes[3];
4777 const float h_fracs[4]={0.1f,0.2f,0.7f,0.85f};
4778 unsigned num_box_offset=0;
4779 for (
unsigned j=0;j<4;j++) {
4780 const unsigned group_boxes = num_group_boxes[j];
if (group_boxes==0)
continue;;
4781 const float dh = h_fracs[j]*HH,dr = R*(HH-dh)/(HH*1.41f);
4782 float angle = M_PI*0.5f/group_boxes, angle_offset=M_PI*0.25f*j;
4783 for (
unsigned i=0;i<group_boxes;i++) {
4784 float* hs = &box_hsizes[3*(i+num_box_offset)];Transform* t = &boxT[i+num_box_offset];*t=identity_transform;
4786 hs[0]=hs[1]=hs[2]=dr;hs[axis]=dh;
4789 num_box_offset+=group_boxes;
4793 float* sphere_radii = NULL;Transform* sphereT = NULL;
4794 if (num_spheres>0) {
4795 sphere_radii = allocate_array<float>(&arena, num_spheres*1, 32);
4796 sphereT = allocate_array<Transform>(&arena, num_spheres, 32);
4797 for (
unsigned i=0;i<num_spheres;i++) sphereT[i]=identity_transform;
4798 const float sin_theta = sinf(theta);
4799 const float r = H*sin_theta/(sin_theta+1);sphere_radii[0]=r;sphereT[0].p[axis]=-HH+r;
4800 if (num_spheres>1) {
4801 const unsigned remaining_spheres = num_spheres-1;
4802 const float min_rad=0.2f*r;
4803 const float max_rad=num_spheres<=3?0.45f*r:(num_spheres>=6?0.85f*r:(0.45f*r+((0.85f*r-0.45f*r)*(num_spheres-3))/2));
4804 for (
unsigned i=0;i<remaining_spheres;i++) {
4805 const float rtop = remaining_spheres==1?min_rad:(min_rad+((max_rad-min_rad)*i)/(remaining_spheres-1));
4806 sphere_radii[i+1]=rtop;
4807 sphereT[i+1].p[axis]=HH-rtop/sinf(theta);
4811 float inertia[3];
calculate_cone_inertia(inertia,mass,radius,hheight,axis,comOffset);
float stripped_center[3];
4812 return add_compound(c,mass,inertia,num_boxes,box_hsizes,boxT,num_spheres,sphere_radii,sphereT,T,comOffset,stripped_center);
4814unsigned add_compound_prism(context_t* c,
float mass,
float radius,
float hsize,
unsigned num_lateral_faces,
const float* mMatrix16WithoutScaling,
AxisEnum axis,
const float comOffset[3]) {
4815 if (!mMatrix16WithoutScaling)
return add_compound_prism(c,mass,radius,hsize,num_lateral_faces,(
const Transform*)NULL,axis,comOffset);
4818unsigned add_compound_cylinder(context_t* c,
float mass,
float radius,
float hsize,
const float* mMatrix16WithoutScaling,
AxisEnum axis,
unsigned num_boxes,
unsigned num_spheres,
const float comOffset[3],
float box_lateral_side_shrinking) {
4819 if (!mMatrix16WithoutScaling)
return add_compound_cylinder(c,mass,radius,hsize,(
const Transform*)NULL,axis,num_boxes,num_spheres,comOffset,box_lateral_side_shrinking);
4820 else {Transform T;
Mat4WithoutScalingToTransform(&T,mMatrix16WithoutScaling);
return add_compound_cylinder(c,mass,radius,hsize,&T,axis,num_boxes,num_spheres,comOffset,box_lateral_side_shrinking);}
4822unsigned add_compound_capsule(context_t* c,
float mass,
float radius,
float hsize,
const float* mMatrix16WithoutScaling,
AxisEnum axis,
unsigned num_boxes,
unsigned num_spheres,
const float comOffset[3],
float box_lateral_side_shrinking) {
4823 if (!mMatrix16WithoutScaling)
return add_compound_capsule(c,mass,radius,hsize,(
const Transform*)NULL,axis,num_boxes,num_spheres,comOffset,box_lateral_side_shrinking);
4824 else {Transform T;
Mat4WithoutScalingToTransform(&T,mMatrix16WithoutScaling);
return add_compound_capsule(c,mass,radius,hsize,&T,axis,num_boxes,num_spheres,comOffset,box_lateral_side_shrinking);}
4826unsigned add_compound_hollow_cylinder(context_t* c,
float mass,
float min_radius,
float max_radius,
float hsize,
const float* mMatrix16WithoutScaling,
AxisEnum axis,
unsigned num_boxes,
const float comOffset[3]) {
4827 if (!mMatrix16WithoutScaling)
return add_compound_hollow_cylinder(c,mass,min_radius,max_radius,hsize,(
const Transform*)NULL,axis,num_boxes,comOffset);
4830unsigned add_compound_torus(context_t* c,
float mass,
float radius,
float inner_radius,
const float* mMatrix16WithoutScaling,
AxisEnum axis,
unsigned num_boxes,
const float comOffset[3]) {
4831 if (!mMatrix16WithoutScaling)
return add_compound_torus(c,mass,radius,inner_radius,(
const Transform*)NULL,axis,num_boxes,comOffset);
4834unsigned add_compound_cone(context_t* c,
float mass,
float radius,
float hheight,
const float* mMatrix16WithoutScaling,
AxisEnum axis,
unsigned num_boxes,
unsigned num_spheres,
const float comOffset[3]) {
4835 if (!mMatrix16WithoutScaling)
return add_compound_cone(c,mass,radius,hheight,(
const Transform*)NULL,axis,num_boxes,num_spheres,comOffset);
4839unsigned add_compound_staircase(context_t* c,
float mass,
float hdepth,
float hheight,
float hlength,
unsigned num_steps,
const Transform* T,
int orientation_in_0_3,
const float comOffset[3]) {
4840 if (num_steps<=0) num_steps=15;
4841 Arena arena = c->arena;assert(arena.size>num_steps*(3*
sizeof(
float)+
sizeof(Transform))+128);
4842 int axisi[3] = {0,1,2};
if (orientation_in_0_3<0) orientation_in_0_3=-orientation_in_0_3;orientation_in_0_3%=4;
const float sign = orientation_in_0_3<2?-1.f:1.f;
4843 if (orientation_in_0_3%2==1) {axisi[0]=2;axisi[1]=1;axisi[2]=0;}
4844 float* box_hsizes = NULL;Transform* boxT=NULL;
4845 const float step_hheight = hheight/(float)(num_steps);
4846 const float step_hlen = hlength/(float)(num_steps);
4848 box_hsizes = allocate_array<float>(&arena, num_steps*3, 32);
4849 boxT = allocate_array<Transform>(&arena, num_steps, 32);
4850 for (
unsigned i=0;i<num_steps;i++) {
4851 float* hs = &box_hsizes[3*i];Transform* t = &boxT[i];*t=identity_transform;
4852 hs[axisi[0]]=hdepth;hs[axisi[1]]=step_hheight;hs[axisi[2]]=hlength-(float)i*step_hlen;
4853 t->
p[axisi[1]]=-hheight+step_hheight+step_hheight*2.f*(float)i;
4854 t->p[axisi[2]]=sign*step_hlen*(float)i;
4857 return add_compound(c,mass,NULL,num_steps,box_hsizes,boxT,0,NULL,NULL,T,comOffset,NULL);
4859unsigned add_compound_staircase(context_t* c,
float mass,
float hdepth,
float hheight,
float hlength,
unsigned num_steps,
const float* mMatrix16WithoutScaling,
int orientation_in_0_3,
const float comOffset[3]) {
4860 if (!mMatrix16WithoutScaling)
return add_compound_staircase(c,mass,hdepth,hheight,hlength,num_steps,(
const Transform*)NULL,orientation_in_0_3,comOffset);
4868 assert(c && body<c->bodies.count);
4869 float aabb_min[3]={0,0,0},aabb_max[3]={0,0,0};
4870 const BodyLayout* L = &c->bodies.layouts[body];
4871 if (L->num_spheres>0) {
4872 assert(L->first_sphere_index>=0 && (uint16_t)L->first_sphere_index+L->num_spheres<=c->colliders.spheres.count);
4873 const SphereCollider* S = &c->colliders.spheres.data[L->first_sphere_index];
4874 const Transform* T = &c->colliders.spheres.transforms[L->first_sphere_index];
4875 for (
int i=0;i<3;i++) {aabb_min[i]=T[0].p[i]-S[0].radius;aabb_max[i]=T[0].p[i]+S[0].radius;}
4877 for (
int j=1;j<L->num_spheres;j++) {
4878 const Transform* t = &T[j];
const float r = S[j].radius;
4879 for (
int i=0;i<3;i++) {
4880 if (aabb_min[i]>t->p[i]-r) aabb_min[i]=t->p[i]-r;
4881 if (aabb_max[i]<t->p[i]+r) aabb_max[i]=t->p[i]+r;
4885 if (L->num_boxes>0) {
4886 assert(L->first_box_index>=0 && (uint16_t)L->first_box_index+L->num_boxes<=c->colliders.boxes.count);
4887 const BoxCollider* B = &c->colliders.boxes.data[L->first_box_index];
4888 const Transform* T = &c->colliders.boxes.transforms[L->first_box_index];
4889 for (
int j=0;j<L->num_boxes;j++) {
4890 const float* hs = &B[j].size[0];
const Transform* t = &T[j];
4891 if (t->q[0]==0.f && t->q[1]==0.f && t->q[2]==0.f && t->q[3]==1.f) {
4893 if (j==0 && L->num_spheres==0) {
4894 for (
int i=0;i<3;i++) {aabb_min[i]=t->p[i]-hs[i];aabb_max[i]=t->p[i]+hs[i];}
4898 for (
int i=0;i<3;i++) {
4899 if (aabb_min[i]>t->p[i]-hs[i]) aabb_min[i]=t->p[i]-hs[i];
4900 if (aabb_max[i]<t->p[i]+hs[i]) aabb_max[i]=t->p[i]+hs[i];
4908 for (
int i=0;i<3;i++) {
4910 const float hd=fabsf(m[i]*hs[0])+fabsf(m[3+i]*hs[1])+fabsf(m[6+i]*hs[2]);
4911 vmin = vmax = t->p[i]; vmin-= hd;vmax+= hd;
4912 if (j==0 && L->num_spheres==0) {
4913 aabb_min[i]=vmin;aabb_max[i]=vmax;
4917 if (aabb_min[i]>vmin) aabb_min[i]=vmin;
4918 if (aabb_max[i]<vmax) aabb_max[i]=vmax;
4924 const int stripComOffset=0;
4925 BodyInfo* info = &c->bodies.infos[body];
4926 for (
int i=0;i<3;i++) {
4927 info->aabb_center[i]= (aabb_max[i]+aabb_min[i])*0.5f;
4928 info->aabb_half_extents[i]=(aabb_max[i]-aabb_min[i])*0.5f;
4929 if (stripComOffset) info->aabb_center[i]+=info->com_offset[i];
4932 info->aabb_enlarged_radius = 0;
4933 const float* t = info->aabb_half_extents;
float s = t[0]*t[0] + t[1]*t[1] + t[2]*t[2];
if (s>NM_EPSILON) info->aabb_enlarged_radius+=sqrtf(s);
4934 t = info->aabb_center;s = t[0]*t[0] + t[1]*t[1] + t[2]*t[2];
if (s>NM_EPSILON) info->aabb_enlarged_radius+=sqrtf(s);
4938 using namespace nudge;assert(c && body<c->bodies.count);
4944 else {
if (mass_fallback<0.f) {mass_fallback=-mass_fallback;} bp->
mass_inverse=1.f/mass_fallback;}
4951 bf->
flags&=~BF_IS_STATIC_OR_KINEMATIC_OR_DYNAMIC;bf->
flags|=new_motion_state;
4956 assert(c && body<c->bodies.count);
4957 assert(scale_factor!=0.f);
4958 if (scale_factor<0.f) {
4960 scale_factor = -scale_factor/hey;
4964 if (mass_scale_factor==0.f) mass_scale_factor = scale_factor*scale_factor*scale_factor;
4965 else if (mass_scale_factor<0.f) {
4966 mass_scale_factor=-mass_scale_factor;
4972 for (
int k=0;k<3;k++) bp->
inertia_inverse[k]/=(scale_factor*scale_factor*mass_scale_factor);
4980 for (uint16_t i=0;i<bl->
num_boxes;i++) {
for (
int k=0;k<3;k++) {T[i].
p[k]*=scale_factor;C[i].
size[k]*=scale_factor;}}
4986 for (uint16_t i=0;i<bl->
num_spheres;i++) {
Transform* t = &T[i];C[i].
radius*=scale_factor;
for (
int k=0;k<3;k++) t->
p[k]*=scale_factor;}
4999static void simulate_kinematic_animations(
context_t* c,
float timeStep) {
5007# ifdef NUDGE_DELETE_KINEMATIC_ANIMATIONS_REFERENCING_REMOVED_BODIES
5020 const float absSpeed = fabsf(ka->
speed);
5024 const float deltaTime = timeStep;
5040 if (totalTime<=0)
continue;
5041 const float fractionTime = absPlayTime/totalTime;
5042 const unsigned long fractionTimeUL = (
unsigned long)fractionTime;
5044 if (mustReverse) mustReverse = fractionTimeUL%2==0;
5045 else mustReverse = fractionTimeUL%2==1;
5047 absPlayTime-=(totalTime*fractionTimeUL);
5050 if (!mustReverse || ksz<=1) {
5051 for (
int keyFrameIndex=0;keyFrameIndex<ksz;keyFrameIndex++) {
5052 const Transform* kfT = &pkfT[keyFrameIndex];
5054 if (kfT->
time<=0)
continue;
5056 if (absPlayTime <= curTime) {
5057 float factor = float(1)-(curTime-absPlayTime)/kfT->
time;
5061 if (keyFrameIndex>0) {
5062 const Transform* kfTp = &pkfT[keyFrameIndex-1];
5079 else if (keyFrameIndex == ksz-1) {
5090 for (
int keyFrameIndex=ksz-1;keyFrameIndex>=0;keyFrameIndex--) {
5091 const Transform* kfT = &pkfT[keyFrameIndex];
5092 const bool isFirstKeyFrame = (keyFrameIndex==ksz-1);
5093 const float& kfTime = isFirstKeyFrame ? pkfT[0].
time : pkfT[keyFrameIndex+1].
time;
5095 if (kfTime<=0)
continue;
5097 if (absPlayTime <= curTime) {
5098 float factor = float(1)-(curTime-absPlayTime)/kfTime;
5102 if (!isFirstKeyFrame) {
5103 const Transform* kfTp = &pkfT[keyFrameIndex+1];
5120 else if (keyFrameIndex == 0) {
5134 assert(body<c->bodies.count);
5135 assert(pModelMatrix16Out);
5144 int mustSmoothTransform = timeStepMinusRemainingTime>0 && !(exclude_flags&flags);
5147 if (mustSmoothTransform) {
5167 for (
int l=0;l<3;l++) {
5168 Tn.
position[l]-=linvel[l]*timeStepMinusRemainingTime;
5171 const float angvelinv[3] = {-angvel[0],-angvel[1],-angvel[2]};
5182 for (
int l=0;l<3;l++) pModelMatrix16Out[12+l] -= pModelMatrix16Out[l]*comOffset[0]+pModelMatrix16Out[4+l]*comOffset[1]+pModelMatrix16Out[8+l]*comOffset[2];
5184 return pModelMatrix16Out;
5188 if (modelMatrixStrideInFloatUnits<16) modelMatrixStrideInFloatUnits=16;
5189 const unsigned bodies_count = loopActiveBodiesOnly ? : c->
bodies.
count;
5191 for (uint32_t i=0;i<bodies_count;i++) {
5198 unsigned contact_data_index,
5199 int16_t* box_collider_index_for_body_a,
5200 int16_t* sphere_collider_index_for_body_a,
5201 int16_t* box_collider_index_for_body_b,
5202 int16_t* sphere_collider_index_for_body_b,
5203 int use_relative_values_for_output_indices
5205 assert(c && contact_data_index<c->contact_data.count);
5207 const uint64_t tag = cc->
tags[contact_data_index];
5209 const unsigned a = bp->
a;assert(a<c->bodies.count);
5210 const unsigned b = bp->
b;assert(b<c->bodies.count);
5211 const uint64_t a_tag = (tag&0x0000FFFF00000000ULL)>>(2ULL*16ULL);
5212 const uint64_t b_tag = (tag&0xFFFF000000000000ULL)>>(3ULL*16ULL);
5213 struct coll_t {
unsigned body;uint64_t tag;int16_t first_box_index;int16_t* box_colliding_index;uint16_t num_boxes;int16_t first_sphere_index;int16_t* sphere_colliding_index;uint16_t num_spheres;};
5214 struct coll_t coll[2]=
5217 for (
int t=0;t<2;t++) {
5218 struct coll_t* cl = &coll[t];
5219 assert(cl->num_boxes || cl->num_spheres);
5221 if (cl->box_colliding_index) {
5222 *cl->box_colliding_index=-1;
5223 if (cl->num_boxes>0) {
5224 assert(cl->first_box_index>=0);
5226 for (uint16_t ci=cl->first_box_index;ci<cl->first_box_index+cl->num_boxes;ci++) {
5228 if (c->
colliders.
boxes.
tags[ci]==cl->tag) {*cl->box_colliding_index=use_relative_values_for_output_indices?(ci-cl->first_box_index):ci;
break;}
5230 assert(*cl->box_colliding_index>=0);
5233 if (cl->sphere_colliding_index) {
5234 *cl->sphere_colliding_index=-1;
5235 if (cl->num_spheres>0) {
5236 assert(cl->first_sphere_index>=0);
5238 for (uint16_t ci=cl->first_sphere_index;ci<cl->first_sphere_index+cl->num_spheres;ci++) {
5240 if (c->
colliders.
spheres.
tags[ci]==cl->tag) {*cl->sphere_colliding_index=use_relative_values_for_output_indices?(ci-cl->first_sphere_index):ci;
break;}
5242 assert(*cl->sphere_colliding_index>=0);
5244 if (cl->box_colliding_index && cl->sphere_colliding_index) {
5245 assert(*cl->box_colliding_index>=0 || *cl->sphere_colliding_index>=0);
5246 assert(*cl->box_colliding_index==-1 || *cl->sphere_colliding_index==-1);
5256 unsigned sim_is_burning_time = 0;
5258 if (elapsedSecondsFromLastCall<0) elapsedSecondsFromLastCall=0;
5266 if (must_warn)
log(
"[PhysicFrame: %llu] max_num_substeps=%u reached:\tBurnt remaining_time=%1.3f (on time_step=%1.3f)\n",sp->
num_frames,sp->
max_num_substeps,elapsedSecondsFromLastCall,sp->
time_step);
5271 sim_is_burning_time = 1;
5288extern uintptr_t get_required_arena_size_for_setup_contact_constraints(
context_t* c);
5289void simulate(
context_t* c,
float timeStep,
unsigned numSubSteps,
unsigned numIterations) {
5291 finalize_removed_bodies(c);
5293# define NUDGE_KINEMATIC_ANIMATION_QUALITY_LOW
5294# ifdef NUDGE_KINEMATIC_ANIMATION_QUALITY_LOW
5295 if (numSubSteps>0) simulate_kinematic_animations(c,timeStep*numSubSteps);
5298 for (
unsigned n = 0; n < numSubSteps; ++n) {
5300# ifndef NUDGE_KINEMATIC_ANIMATION_QUALITY_LOW
5301 simulate_kinematic_animations(c,timeStep);
5304 uintptr_t required_arena_size = get_required_arena_size_for_setup_contact_constraints(c);
5306 required_arena_size = required_arena_size+required_arena_size;
5308 if (c->
arena.
size<required_arena_size) {
5310 const uintptr_t new_size = required_arena_size+c->
arena.
size/2;
5311 c->
arena.
data = _mm_malloc(new_size,NUDGE_ARENA_SIZE_ALIGNMENT);memset(c->
arena.
data,0,new_size);
5319 collide(c, connections);
5340 for (
int l=0;l<3;l++) {
5341 momentum->
velocity[l] += gravity[l] * timeStep;
5342 momentum->
velocity[l] *= damping_linear;
5347# if NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES>0
5358 ContactConstraintData* contact_constraints = setup_contact_constraints(c, contact_impulses, &temporary);
5361 for (
unsigned i = 0; i < numIterations; ++i) {
5362 apply_impulses(contact_constraints, c->
bodies);
5367 update_cached_impulses(contact_constraints, contact_impulses);
5373 advance(c, timeStep);
5389#ifndef NUDGE_NO_STDIO
5398 uint32_t size_of_BodyInfo_user = 0;
5399# ifndef NUDGE_BODYINFO_STRUCT_NO_USER_DATA
5402 fprintf(f,
"BodyData:\ncount: %u\nsizeof(BodyData): %u\nsizeof(Transform): %u\nsizeof(BodyProperties): %u\nsizeof(BodyMomentum): %u\n"
5413 fprintf(f,
"\nColliderData::boxes:\n%u\n",cd->
boxes.
count);
5417 fprintf(f,
"\nColliderData::spheres:\n%u\n",cd->
spheres.
count);
5423 fprintf(f,
"\nContactData::count\n%u\n",td->
count);
5426 rv=fwrite(td->
tags,
sizeof(uint64_t),td->
count,f);assert(rv==td->
count);
5427 fprintf(f,
"\nContactData::sleeping_count\n%u\n",td->
sleeping_count);
5431 fprintf(f,
"\nContactCache::count\n%u\n",tc->
count);
5432 rv=fwrite(tc->
tags,
sizeof(uint64_t),tc->
count,f);assert(rv==tc->
count);
5436 fprintf(f,
"\nActiveBodies::count\n%u\n",ab->
count);
5440 fprintf(f,
"\nKinematicData::key_frame_count\n%u\n",kd->
key_frame_count);
5446 fprintf(f,
"\nsizeof(SimulationParams)\n%u\n",(uint32_t)
sizeof(
SimulationParams));
5449 fprintf(f,
"\nsizeof(GlobalData)\n%u\n",(uint32_t)
sizeof(
GlobalData));
5453# ifndef NUDGE_CONTEXT_STRUCT_NO_USER_DATA
5454 fprintf(f,
"\nsizeof(c->user)\n%u\n",(uint32_t)
sizeof(c->
user));
5455 rv=fwrite(&c->
user,
sizeof(c->
user),1,f);assert(rv==1);
5459 size_t rv = 0;uint32_t tmp[8]={};
5462 unsigned num_saved_boxes=0,num_saved_spheres=0;
5463 rv=fscanf(f,
"MAX_NUM_BOXES:\n%u\n",&num_saved_boxes);assert(rv==1);
5464 rv=fscanf(f,
"MAX_NUM_SPHERES:\n%u\n",&num_saved_spheres);assert(rv==1);
5471 rv=fscanf(f,
"BodyData:\ncount: %u\nsizeof(BodyData): %u\nsizeof(Transform): %u\nsizeof(BodyProperties): %u\nsizeof(BodyMomentum): %u\n"
5472 "sizeof(BodyFilter): %u\nsizeof(BodyInfo): %u\nsizeof(BodyInfo::user): %u\nnum_aux_bodies: %u\n",&bd->
count,&tmp[0],&tmp[1],&tmp[2],&tmp[3],&tmp[4],&tmp[5],&tmp[6],&tmp[7]);assert(rv==9);
5479# ifndef NUDGE_BODYINFO_STRUCT_NO_USER_DATA
5504 rv=fscanf(f,
"\nContactData::count\n%u\n",&td->
count);assert(rv==1);assert(td->
count<=td->
capacity);
5507 rv=fread(td->
tags,
sizeof(uint64_t),td->
count,f);assert(rv==td->
count);
5508 rv=fscanf(f,
"\nContactData::sleeping_count\n%u\n",&td->
sleeping_count);assert(rv==1);
5512 rv=fscanf(f,
"\nContactCache::count\n%u\n",&tc->
count);assert(tc->
count<=tc->
capacity);assert(rv==1);
5513 rv=fread(tc->
tags,
sizeof(uint64_t),tc->
count,f);assert(rv==tc->
count);
5517 rv=fscanf(f,
"\nActiveBodies::count\n%u\n",&ab->
count);assert(rv==1);assert(ab->
count<=ab->
capacity);
5521 rv=fscanf(f,
"\nKinematicData::key_frame_count\n%u\n",&kd->
key_frame_count);assert(rv==1);
5530 uint32_t simulation_params_size=0;
5531 rv=fscanf(f,
"\nsizeof(SimulationParams)\n%u\n",&simulation_params_size);assert(simulation_params_size==(uint32_t)
sizeof(
SimulationParams));assert(rv==1);
5535 uint32_t global_data_size=0;
5536 rv=fscanf(f,
"\nsizeof(GlobalData)\n%u\n",&global_data_size);assert(global_data_size==(uint32_t)
sizeof(
GlobalData));assert(rv==1);
5541# ifndef NUDGE_CONTEXT_STRUCT_NO_USER_DATA
5542 uint32_t user_size=0;
5543 rv=fscanf(f,
"\nsizeof(c->user)\n%u\n",&user_size);assert(user_size==(uint32_t)
sizeof(c->
user));assert(rv==1);
5544 rv=fread(&c->
user,
sizeof(c->
user),1,f);assert(rv==1);
5557 bool* check_array = allocate_array<bool>(&arena, required_size, 32);assert(check_array);
5559 bool* box_checks = check_array;
5560 for (
unsigned i=0;i<c->
MAX_NUM_BOXES;i++) box_checks[i]=
false;
5563 assert(tag<c->MAX_NUM_BOXES);
5564 assert(box_checks[tag]==
false);
5565 box_checks[tag]=
true;
5569 if (!box_checks[i]) {
5570 assert(starti<c->MAX_NUM_BOXES);
5577 bool* sphere_checks = check_array;
5581 assert(tag>=NUDGE_START_SPHERE_TAG && tag<NUDGE_START_SPHERE_TAG+c->MAX_NUM_SPHERES);
5582 assert(sphere_checks[tag-NUDGE_START_SPHERE_TAG]==
false);
5583 sphere_checks[tag-NUDGE_START_SPHERE_TAG]=
true;
5587 if (!sphere_checks[i]) {
5588 assert(starti<c->MAX_NUM_SPHERES);
5599# ifdef NUDGE_USE_TIME_CONTEXT
5600void save_time_context(FILE* f,
const time_context_t* c) {
5602 fprintf(f,
"\nsizeof(time_context_t)\n%u\n",(uint32_t)
sizeof(time_context_t));
5603 fwrite(c,
sizeof(time_context_t),1,f);
5605void load_time_context(FILE* f,time_context_t* c) {
5607 uint32_t time_context_size=0;
5608 fscanf(f,
"\nsizeof(time_context_t)\n%u\n",&time_context_size);assert(time_context_size==(uint32_t)
sizeof(time_context_t));
5609 fread(c,
sizeof(time_context_t),1,f);
5621 float* feature_penetrations = allocate_array<float>(&temporary, pair_count + 7, 32);
5622 uint32_t* features = allocate_array<uint32_t>(&temporary, pair_count + 7, 32);
5628 pairs[pair_count+0] = 0;
5629 pairs[pair_count+1] = 0;
5630 pairs[pair_count+2] = 0;
5635 for (
unsigned i = 0; i < pair_count; i += 4) {
5637 unsigned pair0 = pairs[i+0];
5638 unsigned pair1 = pairs[i+1];
5639 unsigned pair2 = pairs[i+2];
5640 unsigned pair3 = pairs[i+3];
5642 unsigned a0_index = pair0 & 0xffff;
5643 unsigned b0_index = pair0 >> 16;
5645 unsigned a1_index = pair1 & 0xffff;
5646 unsigned b1_index = pair1 >> 16;
5648 unsigned a2_index = pair2 & 0xffff;
5649 unsigned b2_index = pair2 >> 16;
5651 unsigned a3_index = pair3 & 0xffff;
5652 unsigned b3_index = pair3 >> 16;
5655 simd4_float a_rotation_x = simd_float::load4(transforms[a0_index].rotation);
5656 simd4_float a_rotation_y = simd_float::load4(transforms[a1_index].rotation);
5657 simd4_float a_rotation_z = simd_float::load4(transforms[a2_index].rotation);
5658 simd4_float a_rotation_s = simd_float::load4(transforms[a3_index].rotation);
5660 simd4_float b_rotation_x = simd_float::load4(transforms[b0_index].rotation);
5661 simd4_float b_rotation_y = simd_float::load4(transforms[b1_index].rotation);
5662 simd4_float b_rotation_z = simd_float::load4(transforms[b2_index].rotation);
5663 simd4_float b_rotation_s = simd_float::load4(transforms[b3_index].rotation);
5665 simd128::transpose32(a_rotation_x, a_rotation_y, a_rotation_z, a_rotation_s);
5666 simd128::transpose32(b_rotation_x, b_rotation_y, b_rotation_z, b_rotation_s);
5669 simd4_float t_x, t_y, t_z;
5670 simd_soa::cross(b_rotation_x, b_rotation_y, b_rotation_z, a_rotation_x, a_rotation_y, a_rotation_z, t_x, t_y, t_z);
5672 simd4_float relative_rotation_x = a_rotation_x * b_rotation_s - b_rotation_x * a_rotation_s - t_x;
5673 simd4_float relative_rotation_y = a_rotation_y * b_rotation_s - b_rotation_y * a_rotation_s - t_y;
5674 simd4_float relative_rotation_z = a_rotation_z * b_rotation_s - b_rotation_z * a_rotation_s - t_z;
5675 simd4_float relative_rotation_s = (a_rotation_x * b_rotation_x +
5676 a_rotation_y * b_rotation_y +
5677 a_rotation_z * b_rotation_z +
5678 a_rotation_s * b_rotation_s);
5682 simd4_float kx = relative_rotation_x + relative_rotation_x;
5683 simd4_float ky = relative_rotation_y + relative_rotation_y;
5684 simd4_float kz = relative_rotation_z + relative_rotation_z;
5686 simd4_float xx = kx * relative_rotation_x;
5687 simd4_float yy = ky * relative_rotation_y;
5688 simd4_float zz = kz * relative_rotation_z;
5689 simd4_float xy = kx * relative_rotation_y;
5690 simd4_float xz = kx * relative_rotation_z;
5691 simd4_float yz = ky * relative_rotation_z;
5692 simd4_float sx = kx * relative_rotation_s;
5693 simd4_float sy = ky * relative_rotation_s;
5694 simd4_float sz = kz * relative_rotation_s;
5696 simd4_float one = simd_float::make4(1.0f);
5698 simd4_float vx_x = one - yy - zz;
5699 simd4_float vx_y = xy + sz;
5700 simd4_float vx_z = xz - sy;
5702 simd4_float vy_x = xy - sz;
5703 simd4_float vy_y = one - xx - zz;
5704 simd4_float vy_z = yz + sx;
5706 simd4_float vz_x = xz + sy;
5707 simd4_float vz_y = yz - sx;
5708 simd4_float vz_z = one - xx - yy;
5711 simd4_float a_size_x = simd_float::load4(colliders[a0_index].size);
5712 simd4_float a_size_y = simd_float::load4(colliders[a1_index].size);
5713 simd4_float a_size_z = simd_float::load4(colliders[a2_index].size);
5714 simd4_float a_size_w = simd_float::load4(colliders[a3_index].size);
5716 simd4_float b_size_x = simd_float::load4(colliders[b0_index].size);
5717 simd4_float b_size_y = simd_float::load4(colliders[b1_index].size);
5718 simd4_float b_size_z = simd_float::load4(colliders[b2_index].size);
5719 simd4_float b_size_w = simd_float::load4(colliders[b3_index].size);
5721 simd128::transpose32(a_size_x, a_size_y, a_size_z, a_size_w);
5722 simd128::transpose32(b_size_x, b_size_y, b_size_z, b_size_w);
5725 vx_x = simd_float::abs(vx_x);
5726 vx_y = simd_float::abs(vx_y);
5727 vx_z = simd_float::abs(vx_z);
5729 vy_x = simd_float::abs(vy_x);
5730 vy_y = simd_float::abs(vy_y);
5731 vy_z = simd_float::abs(vy_z);
5733 vz_x = simd_float::abs(vz_x);
5734 vz_y = simd_float::abs(vz_y);
5735 vz_z = simd_float::abs(vz_z);
5737 simd4_float pax = b_size_x + vx_x*a_size_x + vy_x*a_size_y + vz_x*a_size_z;
5738 simd4_float pay = b_size_y + vx_y*a_size_x + vy_y*a_size_y + vz_y*a_size_z;
5739 simd4_float paz = b_size_z + vx_z*a_size_x + vy_z*a_size_y + vz_z*a_size_z;
5741 simd4_float pbx = a_size_x + vx_x*b_size_x + vx_y*b_size_y + vx_z*b_size_z;
5742 simd4_float pby = a_size_y + vy_x*b_size_x + vy_y*b_size_y + vy_z*b_size_z;
5743 simd4_float pbz = a_size_z + vz_x*b_size_x + vz_y*b_size_y + vz_z*b_size_z;
5746 simd4_float a_position_x = simd_float::load4(transforms[a0_index].position);
5747 simd4_float a_position_y = simd_float::load4(transforms[a1_index].position);
5748 simd4_float a_position_z = simd_float::load4(transforms[a2_index].position);
5749 simd4_float a_position_w = simd_float::load4(transforms[a3_index].position);
5751 simd4_float b_position_x = simd_float::load4(transforms[b0_index].position);
5752 simd4_float b_position_y = simd_float::load4(transforms[b1_index].position);
5753 simd4_float b_position_z = simd_float::load4(transforms[b2_index].position);
5754 simd4_float b_position_w = simd_float::load4(transforms[b3_index].position);
5757 simd4_float delta_x = a_position_x - b_position_x;
5758 simd4_float delta_y = a_position_y - b_position_y;
5759 simd4_float delta_z = a_position_z - b_position_z;
5760 simd4_float delta_w = a_position_w - b_position_w;
5762 simd128::transpose32(delta_x, delta_y, delta_z, delta_w);
5764 simd_soa::cross(b_rotation_x, b_rotation_y, b_rotation_z, delta_x, delta_y, delta_z, t_x, t_y, t_z);
5769 simd4_float u_x, u_y, u_z;
5770 simd_soa::cross(b_rotation_x, b_rotation_y, b_rotation_z, t_x, t_y, t_z, u_x, u_y, u_z);
5772 simd4_float a_offset_x = u_x + delta_x - b_rotation_s * t_x;
5773 simd4_float a_offset_y = u_y + delta_y - b_rotation_s * t_y;
5774 simd4_float a_offset_z = u_z + delta_z - b_rotation_s * t_z;
5776 pax -= simd_float::abs(a_offset_x);
5777 pay -= simd_float::abs(a_offset_y);
5778 paz -= simd_float::abs(a_offset_z);
5780 simd_soa::cross(delta_x, delta_y, delta_z, a_rotation_x, a_rotation_y, a_rotation_z, t_x, t_y, t_z);
5785 simd_soa::cross(a_rotation_x, a_rotation_y, a_rotation_z, t_x, t_y, t_z, u_x, u_y, u_z);
5787 simd4_float b_offset_x = u_x - delta_x - a_rotation_s * t_x;
5788 simd4_float b_offset_y = u_y - delta_y - a_rotation_s * t_y;
5789 simd4_float b_offset_z = u_z - delta_z - a_rotation_s * t_z;
5791 pbx -= simd_float::abs(b_offset_x);
5792 pby -= simd_float::abs(b_offset_y);
5793 pbz -= simd_float::abs(b_offset_z);
5796 simd4_float payz = simd_float::min(pay, paz);
5797 simd4_float pbyz = simd_float::min(pby, pbz);
5799 simd4_float pa = simd_float::min(pax, payz);
5800 simd4_float pb = simd_float::min(pbx, pbyz);
5802 simd4_float p = simd_float::min(pa, pb);
5805 simd4_float aymf = simd_float::cmp_eq(payz, pa);
5806 simd4_float azmf = simd_float::cmp_eq(paz, pa);
5808 simd4_float bymf = simd_float::cmp_eq(pbyz, pb);
5809 simd4_float bzmf = simd_float::cmp_eq(pbz, pb);
5811 simd4_int32 aymi = simd::bitwise_and(simd_float::asint(aymf), simd_int32::make4(1));
5812 simd4_int32 azmi = simd::bitwise_and(simd_float::asint(azmf), simd_int32::make4(1));
5814 simd4_int32 bymi = simd::bitwise_and(simd_float::asint(bymf), simd_int32::make4(1));
5815 simd4_int32 bzmi = simd::bitwise_and(simd_float::asint(bzmf), simd_int32::make4(1));
5817 simd4_int32 aface = simd_int32::add(aymi, azmi);
5818 simd4_int32 bface = simd_int32::add(bymi, bzmi);
5821 simd4_float swap = simd_float::cmp_eq(pa, p);
5823 simd4_float pair_a_b = simd_int32::asfloat(simd_int32::load4((
const int32_t*)(pairs + i)));
5824 simd4_float pair_b_a = simd_int32::asfloat(simd::bitwise_or(simd_int32::shift_left<16>(simd_float::asint(pair_a_b)), simd_int32::shift_right<16>(simd_float::asint(pair_a_b))));
5826 simd4_float face = simd::blendv32(simd_int32::asfloat(bface), simd_int32::asfloat(aface), swap);
5827 simd4_float pair = simd::blendv32(pair_a_b, pair_b_a, swap);
5830 unsigned mask = simd::signmask32(simd_float::cmp_gt(p, simd_float::zero4()));
5832 NUDGE_ALIGNED(16) float face_penetration_array[4];
5833 NUDGE_ALIGNED(16) uint32_t face_array[4];
5834 NUDGE_ALIGNED(16) uint32_t pair_array[4];
5836 simd_float::store4(face_penetration_array, p);
5837 simd_float::store4((
float*)face_array, face);
5838 simd_float::store4((
float*)pair_array, pair);
5841 unsigned index = first_set_bit(mask);
5844 feature_penetrations[added] = face_penetration_array[index];
5845 features[added] = face_array[index];
5846 pairs[added] = pair_array[index];
5853 while (added && !pairs[added-1])
5862 pairs[pair_count+0] = 0;
5863 pairs[pair_count+1] = 0;
5864 pairs[pair_count+2] = 0;
5866 feature_penetrations[pair_count+0] = 0.0f;
5867 feature_penetrations[pair_count+1] = 0.0f;
5868 feature_penetrations[pair_count+2] = 0.0f;
5872 for (
unsigned pair_offset = 0; pair_offset < pair_count; pair_offset += 4) {
5874 unsigned pair0 = pairs[pair_offset+0];
5875 unsigned pair1 = pairs[pair_offset+1];
5876 unsigned pair2 = pairs[pair_offset+2];
5877 unsigned pair3 = pairs[pair_offset+3];
5879 unsigned a0_index = pair0 & 0xffff;
5880 unsigned b0_index = pair0 >> 16;
5882 unsigned a1_index = pair1 & 0xffff;
5883 unsigned b1_index = pair1 >> 16;
5885 unsigned a2_index = pair2 & 0xffff;
5886 unsigned b2_index = pair2 >> 16;
5888 unsigned a3_index = pair3 & 0xffff;
5889 unsigned b3_index = pair3 >> 16;
5892 simd4_float a_rotation_x = simd_float::load4(transforms[a0_index].rotation);
5893 simd4_float a_rotation_y = simd_float::load4(transforms[a1_index].rotation);
5894 simd4_float a_rotation_z = simd_float::load4(transforms[a2_index].rotation);
5895 simd4_float a_rotation_s = simd_float::load4(transforms[a3_index].rotation);
5897 simd4_float b_rotation_x = simd_float::load4(transforms[b0_index].rotation);
5898 simd4_float b_rotation_y = simd_float::load4(transforms[b1_index].rotation);
5899 simd4_float b_rotation_z = simd_float::load4(transforms[b2_index].rotation);
5900 simd4_float b_rotation_s = simd_float::load4(transforms[b3_index].rotation);
5902 simd128::transpose32(a_rotation_x, a_rotation_y, a_rotation_z, a_rotation_s);
5903 simd128::transpose32(b_rotation_x, b_rotation_y, b_rotation_z, b_rotation_s);
5906 simd4_float t_x, t_y, t_z;
5907 simd_soa::cross(b_rotation_x, b_rotation_y, b_rotation_z, a_rotation_x, a_rotation_y, a_rotation_z, t_x, t_y, t_z);
5909 simd4_float relative_rotation_x = a_rotation_x * b_rotation_s - b_rotation_x * a_rotation_s - t_x;
5910 simd4_float relative_rotation_y = a_rotation_y * b_rotation_s - b_rotation_y * a_rotation_s - t_y;
5911 simd4_float relative_rotation_z = a_rotation_z * b_rotation_s - b_rotation_z * a_rotation_s - t_z;
5912 simd4_float relative_rotation_s = (a_rotation_x * b_rotation_x +
5913 a_rotation_y * b_rotation_y +
5914 a_rotation_z * b_rotation_z +
5915 a_rotation_s * b_rotation_s);
5919 simd4_float kx = relative_rotation_x + relative_rotation_x;
5920 simd4_float ky = relative_rotation_y + relative_rotation_y;
5921 simd4_float kz = relative_rotation_z + relative_rotation_z;
5923 simd4_float xx = kx * relative_rotation_x;
5924 simd4_float yy = ky * relative_rotation_y;
5925 simd4_float zz = kz * relative_rotation_z;
5926 simd4_float xy = kx * relative_rotation_y;
5927 simd4_float xz = kx * relative_rotation_z;
5928 simd4_float yz = ky * relative_rotation_z;
5929 simd4_float sx = kx * relative_rotation_s;
5930 simd4_float sy = ky * relative_rotation_s;
5931 simd4_float sz = kz * relative_rotation_s;
5933 simd4_float one = simd_float::make4(1.0f);
5935 simd4_float vx_x = one - yy - zz;
5936 simd4_float vx_y = xy + sz;
5937 simd4_float vx_z = xz - sy;
5939 simd4_float vy_x = xy - sz;
5940 simd4_float vy_y = one - xx - zz;
5941 simd4_float vy_z = yz + sx;
5943 simd4_float vz_x = xz + sy;
5944 simd4_float vz_y = yz - sx;
5945 simd4_float vz_z = one - xx - yy;
5947 NUDGE_ALIGNED(16) float a_to_b[4*9];
5949 simd_float::store4(a_to_b + 0, vx_x);
5950 simd_float::store4(a_to_b + 4, vx_y);
5951 simd_float::store4(a_to_b + 8, vx_z);
5953 simd_float::store4(a_to_b + 12, vy_x);
5954 simd_float::store4(a_to_b + 16, vy_y);
5955 simd_float::store4(a_to_b + 20, vy_z);
5957 simd_float::store4(a_to_b + 24, vz_x);
5958 simd_float::store4(a_to_b + 28, vz_y);
5959 simd_float::store4(a_to_b + 32, vz_z);
5962 simd4_float a_size_x = simd_float::load4(colliders[a0_index].size);
5963 simd4_float a_size_y = simd_float::load4(colliders[a1_index].size);
5964 simd4_float a_size_z = simd_float::load4(colliders[a2_index].size);
5965 simd4_float a_size_w = simd_float::load4(colliders[a3_index].size);
5967 simd4_float b_size_x = simd_float::load4(colliders[b0_index].size);
5968 simd4_float b_size_y = simd_float::load4(colliders[b1_index].size);
5969 simd4_float b_size_z = simd_float::load4(colliders[b2_index].size);
5970 simd4_float b_size_w = simd_float::load4(colliders[b3_index].size);
5972 simd128::transpose32(a_size_x, a_size_y, a_size_z, a_size_w);
5973 simd128::transpose32(b_size_x, b_size_y, b_size_z, b_size_w);
5976 simd4_float a_position_x = simd_float::load4(transforms[a0_index].position);
5977 simd4_float a_position_y = simd_float::load4(transforms[a1_index].position);
5978 simd4_float a_position_z = simd_float::load4(transforms[a2_index].position);
5979 simd4_float a_position_w = simd_float::load4(transforms[a3_index].position);
5981 simd4_float b_position_x = simd_float::load4(transforms[b0_index].position);
5982 simd4_float b_position_y = simd_float::load4(transforms[b1_index].position);
5983 simd4_float b_position_z = simd_float::load4(transforms[b2_index].position);
5984 simd4_float b_position_w = simd_float::load4(transforms[b3_index].position);
5987 simd4_float delta_x = a_position_x - b_position_x;
5988 simd4_float delta_y = a_position_y - b_position_y;
5989 simd4_float delta_z = a_position_z - b_position_z;
5990 simd4_float delta_w = a_position_w - b_position_w;
5992 simd128::transpose32(delta_x, delta_y, delta_z, delta_w);
5994 simd_soa::cross(delta_x, delta_y, delta_z, a_rotation_x, a_rotation_y, a_rotation_z, t_x, t_y, t_z);
5999 simd4_float u_x, u_y, u_z;
6000 simd_soa::cross(a_rotation_x, a_rotation_y, a_rotation_z, t_x, t_y, t_z, u_x, u_y, u_z);
6002 simd4_float b_offset_x = u_x - delta_x - a_rotation_s * t_x;
6003 simd4_float b_offset_y = u_y - delta_y - a_rotation_s * t_y;
6004 simd4_float b_offset_z = u_z - delta_z - a_rotation_s * t_z;
6006 NUDGE_ALIGNED(16)
float b_offset_array[3*4];
6008 simd_float::store4(b_offset_array + 0, b_offset_x);
6009 simd_float::store4(b_offset_array + 4, b_offset_y);
6010 simd_float::store4(b_offset_array + 8, b_offset_z);
6012 simd4_float face_penetration = simd_float::load4(feature_penetrations + pair_offset);
6015 NUDGE_ALIGNED(16)
float edge_penetration_a[4*9];
6016 NUDGE_ALIGNED(16)
float edge_penetration_b[4*9];
6018 for (
unsigned i = 0; i < 3; ++i) {
6019 simd4_float acx = simd_float::load4(a_to_b + (0*3 + i)*4);
6020 simd4_float acy = simd_float::load4(a_to_b + (1*3 + i)*4);
6021 simd4_float acz = simd_float::load4(a_to_b + (2*3 + i)*4);
6023 simd4_float bcx = simd_float::load4(a_to_b + (i*3 + 0)*4);
6024 simd4_float bcy = simd_float::load4(a_to_b + (i*3 + 1)*4);
6025 simd4_float bcz = simd_float::load4(a_to_b + (i*3 + 2)*4);
6027 simd4_float ac2x = acx*acx;
6028 simd4_float ac2y = acy*acy;
6029 simd4_float ac2z = acz*acz;
6031 simd4_float bc2x = bcx*bcx;
6032 simd4_float bc2y = bcy*bcy;
6033 simd4_float bc2z = bcz*bcz;
6035 simd4_float aacx = simd_float::abs(acx);
6036 simd4_float aacy = simd_float::abs(acy);
6037 simd4_float aacz = simd_float::abs(acz);
6039 simd4_float abcx = simd_float::abs(bcx);
6040 simd4_float abcy = simd_float::abs(bcy);
6041 simd4_float abcz = simd_float::abs(bcz);
6043 simd4_float r_a0 = ac2y + ac2z;
6044 simd4_float r_a1 = ac2z + ac2x;
6045 simd4_float r_a2 = ac2x + ac2y;
6047 simd4_float r_b0 = bc2y + bc2z;
6048 simd4_float r_b1 = bc2z + bc2x;
6049 simd4_float r_b2 = bc2x + bc2y;
6051 simd4_float nan_threshold = simd_float::make4(1e-3f);
6053 r_a0 = simd::bitwise_or(simd_float::rsqrt(r_a0), simd_float::cmp_le(r_a0, nan_threshold));
6054 r_a1 = simd::bitwise_or(simd_float::rsqrt(r_a1), simd_float::cmp_le(r_a1, nan_threshold));
6055 r_a2 = simd::bitwise_or(simd_float::rsqrt(r_a2), simd_float::cmp_le(r_a2, nan_threshold));
6057 r_b0 = simd::bitwise_or(simd_float::rsqrt(r_b0), simd_float::cmp_le(r_b0, nan_threshold));
6058 r_b1 = simd::bitwise_or(simd_float::rsqrt(r_b1), simd_float::cmp_le(r_b1, nan_threshold));
6059 r_b2 = simd::bitwise_or(simd_float::rsqrt(r_b2), simd_float::cmp_le(r_b2, nan_threshold));
6061 simd4_float pa0 = aacy*a_size_z + aacz*a_size_y;
6062 simd4_float pa1 = aacz*a_size_x + aacx*a_size_z;
6063 simd4_float pa2 = aacx*a_size_y + aacy*a_size_x;
6065 simd4_float pb0 = abcy*b_size_z + abcz*b_size_y;
6066 simd4_float pb1 = abcz*b_size_x + abcx*b_size_z;
6067 simd4_float pb2 = abcx*b_size_y + abcy*b_size_x;
6069 simd4_float o0 = simd_float::abs(acy*b_offset_z - acz*b_offset_y);
6070 simd4_float o1 = simd_float::abs(acz*b_offset_x - acx*b_offset_z);
6071 simd4_float o2 = simd_float::abs(acx*b_offset_y - acy*b_offset_x);
6073 simd_float::store4(edge_penetration_a + (i*3 + 0)*4, (pa0 - o0) * r_a0);
6074 simd_float::store4(edge_penetration_a + (i*3 + 1)*4, (pa1 - o1) * r_a1);
6075 simd_float::store4(edge_penetration_a + (i*3 + 2)*4, (pa2 - o2) * r_a2);
6077 simd_float::store4(edge_penetration_b + (i*3 + 0)*4, pb0 * r_b0);
6078 simd_float::store4(edge_penetration_b + (i*3 + 1)*4, pb1 * r_b1);
6079 simd_float::store4(edge_penetration_b + (i*3 + 2)*4, pb2 * r_b2);
6082 simd4_int32 a_edge = simd_int32::make4(0);
6083 simd4_int32 b_edge = simd_int32::make4(0);
6085 simd4_float penetration = face_penetration;
6087 for (
unsigned i = 0; i < 3; ++i) {
6088 for (
unsigned j = 0; j < 3; ++j) {
6089 simd4_float p = simd_float::load4(edge_penetration_a + (i*3 + j)*4) + simd_float::load4(edge_penetration_b + (j*3 + i)*4);
6091 simd4_float mask = simd_float::cmp_gt(penetration, p);
6093 penetration = simd_float::min(penetration, p);
6094 a_edge = simd::blendv32(a_edge, simd_int32::make4(j), simd_float::asint(mask));
6095 b_edge = simd::blendv32(b_edge, simd_int32::make4(i), simd_float::asint(mask));
6099 simd4_float face_bias = simd_float::make4(1e-3f);
6101 unsigned edge = simd::signmask32(simd_float::cmp_gt(face_penetration, penetration + face_bias));
6102 unsigned overlapping = simd::signmask32(simd_float::cmp_gt(penetration, simd_float::zero4()));
6104 unsigned face = ~edge;
6106 edge &= overlapping;
6107 face &= overlapping;
6109 NUDGE_ALIGNED(16) float penetration_array[4];
6110 NUDGE_ALIGNED(16) int32_t a_edge_array[4];
6111 NUDGE_ALIGNED(16) int32_t b_edge_array[4];
6113 simd_float::store4(penetration_array, penetration);
6114 simd_int32::store4(a_edge_array, a_edge);
6115 simd_int32::store4(b_edge_array, b_edge);
6119 unsigned index = first_set_bit(face);
6122 unsigned pair = pairs[pair_offset + index];
6123 unsigned a_face = features[pair_offset + index];
6125 unsigned a_index = pair & 0xffff;
6126 unsigned b_index = pair >> 16;
6129 simd4_float dirs = simd_float::make4(a_to_b[(a_face*3 + 0)*4 + index],
6130 a_to_b[(a_face*3 + 1)*4 + index],
6131 a_to_b[(a_face*3 + 2)*4 + index],
6134 simd4_float c0 = simd_float::make4(a_to_b[(0*3 + 0)*4 + index],
6135 a_to_b[(1*3 + 0)*4 + index],
6136 a_to_b[(2*3 + 0)*4 + index],
6139 simd4_float c1 = simd_float::make4(a_to_b[(0*3 + 1)*4 + index],
6140 a_to_b[(1*3 + 1)*4 + index],
6141 a_to_b[(2*3 + 1)*4 + index],
6144 simd4_float c2 = simd_float::make4(a_to_b[(0*3 + 2)*4 + index],
6145 a_to_b[(1*3 + 2)*4 + index],
6146 a_to_b[(2*3 + 2)*4 + index],
6149 simd4_float b_offset = simd_float::make4(b_offset_array[0*4 + index],
6150 b_offset_array[1*4 + index],
6151 b_offset_array[2*4 + index],
6155 simd4_float a_size = simd_float::load4(colliders[a_index].size);
6156 simd4_float b_size = simd_float::load4(colliders[b_index].size);
6159 dirs = simd_float::abs(dirs);
6161 simd4_float max_dir = simd_float::max(simd128::shuffle32<0,2,1,3>(dirs), simd128::shuffle32<0,0,0,0>(dirs));
6163 unsigned dir_mask = simd::signmask32(simd_float::cmp_ge(dirs, max_dir));
6166 c0 *= simd128::shuffle32<0,0,0,0>(b_size);
6167 c1 *= simd128::shuffle32<1,1,1,1>(b_size);
6168 c2 *= simd128::shuffle32<2,2,2,2>(b_size);
6170 unsigned b_face = 0;
6179 else if (dir_mask & 2) {
6188 simd4_float dx = c1;
6189 simd4_float dy = c2;
6191 unsigned b_positive_face_bit = simd::signmask32(simd::bitwise_xor(b_offset, c)) & (1 << a_face);
6192 unsigned b_offset_neg = simd::signmask32(b_offset) & (1 << a_face);
6194 if (!b_positive_face_bit)
6204 NUDGE_ALIGNED(16) float quads[4*3];
6206 simd4_float q0 = simd128::unpacklo32(a_size, c);
6207 simd4_float q1 = simd128::unpackhi32(a_size, c);
6208 simd4_float q2 = simd128::unpacklo32(dx, dy);
6209 simd4_float q3 = simd128::unpackhi32(dx, dy);
6211 simd_float::store4(quads + 0, simd128::concat2x32<0,1,0,1>(q0, q2));
6212 simd_float::store4(quads + 4, simd128::concat2x32<2,3,2,3>(q0, q2));
6213 simd_float::store4(quads + 8, simd128::concat2x32<0,1,0,1>(q1, q3));
6216 const
float* transformed_x = quads + 4*((a_face+1) % 3);
6217 const
float* transformed_y = quads + 4*((a_face+2) % 3);
6218 const
float* transformed_z = quads + 4*a_face;
6221 NUDGE_ALIGNED(32)
float support[16*3];
6222 NUDGE_ALIGNED(32) uint32_t support_tags[16];
6225 float* support_x = support + 0;
6226 float* support_y = support + 16;
6228 simd4_float tx = simd_float::load4(transformed_x);
6229 simd4_float ty = simd_float::load4(transformed_y);
6231 simd4_float sxycxy = simd128::unpacklo32(tx, ty);
6232 simd4_float dxy = simd128::unpackhi32(tx, ty);
6234 simd4_float sx = simd128::shuffle32<0,0,0,0>(sxycxy);
6235 simd4_float sy = simd128::shuffle32<1,1,1,1>(sxycxy);
6236 simd4_float cx = simd128::shuffle32<2,2,2,2>(sxycxy);
6237 simd4_float cy = simd128::shuffle32<3,3,3,3>(sxycxy);
6239 simd4_float sign_npnp = simd_float::make4(-0.0f, 0.0f, -0.0f, 0.0f);
6242 __m128i corner_mask;
6245 simd4_float sign_pnpn = simd_float::make4(0.0f, -0.0f, 0.0f, -0.0f);
6246 simd4_float sign_nnpp = simd_float::make4(-0.0f, -0.0f, 0.0f, 0.0f);
6248 simd4_float corner0x = simd::bitwise_xor(sx, sign_pnpn);
6249 simd4_float corner0y = simd::bitwise_xor(sy, sign_nnpp);
6251 simd4_float corner1x = cx + simd::bitwise_xor(simd128::shuffle32<0,0,0,0>(dxy), sign_npnp) + simd::bitwise_xor(simd128::shuffle32<2,2,2,2>(dxy), sign_nnpp);
6252 simd4_float corner1y = cy + simd::bitwise_xor(simd128::shuffle32<1,1,1,1>(dxy), sign_npnp) + simd::bitwise_xor(simd128::shuffle32<3,3,3,3>(dxy), sign_nnpp);
6254 simd4_float k = (simd128::concat2x32<2,2,0,0>(sxycxy, dxy) * simd128::shuffle32<3,1,3,1>(dxy) -
6255 simd128::concat2x32<3,3,1,1>(sxycxy, dxy) * simd128::shuffle32<2,0,2,0>(dxy));
6257 simd4_float ox = simd128::shuffle32<0,0,0,0>(k);
6258 simd4_float oy = simd128::shuffle32<1,1,1,1>(k);
6259 simd4_float delta_max = simd_float::abs(simd128::shuffle32<2,2,2,2>(k));
6261 simd4_float sdxy = dxy * simd128::shuffle32<1,0,1,0>(sxycxy);
6263 simd4_float delta_x = ox + simd::bitwise_xor(simd128::shuffle32<2,2,2,2>(sdxy), sign_nnpp) + simd::bitwise_xor(simd128::shuffle32<3,3,3,3>(sdxy), sign_npnp);
6264 simd4_float delta_y = oy + simd::bitwise_xor(simd128::shuffle32<0,0,0,0>(sdxy), sign_nnpp) + simd::bitwise_xor(simd128::shuffle32<1,1,1,1>(sdxy), sign_npnp);
6266 simd4_float inside_x = simd_float::cmp_le(simd_float::abs(corner1x), sx);
6267 simd4_float inside_y = simd_float::cmp_le(simd_float::abs(corner1y), sy);
6269 simd4_float mask0 = simd_float::cmp_le(simd_float::max(simd_float::abs(delta_x), simd_float::abs(delta_y)), delta_max);
6270 simd4_float mask1 = simd::bitwise_and(inside_x, inside_y);
6272 corner_mask = _mm_packs_epi32(simd_float::asint(mask0), simd_float::asint(mask1));
6275 edge_mask = _mm_packs_epi32(simd_float::asint(simd::bitwise_and(simd128::shuffle32<3,2,0,2>(mask0), simd128::shuffle32<1,0,1,3>(mask0))),
6276 simd_float::asint(simd::bitwise_and(simd128::shuffle32<1,3,2,3>(mask1), simd128::shuffle32<0,2,0,1>(mask1))));
6278 simd_float::store4(support_x + 0, corner0x);
6279 simd_float::store4(support_y + 0, corner0y);
6280 simd_float::store4(support_x + 4, corner1x);
6281 simd_float::store4(support_y + 4, corner1y);
6285 unsigned edge_axis_near;
6286 unsigned edge_axis_far;
6288 simd4_float one = simd_float::make4(1.0f);
6289 simd4_float rdxy = one/dxy;
6291 simd4_float offset_x = simd128::shuffle32<0,0,2,2>(dxy);
6292 simd4_float offset_y = simd128::shuffle32<1,1,3,3>(dxy);
6294 simd4_float pivot_x = cx + simd::bitwise_xor(simd128::shuffle32<2,2,0,0>(dxy), sign_npnp);
6295 simd4_float pivot_y = cy + simd::bitwise_xor(simd128::shuffle32<3,3,1,1>(dxy), sign_npnp);
6297 simd4_float sign_mask = simd_float::make4(-0.0f);
6298 simd4_float pos_x = simd::bitwise_or(simd::bitwise_and(offset_x, sign_mask), sx);
6299 simd4_float pos_y = simd::bitwise_or(simd::bitwise_and(offset_y, sign_mask), sy);
6301 simd4_float rx = simd128::shuffle32<0,0,2,2>(rdxy);
6302 simd4_float ry = simd128::shuffle32<1,1,3,3>(rdxy);
6304 simd4_float near_x = (pos_x + pivot_x) * rx;
6305 simd4_float far_x = (pos_x - pivot_x) * rx;
6307 simd4_float near_y = (pos_y + pivot_y) * ry;
6308 simd4_float far_y = (pos_y - pivot_y) * ry;
6310 simd4_float a = simd_float::min(one, near_x);
6311 simd4_float b = simd_float::min(one, far_x);
6313 edge_axis_near = simd::signmask32(simd_float::cmp_gt(a, near_y));
6314 edge_axis_far = simd::signmask32(simd_float::cmp_gt(b, far_y));
6316 a = simd_float::min(a, near_y);
6317 b = simd_float::min(b, far_y);
6319 simd4_float ax = pivot_x - offset_x * a;
6320 simd4_float ay = pivot_y - offset_y * a;
6321 simd4_float bx = pivot_x + offset_x * b;
6322 simd4_float by = pivot_y + offset_y * b;
6324 simd4_float mask = simd_float::cmp_gt(a + b, simd_float::zero4());
6326 simd4_float mask_a = simd_float::cmp_neq(a, one);
6327 simd4_float mask_b = simd_float::cmp_neq(b, one);
6329 mask_a = simd::bitwise_and(mask_a, mask);
6330 mask_b = simd::bitwise_and(mask_b, mask);
6332 edge_mask = simd::bitwise_notand(edge_mask, _mm_packs_epi32(simd_float::asint(mask_a), simd_float::asint(mask_b)));
6334 simd_float::store4(support_x + 8, ax);
6335 simd_float::store4(support_y + 8, ay);
6336 simd_float::store4(support_x + 12, bx);
6337 simd_float::store4(support_y + 12, by);
6340 mask = _mm_movemask_epi8(_mm_packs_epi16(corner_mask, edge_mask));
6345 unsigned a_sign_face_bit = b_offset_neg ? (1 << a_face) : 0;
6346 unsigned b_sign_face_bit = b_positive_face_bit ? 0 : (1 << b_face);
6348 unsigned a_vertices = 0x12003624 >> (3 - a_face);
6349 unsigned b_vertices = 0x00122436 >> (3 - b_face);
6351 unsigned a_face_bits = 0xffff0000 | a_sign_face_bit;
6352 unsigned b_face_bits = 0x0000ffff | (b_sign_face_bit << 16);
6354 support_tags[0] = ((a_vertices >> 0) & 0x7) | a_face_bits;
6355 support_tags[1] = ((a_vertices >> 8) & 0x7) | a_face_bits;
6356 support_tags[2] = ((a_vertices >> 16) & 0x7) | a_face_bits;
6357 support_tags[3] = ((a_vertices >> 24) & 0x7) | a_face_bits;
6359 support_tags[4] = ((b_vertices << 16) & 0x70000) | b_face_bits;
6360 support_tags[5] = ((b_vertices << 8) & 0x70000) | b_face_bits;
6361 support_tags[6] = ((b_vertices >> 0) & 0x70000) | b_face_bits;
6362 support_tags[7] = ((b_vertices >> 8) & 0x70000) | b_face_bits;
6365 unsigned edge_axis_winding = simd::signmask32(dxy);
6367 unsigned y_near0 = (edge_axis_near >> 0) & 1;
6368 unsigned y_near1 = (edge_axis_near >> 1) & 1;
6369 unsigned y_near2 = (edge_axis_near >> 2) & 1;
6370 unsigned y_near3 = (edge_axis_near >> 3) & 1;
6372 unsigned y_far0 = (edge_axis_far >> 0) & 1;
6373 unsigned y_far1 = (edge_axis_far >> 1) & 1;
6374 unsigned y_far2 = (edge_axis_far >> 2) & 1;
6375 unsigned y_far3 = (edge_axis_far >> 3) & 1;
6377 unsigned a_near_edge0 = y_near0*2 + ((edge_axis_winding >> (0 + y_near0)) & 1);
6378 unsigned a_near_edge1 = y_near1*2 + ((edge_axis_winding >> (0 + y_near1)) & 1);
6379 unsigned a_near_edge2 = y_near2*2 + ((edge_axis_winding >> (2 + y_near2)) & 1);
6380 unsigned a_near_edge3 = y_near3*2 + ((edge_axis_winding >> (2 + y_near3)) & 1);
6382 edge_axis_winding ^= 0xf;
6384 unsigned a_far_edge0 = y_far0*2 + ((edge_axis_winding >> (0 + y_far0)) & 1);
6385 unsigned a_far_edge1 = y_far1*2 + ((edge_axis_winding >> (0 + y_far1)) & 1);
6386 unsigned a_far_edge2 = y_far2*2 + ((edge_axis_winding >> (2 + y_far2)) & 1);
6387 unsigned a_far_edge3 = y_far3*2 + ((edge_axis_winding >> (2 + y_far3)) & 1);
6392 uint64_t a_edge_map = 0x1200362424003612llu >> (3 - a_face);
6393 uint64_t b_edge_map = 0x2400361212003624llu >> (3 - b_face);
6395 unsigned face_bits = a_sign_face_bit | (a_sign_face_bit << 8) | (b_sign_face_bit << 16) | (b_sign_face_bit << 24);
6397 unsigned b_edge0 = ((unsigned)((b_edge_map >> (0<<4)) & 0x0707) << 16) | face_bits;
6398 unsigned b_edge1 = ((unsigned)((b_edge_map >> (1<<4)) & 0x0707) << 16) | face_bits;
6399 unsigned b_edge2 = ((unsigned)((b_edge_map >> (2<<4)) & 0x0707) << 16) | face_bits;
6400 unsigned b_edge3 = ((unsigned)((b_edge_map >> (3<<4)) & 0x0707) << 16) | face_bits;
6402 support_tags[ 8] = (unsigned)((a_edge_map >> (a_near_edge0<<4)) & 0x0707) | b_edge0;
6403 support_tags[ 9] = (unsigned)((a_edge_map >> (a_near_edge1<<4)) & 0x0707) | b_edge1;
6404 support_tags[10] = (unsigned)((a_edge_map >> (a_near_edge2<<4)) & 0x0707) | b_edge2;
6405 support_tags[11] = (unsigned)((a_edge_map >> (a_near_edge3<<4)) & 0x0707) | b_edge3;
6407 support_tags[12] = (unsigned)((a_edge_map >> (a_far_edge0<<4)) & 0x0707) | b_edge0;
6408 support_tags[13] = (unsigned)((a_edge_map >> (a_far_edge1<<4)) & 0x0707) | b_edge1;
6409 support_tags[14] = (unsigned)((a_edge_map >> (a_far_edge2<<4)) & 0x0707) | b_edge2;
6410 support_tags[15] = (unsigned)((a_edge_map >> (a_far_edge3<<4)) & 0x0707) | b_edge3;
6414 simd4_float a_size_transformed = simd_float::load4(transformed_x);
6415 simd4_float c_transformed = simd_float::load4(transformed_y);
6416 simd4_float dx_transformed = simd_float::load4(transformed_z);
6417 simd4_float dy_transformed = simd_float::zero4();
6419 simd128::transpose32(a_size_transformed, c_transformed, dx_transformed, dy_transformed);
6421 simd4_float zn = simd_aos::cross(dx_transformed, dy_transformed);
6422 simd4_float plane = simd128::concat2x32<0,1,0,1>(simd::bitwise_xor(zn, simd_float::make4(-0.0f)), simd_aos::dot(c_transformed, zn));
6423 plane *= simd_float::make4(1.0f)/simd128::shuffle32<2,2,2,2>(zn);
6425 NUDGE_ALIGNED(32) float penetrations[16];
6427 simdv_float z_sign = simd_float::zerov();
6430 z_sign = simd_float::makev(-0.0f);
6432#if NUDGE_SIMDV_WIDTH == 256
6433 simdv_float penetration_offset = simd256::broadcast(simd128::shuffle32<2,2,2,2>(a_size_transformed));
6434 simdv_float plane256 = simd256::broadcast(plane);
6436 simdv_float penetration_offset = simd128::shuffle32<2,2,2,2>(a_size_transformed);
6438 unsigned penetration_mask = 0;
6440 for (
unsigned i = 0; i < 16; i += simdv_width32) {
6441#if NUDGE_SIMDV_WIDTH == 256
6442 simdv_float plane = plane256;
6445 simdv_float x = simd_float::loadv(support + 0 + i);
6446 simdv_float y = simd_float::loadv(support + 16 + i);
6447 simdv_float z = x*simd128::shuffle32<0,0,0,0>(plane) + y*simd128::shuffle32<1,1,1,1>(plane) + simd128::shuffle32<2,2,2,2>(plane);
6449 simdv_float penetration = penetration_offset - simd::bitwise_xor(z, z_sign);
6451 z += penetration * simd::bitwise_xor(simd_float::makev(0.5f), z_sign);
6453 penetration_mask |= simd::signmask32(simd_float::cmp_gt(penetration, simd_float::zerov())) << i;
6455 simd_float::storev(penetrations + i, penetration);
6456 simd_float::storev(support + 32 + i, z);
6459 mask &= penetration_mask;
6462 unsigned a_face_inverse = (a_face ^ 1) ^ (a_face >> 1);
6464 const float* support_x = support + 16*((a_face_inverse+1) % 3);
6465 const float* support_y = support + 16*((a_face_inverse+2) % 3);
6466 const float* support_z = support + 16*a_face_inverse;
6469 simd4_float a_to_world0, a_to_world1, a_to_world2;
6471 simd4_float qx_qy_qz_qs = simd_float::load4(transforms[a_index].rotation);
6472 simd4_float kx_ky_kz_ks = qx_qy_qz_qs + qx_qy_qz_qs;
6475 kx_ky_kz_ks = simd::bitwise_xor(kx_ky_kz_ks, simd_float::make4(0.0f, 0.0f, 0.0f, -0.0f));
6478 a_to_world0 = (simd128::shuffle32<1,0,0,3>(kx_ky_kz_ks) * simd128::shuffle32<1,1,2,3>(qx_qy_qz_qs) +
6479 simd128::shuffle32<2,2,3,3>(kx_ky_kz_ks) * simd128::shuffle32<2,3,1,3>(qx_qy_qz_qs));
6482 a_to_world1 = (simd128::shuffle32<0,2,1,3>(kx_ky_kz_ks) * simd128::shuffle32<1,2,2,3>(qx_qy_qz_qs) +
6483 simd128::shuffle32<3,0,0,3>(kx_ky_kz_ks) * simd128::shuffle32<2,0,3,3>(qx_qy_qz_qs));
6486 a_to_world2 = (simd128::shuffle32<0,1,0,3>(kx_ky_kz_ks) * simd128::shuffle32<2,2,0,3>(qx_qy_qz_qs) +
6487 simd128::shuffle32<1,3,1,3>(kx_ky_kz_ks) * simd128::shuffle32<3,0,1,3>(qx_qy_qz_qs));
6489 a_to_world0 = a_to_world0 - simd_float::make4(1.0f, 0.0f, 0.0f, 0.0f);
6490 a_to_world1 = a_to_world1 - simd_float::make4(0.0f, 1.0f, 0.0f, 0.0f);
6491 a_to_world2 = a_to_world2 - simd_float::make4(0.0f, 0.0f, 1.0f, 0.0f);
6493 a_to_world0 = simd::bitwise_xor(a_to_world0, simd_float::make4(-0.0f, 0.0f, 0.0f, 0.0f));
6494 a_to_world1 = simd::bitwise_xor(a_to_world1, simd_float::make4(0.0f, -0.0f, 0.0f, 0.0f));
6495 a_to_world2 = simd::bitwise_xor(a_to_world2, simd_float::make4(0.0f, 0.0f, -0.0f, 0.0f));
6499 simd4_float wn = a_face == 0 ? a_to_world0 : (a_face == 1 ? a_to_world1 : a_to_world2);
6502 wn = simd::bitwise_xor(wn, simd_float::make4(-0.0f));
6504 simd4_float a_position = simd_float::load4(transforms[a_index].position);
6506 uint16_t a_body = (uint16_t)transforms[a_index].body;
6507 uint16_t b_body = (uint16_t)transforms[b_index].body;
6509 a_index = transforms[a_index].
body >> 16;
6510 b_index = transforms[b_index].
body >> 16;
6512 unsigned tag_swap = 0;
6514 if (b_index > a_index) {
6515 unsigned tc = a_index;
6516 uint16_t tb = a_body;
6526 wn = simd::bitwise_xor(wn, simd_float::make4(-0.0f));;
6529 uint64_t high_tag = ((uint64_t)a_index << 32) | ((uint64_t)b_index << 48);
6532 unsigned index = first_set_bit(mask);
6535 simd4_float wp = (a_to_world0 * simd_float::broadcast_load4(support_x + index) +
6536 a_to_world1 * simd_float::broadcast_load4(support_y + index) +
6537 a_to_world2 * simd_float::broadcast_load4(support_z + index) + a_position);
6539 float penetration = penetrations[index];
6541 simd_float::store4(contacts[count].position, wp);
6542 simd_float::store4(contacts[count].normal, wn);
6545 contacts[count].
friction = NUDGE_FRICTION_MODEL(properties[a_body].friction,properties[b_body].friction);
6546 bodies[count].
a = a_body;
6547 bodies[count].
b = b_body;
6548 tags[count] = (uint32_t)(support_tags[index] >> tag_swap) | (uint32_t)(support_tags[index] << tag_swap) | high_tag;
6557 unsigned index = first_set_bit(edge);
6560 unsigned pair = pairs[pair_offset + index];
6561 unsigned edge_a = a_edge_array[index];
6562 unsigned edge_b = b_edge_array[index];
6564 unsigned a = pair & 0xffff;
6565 unsigned b = pair >> 16;
6567 a = transforms[a].
body >> 16;
6568 b = transforms[b].
body >> 16;
6570 feature_penetrations[added] = penetration_array[index];
6571 features[added] = a > b ? edge_a | (edge_b << 16) : edge_b | (edge_a << 16);
6572 pairs[added] = a > b ? pair : (pair >> 16) | (pair << 16);
6578 assert(!added || pairs[added-1]);
6585 pairs[pair_count+0] = 0;
6586 pairs[pair_count+1] = 0;
6587 pairs[pair_count+2] = 0;
6589 features[pair_count+0] = 0;
6590 features[pair_count+1] = 0;
6591 features[pair_count+2] = 0;
6593 feature_penetrations[pair_count+0] = 0.0f;
6594 feature_penetrations[pair_count+1] = 0.0f;
6595 feature_penetrations[pair_count+2] = 0.0f;
6597 for (
unsigned i = 0; i < pair_count; i += 4) {
6599 unsigned pair0 = pairs[i + 0];
6600 unsigned pair1 = pairs[i + 1];
6601 unsigned pair2 = pairs[i + 2];
6602 unsigned pair3 = pairs[i + 3];
6604 unsigned a0_index = pair0 & 0xffff;
6605 unsigned b0_index = pair0 >> 16;
6607 unsigned a1_index = pair1 & 0xffff;
6608 unsigned b1_index = pair1 >> 16;
6610 unsigned a2_index = pair2 & 0xffff;
6611 unsigned b2_index = pair2 >> 16;
6613 unsigned a3_index = pair3 & 0xffff;
6614 unsigned b3_index = pair3 >> 16;
6617 simd4_float a_rotation_x = simd_float::load4(transforms[a0_index].rotation);
6618 simd4_float a_rotation_y = simd_float::load4(transforms[a1_index].rotation);
6619 simd4_float a_rotation_z = simd_float::load4(transforms[a2_index].rotation);
6620 simd4_float a_rotation_s = simd_float::load4(transforms[a3_index].rotation);
6622 simd4_float b_rotation_x = simd_float::load4(transforms[b0_index].rotation);
6623 simd4_float b_rotation_y = simd_float::load4(transforms[b1_index].rotation);
6624 simd4_float b_rotation_z = simd_float::load4(transforms[b2_index].rotation);
6625 simd4_float b_rotation_s = simd_float::load4(transforms[b3_index].rotation);
6627 simd128::transpose32(a_rotation_x, a_rotation_y, a_rotation_z, a_rotation_s);
6628 simd128::transpose32(b_rotation_x, b_rotation_y, b_rotation_z, b_rotation_s);
6631 simd4_float a_basis_xx, a_basis_xy, a_basis_xz;
6632 simd4_float a_basis_yx, a_basis_yy, a_basis_yz;
6633 simd4_float a_basis_zx, a_basis_zy, a_basis_zz;
6635 simd4_float kx = a_rotation_x + a_rotation_x;
6636 simd4_float ky = a_rotation_y + a_rotation_y;
6637 simd4_float kz = a_rotation_z + a_rotation_z;
6639 simd4_float xx = kx*a_rotation_x;
6640 simd4_float yy = ky*a_rotation_y;
6641 simd4_float zz = kz*a_rotation_z;
6642 simd4_float xy = kx*a_rotation_y;
6643 simd4_float xz = kx*a_rotation_z;
6644 simd4_float yz = ky*a_rotation_z;
6645 simd4_float sx = kx*a_rotation_s;
6646 simd4_float sy = ky*a_rotation_s;
6647 simd4_float sz = kz*a_rotation_s;
6649 a_basis_xx = simd_float::make4(1.0f) - yy - zz;
6650 a_basis_xy = xy + sz;
6651 a_basis_xz = xz - sy;
6653 a_basis_yx = xy - sz;
6654 a_basis_yy = simd_float::make4(1.0f) - xx - zz;
6655 a_basis_yz = yz + sx;
6657 a_basis_zx = xz + sy;
6658 a_basis_zy = yz - sx;
6659 a_basis_zz = simd_float::make4(1.0f) - xx - yy;
6662 simd4_float b_basis_xx, b_basis_xy, b_basis_xz;
6663 simd4_float b_basis_yx, b_basis_yy, b_basis_yz;
6664 simd4_float b_basis_zx, b_basis_zy, b_basis_zz;
6666 simd4_float kx = b_rotation_x + b_rotation_x;
6667 simd4_float ky = b_rotation_y + b_rotation_y;
6668 simd4_float kz = b_rotation_z + b_rotation_z;
6670 simd4_float xx = kx*b_rotation_x;
6671 simd4_float yy = ky*b_rotation_y;
6672 simd4_float zz = kz*b_rotation_z;
6673 simd4_float xy = kx*b_rotation_y;
6674 simd4_float xz = kx*b_rotation_z;
6675 simd4_float yz = ky*b_rotation_z;
6676 simd4_float sx = kx*b_rotation_s;
6677 simd4_float sy = ky*b_rotation_s;
6678 simd4_float sz = kz*b_rotation_s;
6680 b_basis_xx = simd_float::make4(1.0f) - yy - zz;
6681 b_basis_xy = xy + sz;
6682 b_basis_xz = xz - sy;
6684 b_basis_yx = xy - sz;
6685 b_basis_yy = simd_float::make4(1.0f) - xx - zz;
6686 b_basis_yz = yz + sx;
6688 b_basis_zx = xz + sy;
6689 b_basis_zy = yz - sx;
6690 b_basis_zz = simd_float::make4(1.0f) - xx - yy;
6694 simd4_int32 edge = simd_int32::load4((
const int32_t*)(features + i));
6697#ifdef NUDGE_NATIVE_BLENDV32
6698 simd4_int32 a_select_y = simd_int32::shift_left<32-1>(edge);
6699 simd4_int32 a_select_z = simd_int32::shift_left<32-2>(edge);
6701 simd4_int32 b_select_y = simd_int32::shift_left<16-1>(edge);
6702 simd4_int32 b_select_z = simd_int32::shift_left<16-2>(edge);
6704 simd4_float u_x = simd::blendv32(a_basis_xx, a_basis_yx, simd_int32::asfloat(a_select_y));
6705 simd4_float u_y = simd::blendv32(a_basis_xy, a_basis_yy, simd_int32::asfloat(a_select_y));
6706 simd4_float u_z = simd::blendv32(a_basis_xz, a_basis_yz, simd_int32::asfloat(a_select_y));
6708 simd4_float v_x = simd::blendv32(b_basis_xx, b_basis_yx, simd_int32::asfloat(b_select_y));
6709 simd4_float v_y = simd::blendv32(b_basis_xy, b_basis_yy, simd_int32::asfloat(b_select_y));
6710 simd4_float v_z = simd::blendv32(b_basis_xz, b_basis_yz, simd_int32::asfloat(b_select_y));
6712 u_x = simd::blendv32(u_x, a_basis_zx, simd_int32::asfloat(a_select_z));
6713 u_y = simd::blendv32(u_y, a_basis_zy, simd_int32::asfloat(a_select_z));
6714 u_z = simd::blendv32(u_z, a_basis_zz, simd_int32::asfloat(a_select_z));
6716 v_x = simd::blendv32(v_x, b_basis_zx, simd_int32::asfloat(b_select_z));
6717 v_y = simd::blendv32(v_y, b_basis_zy, simd_int32::asfloat(b_select_z));
6718 v_z = simd::blendv32(v_z, b_basis_zz, simd_int32::asfloat(b_select_z));
6720 simd4_int32 a_edge = simd::bitwise_and(edge, simd_int32::make4(0xffff));
6721 simd4_int32 b_edge = simd_int32::shift_right<16>(edge);
6723 simd4_float a_select_x = simd_int32::asfloat(simd_int32::cmp_eq(a_edge, simd_int32::zero4()));
6724 simd4_float a_select_y = simd_int32::asfloat(simd_int32::cmp_eq(a_edge, simd_int32::make4(1)));
6725 simd4_float a_select_z = simd_int32::asfloat(simd_int32::cmp_eq(a_edge, simd_int32::make4(2)));
6727 simd4_float b_select_x = simd_int32::asfloat(simd_int32::cmp_eq(b_edge, simd_int32::zero4()));
6728 simd4_float b_select_y = simd_int32::asfloat(simd_int32::cmp_eq(b_edge, simd_int32::make4(1)));
6729 simd4_float b_select_z = simd_int32::asfloat(simd_int32::cmp_eq(b_edge, simd_int32::make4(2)));
6731 simd4_float u_x = simd::bitwise_and(a_basis_xx, a_select_x);
6732 simd4_float u_y = simd::bitwise_and(a_basis_xy, a_select_x);
6733 simd4_float u_z = simd::bitwise_and(a_basis_xz, a_select_x);
6735 simd4_float v_x = simd::bitwise_and(b_basis_xx, b_select_x);
6736 simd4_float v_y = simd::bitwise_and(b_basis_xy, b_select_x);
6737 simd4_float v_z = simd::bitwise_and(b_basis_xz, b_select_x);
6739 u_x = simd::bitwise_or(u_x, simd::bitwise_and(a_basis_yx, a_select_y));
6740 u_y = simd::bitwise_or(u_y, simd::bitwise_and(a_basis_yy, a_select_y));
6741 u_z = simd::bitwise_or(u_z, simd::bitwise_and(a_basis_yz, a_select_y));
6743 v_x = simd::bitwise_or(v_x, simd::bitwise_and(b_basis_yx, b_select_y));
6744 v_y = simd::bitwise_or(v_y, simd::bitwise_and(b_basis_yy, b_select_y));
6745 v_z = simd::bitwise_or(v_z, simd::bitwise_and(b_basis_yz, b_select_y));
6747 u_x = simd::bitwise_or(u_x, simd::bitwise_and(a_basis_zx, a_select_z));
6748 u_y = simd::bitwise_or(u_y, simd::bitwise_and(a_basis_zy, a_select_z));
6749 u_z = simd::bitwise_or(u_z, simd::bitwise_and(a_basis_zz, a_select_z));
6751 v_x = simd::bitwise_or(v_x, simd::bitwise_and(b_basis_zx, b_select_z));
6752 v_y = simd::bitwise_or(v_y, simd::bitwise_and(b_basis_zy, b_select_z));
6753 v_z = simd::bitwise_or(v_z, simd::bitwise_and(b_basis_zz, b_select_z));
6757 simd4_float n_x, n_y, n_z;
6758 simd_soa::cross(u_x, u_y, u_z, v_x, v_y, v_z, n_x, n_y, n_z);
6761 simd4_float a_position_x = simd_float::load4(transforms[a0_index].position);
6762 simd4_float a_position_y = simd_float::load4(transforms[a1_index].position);
6763 simd4_float a_position_z = simd_float::load4(transforms[a2_index].position);
6764 simd4_float a_position_w = simd_float::load4(transforms[a3_index].position);
6766 simd4_float b_position_x = simd_float::load4(transforms[b0_index].position);
6767 simd4_float b_position_y = simd_float::load4(transforms[b1_index].position);
6768 simd4_float b_position_z = simd_float::load4(transforms[b2_index].position);
6769 simd4_float b_position_w = simd_float::load4(transforms[b3_index].position);
6771 simd128::transpose32(a_position_x, a_position_y, a_position_z, a_position_w);
6772 simd128::transpose32(b_position_x, b_position_y, b_position_z, b_position_w);
6775 simd4_float delta_x = b_position_x - a_position_x;
6776 simd4_float delta_y = b_position_y - a_position_y;
6777 simd4_float delta_z = b_position_z - a_position_z;
6780 simd4_float sign_mask = simd_float::make4(-0.0f);
6781 simd4_float flip_sign = simd::bitwise_and(n_x*delta_x + n_y*delta_y + n_z*delta_z, sign_mask);
6783 n_x = simd::bitwise_xor(n_x, flip_sign);
6784 n_y = simd::bitwise_xor(n_y, flip_sign);
6785 n_z = simd::bitwise_xor(n_z, flip_sign);
6788 simd4_float a_size_x = simd_float::load4(colliders[a0_index].size);
6789 simd4_float a_size_y = simd_float::load4(colliders[a1_index].size);
6790 simd4_float a_size_z = simd_float::load4(colliders[a2_index].size);
6791 simd4_float a_size_w = simd_float::load4(colliders[a3_index].size);
6793 simd4_float b_size_x = simd_float::load4(colliders[b0_index].size);
6794 simd4_float b_size_y = simd_float::load4(colliders[b1_index].size);
6795 simd4_float b_size_z = simd_float::load4(colliders[b2_index].size);
6796 simd4_float b_size_w = simd_float::load4(colliders[b3_index].size);
6798 simd128::transpose32(a_size_x, a_size_y, a_size_z, a_size_w);
6799 simd128::transpose32(b_size_x, b_size_y, b_size_z, b_size_w);
6802 simd4_float a_sign_x = a_basis_xx*n_x + a_basis_xy*n_y + a_basis_xz*n_z;
6803 simd4_float a_sign_y = a_basis_yx*n_x + a_basis_yy*n_y + a_basis_yz*n_z;
6804 simd4_float a_sign_z = a_basis_zx*n_x + a_basis_zy*n_y + a_basis_zz*n_z;
6806 simd4_float b_sign_x = b_basis_xx*n_x + b_basis_xy*n_y + b_basis_xz*n_z;
6807 simd4_float b_sign_y = b_basis_yx*n_x + b_basis_yy*n_y + b_basis_yz*n_z;
6808 simd4_float b_sign_z = b_basis_zx*n_x + b_basis_zy*n_y + b_basis_zz*n_z;
6810 a_sign_x = simd::bitwise_and(a_sign_x, sign_mask);
6811 a_sign_y = simd::bitwise_and(a_sign_y, sign_mask);
6812 a_sign_z = simd::bitwise_and(a_sign_z, sign_mask);
6814 b_sign_x = simd::bitwise_and(b_sign_x, sign_mask);
6815 b_sign_y = simd::bitwise_and(b_sign_y, sign_mask);
6816 b_sign_z = simd::bitwise_and(b_sign_z, sign_mask);
6818 simd4_int32 edge_x = simd::bitwise_or(simd_int32::shift_right<31-0>(simd_float::asint(a_sign_x)), simd_int32::shift_right<31-16>(simd_float::asint(simd::bitwise_xor(b_sign_x, simd_float::make4(-0.0f)))));
6819 simd4_int32 edge_y = simd::bitwise_or(simd_int32::shift_right<31-1>(simd_float::asint(a_sign_y)), simd_int32::shift_right<31-17>(simd_float::asint(simd::bitwise_xor(b_sign_y, simd_float::make4(-0.0f)))));
6820 simd4_int32 edge_z = simd::bitwise_or(simd_int32::shift_right<31-2>(simd_float::asint(a_sign_z)), simd_int32::shift_right<31-18>(simd_float::asint(simd::bitwise_xor(b_sign_z, simd_float::make4(-0.0f)))));
6821 simd4_int32 edge_w = _mm_add_epi16(_mm_add_epi16(edge, _mm_set1_epi16(1)), _mm_srli_epi16(edge, 1));
6823 simd4_int32 edge_xy = simd::bitwise_or(edge_x, edge_y);
6824 simd4_int32 edge_zw = simd::bitwise_or(edge_z, edge_w);
6826 simd4_int32 tag_hi = simd::bitwise_or(edge_xy, edge_zw);
6827 simd4_int32 tag_lo = simd::bitwise_notand(edge_w, tag_hi);
6828 tag_hi = simd_int32::shift_left<8>(tag_hi);
6830 simd4_int32 tag = simd::bitwise_or(tag_lo, tag_hi);
6832 a_size_x = simd::bitwise_xor(a_size_x, a_sign_x);
6833 a_size_y = simd::bitwise_xor(a_size_y, a_sign_y);
6834 a_size_z = simd::bitwise_xor(a_size_z, a_sign_z);
6836 b_size_x = simd::bitwise_xor(b_size_x, b_sign_x);
6837 b_size_y = simd::bitwise_xor(b_size_y, b_sign_y);
6838 b_size_z = simd::bitwise_xor(b_size_z, b_sign_z);
6840 a_basis_xx *= a_size_x;
6841 a_basis_xy *= a_size_x;
6842 a_basis_xz *= a_size_x;
6844 a_basis_yx *= a_size_y;
6845 a_basis_yy *= a_size_y;
6846 a_basis_yz *= a_size_y;
6848 a_basis_zx *= a_size_z;
6849 a_basis_zy *= a_size_z;
6850 a_basis_zz *= a_size_z;
6852 b_basis_xx *= b_size_x;
6853 b_basis_xy *= b_size_x;
6854 b_basis_xz *= b_size_x;
6856 b_basis_yx *= b_size_y;
6857 b_basis_yy *= b_size_y;
6858 b_basis_yz *= b_size_y;
6860 b_basis_zx *= b_size_z;
6861 b_basis_zy *= b_size_z;
6862 b_basis_zz *= b_size_z;
6864 simd4_float ca_x = a_basis_xx + a_basis_yx + a_basis_zx + a_position_x;
6865 simd4_float ca_y = a_basis_xy + a_basis_yy + a_basis_zy + a_position_y;
6866 simd4_float ca_z = a_basis_xz + a_basis_yz + a_basis_zz + a_position_z;
6868 simd4_float cb_x = b_basis_xx + b_basis_yx + b_basis_zx - b_position_x;
6869 simd4_float cb_y = b_basis_xy + b_basis_yy + b_basis_zy - b_position_y;
6870 simd4_float cb_z = b_basis_xz + b_basis_yz + b_basis_zz - b_position_z;
6873 simd4_float o_x = ca_x + cb_x;
6874 simd4_float o_y = ca_y + cb_y;
6875 simd4_float o_z = ca_z + cb_z;
6877 simd4_float ia = u_x*u_x + u_y*u_y + u_z*u_z;
6878 simd4_float ib = u_x*v_x + u_y*v_y + u_z*v_z;
6879 simd4_float ic = v_x*v_x + v_y*v_y + v_z*v_z;
6880 simd4_float
id = o_x*u_x + o_y*u_y + o_z*u_z;
6881 simd4_float ie = o_x*v_x + o_y*v_y + o_z*v_z;
6883 simd4_float half = simd_float::make4(0.5f);
6884 simd4_float ir = half / (ia*ic - ib*ib);
6886 simd4_float sa = (ib*ie - ic*id) * ir;
6887 simd4_float sb = (ia*ie - ib*id) * ir;
6889 simd4_float p_x = (ca_x - cb_x)*half + u_x*sa + v_x*sb;
6890 simd4_float p_y = (ca_y - cb_y)*half + u_y*sa + v_y*sb;
6891 simd4_float p_z = (ca_z - cb_z)*half + u_z*sa + v_z*sb;
6893 simd_soa::normalize(n_x, n_y, n_z);
6895 simd4_float p_w = simd_float::load4(feature_penetrations + i);
6896 simd4_float n_w = simd_float::make4(0.5f);
6898 simd128::transpose32(p_x, p_y, p_z, p_w);
6899 simd128::transpose32(n_x, n_y, n_z, n_w);
6901 simd_float::store4(contacts[count + 0].position, p_x);
6902 simd_float::store4(contacts[count + 0].normal, n_x);
6903 simd_float::store4(contacts[count + 1].position, p_y);
6904 simd_float::store4(contacts[count + 1].normal, n_y);
6905 simd_float::store4(contacts[count + 2].position, p_z);
6906 simd_float::store4(contacts[count + 2].normal, n_z);
6907 simd_float::store4(contacts[count + 3].position, p_w);
6908 simd_float::store4(contacts[count + 3].normal, n_w);
6910 simd4_float body_pair = simd::bitwise_or(simd::bitwise_and(a_position_w, simd_int32::asfloat(simd_int32::make4(0xffff))), simd_int32::asfloat(simd_int32::shift_left<16>(simd_float::asint(b_position_w))));
6911 simd_float::storeu4((
float*)(bodies + count), body_pair);
6913 simd4_int32 pair = simd_float::asint(simd::bitwise_or(simd::bitwise_and(b_position_w, simd_int32::asfloat(simd_int32::make4(0xffff0000))), simd_int32::asfloat(simd_int32::shift_right<16>(simd_float::asint(a_position_w)))));
6915 simd_int32::storeu4((int32_t*)tags + count*2 + 0, simd128::unpacklo32(tag, pair));
6916 simd_int32::storeu4((int32_t*)tags + count*2 + 4, simd128::unpackhi32(tag, pair));
6922 while (count && bodies[count-1].a == bodies[count-1].b)
6932 float3 dp = make_float3(b_transform.
position) - make_float3(a_transform.
position);
6933 float l2 = length2(dp);
6939 float l = sqrtf(l2);
6942 n = dp * (1.0f / l);
6944 n = make_float3(1.0f, 0.0f, 0.0f);
6946 float3 p = make_float3(a_transform.
position) + n * (l - b.
radius);
6952 contacts[0].
normal[0] = n.x;
6953 contacts[0].
normal[1] = n.y;
6954 contacts[0].
normal[2] = n.z;
6957 bodies[0].
a = (uint16_t)a_transform.
body;
6958 bodies[0].
b = (uint16_t)b_transform.
body;
6964 Rotation a_to_world = make_rotation(a_transform.
rotation);
6965 Rotation world_to_a = inverse(a_to_world);
6966 float3 offset_b = world_to_a * (make_float3(b_transform.
position) - make_float3(a_transform.
position));
6968 float dx = fabsf(offset_b.x);
6969 float dy = fabsf(offset_b.y);
6970 float dz = fabsf(offset_b.z);
6976 if (dx >= w || dy >= h || dz >= d)
6984 unsigned outside_x = dx > a.
size[0];
6985 unsigned outside_y = dy > a.
size[1];
6986 unsigned outside_z = dz > a.
size[2];
6988 if (outside_x + outside_y + outside_z >= 2) {
6990 outside_x ? (offset_b.x > 0.0f ? a.
size[0] : -a.
size[0]) : offset_b.x,
6991 outside_y ? (offset_b.y > 0.0f ? a.size[1] : -a.size[1]) : offset_b.y,
6992 outside_z ? (offset_b.z > 0.0f ? a.size[2] : -a.size[2]) : offset_b.z,
6995 float3 dp = offset_b - corner;
6996 float l2 = length2(dp);
7001 float l = sqrtf(l2);
7005 penetration = r - l;
7007 else if (w - dx < h - dy && w - dx < d - dz) {
7008 n.x = offset_b.x > 0.0f ? 1.0f : -1.0f;
7011 penetration = w - dx;
7013 else if (h - dy < d - dz) {
7015 n.y = offset_b.y > 0.0f ? 1.0f : -1.0f;
7017 penetration = h - dy;
7022 n.z = offset_b.z > 0.0f ? 1.0f : -1.0f;
7023 penetration = d - dz;
7026 float3 p = offset_b - n*r;
7028 p = a_to_world * p + make_float3(a_transform.
position);
7035 contacts[0].
normal[0] = n.x;
7036 contacts[0].
normal[1] = n.y;
7037 contacts[0].
normal[2] = n.z;
7040 bodies[0].
a = (uint16_t)a_transform.
body;
7041 bodies[0].
b = (uint16_t)b_transform.
body;
7046template<
unsigned offset>
7047static inline void dilate_3(simdv_int32 x, simdv_int32& lo32, simdv_int32& hi32) {
7048 simdv_int32 mask0 = simd_int32::makev(0xff);
7049 simdv_int32 mask1 = simd_int32::makev(0x0f00f00f);
7050 simdv_int32 mask2 = simd_int32::makev(0xc30c30c3);
7051 simdv_int32 mask3 = simd_int32::makev(0x49249249);
7053 simdv_int32 lo24 = x;
7054 simdv_int32 hi24 = simd_int32::shift_right<8>(x);
7055 lo24 = simd::bitwise_and(lo24, mask0);
7056 hi24 = simd::bitwise_and(hi24, mask0);
7058 lo24 = simd::bitwise_or(lo24, simd_int32::shift_left<8>(lo24));
7059 hi24 = simd::bitwise_or(hi24, simd_int32::shift_left<8>(hi24));
7060 lo24 = simd::bitwise_and(lo24, mask1);
7061 hi24 = simd::bitwise_and(hi24, mask1);
7063 lo24 = simd::bitwise_or(lo24, simd_int32::shift_left<4>(lo24));
7064 hi24 = simd::bitwise_or(hi24, simd_int32::shift_left<4>(hi24));
7065 lo24 = simd::bitwise_and(lo24, mask2);
7066 hi24 = simd::bitwise_and(hi24, mask2);
7068 lo24 = simd::bitwise_or(lo24, simd_int32::shift_left<2>(lo24));
7069 hi24 = simd::bitwise_or(hi24, simd_int32::shift_left<2>(hi24));
7070 lo24 = simd::bitwise_and(lo24, mask3);
7071 hi24 = simd::bitwise_and(hi24, mask3);
7073 lo32 = simd::bitwise_or(simd_int32::shift_left<offset>(lo24), simd_int32::shift_left<24+offset>(hi24));
7074 hi32 = simd_int32::shift_right<8-offset>(hi24);
7077static inline void morton(simdv_int32 x, simdv_int32 y, simdv_int32 z, simdv_int32& lo32, simdv_int32& hi32) {
7078 simdv_int32 lx, hx, ly, hy, lz, hz;
7079 dilate_3<2>(x, lx, hx);
7080 dilate_3<1>(y, ly, hy);
7081 dilate_3<0>(z, lz, hz);
7083 lo32 = simd::bitwise_or(simd::bitwise_or(lx, ly), lz);
7084 hi32 = simd::bitwise_or(simd::bitwise_or(hx, hy), hz);
7087static inline void radix_sort_uint64_low48(uint64_t* data,
unsigned count,
Arena temporary) {
7088 uint64_t* temp = allocate_array<uint64_t>(&temporary, count, 16);
7090 unsigned buckets0[257] = {};
7091 unsigned buckets1[257] = {};
7092 unsigned buckets2[257] = {};
7093 unsigned buckets3[257] = {};
7094 unsigned buckets4[257] = {};
7095 unsigned buckets5[257] = {};
7097 unsigned* histogram0 = buckets0+1;
7098 unsigned* histogram1 = buckets1+1;
7099 unsigned* histogram2 = buckets2+1;
7100 unsigned* histogram3 = buckets3+1;
7101 unsigned* histogram4 = buckets4+1;
7102 unsigned* histogram5 = buckets5+1;
7104 for (
unsigned i = 0; i < count; ++i) {
7105 uint64_t d = data[i];
7107 ++histogram0[(d >> (0 << 3)) & 0xff];
7108 ++histogram1[(d >> (1 << 3)) & 0xff];
7109 ++histogram2[(d >> (2 << 3)) & 0xff];
7110 ++histogram3[(d >> (3 << 3)) & 0xff];
7111 ++histogram4[(d >> (4 << 3)) & 0xff];
7112 ++histogram5[(d >> (5 << 3)) & 0xff];
7115 for (
unsigned i = 1; i < 256; ++i) {
7116 buckets0[i] += buckets0[i-1];
7117 buckets1[i] += buckets1[i-1];
7118 buckets2[i] += buckets2[i-1];
7119 buckets3[i] += buckets3[i-1];
7120 buckets4[i] += buckets4[i-1];
7121 buckets5[i] += buckets5[i-1];
7124 for (
unsigned i = 0; i < count; ++i) {
7125 uint64_t d = data[i];
7126 unsigned index = buckets0[(d >> (0 << 3)) & 0xff]++;
7130 for (
unsigned i = 0; i < count; ++i) {
7131 uint64_t d = temp[i];
7132 unsigned index = buckets1[(d >> (1 << 3)) & 0xff]++;
7136 for (
unsigned i = 0; i < count; ++i) {
7137 uint64_t d = data[i];
7138 unsigned index = buckets2[(d >> (2 << 3)) & 0xff]++;
7142 for (
unsigned i = 0; i < count; ++i) {
7143 uint64_t d = temp[i];
7144 unsigned index = buckets3[(d >> (3 << 3)) & 0xff]++;
7148 for (
unsigned i = 0; i < count; ++i) {
7149 uint64_t d = data[i];
7150 unsigned index = buckets4[(d >> (4 << 3)) & 0xff]++;
7154 for (
unsigned i = 0; i < count; ++i) {
7155 uint64_t d = temp[i];
7156 unsigned index = buckets5[(d >> (5 << 3)) & 0xff]++;
7161static inline void radix_sort_uint32_x2(uint32_t* data, uint32_t* data2,
unsigned count,
Arena temporary) {
7162 uint32_t* temp = allocate_array<uint32_t>(&temporary, count, 16);
7163 uint32_t* temp2 = allocate_array<uint32_t>(&temporary, count, 16);
7165 unsigned buckets0[257] = {};
7166 unsigned buckets1[257] = {};
7167 unsigned buckets2[257] = {};
7168 unsigned buckets3[257] = {};
7170 unsigned* histogram0 = buckets0+1;
7171 unsigned* histogram1 = buckets1+1;
7172 unsigned* histogram2 = buckets2+1;
7173 unsigned* histogram3 = buckets3+1;
7175 for (
unsigned i = 0; i < count; ++i) {
7176 uint32_t d = data[i];
7178 ++histogram0[(d >> (0 << 3)) & 0xff];
7179 ++histogram1[(d >> (1 << 3)) & 0xff];
7180 ++histogram2[(d >> (2 << 3)) & 0xff];
7181 ++histogram3[(d >> (3 << 3)) & 0xff];
7184 for (
unsigned i = 1; i < 256; ++i) {
7185 buckets0[i] += buckets0[i-1];
7186 buckets1[i] += buckets1[i-1];
7187 buckets2[i] += buckets2[i-1];
7188 buckets3[i] += buckets3[i-1];
7191 for (
unsigned i = 0; i < count; ++i) {
7192 uint32_t d = data[i];
7193 uint32_t d2 = data2[i];
7194 unsigned index = buckets0[(d >> (0 << 3)) & 0xff]++;
7199 for (
unsigned i = 0; i < count; ++i) {
7200 uint32_t d = temp[i];
7201 uint32_t d2 = temp2[i];
7202 unsigned index = buckets1[(d >> (1 << 3)) & 0xff]++;
7207 for (
unsigned i = 0; i < count; ++i) {
7208 uint32_t d = data[i];
7209 uint32_t d2 = data2[i];
7210 unsigned index = buckets2[(d >> (2 << 3)) & 0xff]++;
7215 for (
unsigned i = 0; i < count; ++i) {
7216 uint32_t d = temp[i];
7217 uint32_t d2 = temp2[i];
7218 unsigned index = buckets3[(d >> (3 << 3)) & 0xff]++;
7224static inline void radix_sort_uint32(uint32_t* data,
unsigned count,
Arena temporary) {
7225 uint32_t* temp = allocate_array<uint32_t>(&temporary, count, 16);
7227 unsigned buckets0[257] = {};
7228 unsigned buckets1[257] = {};
7229 unsigned buckets2[257] = {};
7230 unsigned buckets3[257] = {};
7232 unsigned* histogram0 = buckets0+1;
7233 unsigned* histogram1 = buckets1+1;
7234 unsigned* histogram2 = buckets2+1;
7235 unsigned* histogram3 = buckets3+1;
7237 for (
unsigned i = 0; i < count; ++i) {
7238 uint32_t d = data[i];
7240 ++histogram0[(d >> (0 << 3)) & 0xff];
7241 ++histogram1[(d >> (1 << 3)) & 0xff];
7242 ++histogram2[(d >> (2 << 3)) & 0xff];
7243 ++histogram3[(d >> (3 << 3)) & 0xff];
7246 for (
unsigned i = 1; i < 256; ++i) {
7247 buckets0[i] += buckets0[i-1];
7248 buckets1[i] += buckets1[i-1];
7249 buckets2[i] += buckets2[i-1];
7250 buckets3[i] += buckets3[i-1];
7253 for (
unsigned i = 0; i < count; ++i) {
7254 uint32_t d = data[i];
7255 unsigned index = buckets0[(d >> (0 << 3)) & 0xff]++;
7259 for (
unsigned i = 0; i < count; ++i) {
7260 uint32_t d = temp[i];
7261 unsigned index = buckets1[(d >> (1 << 3)) & 0xff]++;
7265 for (
unsigned i = 0; i < count; ++i) {
7266 uint32_t d = data[i];
7267 unsigned index = buckets2[(d >> (2 << 3)) & 0xff]++;
7271 for (
unsigned i = 0; i < count; ++i) {
7272 uint32_t d = temp[i];
7273 unsigned index = buckets3[(d >> (3 << 3)) & 0xff]++;
7278template<
unsigned data_str
ide,
unsigned index_str
ide,
class T>
7279NUDGE_FORCEINLINE
static void load4(
const float* data,
const T* indices,
7280 simdv_float& d0, simdv_float& d1, simdv_float& d2, simdv_float& d3) {
7281 static const unsigned stride_in_floats = data_stride/
sizeof(float);
7283#if NUDGE_SIMDV_WIDTH == 256
7284 unsigned i0 = indices[0*index_stride];
7285 unsigned i1 = indices[1*index_stride];
7286 unsigned i2 = indices[2*index_stride];
7287 unsigned i3 = indices[3*index_stride];
7289 simd4_float t0 = simd_float::load4(data + i0*stride_in_floats);
7290 simd4_float t1 = simd_float::load4(data + i1*stride_in_floats);
7291 simd4_float t2 = simd_float::load4(data + i2*stride_in_floats);
7292 simd4_float t3 = simd_float::load4(data + i3*stride_in_floats);
7294 unsigned i4 = indices[4*index_stride];
7295 unsigned i5 = indices[5*index_stride];
7296 unsigned i6 = indices[6*index_stride];
7297 unsigned i7 = indices[7*index_stride];
7299 simd4_float t4 = simd_float::load4(data + i4*stride_in_floats);
7300 simd4_float t5 = simd_float::load4(data + i5*stride_in_floats);
7301 simd4_float t6 = simd_float::load4(data + i6*stride_in_floats);
7302 simd4_float t7 = simd_float::load4(data + i7*stride_in_floats);
7304 d0 = simd::concat(t0, t4);
7305 d1 = simd::concat(t1, t5);
7306 d2 = simd::concat(t2, t6);
7307 d3 = simd::concat(t3, t7);
7309 unsigned i0 = indices[0*index_stride];
7310 unsigned i1 = indices[1*index_stride];
7311 unsigned i2 = indices[2*index_stride];
7312 unsigned i3 = indices[3*index_stride];
7314 d0 = simd_float::load4(data + i0*stride_in_floats);
7315 d1 = simd_float::load4(data + i1*stride_in_floats);
7316 d2 = simd_float::load4(data + i2*stride_in_floats);
7317 d3 = simd_float::load4(data + i3*stride_in_floats);
7320 simd128::transpose32(d0, d1, d2, d3);
7323template<
unsigned data_str
ide,
unsigned index_str
ide,
class T>
7324NUDGE_FORCEINLINE
static void load8(
const float* data,
const T* indices,
7325 simdv_float& d0, simdv_float& d1, simdv_float& d2, simdv_float& d3,
7326 simdv_float& d4, simdv_float& d5, simdv_float& d6, simdv_float& d7) {
7327 static const unsigned stride_in_floats = data_stride/
sizeof(float);
7329#if NUDGE_SIMDV_WIDTH == 256
7330 unsigned i0 = indices[0*index_stride];
7331 unsigned i1 = indices[1*index_stride];
7332 unsigned i2 = indices[2*index_stride];
7333 unsigned i3 = indices[3*index_stride];
7335 simdv_float t0 = simd_float::load8(data + i0*stride_in_floats);
7336 simdv_float t1 = simd_float::load8(data + i1*stride_in_floats);
7337 simdv_float t2 = simd_float::load8(data + i2*stride_in_floats);
7338 simdv_float t3 = simd_float::load8(data + i3*stride_in_floats);
7340 unsigned i4 = indices[4*index_stride];
7341 unsigned i5 = indices[5*index_stride];
7342 unsigned i6 = indices[6*index_stride];
7343 unsigned i7 = indices[7*index_stride];
7345 simdv_float t4 = simd_float::load8(data + i4*stride_in_floats);
7346 simdv_float t5 = simd_float::load8(data + i5*stride_in_floats);
7347 simdv_float t6 = simd_float::load8(data + i6*stride_in_floats);
7348 simdv_float t7 = simd_float::load8(data + i7*stride_in_floats);
7350 d0 = simd256::permute128<0,2>(t0, t4);
7351 d1 = simd256::permute128<0,2>(t1, t5);
7352 d2 = simd256::permute128<0,2>(t2, t6);
7353 d3 = simd256::permute128<0,2>(t3, t7);
7355 d4 = simd256::permute128<1,3>(t0, t4);
7356 d5 = simd256::permute128<1,3>(t1, t5);
7357 d6 = simd256::permute128<1,3>(t2, t6);
7358 d7 = simd256::permute128<1,3>(t3, t7);
7360 unsigned i0 = indices[0*index_stride];
7361 unsigned i1 = indices[1*index_stride];
7362 unsigned i2 = indices[2*index_stride];
7363 unsigned i3 = indices[3*index_stride];
7365 d0 = simd_float::load4(data + i0*stride_in_floats);
7366 d1 = simd_float::load4(data + i1*stride_in_floats);
7367 d2 = simd_float::load4(data + i2*stride_in_floats);
7368 d3 = simd_float::load4(data + i3*stride_in_floats);
7370 d4 = simd_float::load4(data + i0*stride_in_floats + 4);
7371 d5 = simd_float::load4(data + i1*stride_in_floats + 4);
7372 d6 = simd_float::load4(data + i2*stride_in_floats + 4);
7373 d7 = simd_float::load4(data + i3*stride_in_floats + 4);
7376 simd128::transpose32(d0, d1, d2, d3);
7377 simd128::transpose32(d4, d5, d6, d7);
7380template<
unsigned data_str
ide,
unsigned index_str
ide,
class T>
7381NUDGE_FORCEINLINE
static void store8(
float* data,
const T* indices,
7382 simdv_float d0, simdv_float d1, simdv_float d2, simdv_float d3,
7383 simdv_float d4, simdv_float d5, simdv_float d6, simdv_float d7) {
7384 static const unsigned stride_in_floats = data_stride/
sizeof(float);
7386#if NUDGE_SIMDV_WIDTH == 256
7387 simdv_float t0 = simd256::permute128<0,2>(d0, d4);
7388 simdv_float t1 = simd256::permute128<0,2>(d1, d5);
7389 simdv_float t2 = simd256::permute128<0,2>(d2, d6);
7390 simdv_float t3 = simd256::permute128<0,2>(d3, d7);
7392 simdv_float t4 = simd256::permute128<1,3>(d0, d4);
7393 simdv_float t5 = simd256::permute128<1,3>(d1, d5);
7394 simdv_float t6 = simd256::permute128<1,3>(d2, d6);
7395 simdv_float t7 = simd256::permute128<1,3>(d3, d7);
7397 simd128::transpose32(t0, t1, t2, t3);
7398 simd128::transpose32(t4, t5, t6, t7);
7400 unsigned i0 = indices[0*index_stride];
7401 unsigned i1 = indices[1*index_stride];
7402 unsigned i2 = indices[2*index_stride];
7403 unsigned i3 = indices[3*index_stride];
7405 simd_float::store8(data + i0*stride_in_floats, t0);
7406 simd_float::store8(data + i1*stride_in_floats, t1);
7407 simd_float::store8(data + i2*stride_in_floats, t2);
7408 simd_float::store8(data + i3*stride_in_floats, t3);
7410 unsigned i4 = indices[4*index_stride];
7411 unsigned i5 = indices[5*index_stride];
7412 unsigned i6 = indices[6*index_stride];
7413 unsigned i7 = indices[7*index_stride];
7415 simd_float::store8(data + i4*stride_in_floats, t4);
7416 simd_float::store8(data + i5*stride_in_floats, t5);
7417 simd_float::store8(data + i6*stride_in_floats, t6);
7418 simd_float::store8(data + i7*stride_in_floats, t7);
7420 simd128::transpose32(d0, d1, d2, d3);
7421 simd128::transpose32(d4, d5, d6, d7);
7423 unsigned i0 = indices[0*index_stride];
7424 unsigned i1 = indices[1*index_stride];
7425 unsigned i2 = indices[2*index_stride];
7426 unsigned i3 = indices[3*index_stride];
7428 simd_float::store4(data + i0*stride_in_floats, d0);
7429 simd_float::store4(data + i1*stride_in_floats, d1);
7430 simd_float::store4(data + i2*stride_in_floats, d2);
7431 simd_float::store4(data + i3*stride_in_floats, d3);
7433 simd_float::store4(data + i0*stride_in_floats + 4, d4);
7434 simd_float::store4(data + i1*stride_in_floats + 4, d5);
7435 simd_float::store4(data + i2*stride_in_floats + 4, d6);
7436 simd_float::store4(data + i3*stride_in_floats + 4, d7);
7440#ifndef NUDGE_COLLISION_MASKS_CONSISTENT
7441# define NUDGE_INTERNAL_CSBM &&
7443# define NUDGE_INTERNAL_CSBM ||
7446#ifndef NUDGE_COLLIDE_SKIP_BODYFILTERS_MACRO
7460#define NUDGE_COLLIDE_SKIP_BODYFILTERS_MACRO(a,b) \
7462 ((a)->flags&BF_IS_STATIC_OR_KINEMATIC_OR_DISABLED_OR_REMOVED && (b)->flags&BF_IS_STATIC_OR_KINEMATIC_OR_DISABLED_OR_REMOVED) \
7463 || (((a)->flags&BF_IS_DISABLED_OR_REMOVED) || ((b)->flags&BF_IS_DISABLED_OR_REMOVED)) \
7464 || (!(((a)->collision_group&(b)->collision_mask) NUDGE_INTERNAL_CSBM ((b)->collision_group&(a)->collision_mask))) \
7482 contacts->
count = 0;
7484 active_bodies->
count = 0;
7489 unsigned aligned_count = (count + 7) & (~7);
7491 assert(count <= (1 << 13));
7493 AABB* aos_bounds = allocate_array<AABB>(&temporary, aligned_count, 32);
7495 unsigned box_bounds_offset = 0;
7496 unsigned sphere_bounds_offset = colliders.
boxes.
count;
7498 Transform* transforms = allocate_array<Transform>(&temporary, count, 32);
7499 uint16_t* collider_tags = allocate_array<uint16_t>(&temporary, count, 32);
7500 uint16_t* collider_bodies = allocate_array<uint16_t>(&temporary, count, 32);
7503 for (
unsigned i = 0; i < colliders.
boxes.
count; ++i) {
7505 transform = body_transforms[transform.
body] * transform;
7508 float3x3 m = matrix(make_rotation(transform.rotation));
7515 fabsf(m.c0.x) + fabsf(m.c1.x) + fabsf(m.c2.x),
7516 fabsf(m.c0.y) + fabsf(m.c1.y) + fabsf(m.c2.y),
7517 fabsf(m.c0.z) + fabsf(m.c1.z) + fabsf(m.c2.z),
7520 float3 min = make_float3(transform.position) - size;
7521 float3 max = make_float3(transform.position) + size;
7528 transforms[i + box_bounds_offset] = transform;
7529 aos_bounds[i + box_bounds_offset] = aabb;
7530 collider_tags[i + box_bounds_offset] = colliders.
boxes.
tags[i];
7538 for (
unsigned i = 0; i < colliders.
spheres.
count; ++i) {
7540 transform = body_transforms[transform.
body] * transform;
7545 float3 min = make_float3(transform.position) - make_float3(radius);
7546 float3 max = make_float3(transform.position) + make_float3(radius);
7553 transforms[i + sphere_bounds_offset] = transform;
7554 aos_bounds[i + sphere_bounds_offset] = aabb;
7555 collider_tags[i + sphere_bounds_offset] = colliders.
spheres.
tags[i];
7562 for (
unsigned i = count; i < aligned_count; ++i) {
7564 aos_bounds[i] = zero;
7568 simd4_float scene_min128 = simd_float::load4(&aos_bounds[0].min.x);
7569 simd4_float scene_max128 = scene_min128;
7571 for (
unsigned i = 1; i < count; ++i) {
7572 simd4_float p = simd_float::load4(&aos_bounds[i].min.x);
7573 scene_min128 = simd_float::min(scene_min128, p);
7574 scene_max128 = simd_float::max(scene_max128, p);
7577 simd4_float scene_scale128 = simd_float::make4((1<<16)-1) * simd_float::recip(scene_max128 - scene_min128);
7579 scene_scale128 = simd_float::min(simd128::shuffle32<0,1,2,2>(scene_scale128), simd128::shuffle32<2,2,0,1>(scene_scale128));
7580 scene_scale128 = simd_float::min(scene_scale128, simd128::shuffle32<1,0,3,2>(scene_scale128));
7581 scene_min128 = scene_min128 * scene_scale128;
7584 if (simd_float::extract_first_float(scene_scale128) < 2.0f)
7585 log(
"Warning: World bounds are very large, which may decrease performance. Perhaps there's a body in free fall?\n");
7588#if NUDGE_SIMDV_WIDTH == 256
7589 simdv_float scene_min = simd256::broadcast(scene_min128);
7590 simdv_float scene_scale = simd256::broadcast(scene_scale128);
7591 simdv_int32 index = simd_int32::make8(0 << 16, 1 << 16, 2 << 16, 3 << 16, 4 << 16, 5 << 16, 6 << 16, 7 << 16);
7593 simdv_float scene_min = scene_min128;
7594 simdv_float scene_scale = scene_scale128;
7595 simdv_int32 index = simd_int32::make4(0 << 16, 1 << 16, 2 << 16, 3 << 16);
7598 simdv_float scene_min_x = simd128::shuffle32<0,0,0,0>(scene_min);
7599 simdv_float scene_min_y = simd128::shuffle32<1,1,1,1>(scene_min);
7600 simdv_float scene_min_z = simd128::shuffle32<2,2,2,2>(scene_min);
7602 uint64_t* morton_codes = allocate_array<uint64_t>(&temporary, aligned_count, 32);
7604 for (
unsigned i = 0; i < count; i += simdv_width32) {
7605#if NUDGE_SIMDV_WIDTH == 256
7606 simd4_float pos_xl = simd_float::load4(&aos_bounds[i+0].min.x);
7607 simd4_float pos_yl = simd_float::load4(&aos_bounds[i+1].min.x);
7608 simd4_float pos_zl = simd_float::load4(&aos_bounds[i+2].min.x);
7609 simd4_float pos_wl = simd_float::load4(&aos_bounds[i+3].min.x);
7611 simdv_float pos_x = simd::concat(pos_xl, simd_float::load4(&aos_bounds[i+4].min.x));
7612 simdv_float pos_y = simd::concat(pos_yl, simd_float::load4(&aos_bounds[i+5].min.x));
7613 simdv_float pos_z = simd::concat(pos_zl, simd_float::load4(&aos_bounds[i+6].min.x));
7614 simdv_float pos_w = simd::concat(pos_wl, simd_float::load4(&aos_bounds[i+7].min.x));
7616 simd4_float pos_x = simd_float::load4(&aos_bounds[i+0].min.x);
7617 simd4_float pos_y = simd_float::load4(&aos_bounds[i+1].min.x);
7618 simd4_float pos_z = simd_float::load4(&aos_bounds[i+2].min.x);
7619 simd4_float pos_w = simd_float::load4(&aos_bounds[i+3].min.x);
7622 simd128::transpose32(pos_x, pos_y, pos_z, pos_w);
7624 pos_x = simd_float::msub(pos_x, scene_scale, scene_min_x);
7625 pos_y = simd_float::msub(pos_y, scene_scale, scene_min_y);
7626 pos_z = simd_float::msub(pos_z, scene_scale, scene_min_z);
7629 morton(simd_float::toint(pos_x), simd_float::toint(pos_y), simd_float::toint(pos_z), lm, hm);
7630 hm = simd::bitwise_or(hm, index);
7632 simdv_int32 mi0 = simd128::unpacklo32(lm, hm);
7633 simdv_int32 mi1 = simd128::unpackhi32(lm, hm);
7635#if NUDGE_SIMDV_WIDTH == 256
7636 simd_int32::store8((int32_t*)(morton_codes + i) + 0, simd256::permute128<0,2>(mi0, mi1));
7637 simd_int32::store8((int32_t*)(morton_codes + i) + 8, simd256::permute128<1,3>(mi0, mi1));
7639 simd_int32::store4((int32_t*)(morton_codes + i) + 0, mi0);
7640 simd_int32::store4((int32_t*)(morton_codes + i) + 4, mi1);
7643 index = simd_int32::add(index, simd_int32::makev(simdv_width32 << 16));
7646 radix_sort_uint64_low48(morton_codes, count, temporary);
7647 uint16_t* sorted_indices = allocate_array<uint16_t>(&temporary, aligned_count, 32);
7649 for (
unsigned i = 0; i < count; ++i)
7650 sorted_indices[i] = (uint16_t)(morton_codes[i] >> 48);
7652 for (
unsigned i = count; i < aligned_count; ++i)
7653 sorted_indices[i] = 0;
7655 unsigned bounds_count = aligned_count >> simdv_width32_log2;
7656 AABBV* bounds = allocate_array<AABBV>(&temporary, bounds_count, 32);
7658 for (
unsigned i = 0; i < count; i += simdv_width32) {
7659 simdv_float min_x, min_y, min_z, min_w;
7660 simdv_float max_x, max_y, max_z, max_w;
7661 load8<sizeof(aos_bounds[0]), 1>(&aos_bounds[0].min.x, sorted_indices + i,
7662 min_x, min_y, min_z, min_w,
7663 max_x, max_y, max_z, max_w);
7665 simd_float::storev(bounds[i >> simdv_width32_log2].min_x, min_x);
7666 simd_float::storev(bounds[i >> simdv_width32_log2].max_x, max_x);
7667 simd_float::storev(bounds[i >> simdv_width32_log2].min_y, min_y);
7668 simd_float::storev(bounds[i >> simdv_width32_log2].max_y, max_y);
7669 simd_float::storev(bounds[i >> simdv_width32_log2].min_z, min_z);
7670 simd_float::storev(bounds[i >> simdv_width32_log2].max_z, max_z);
7673 for (
unsigned i = count; i < aligned_count; ++i) {
7674 unsigned bounds_group = i >> simdv_width32_log2;
7675 unsigned bounds_lane = i & (simdv_width32-1);
7677 bounds[bounds_group].min_x[bounds_lane] = NAN;
7678 bounds[bounds_group].max_x[bounds_lane] = NAN;
7679 bounds[bounds_group].min_y[bounds_lane] = NAN;
7680 bounds[bounds_group].max_y[bounds_lane] = NAN;
7681 bounds[bounds_group].min_z[bounds_lane] = NAN;
7682 bounds[bounds_group].max_z[bounds_lane] = NAN;
7686 unsigned coarse_count = aligned_count >> 3;
7687 unsigned aligned_coarse_count = (coarse_count + (simdv_width32-1)) & (~(simdv_width32-1));
7689 unsigned coarse_bounds_count = aligned_coarse_count >> simdv_width32_log2;
7690 AABBV* coarse_bounds = allocate_array<AABBV>(&temporary, coarse_bounds_count, 32);
7692 for (
unsigned i = 0; i < coarse_count; ++i) {
7693 unsigned start = i << (3 - simdv_width32_log2);
7695 simd4_float coarse_min_x = simd_float::load4(bounds[start].min_x);
7696 simd4_float coarse_max_x = simd_float::load4(bounds[start].max_x);
7697 simd4_float coarse_min_y = simd_float::load4(bounds[start].min_y);
7698 simd4_float coarse_max_y = simd_float::load4(bounds[start].max_y);
7699 simd4_float coarse_min_z = simd_float::load4(bounds[start].min_z);
7700 simd4_float coarse_max_z = simd_float::load4(bounds[start].max_z);
7703#if NUDGE_SIMDV_WIDTH == 256
7704 coarse_min_x = simd_float::min(coarse_min_x, simd_float::load4(bounds[start].min_x + 4));
7705 coarse_max_x = simd_float::max(coarse_max_x, simd_float::load4(bounds[start].max_x + 4));
7706 coarse_min_y = simd_float::min(coarse_min_y, simd_float::load4(bounds[start].min_y + 4));
7707 coarse_max_y = simd_float::max(coarse_max_y, simd_float::load4(bounds[start].max_y + 4));
7708 coarse_min_z = simd_float::min(coarse_min_z, simd_float::load4(bounds[start].min_z + 4));
7709 coarse_max_z = simd_float::max(coarse_max_z, simd_float::load4(bounds[start].max_z + 4));
7711 coarse_min_x = simd_float::min(coarse_min_x, simd_float::load4(bounds[start+1].min_x));
7712 coarse_max_x = simd_float::max(coarse_max_x, simd_float::load4(bounds[start+1].max_x));
7713 coarse_min_y = simd_float::min(coarse_min_y, simd_float::load4(bounds[start+1].min_y));
7714 coarse_max_y = simd_float::max(coarse_max_y, simd_float::load4(bounds[start+1].max_y));
7715 coarse_min_z = simd_float::min(coarse_min_z, simd_float::load4(bounds[start+1].min_z));
7716 coarse_max_z = simd_float::max(coarse_max_z, simd_float::load4(bounds[start+1].max_z));
7719 coarse_min_x = simd_float::min(coarse_min_x, simd128::shuffle32<2,3,0,1>(coarse_min_x));
7720 coarse_max_x = simd_float::max(coarse_max_x, simd128::shuffle32<2,3,0,1>(coarse_max_x));
7721 coarse_min_y = simd_float::min(coarse_min_y, simd128::shuffle32<2,3,0,1>(coarse_min_y));
7722 coarse_max_y = simd_float::max(coarse_max_y, simd128::shuffle32<2,3,0,1>(coarse_max_y));
7723 coarse_min_z = simd_float::min(coarse_min_z, simd128::shuffle32<2,3,0,1>(coarse_min_z));
7724 coarse_max_z = simd_float::max(coarse_max_z, simd128::shuffle32<2,3,0,1>(coarse_max_z));
7726 coarse_min_x = simd_float::min(coarse_min_x, simd128::shuffle32<1,0,3,2>(coarse_min_x));
7727 coarse_max_x = simd_float::max(coarse_max_x, simd128::shuffle32<1,0,3,2>(coarse_max_x));
7728 coarse_min_y = simd_float::min(coarse_min_y, simd128::shuffle32<1,0,3,2>(coarse_min_y));
7729 coarse_max_y = simd_float::max(coarse_max_y, simd128::shuffle32<1,0,3,2>(coarse_max_y));
7730 coarse_min_z = simd_float::min(coarse_min_z, simd128::shuffle32<1,0,3,2>(coarse_min_z));
7731 coarse_max_z = simd_float::max(coarse_max_z, simd128::shuffle32<1,0,3,2>(coarse_max_z));
7733 unsigned bounds_group = i >> simdv_width32_log2;
7734 unsigned bounds_lane = i & (simdv_width32-1);
7736 coarse_bounds[bounds_group].min_x[bounds_lane] = simd_float::extract_first_float(coarse_min_x);
7737 coarse_bounds[bounds_group].max_x[bounds_lane] = simd_float::extract_first_float(coarse_max_x);
7738 coarse_bounds[bounds_group].min_y[bounds_lane] = simd_float::extract_first_float(coarse_min_y);
7739 coarse_bounds[bounds_group].max_y[bounds_lane] = simd_float::extract_first_float(coarse_max_y);
7740 coarse_bounds[bounds_group].min_z[bounds_lane] = simd_float::extract_first_float(coarse_min_z);
7741 coarse_bounds[bounds_group].max_z[bounds_lane] = simd_float::extract_first_float(coarse_max_z);
7744 for (
unsigned i = coarse_count; i < aligned_coarse_count; ++i) {
7745 unsigned bounds_group = i >> simdv_width32_log2;
7746 unsigned bounds_lane = i & (simdv_width32-1);
7748 coarse_bounds[bounds_group].min_x[bounds_lane] = NAN;
7749 coarse_bounds[bounds_group].max_x[bounds_lane] = NAN;
7750 coarse_bounds[bounds_group].min_y[bounds_lane] = NAN;
7751 coarse_bounds[bounds_group].max_y[bounds_lane] = NAN;
7752 coarse_bounds[bounds_group].min_z[bounds_lane] = NAN;
7753 coarse_bounds[bounds_group].max_z[bounds_lane] = NAN;
7757 uint32_t* coarse_groups = reserve_array<uint32_t>(&temporary, coarse_count*coarse_count, 32);
7758 unsigned coarse_group_count = 0;
7760 for (
unsigned i = 0; i < coarse_count; ++i) {
7761 unsigned bounds_group = i >> simdv_width32_log2;
7762 unsigned bounds_lane = i & (simdv_width32-1);
7764 simdv_float min_a_x = simd_float::broadcast_loadv(coarse_bounds[bounds_group].min_x + bounds_lane);
7765 simdv_float max_a_x = simd_float::broadcast_loadv(coarse_bounds[bounds_group].max_x + bounds_lane);
7766 simdv_float min_a_y = simd_float::broadcast_loadv(coarse_bounds[bounds_group].min_y + bounds_lane);
7767 simdv_float max_a_y = simd_float::broadcast_loadv(coarse_bounds[bounds_group].max_y + bounds_lane);
7768 simdv_float min_a_z = simd_float::broadcast_loadv(coarse_bounds[bounds_group].min_z + bounds_lane);
7769 simdv_float max_a_z = simd_float::broadcast_loadv(coarse_bounds[bounds_group].max_z + bounds_lane);
7771 unsigned first = coarse_group_count;
7777 unsigned ij_bits = (bounds_group << 8) | (i << 16);
7779 for (
unsigned j = bounds_group; j < coarse_bounds_count; ++j) {
7780 simdv_float min_b_x = simd_float::loadv(coarse_bounds[j].min_x);
7781 simdv_float max_b_x = simd_float::loadv(coarse_bounds[j].max_x);
7782 simdv_float min_b_y = simd_float::loadv(coarse_bounds[j].min_y);
7783 simdv_float max_b_y = simd_float::loadv(coarse_bounds[j].max_y);
7784 simdv_float min_b_z = simd_float::loadv(coarse_bounds[j].min_z);
7785 simdv_float max_b_z = simd_float::loadv(coarse_bounds[j].max_z);
7787 simdv_float inside_x = simd::bitwise_and(simd_float::cmp_gt(max_b_x, min_a_x), simd_float::cmp_gt(max_a_x, min_b_x));
7788 simdv_float inside_y = simd::bitwise_and(simd_float::cmp_gt(max_b_y, min_a_y), simd_float::cmp_gt(max_a_y, min_b_y));
7789 simdv_float inside_z = simd::bitwise_and(simd_float::cmp_gt(max_b_z, min_a_z), simd_float::cmp_gt(max_a_z, min_b_z));
7791 unsigned mask = simd::signmask32(simd::bitwise_and(simd::bitwise_and(inside_x, inside_y), inside_z));
7793 coarse_groups[coarse_group_count] = mask | ij_bits;
7794 coarse_group_count += mask != 0;
7800 coarse_groups[first] &= ~((1 << bounds_lane) - 1);
7803 commit_array<uint32_t>(&temporary, coarse_group_count);
7805 uint32_t* coarse_pairs = reserve_array<uint32_t>(&temporary, coarse_group_count*simdv_width32, 32);
7806 unsigned coarse_pair_count = 0;
7808 for (
unsigned i = 0; i < coarse_group_count; ++i) {
7809 unsigned group = coarse_groups[i];
7810 unsigned mask = group & 0xff;
7812 unsigned batch = (group & 0xff00) >> (8 - simdv_width32_log2);
7813 unsigned other = group & 0xffff0000;
7816 unsigned index = first_set_bit(mask);
7819 coarse_pairs[coarse_pair_count++] = other | (batch + index);
7823 commit_array<uint32_t>(&temporary, coarse_pair_count);
7826 uint32_t* groups = reserve_array<uint32_t>(&temporary, coarse_pair_count*16, 32);
7827 unsigned group_count = 0;
7829#if NUDGE_SIMDV_WIDTH == 256
7830 for (
unsigned n = 0; n < coarse_pair_count; ++n) {
7831 unsigned pair = coarse_pairs[n];
7833 unsigned a = pair >> 16;
7834 unsigned b = pair & 0xffff;
7836 unsigned lane_count = 8;
7841 if (lane_count + (a << 3) > count)
7842 lane_count = count - (a << 3);
7848 unsigned ij_bits = (b << 8) | (a << 22);
7850 unsigned lower_lane_mask = a == b ? 0xfe00 : 0xffff;
7852 simdv_float min_b_x = simd_float::loadv(bounds[b].min_x);
7853 simdv_float max_b_x = simd_float::loadv(bounds[b].max_x);
7854 simdv_float min_b_y = simd_float::loadv(bounds[b].min_y);
7855 simdv_float max_b_y = simd_float::loadv(bounds[b].max_y);
7856 simdv_float min_b_z = simd_float::loadv(bounds[b].min_z);
7857 simdv_float max_b_z = simd_float::loadv(bounds[b].max_z);
7859 for (
unsigned i = 0; i < lane_count; ++i, ij_bits += (1 << 19)) {
7860 simdv_float min_a_x = simd_float::broadcast_loadv(bounds[a].min_x + i);
7861 simdv_float max_a_x = simd_float::broadcast_loadv(bounds[a].max_x + i);
7862 simdv_float min_a_y = simd_float::broadcast_loadv(bounds[a].min_y + i);
7863 simdv_float max_a_y = simd_float::broadcast_loadv(bounds[a].max_y + i);
7864 simdv_float min_a_z = simd_float::broadcast_loadv(bounds[a].min_z + i);
7865 simdv_float max_a_z = simd_float::broadcast_loadv(bounds[a].max_z + i);
7867 simdv_float inside_x = simd::bitwise_and(simd_float::cmp_gt(max_b_x, min_a_x), simd_float::cmp_gt(max_a_x, min_b_x));
7868 simdv_float inside_y = simd::bitwise_and(simd_float::cmp_gt(max_b_y, min_a_y), simd_float::cmp_gt(max_a_y, min_b_y));
7869 simdv_float inside_z = simd::bitwise_and(simd_float::cmp_gt(max_b_z, min_a_z), simd_float::cmp_gt(max_a_z, min_b_z));
7871 unsigned mask = simd::signmask32(simd::bitwise_and(simd::bitwise_and(inside_x, inside_y), inside_z));
7874 mask &= lower_lane_mask >> 8;
7875 lower_lane_mask <<= 1;
7877 groups[group_count] = mask | ij_bits;
7878 group_count += mask != 0;
7883 for (
unsigned n = 0; n < coarse_pair_count; ++n) {
7884 unsigned pair = coarse_pairs[n];
7886 unsigned a = pair >> 16;
7887 unsigned b = pair & 0xffff;
7889 unsigned a_start = a << 3;
7890 unsigned a_end = a_start + (1 << 3);
7895 unsigned b_start = b << (3 - simdv_width32_log2);
7896 unsigned b_end = b_start + (1 << (3 - simdv_width32_log2));
7898 if (b_end > bounds_count)
7899 b_end = bounds_count;
7901 for (
unsigned i = a_start; i < a_end; ++i) {
7902 unsigned bounds_group = i >> simdv_width32_log2;
7903 unsigned bounds_lane = i & (simdv_width32-1);
7905 simdv_float min_a_x = simd_float::broadcast_loadv(bounds[bounds_group].min_x + bounds_lane);
7906 simdv_float max_a_x = simd_float::broadcast_loadv(bounds[bounds_group].max_x + bounds_lane);
7907 simdv_float min_a_y = simd_float::broadcast_loadv(bounds[bounds_group].min_y + bounds_lane);
7908 simdv_float max_a_y = simd_float::broadcast_loadv(bounds[bounds_group].max_y + bounds_lane);
7909 simdv_float min_a_z = simd_float::broadcast_loadv(bounds[bounds_group].min_z + bounds_lane);
7910 simdv_float max_a_z = simd_float::broadcast_loadv(bounds[bounds_group].max_z + bounds_lane);
7912 unsigned first = group_count;
7914 unsigned start = (i+1) >> simdv_width32_log2;
7916 if (start < b_start)
7923 unsigned ij_bits = (start << 8) | (i << 19);
7925 for (
unsigned j = start; j < b_end; ++j) {
7926 simdv_float min_b_x = simd_float::loadv(bounds[j].min_x);
7927 simdv_float max_b_x = simd_float::loadv(bounds[j].max_x);
7928 simdv_float min_b_y = simd_float::loadv(bounds[j].min_y);
7929 simdv_float max_b_y = simd_float::loadv(bounds[j].max_y);
7930 simdv_float min_b_z = simd_float::loadv(bounds[j].min_z);
7931 simdv_float max_b_z = simd_float::loadv(bounds[j].max_z);
7933 simdv_float inside_x = simd::bitwise_and(simd_float::cmp_gt(max_b_x, min_a_x), simd_float::cmp_gt(max_a_x, min_b_x));
7934 simdv_float inside_y = simd::bitwise_and(simd_float::cmp_gt(max_b_y, min_a_y), simd_float::cmp_gt(max_a_y, min_b_y));
7935 simdv_float inside_z = simd::bitwise_and(simd_float::cmp_gt(max_b_z, min_a_z), simd_float::cmp_gt(max_a_z, min_b_z));
7937 unsigned mask = simd::signmask32(simd::bitwise_and(simd::bitwise_and(inside_x, inside_y), inside_z));
7939 groups[group_count] = mask | ij_bits;
7940 group_count += mask != 0;
7946 if (first < group_count && (groups[first] & 0x7ff00) == (bounds_group << 8))
7947 groups[first] &= ~((2 << bounds_lane) - 1);
7952 commit_array<uint32_t>(&temporary, group_count);
7954 uint32_t* pairs = reserve_array<uint32_t>(&temporary, group_count*simdv_width32, 32);
7955 unsigned pair_count = 0;
7957 for (
unsigned i = 0; i < group_count; ++i) {
7958 unsigned group = groups[i];
7959 unsigned mask = group & 0xff;
7961 unsigned batch = (group & 0x7ff00) >> (8 - simdv_width32_log2);
7962 unsigned base = ((uint32_t)(group >> 19) << 16) | batch;
7965 unsigned index = first_set_bit(mask);
7968 pairs[pair_count++] = base + index;
7972 commit_array<uint32_t>(&temporary, pair_count);
7974 for (
unsigned i = 0; i < pair_count; ++i) {
7975 unsigned pair = pairs[i];
7976 pairs[i] = sorted_indices[pair & 0xffff] | ((uint32_t)sorted_indices[pair >> 16] << 16);
7979 radix_sort_uint32(pairs, pair_count, temporary);
7983 NUDGE_ARENA_SCOPE(temporary);
7986 uint16_t* heights = allocate_array<uint16_t>(&temporary, bodies.
count, 16);
7987 uint16_t* parents = allocate_array<uint16_t>(&temporary, bodies.
count, 16);
7989 memset(heights, 0,
sizeof(heights[0])*bodies.
count);
7990 memset(parents, 0xff,
sizeof(parents[0])*bodies.
count);
7992 for (
unsigned i = 0; i < body_connections.
count; ++i) {
7995 unsigned a = pair.
a;
7996 unsigned b = pair.
b;
8000 if (NUDGE_COLLIDE_SKIP_BODYFILTERS_MACRO(a_filter,b_filter))
continue;
8005 unsigned a_root = a;
8006 unsigned a_parent = parents[a];
8008 for (
unsigned parent = a_parent; parent != 0xffff; parent = parents[a_root])
8011 unsigned b_root = b;
8012 unsigned b_parent = parents[b];
8014 for (
unsigned parent = b_parent; parent != 0xffff; parent = parents[b_root])
8017 if (a_root == b_root)
8021 unsigned a_height = heights[a_root];
8022 unsigned b_height = heights[b_root];
8026 if (a_height < b_height) {
8027 parents[a_root] = b_root;
8031 parents[b_root] = a_root;
8035 if (a_height == b_height)
8036 heights[a_root] = a_height+1;
8040 while (a_parent != a_root) {
8041 unsigned next = parents[a_parent];
8050 while (b_parent != b_root) {
8051 unsigned next = parents[b_parent];
8060 for (
unsigned i = 0; i < pair_count; ++i) {
8061 unsigned pair = pairs[i];
8063 unsigned a = collider_bodies[pair & 0xffff];
8064 unsigned b = collider_bodies[pair >> 16];
8068 if (NUDGE_COLLIDE_SKIP_BODYFILTERS_MACRO(a_filter,b_filter))
continue;
8073 unsigned a_root = a;
8074 unsigned a_parent = parents[a];
8076 for (
unsigned parent = a_parent; parent != 0xffff; parent = parents[a_root])
8079 unsigned b_root = b;
8080 unsigned b_parent = parents[b];
8082 for (
unsigned parent = b_parent; parent != 0xffff; parent = parents[b_root])
8085 if (a_root == b_root)
8089 unsigned a_height = heights[a_root];
8090 unsigned b_height = heights[b_root];
8094 if (a_height < b_height) {
8095 parents[a_root] = b_root;
8099 parents[b_root] = a_root;
8103 if (a_height == b_height)
8104 heights[a_root] = a_height+1;
8108 while (a_parent != a_root) {
8109 unsigned next = parents[a_parent];
8118 while (b_parent != b_root) {
8119 unsigned next = parents[b_parent];
8129 unsigned set_count = 0;
8130 uint16_t* sets = heights;
8131 memset(sets, 0xff,
sizeof(sets[0])*bodies.
count);
8133 for (
unsigned i = 0 ; i < bodies.
count; ++i) {
8136 unsigned root = parents[i];
8138 for (
unsigned parent = root; parent != 0xffff; parent = parents[root])
8144 if (sets[root] == 0xffff)
8145 sets[root] = set_count++;
8147 sets[i] = sets[root];
8153 uint8_t* active = allocate_array<uint8_t>(&temporary, set_count, 16);
8154 memset(active, 0,
sizeof(active[0])*set_count);
8156 for (
unsigned i = 0 ; i < bodies.
count; ++i) {
8161 active[sets[i]] = 1;
8165 unsigned removed = 0;
8167 for (
unsigned i = 0; i < pair_count; ++i) {
8168 unsigned pair = pairs[i];
8170 unsigned a = collider_bodies[pair & 0xffff];
8171 unsigned b = collider_bodies[pair >> 16];
8180 unsigned set = sets[a] | sets[b];
8183 pairs[i-removed] = pair;
8186 unsigned a = collider_tags[pair & 0xffff];
8187 unsigned b = collider_tags[pair >> 16];
8194 pair_count -= removed;
8197 uint32_t bucket_sizes[4] = {};
8199 for (
unsigned i = 0; i < pair_count; ++i) {
8200 unsigned pair = pairs[i];
8202 unsigned a = pair & 0xffff;
8203 unsigned b = pair >> 16;
8208 unsigned ab = a | b;
8213 uint32_t bucket_offsets[4] = {
8215 ((bucket_sizes[0] + 7) & ~3),
8216 ((bucket_sizes[0] + 7) & ~3) + bucket_sizes[1],
8217 ((bucket_sizes[0] + 7) & ~3) + bucket_sizes[1] + bucket_sizes[2],
8220 uint32_t written_per_bucket[4] = { bucket_offsets[0], bucket_offsets[1], bucket_offsets[2], bucket_offsets[3] };
8222 uint32_t* partitioned_pairs = allocate_array<uint32_t>(&temporary, pair_count + 7, 16);
8224 for (
unsigned i = 0; i < pair_count; ++i) {
8225 unsigned pair = pairs[i];
8227 unsigned a = pair & 0xffff;
8228 unsigned b = pair >> 16;
8233 unsigned ab = a | b;
8235 partitioned_pairs[written_per_bucket[ab]++] = pair;
8238 for (
unsigned i = 0; i < bucket_sizes[2]; ++i) {
8239 unsigned index = bucket_offsets[2] + i;
8240 unsigned pair = partitioned_pairs[index];
8242 partitioned_pairs[index] = (pair >> 16) | (pair << 16);
8248 for (
unsigned i = 0; i < bucket_sizes[1] + bucket_sizes[2]; ++i) {
8249 unsigned pair = partitioned_pairs[bucket_offsets[1] + i];
8251 unsigned a = pair >> 16;
8252 unsigned b = pair & 0xffff;
8266 const float friction = NUDGE_FRICTION_MODEL(properties[bodyA].friction,properties[bodyB].friction);
8275 for (
unsigned i = 0; i < bucket_sizes[3]; ++i) {
8276 unsigned pair = partitioned_pairs[bucket_offsets[3] + i];
8278 unsigned a = pair >> 16;
8279 unsigned b = pair & 0xffff;
8294 const float friction = NUDGE_FRICTION_MODEL(properties[bodyA].friction,properties[bodyB].friction);
8304 NUDGE_ARENA_SCOPE(temporary);
8307 uint16_t* heights = allocate_array<uint16_t>(&temporary, bodies.
count, 16);
8308 uint16_t* parents = allocate_array<uint16_t>(&temporary, bodies.
count, 16);
8310 memset(heights, 0,
sizeof(heights[0])*bodies.
count);
8311 memset(parents, 0xff,
sizeof(parents[0])*bodies.
count);
8313 for (
unsigned i = 0; i < body_connections.
count; ++i) {
8316 unsigned a = pair.
a;
8317 unsigned b = pair.
b;
8321 if (NUDGE_COLLIDE_SKIP_BODYFILTERS_MACRO(a_filter,b_filter))
continue;
8326 unsigned a_root = a;
8327 unsigned a_parent = parents[a];
8329 for (
unsigned parent = a_parent; parent != 0xffff; parent = parents[a_root])
8332 unsigned b_root = b;
8333 unsigned b_parent = parents[b];
8335 for (
unsigned parent = b_parent; parent != 0xffff; parent = parents[b_root])
8338 if (a_root == b_root)
8342 unsigned a_height = heights[a_root];
8343 unsigned b_height = heights[b_root];
8347 if (a_height < b_height) {
8348 parents[a_root] = b_root;
8352 parents[b_root] = a_root;
8356 if (a_height == b_height)
8357 heights[a_root] = a_height+1;
8361 while (a_parent != a_root) {
8362 unsigned next = parents[a_parent];
8371 while (b_parent != b_root) {
8372 unsigned next = parents[b_parent];
8381 for (
unsigned i = 0; i < contacts->
count; ) {
8382 unsigned a = contacts->
bodies[i].
a;
8383 unsigned b = contacts->
bodies[i].
b;
8388 while (i < contacts->count && contacts->
bodies[i].
a == a && contacts->
bodies[i].
b == b);
8392 if (NUDGE_COLLIDE_SKIP_BODYFILTERS_MACRO(a_filter,b_filter))
continue;
8397 unsigned a_root = a;
8398 unsigned a_parent = parents[a];
8400 for (
unsigned parent = a_parent; parent != 0xffff; parent = parents[a_root])
8403 unsigned b_root = b;
8404 unsigned b_parent = parents[b];
8406 for (
unsigned parent = b_parent; parent != 0xffff; parent = parents[b_root])
8409 if (a_root == b_root)
8413 unsigned a_height = heights[a_root];
8414 unsigned b_height = heights[b_root];
8418 if (a_height < b_height) {
8419 parents[a_root] = b_root;
8423 parents[b_root] = a_root;
8427 if (a_height == b_height)
8428 heights[a_root] = a_height+1;
8432 while (a_parent != a_root) {
8433 unsigned next = parents[a_parent];
8442 while (b_parent != b_root) {
8443 unsigned next = parents[b_parent];
8453 unsigned set_count = 0;
8454 uint16_t* sets = heights;
8455 memset(sets, 0xff,
sizeof(sets[0])*bodies.
count);
8457 for (
unsigned i = 0 ; i < bodies.
count; ++i) {
8460 unsigned root = parents[i];
8462 for (
unsigned parent = root; parent != 0xffff; parent = parents[root])
8468 if (sets[root] == 0xffff)
8469 sets[root] = set_count++;
8471 sets[i] = sets[root];
8477 uint8_t* active = allocate_array<uint8_t>(&temporary, set_count, 16);
8478 memset(active, 0,
sizeof(active[0])*set_count);
8480 for (
unsigned i = 0 ; i < bodies.
count; ++i) {
8485 active[sets[i]] = 1;
8489 for (
unsigned i = 0 ; i < bodies.
count; ++i) {
8492 unsigned set = sets[i];
8499 unsigned removed = 0;
8501 for (
unsigned i = 0; i < contacts->
count; ) {
8502 unsigned a = contacts->
bodies[i].
a;
8503 unsigned b = contacts->
bodies[i].
b;
8504 unsigned tag = contacts->
tags[i] >> 32;
8511 while (i+span < contacts->count && (contacts->
tags[i+span] >> 32) == tag);
8513 unsigned set = sets[a] | sets[b];
8516 for (
unsigned j = 0; j < span; ++j) {
8517 contacts->
tags[i+j-removed] = contacts->
tags[i+j];
8518 contacts->
data[i+j-removed] = contacts->
data[i+j];
8530 contacts->
count -= removed;
8536struct ContactImpulseData {
8537 uint32_t* sorted_contacts;
8540 uint64_t* culled_tags;
8541 unsigned culled_count;
8547 ContactImpulseData* data = allocate_struct<ContactImpulseData>(memory, 64);
8550 uint32_t* sorted_contacts = allocate_array<uint32_t>(memory, contacts.
count, 16);
8551 data->sorted_contacts = sorted_contacts;
8553 Arena temporary = *memory;
8554 uint32_t* contact_keys = allocate_array<uint32_t>(&temporary, contacts.
count, 16);
8556 for (
unsigned i = 0; i < contacts.
count; ++i) {
8557 sorted_contacts[i] = i;
8558 contact_keys[i] = (uint32_t)contacts.
tags[i];
8561 radix_sort_uint32_x2(contact_keys, sorted_contacts, contacts.
count, temporary);
8563 for (
unsigned i = 0; i < contacts.
count; ++i) {
8564 unsigned index = sorted_contacts[i];
8565 contact_keys[i] = (uint32_t)(contacts.
tags[index] >> 32);
8568 radix_sort_uint32_x2(contact_keys, sorted_contacts, contacts.
count, temporary);
8573 uint64_t* culled_tags = allocate_array<uint64_t>(memory, contact_cache.
count, 16);
8574 unsigned culled_count = 0;
8577 data->data = contact_impulses;
8579 unsigned cached_contact_offset = 0;
8580 unsigned sleeping_pair_offset = 0;
8582 for (
unsigned i = 0; i < contacts.
count; ++i) {
8583 unsigned index = sorted_contacts[i];
8584 uint64_t tag = contacts.
tags[index];
8588 uint64_t cached_tag;
8589 while (cached_contact_offset < contact_cache.
count && (cached_tag = contact_cache.
tags[cached_contact_offset]) < tag) {
8590 unsigned cached_pair = cached_tag >> 32;
8593 ++sleeping_pair_offset;
8596 culled_data[culled_count] = contact_cache.
data[cached_contact_offset];
8597 culled_tags[culled_count] = contact_cache.
tags[cached_contact_offset];
8601 ++cached_contact_offset;
8604 if (cached_contact_offset < contact_cache.
count && contact_cache.
tags[cached_contact_offset] == tag)
8605 cached_impulse = contact_cache.
data[cached_contact_offset];
8607 contact_impulses[index] = cached_impulse;
8610 for (; cached_contact_offset < contact_cache.
count && sleeping_pair_offset < contacts.
sleeping_count; ) {
8611 unsigned a = contact_cache.
tags[cached_contact_offset] >> 32;
8615 ++cached_contact_offset;
8618 culled_data[culled_count] = contact_cache.
data[cached_contact_offset];
8619 culled_tags[culled_count] = contact_cache.
tags[cached_contact_offset];
8621 ++cached_contact_offset;
8624 ++sleeping_pair_offset;
8628 data->culled_data = culled_data;
8629 data->culled_tags = culled_tags;
8630 data->culled_count = culled_count;
8635void write_cached_impulses(
ContactCache* contact_cache,
ContactData contacts, ContactImpulseData* contact_impulses) {
8636 uint32_t* sorted_contacts = contact_impulses->sorted_contacts;
8639 uint64_t* culled_tags = contact_impulses->culled_tags;
8640 unsigned culled_count = contact_impulses->culled_count;
8643 assert(contact_cache->
capacity >= contacts.
count + culled_count);
8644 contact_cache->
count = contacts.
count + culled_count;
8647 unsigned i = 0, j = 0, k = 0;
8649 while (i < contacts.
count && j < culled_count) {
8650 unsigned index = sorted_contacts[i];
8652 uint64_t a = contacts.
tags[index];
8653 uint64_t b = culled_tags[j];
8656 contact_cache->
tags[k] = contacts.
tags[index];
8657 contact_cache->
data[k] = contact_impulses->data[index];
8661 contact_cache->
tags[k] = culled_tags[j];
8662 contact_cache->
data[k] = culled_data[j];
8669 for (; i < contacts.
count; ++i) {
8670 unsigned index = sorted_contacts[i];
8672 contact_cache->
tags[k] = contacts.
tags[index];
8673 contact_cache->
data[k] = contact_impulses->data[index];
8677 for (; j < culled_count; ++j) {
8678 contact_cache->
tags[k] = culled_tags[j];
8679 contact_cache->
data[k] = culled_data[j];
8685struct ContactConstraintData {
8686 unsigned contact_count;
8687 InertiaTransform* momentum_to_velocity;
8688 uint32_t* constraint_to_contact;
8690 ContactConstraintV* constraints;
8691 ContactConstraintStateV* constraint_states;
8692 unsigned constraint_batches;
8695ContactConstraintData* setup_contact_constraints(
context_t* c, ContactImpulseData* contact_impulses,
Arena* memory) {
8704 uint32_t* contact_order = contact_impulses->sorted_contacts;
8706 ContactConstraintData* data = allocate_struct<ContactConstraintData>(memory, 64);
8707 data->contact_count = contacts.
count;
8709 InertiaTransform* momentum_to_velocity = allocate_array<InertiaTransform>(memory, bodies.
count, 32);
8710 data->momentum_to_velocity = momentum_to_velocity;
8714 for (
unsigned i = 0; i < bodies.
count; ++i) {
8722 float3x3 m = matrix(rotation);
8724 InertiaTransform transform = {};
8726 transform.xx = inertia_inverse.x*m.c0.x*m.c0.x + inertia_inverse.y*m.c1.x*m.c1.x + inertia_inverse.z*m.c2.x*m.c2.x;
8727 transform.yy = inertia_inverse.x*m.c0.y*m.c0.y + inertia_inverse.y*m.c1.y*m.c1.y + inertia_inverse.z*m.c2.y*m.c2.y;
8728 transform.zz = inertia_inverse.x*m.c0.z*m.c0.z + inertia_inverse.y*m.c1.z*m.c1.z + inertia_inverse.z*m.c2.z*m.c2.z;
8729 transform.xy = inertia_inverse.x*m.c0.x*m.c0.y + inertia_inverse.y*m.c1.x*m.c1.y + inertia_inverse.z*m.c2.x*m.c2.y;
8730 transform.xz = inertia_inverse.x*m.c0.x*m.c0.z + inertia_inverse.y*m.c1.x*m.c1.z + inertia_inverse.z*m.c2.x*m.c2.z;
8731 transform.yz = inertia_inverse.x*m.c0.y*m.c0.z + inertia_inverse.y*m.c1.y*m.c1.z + inertia_inverse.z*m.c2.y*m.c2.z;
8733 momentum_to_velocity[i] = transform;
8736 else {memset(&momentum_to_velocity[i],0,
sizeof(InertiaTransform));bodies.
momentum[i].
unused0 = 0;}
8742 uint32_t* constraint_to_contact = allocate_array<uint32_t>(memory, contacts.
count*simdv_width32, 32);
8743 data->constraint_to_contact = constraint_to_contact;
8746 ContactSlotV* contact_slots = reserve_array<ContactSlotV>(memory, contacts.
count, 32);
8747 unsigned contact_slot_count = 0;
8749 Arena temporary = *memory;
8750 commit_array<ContactSlotV>(&temporary, contacts.
count);
8751# ifndef NUDGE_SETUP_CONTACT_CONSTRAINTS_BUCKET_COUNT
8752# define NUDGE_SETUP_CONTACT_CONSTRAINTS_BUCKET_COUNT (16)
8754 static const unsigned bucket_count = NUDGE_SETUP_CONTACT_CONSTRAINTS_BUCKET_COUNT;
8756 ContactPairV* vacant_pair_buckets[bucket_count];
8757 ContactSlotV* vacant_slot_buckets[bucket_count];
8758 unsigned bucket_vacancy_count[bucket_count] = {};
8760 simdv_int32 invalid_index = simd_int32::makev(~0u);
8762 assert(temporary.
size>bucket_count*((2*contacts.
count+1)+31)*
sizeof(ContactPairV));
8768# ifdef ORIGINAL_CODE
8769 for (
unsigned i = 0; i < bucket_count; ++i) {
8770 vacant_pair_buckets[i] = allocate_array<ContactPairV>(&temporary, contacts.
count+1, 32);
8771 vacant_slot_buckets[i] = allocate_array<ContactSlotV>(&temporary, contacts.
count, 32);
8774 simd_int32::storev((int32_t*)vacant_pair_buckets[i]->ab, invalid_index);
8777 assert(
sizeof(ContactPairV)==
sizeof(ContactSlotV));
8778 const unsigned stride = (2*contacts.
count+1);
8779 ContactPairV* unified_alloc = allocate_array<ContactPairV>(&temporary, bucket_count*stride, 32);
8780 for (
unsigned i = 0; i < bucket_count; ++i) {
8781 vacant_pair_buckets[i] = unified_alloc;
8782 vacant_slot_buckets[i] = (ContactSlotV*) &unified_alloc[contacts.
count+1];
8783 unified_alloc+=stride;
8785 simd_int32::storev((int32_t*)vacant_pair_buckets[i]->ab, invalid_index);
8789 for (
unsigned i = 0; i < contacts.
count; ++i) {
8790 unsigned index = contact_order[i];
8793 unsigned bucket = i % bucket_count;
8794 ContactPairV* vacant_pairs = vacant_pair_buckets[bucket];
8795 ContactSlotV* vacant_slots = vacant_slot_buckets[bucket];
8796 unsigned vacancy_count = bucket_vacancy_count[bucket];
8800 if (NUDGE_COLLIDE_SKIP_BODYFILTERS_MACRO(a_filter,b_filter))
8809 unsigned cb = b_filter->flags&
BF_IS_DYNAMIC ? active_bodies.
b : active_bodies.
a;
8821 __m256i a = _mm256_set1_epi16(ca);
8822 __m256i b = _mm256_set1_epi16(cb);
8824 __m256i scheduled_a_b;
8829 scheduled_a_b = _mm256_load_si256((
const __m256i*)vacant_pairs[j].ab);
8831 __m256i conflict = _mm256_packs_epi16(_mm256_cmpeq_epi16(a, scheduled_a_b), _mm256_cmpeq_epi16(b, scheduled_a_b));
8833 if (!_mm256_movemask_epi8(conflict))
8837 unsigned lane = first_set_bit((
unsigned)_mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpeq_epi32(scheduled_a_b, invalid_index))));
8839 __m128i a = _mm_set1_epi16(ca);
8840 __m128i b = _mm_set1_epi16(cb);
8842 __m128i scheduled_a_b;
8847 scheduled_a_b = _mm_load_si128((
const __m128i*)vacant_pairs[j].ab);
8849 __m128i conflict = _mm_packs_epi16(_mm_cmpeq_epi16(a, scheduled_a_b), _mm_cmpeq_epi16(b, scheduled_a_b));
8851 if (!_mm_movemask_epi8(conflict))
8855 unsigned lane = first_set_bit((
unsigned)_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(scheduled_a_b, invalid_index))));
8858 ContactSlotV* slot = vacant_slots + j;
8859 ContactPairV* pair = vacant_pairs + j;
8861 slot->indices[lane] = index;
8864 _mm_store_ss((
float*)pair->ab + lane, _mm_castsi128_ps(_mm_unpacklo_epi16(simd::extract_low(a), simd::extract_low(b))));
8866 _mm_store_ss((
float*)pair->ab + lane, _mm_castsi128_ps(_mm_unpacklo_epi16(a, b)));
8869 if (j == vacancy_count) {
8872 else if (lane == simdv_width32-1) {
8873 simdv_int32 indices = simd_int32::loadv((
const int32_t*)slot->indices);
8877 ContactPairV* last_pair = vacant_pairs + vacancy_count;
8878 ContactSlotV* last_slot = vacant_slots + vacancy_count;
8880 simd_int32::storev((int32_t*)contact_slots[contact_slot_count++].indices, indices);
8890 bucket_vacancy_count[bucket] = vacancy_count;
8891 simd_int32::storev((int32_t*)vacant_pairs[vacancy_count].ab, invalid_index);
8894 for (
unsigned i = 0; i < bucket_count; ++i) {
8895 ContactPairV* vacant_pairs = vacant_pair_buckets[i];
8896 ContactSlotV* vacant_slots = vacant_slot_buckets[i];
8897 unsigned vacancy_count = bucket_vacancy_count[i];
8901 for (
unsigned i = 0; i < vacancy_count; ++i) {
8902 simdv_int32 ab = simd_int32::loadv((int32_t*)vacant_pairs[i].ab);
8903 simdv_int32 indices = simd_int32::loadv((
const int32_t*)vacant_slots[i].indices);
8905 simdv_int32 mask = simd_int32::cmp_eq(ab, invalid_index);
8906 simdv_int32 first_index = simd128::shuffle32<0, 0, 0, 0>(indices);
8908#if NUDGE_SIMDV_WIDTH == 256
8909 first_index = simd256::shuffle128<0,0>(first_index);
8912 indices = simd::blendv32(indices, first_index, mask);
8914 simd_int32::storev((int32_t*)contact_slots[contact_slot_count++].indices, indices);
8918 commit_array<ContactSlotV>(memory, contact_slot_count);
8920 ContactConstraintV* constraints = allocate_array<ContactConstraintV>(memory, contact_slot_count, 32);
8921 ContactConstraintStateV* constraint_states = allocate_array<ContactConstraintStateV>(memory, contact_slot_count, 32);
8923 data->constraints = constraints;
8924 data->constraint_states = constraint_states;
8926 memset(constraint_states, 0,
sizeof(ContactConstraintStateV)*contact_slot_count);
8928 for (
unsigned i = 0; i < contact_slot_count; ++i) {
8929 ContactSlotV slot = contact_slots[i];
8931 for (
unsigned j = 0; j < simdv_width32; ++j)
8932 constraint_to_contact[i*simdv_width32 + j] = slot.indices[j];
8934 simdv_float position_x, position_y, position_z, penetration;
8935 simdv_float normal_x, normal_y, normal_z, friction;
8936 load8<
sizeof(contacts.
data[0]), 1>((
const float*)contacts.
data, slot.indices,
8937 position_x, position_y, position_z, penetration,
8938 normal_x, normal_y, normal_z, friction);
8941 NUDGE_SIMDV_ALIGNED uint16_t ab_array[simdv_width32*2];
8943 for (
unsigned j = 0; j < simdv_width32; ++j) {
8945 ab_array[j*2 + 0] = pair.
a;
8946 ab_array[j*2 + 1] = pair.
b;
8949 unsigned a0 = ab_array[0];
unsigned a1 = ab_array[2];
unsigned a2 = ab_array[4];
unsigned a3 = ab_array[6];
8950 unsigned b0 = ab_array[1];
unsigned b1 = ab_array[3];
unsigned b2 = ab_array[5];
unsigned b3 = ab_array[7];
8952#if NUDGE_SIMDV_WIDTH == 256
8953 unsigned a4 = ab_array[8];
unsigned a5 = ab_array[10];
unsigned a6 = ab_array[12];
unsigned a7 = ab_array[14];
8954 unsigned b4 = ab_array[9];
unsigned b5 = ab_array[11];
unsigned b6 = ab_array[13];
unsigned b7 = ab_array[15];
8965 simdv_float a_position_x, a_position_y, a_position_z, a_position_w;
8966 simdv_float b_position_x, b_position_y, b_position_z, b_position_w;
8968 a_position_x, a_position_y, a_position_z, a_position_w);
8970 b_position_x, b_position_y, b_position_z, b_position_w);
8972 simdv_float pa_x = position_x - a_position_x;
8973 simdv_float pa_y = position_y - a_position_y;
8974 simdv_float pa_z = position_z - a_position_z;
8976 simdv_float pb_x = position_x - b_position_x;
8977 simdv_float pb_y = position_y - b_position_y;
8978 simdv_float pb_z = position_z - b_position_z;
8980 simdv_float a_momentum_to_velocity_xx, a_momentum_to_velocity_yy, a_momentum_to_velocity_zz, a_momentum_to_velocity_u0;
8981 simdv_float a_momentum_to_velocity_xy, a_momentum_to_velocity_xz, a_momentum_to_velocity_yz, a_momentum_to_velocity_u1;
8982 load8<sizeof(momentum_to_velocity[0]), 2>((
const float*)momentum_to_velocity, ab_array,
8983 a_momentum_to_velocity_xx, a_momentum_to_velocity_yy, a_momentum_to_velocity_zz, a_momentum_to_velocity_u0,
8984 a_momentum_to_velocity_xy, a_momentum_to_velocity_xz, a_momentum_to_velocity_yz, a_momentum_to_velocity_u1);
8986 simdv_float na_xt, na_yt, na_zt;
8987 simd_soa::cross(pa_x, pa_y, pa_z, normal_x, normal_y, normal_z, na_xt, na_yt, na_zt);
8989 simdv_float na_x = a_momentum_to_velocity_xx*na_xt + a_momentum_to_velocity_xy*na_yt + a_momentum_to_velocity_xz*na_zt;
8990 simdv_float na_y = a_momentum_to_velocity_xy*na_xt + a_momentum_to_velocity_yy*na_yt + a_momentum_to_velocity_yz*na_zt;
8991 simdv_float na_z = a_momentum_to_velocity_xz*na_xt + a_momentum_to_velocity_yz*na_yt + a_momentum_to_velocity_zz*na_zt;
8993 simdv_float b_momentum_to_velocity_xx, b_momentum_to_velocity_yy, b_momentum_to_velocity_zz, b_momentum_to_velocity_u0;
8994 simdv_float b_momentum_to_velocity_xy, b_momentum_to_velocity_xz, b_momentum_to_velocity_yz, b_momentum_to_velocity_u1;
8995 load8<sizeof(momentum_to_velocity[0]), 2>((
const float*)momentum_to_velocity, ab_array + 1,
8996 b_momentum_to_velocity_xx, b_momentum_to_velocity_yy, b_momentum_to_velocity_zz, b_momentum_to_velocity_u0,
8997 b_momentum_to_velocity_xy, b_momentum_to_velocity_xz, b_momentum_to_velocity_yz, b_momentum_to_velocity_u1);
8999 simdv_float nb_xt, nb_yt, nb_zt;
9000 simd_soa::cross(pb_x, pb_y, pb_z, normal_x, normal_y, normal_z, nb_xt, nb_yt, nb_zt);
9002 simdv_float nb_x = b_momentum_to_velocity_xx*nb_xt + b_momentum_to_velocity_xy*nb_yt + b_momentum_to_velocity_xz*nb_zt;
9003 simdv_float nb_y = b_momentum_to_velocity_xy*nb_xt + b_momentum_to_velocity_yy*nb_yt + b_momentum_to_velocity_yz*nb_zt;
9004 simdv_float nb_z = b_momentum_to_velocity_xz*nb_xt + b_momentum_to_velocity_yz*nb_yt + b_momentum_to_velocity_zz*nb_zt;
9006 simd_soa::cross(na_x, na_y, na_z, pa_x, pa_y, pa_z, na_xt, na_yt, na_zt);
9007 simd_soa::cross(nb_x, nb_y, nb_z, pb_x, pb_y, pb_z, nb_xt, nb_yt, nb_zt);
9009 simdv_float normal_impulse_to_rotational_velocity_x = na_xt + nb_xt;
9010 simdv_float normal_impulse_to_rotational_velocity_y = na_yt + nb_yt;
9011 simdv_float normal_impulse_to_rotational_velocity_z = na_zt + nb_zt;
9013 simdv_float r_dot_n = normal_impulse_to_rotational_velocity_x*normal_x + normal_impulse_to_rotational_velocity_y*normal_y + normal_impulse_to_rotational_velocity_z*normal_z;
9015 simdv_float mass_inverse = a_mass_inverse + b_mass_inverse;
9016 simdv_float normal_velocity_to_normal_impulse = mass_inverse + r_dot_n;
9018 simdv_float nonzero = simd_float::cmp_neq(normal_velocity_to_normal_impulse, simd_float::zerov());
9019 normal_velocity_to_normal_impulse = simd::bitwise_and(simd_float::makev(-1.0f) / normal_velocity_to_normal_impulse, nonzero);
9021 simdv_float bias = simd_float::makev(-bias_factor) * simd_float::max(penetration - simd_float::makev(allowed_penetration), simd_float::zerov()) * normal_velocity_to_normal_impulse;
9024 simdv_float s = simd_float::abs(normal_x);
9026 simdv_float u_x = normal_z*s;
9027 simdv_float u_y = u_x - normal_z;
9028 simdv_float u_z = simd_float::madd(normal_x - normal_y, s, normal_y);
9030 u_x = simd::bitwise_xor(u_x, simd_float::makev(-0.0f));
9031 simd_soa::normalize(u_x, u_y, u_z);
9034 simdv_float v_x, v_y, v_z;
9035 simd_soa::cross(u_x, u_y, u_z, normal_x, normal_y, normal_z, v_x, v_y, v_z);
9037 simdv_float ua_x, ua_y, ua_z, va_x, va_y, va_z;
9038 simd_soa::cross(pa_x, pa_y, pa_z, u_x, u_y, u_z, ua_x, ua_y, ua_z);
9039 simd_soa::cross(pa_x, pa_y, pa_z, v_x, v_y, v_z, va_x, va_y, va_z);
9041 simdv_float ub_x, ub_y, ub_z, vb_x, vb_y, vb_z;
9042 simd_soa::cross(pb_x, pb_y, pb_z, u_x, u_y, u_z, ub_x, ub_y, ub_z);
9043 simd_soa::cross(pb_x, pb_y, pb_z, v_x, v_y, v_z, vb_x, vb_y, vb_z);
9045 simdv_float a_duu = a_momentum_to_velocity_xx*ua_x*ua_x + a_momentum_to_velocity_yy*ua_y*ua_y + a_momentum_to_velocity_zz*ua_z*ua_z;
9046 simdv_float a_dvv = a_momentum_to_velocity_xx*va_x*va_x + a_momentum_to_velocity_yy*va_y*va_y + a_momentum_to_velocity_zz*va_z*va_z;
9047 simdv_float a_duv = a_momentum_to_velocity_xx*ua_x*va_x + a_momentum_to_velocity_yy*ua_y*va_y + a_momentum_to_velocity_zz*ua_z*va_z;
9049 simdv_float a_suu = a_momentum_to_velocity_xy*ua_x*ua_y + a_momentum_to_velocity_xz*ua_x*ua_z + a_momentum_to_velocity_yz*ua_y*ua_z;
9050 simdv_float a_svv = a_momentum_to_velocity_xy*va_x*va_y + a_momentum_to_velocity_xz*va_x*va_z + a_momentum_to_velocity_yz*va_y*va_z;
9051 simdv_float a_suv = a_momentum_to_velocity_xy*(ua_x*va_y + ua_y*va_x) + a_momentum_to_velocity_xz*(ua_x*va_z + ua_z*va_x) + a_momentum_to_velocity_yz*(ua_y*va_z + ua_z*va_y);
9053 simdv_float b_duu = b_momentum_to_velocity_xx*ub_x*ub_x + b_momentum_to_velocity_yy*ub_y*ub_y + b_momentum_to_velocity_zz*ub_z*ub_z;
9054 simdv_float b_dvv = b_momentum_to_velocity_xx*vb_x*vb_x + b_momentum_to_velocity_yy*vb_y*vb_y + b_momentum_to_velocity_zz*vb_z*vb_z;
9055 simdv_float b_duv = b_momentum_to_velocity_xx*ub_x*vb_x + b_momentum_to_velocity_yy*ub_y*vb_y + b_momentum_to_velocity_zz*ub_z*vb_z;
9057 simdv_float b_suu = b_momentum_to_velocity_xy*ub_x*ub_y + b_momentum_to_velocity_xz*ub_x*ub_z + b_momentum_to_velocity_yz*ub_y*ub_z;
9058 simdv_float b_svv = b_momentum_to_velocity_xy*vb_x*vb_y + b_momentum_to_velocity_xz*vb_x*vb_z + b_momentum_to_velocity_yz*vb_y*vb_z;
9059 simdv_float b_suv = b_momentum_to_velocity_xy*(ub_x*vb_y + ub_y*vb_x) + b_momentum_to_velocity_xz*(ub_x*vb_z + ub_z*vb_x) + b_momentum_to_velocity_yz*(ub_y*vb_z + ub_z*vb_y);
9061 simdv_float friction_x = mass_inverse + a_duu + a_suu + a_suu + b_duu + b_suu + b_suu;
9062 simdv_float friction_y = mass_inverse + a_dvv + a_svv + a_svv + b_dvv + b_svv + b_svv;
9063 simdv_float friction_z = a_duv + a_duv + a_suv + a_suv + b_duv + b_duv + b_suv + b_suv;
9065 simdv_float ua_xt = a_momentum_to_velocity_xx*ua_x + a_momentum_to_velocity_xy*ua_y + a_momentum_to_velocity_xz*ua_z;
9066 simdv_float ua_yt = a_momentum_to_velocity_xy*ua_x + a_momentum_to_velocity_yy*ua_y + a_momentum_to_velocity_yz*ua_z;
9067 simdv_float ua_zt = a_momentum_to_velocity_xz*ua_x + a_momentum_to_velocity_yz*ua_y + a_momentum_to_velocity_zz*ua_z;
9069 simdv_float va_xt = a_momentum_to_velocity_xx*va_x + a_momentum_to_velocity_xy*va_y + a_momentum_to_velocity_xz*va_z;
9070 simdv_float va_yt = a_momentum_to_velocity_xy*va_x + a_momentum_to_velocity_yy*va_y + a_momentum_to_velocity_yz*va_z;
9071 simdv_float va_zt = a_momentum_to_velocity_xz*va_x + a_momentum_to_velocity_yz*va_y + a_momentum_to_velocity_zz*va_z;
9073 simdv_float ub_xt = b_momentum_to_velocity_xx*ub_x + b_momentum_to_velocity_xy*ub_y + b_momentum_to_velocity_xz*ub_z;
9074 simdv_float ub_yt = b_momentum_to_velocity_xy*ub_x + b_momentum_to_velocity_yy*ub_y + b_momentum_to_velocity_yz*ub_z;
9075 simdv_float ub_zt = b_momentum_to_velocity_xz*ub_x + b_momentum_to_velocity_yz*ub_y + b_momentum_to_velocity_zz*ub_z;
9077 simdv_float vb_xt = b_momentum_to_velocity_xx*vb_x + b_momentum_to_velocity_xy*vb_y + b_momentum_to_velocity_xz*vb_z;
9078 simdv_float vb_yt = b_momentum_to_velocity_xy*vb_x + b_momentum_to_velocity_yy*vb_y + b_momentum_to_velocity_yz*vb_z;
9079 simdv_float vb_zt = b_momentum_to_velocity_xz*vb_x + b_momentum_to_velocity_yz*vb_y + b_momentum_to_velocity_zz*vb_z;
9081 constraints[i].a[0] = a0; constraints[i].a[1] = a1; constraints[i].a[2] = a2; constraints[i].a[3] = a3;
9082 constraints[i].b[0] = b0; constraints[i].b[1] = b1; constraints[i].b[2] = b2; constraints[i].b[3] = b3;
9084#if NUDGE_SIMDV_WIDTH == 256
9085 constraints[i].a[4] = a4; constraints[i].a[5] = a5; constraints[i].a[6] = a6; constraints[i].a[7] = a7;
9086 constraints[i].b[4] = b4; constraints[i].b[5] = b5; constraints[i].b[6] = b6; constraints[i].b[7] = b7;
9089 simd_float::storev(constraints[i].n_x, normal_x);
9090 simd_float::storev(constraints[i].n_y, normal_y);
9091 simd_float::storev(constraints[i].n_z, normal_z);
9093 simd_float::storev(constraints[i].pa_x, pa_x);
9094 simd_float::storev(constraints[i].pa_y, pa_y);
9095 simd_float::storev(constraints[i].pa_z, pa_z);
9097 simd_float::storev(constraints[i].pb_x, pb_x);
9098 simd_float::storev(constraints[i].pb_y, pb_y);
9099 simd_float::storev(constraints[i].pb_z, pb_z);
9101 simd_float::storev(constraints[i].normal_velocity_to_normal_impulse, normal_velocity_to_normal_impulse);
9103 simd_float::storev(constraints[i].bias, bias);
9104 simd_float::storev(constraints[i].friction, friction);
9106 simd_float::storev(constraints[i].u_x, u_x);
9107 simd_float::storev(constraints[i].u_y, u_y);
9108 simd_float::storev(constraints[i].u_z, u_z);
9110 simd_float::storev(constraints[i].v_x, v_x);
9111 simd_float::storev(constraints[i].v_y, v_y);
9112 simd_float::storev(constraints[i].v_z, v_z);
9114 simd_float::storev(constraints[i].friction_coefficient_x, friction_x);
9115 simd_float::storev(constraints[i].friction_coefficient_y, friction_y);
9116 simd_float::storev(constraints[i].friction_coefficient_z, friction_z);
9118 simd_float::storev(constraints[i].ua_x, -ua_xt);
9119 simd_float::storev(constraints[i].ua_y, -ua_yt);
9120 simd_float::storev(constraints[i].ua_z, -ua_zt);
9122 simd_float::storev(constraints[i].va_x, -va_xt);
9123 simd_float::storev(constraints[i].va_y, -va_yt);
9124 simd_float::storev(constraints[i].va_z, -va_zt);
9126 simd_float::storev(constraints[i].na_x, -na_x);
9127 simd_float::storev(constraints[i].na_y, -na_y);
9128 simd_float::storev(constraints[i].na_z, -na_z);
9130 simd_float::storev(constraints[i].ub_x, ub_xt);
9131 simd_float::storev(constraints[i].ub_y, ub_yt);
9132 simd_float::storev(constraints[i].ub_z, ub_zt);
9134 simd_float::storev(constraints[i].vb_x, vb_xt);
9135 simd_float::storev(constraints[i].vb_y, vb_yt);
9136 simd_float::storev(constraints[i].vb_z, vb_zt);
9138 simd_float::storev(constraints[i].nb_x, nb_x);
9139 simd_float::storev(constraints[i].nb_y, nb_y);
9140 simd_float::storev(constraints[i].nb_z, nb_z);
9142 simdv_float cached_impulse_x, cached_impulse_y, cached_impulse_z, unused0;
9143 load4<sizeof(impulses[0]), 1>((
const float*)impulses, slot.indices,
9144 cached_impulse_x, cached_impulse_y, cached_impulse_z, unused0);
9146 simdv_float a_velocity_x, a_velocity_y, a_velocity_z;
9147 simdv_float a_angular_velocity_x, a_angular_velocity_y, a_angular_velocity_z, a_angular_velocity_w;
9148 load8<
sizeof(bodies.
momentum[0]), 1>((
const float*)bodies.
momentum, constraints[i].a,
9149 a_velocity_x, a_velocity_y, a_velocity_z, a_mass_inverse,
9150 a_angular_velocity_x, a_angular_velocity_y, a_angular_velocity_z, a_angular_velocity_w);
9152 simdv_float b_velocity_x, b_velocity_y, b_velocity_z;
9153 simdv_float b_angular_velocity_x, b_angular_velocity_y, b_angular_velocity_z, b_angular_velocity_w;
9154 load8<
sizeof(bodies.
momentum[0]), 1>((
const float*)bodies.
momentum, constraints[i].b,
9155 b_velocity_x, b_velocity_y, b_velocity_z, b_mass_inverse,
9156 b_angular_velocity_x, b_angular_velocity_y, b_angular_velocity_z, b_angular_velocity_w);
9158 simdv_float normal_impulse = simd_float::max(normal_x*cached_impulse_x + normal_y*cached_impulse_y + normal_z*cached_impulse_z, simd_float::zerov());
9159 simdv_float max_friction_impulse = normal_impulse * friction;
9161 simdv_float friction_impulse_x = u_x*cached_impulse_x + u_y*cached_impulse_y + u_z*cached_impulse_z;
9162 simdv_float friction_impulse_y = v_x*cached_impulse_x + v_y*cached_impulse_y + v_z*cached_impulse_z;
9164 simdv_float friction_clamp_scale = friction_impulse_x*friction_impulse_x + friction_impulse_y*friction_impulse_y;
9166 friction_clamp_scale = simd_float::rsqrt(friction_clamp_scale);
9167 friction_clamp_scale = friction_clamp_scale * max_friction_impulse;
9168 friction_clamp_scale = simd_float::min(simd_float::makev(1.0f), friction_clamp_scale);
9170 friction_impulse_x = friction_impulse_x * friction_clamp_scale;
9171 friction_impulse_y = friction_impulse_y * friction_clamp_scale;
9173 simdv_float linear_impulse_x = friction_impulse_x*u_x + friction_impulse_y*v_x + normal_x * normal_impulse;
9174 simdv_float linear_impulse_y = friction_impulse_x*u_y + friction_impulse_y*v_y + normal_y * normal_impulse;
9175 simdv_float linear_impulse_z = friction_impulse_x*u_z + friction_impulse_y*v_z + normal_z * normal_impulse;
9177 simdv_float a_angular_impulse_x = friction_impulse_x*simd_float::loadv(constraints[i].ua_x) + friction_impulse_y*simd_float::loadv(constraints[i].va_x) + normal_impulse*simd_float::loadv(constraints[i].na_x);
9178 simdv_float a_angular_impulse_y = friction_impulse_x*simd_float::loadv(constraints[i].ua_y) + friction_impulse_y*simd_float::loadv(constraints[i].va_y) + normal_impulse*simd_float::loadv(constraints[i].na_y);
9179 simdv_float a_angular_impulse_z = friction_impulse_x*simd_float::loadv(constraints[i].ua_z) + friction_impulse_y*simd_float::loadv(constraints[i].va_z) + normal_impulse*simd_float::loadv(constraints[i].na_z);
9181 simdv_float b_angular_impulse_x = friction_impulse_x*simd_float::loadv(constraints[i].ub_x) + friction_impulse_y*simd_float::loadv(constraints[i].vb_x) + normal_impulse*simd_float::loadv(constraints[i].nb_x);
9182 simdv_float b_angular_impulse_y = friction_impulse_x*simd_float::loadv(constraints[i].ub_y) + friction_impulse_y*simd_float::loadv(constraints[i].vb_y) + normal_impulse*simd_float::loadv(constraints[i].nb_y);
9183 simdv_float b_angular_impulse_z = friction_impulse_x*simd_float::loadv(constraints[i].ub_z) + friction_impulse_y*simd_float::loadv(constraints[i].vb_z) + normal_impulse*simd_float::loadv(constraints[i].nb_z);
9185 a_velocity_x -= linear_impulse_x * a_mass_inverse;
9186 a_velocity_y -= linear_impulse_y * a_mass_inverse;
9187 a_velocity_z -= linear_impulse_z * a_mass_inverse;
9189 a_angular_velocity_x += a_angular_impulse_x;
9190 a_angular_velocity_y += a_angular_impulse_y;
9191 a_angular_velocity_z += a_angular_impulse_z;
9193 b_velocity_x += linear_impulse_x * b_mass_inverse;
9194 b_velocity_y += linear_impulse_y * b_mass_inverse;
9195 b_velocity_z += linear_impulse_z * b_mass_inverse;
9197 b_angular_velocity_x += b_angular_impulse_x;
9198 b_angular_velocity_y += b_angular_impulse_y;
9199 b_angular_velocity_z += b_angular_impulse_z;
9201 simd_float::storev(constraint_states[i].applied_normal_impulse, normal_impulse);
9202 simd_float::storev(constraint_states[i].applied_friction_impulse_x, friction_impulse_x);
9203 simd_float::storev(constraint_states[i].applied_friction_impulse_y, friction_impulse_y);
9205 store8<
sizeof(bodies.
momentum[0]), 1>((
float*)bodies.
momentum, constraints[i].a,
9206 a_velocity_x, a_velocity_y, a_velocity_z, a_mass_inverse,
9207 a_angular_velocity_x, a_angular_velocity_y, a_angular_velocity_z, a_angular_velocity_w);
9209 store8<
sizeof(bodies.
momentum[0]), 1>((
float*)bodies.
momentum, constraints[i].b,
9210 b_velocity_x, b_velocity_y, b_velocity_z, b_mass_inverse,
9211 b_angular_velocity_x, b_angular_velocity_y, b_angular_velocity_z, b_angular_velocity_w);
9214 data->constraint_batches = contact_slot_count;
9219uintptr_t get_required_arena_size_for_setup_contact_constraints(
context_t* c) {
9221 sizeof(ContactConstraintData)+63+
9225 NUDGE_SETUP_CONTACT_CONSTRAINTS_BUCKET_COUNT*((2*c->
contact_data.
count+1)+31)*
sizeof(ContactPairV);
9228void apply_impulses(ContactConstraintData* data,
BodyData bodies) {
9229 ContactConstraintV* constraints = data->constraints;
9230 ContactConstraintStateV* constraint_states = data->constraint_states;
9232 unsigned constraint_batches = data->constraint_batches;
9234 for (
unsigned i = 0; i < constraint_batches; ++i) {
9235 const ContactConstraintV& constraint = constraints[i];
9237 simdv_float a_velocity_x, a_velocity_y, a_velocity_z, a_mass_inverse;
9238 simdv_float a_angular_velocity_x, a_angular_velocity_y, a_angular_velocity_z, a_angular_velocity_w;
9239 load8<
sizeof(bodies.
momentum[0]), 1>((
const float*)bodies.
momentum, constraint.a,
9240 a_velocity_x, a_velocity_y, a_velocity_z, a_mass_inverse,
9241 a_angular_velocity_x, a_angular_velocity_y, a_angular_velocity_z, a_angular_velocity_w);
9243 simdv_float pa_z = simd_float::loadv(constraint.pa_z);
9244 simdv_float pa_x = simd_float::loadv(constraint.pa_x);
9245 simdv_float pa_y = simd_float::loadv(constraint.pa_y);
9247 simdv_float v_xa = simd_float::madd(a_angular_velocity_y, pa_z, a_velocity_x);
9248 simdv_float v_ya = simd_float::madd(a_angular_velocity_z, pa_x, a_velocity_y);
9249 simdv_float v_za = simd_float::madd(a_angular_velocity_x, pa_y, a_velocity_z);
9251 simdv_float b_velocity_x, b_velocity_y, b_velocity_z, b_mass_inverse;
9252 simdv_float b_angular_velocity_x, b_angular_velocity_y, b_angular_velocity_z, b_angular_velocity_w;
9253 load8<
sizeof(bodies.
momentum[0]), 1>((
const float*)bodies.
momentum, constraint.b,
9254 b_velocity_x, b_velocity_y, b_velocity_z, b_mass_inverse,
9255 b_angular_velocity_x, b_angular_velocity_y, b_angular_velocity_z, b_angular_velocity_w);
9257 simdv_float pb_z = simd_float::loadv(constraint.pb_z);
9258 simdv_float pb_x = simd_float::loadv(constraint.pb_x);
9259 simdv_float pb_y = simd_float::loadv(constraint.pb_y);
9261 simdv_float v_xb = simd_float::madd(b_angular_velocity_y, pb_z, b_velocity_x);
9262 simdv_float v_yb = simd_float::madd(b_angular_velocity_z, pb_x, b_velocity_y);
9263 simdv_float v_zb = simd_float::madd(b_angular_velocity_x, pb_y, b_velocity_z);
9265 v_xa = simd_float::madd(b_angular_velocity_z, pb_y, v_xa);
9266 v_ya = simd_float::madd(b_angular_velocity_x, pb_z, v_ya);
9267 v_za = simd_float::madd(b_angular_velocity_y, pb_x, v_za);
9269 simdv_float n_x = simd_float::loadv(constraint.n_x);
9270 simdv_float fu_x = simd_float::loadv(constraint.u_x);
9271 simdv_float fv_x = simd_float::loadv(constraint.v_x);
9273 v_xb = simd_float::madd(a_angular_velocity_z, pa_y, v_xb);
9274 v_yb = simd_float::madd(a_angular_velocity_x, pa_z, v_yb);
9275 v_zb = simd_float::madd(a_angular_velocity_y, pa_x, v_zb);
9277 simdv_float n_y = simd_float::loadv(constraint.n_y);
9278 simdv_float fu_y = simd_float::loadv(constraint.u_y);
9279 simdv_float fv_y = simd_float::loadv(constraint.v_y);
9281 simdv_float v_x = v_xb - v_xa;
9282 simdv_float v_y = v_yb - v_ya;
9283 simdv_float v_z = v_zb - v_za;
9285 simdv_float t_z = n_x * v_x;
9286 simdv_float t_x = v_x * fu_x;
9287 simdv_float t_y = v_x * fv_x;
9289 simdv_float n_z = simd_float::loadv(constraint.n_z);
9290 simdv_float fu_z = simd_float::loadv(constraint.u_z);
9291 simdv_float fv_z = simd_float::loadv(constraint.v_z);
9293 simdv_float normal_bias = simd_float::loadv(constraint.bias);
9294 simdv_float old_normal_impulse = simd_float::loadv(constraint_states[i].applied_normal_impulse);
9295 simdv_float normal_factor = simd_float::loadv(constraint.normal_velocity_to_normal_impulse);
9297 t_z = simd_float::madd(n_y, v_y, t_z);
9298 t_x = simd_float::madd(v_y, fu_y, t_x);
9299 t_y = simd_float::madd(v_y, fv_y, t_y);
9301 normal_bias = normal_bias + old_normal_impulse;
9303 t_z = simd_float::madd(n_z, v_z, t_z);
9304 t_x = simd_float::madd(v_z, fu_z, t_x);
9305 t_y = simd_float::madd(v_z, fv_z, t_y);
9307 simdv_float normal_impulse = simd_float::madd(normal_factor, t_z, normal_bias);
9309 simdv_float t_xx = t_x*t_x;
9310 simdv_float t_yy = t_y*t_y;
9311 simdv_float t_xy = t_x*t_y;
9312 simdv_float tl2 = t_xx + t_yy;
9314 normal_impulse = simd_float::max(normal_impulse, simd_float::zerov());
9319 simd_float::storev(constraint_states[i].applied_normal_impulse, normal_impulse);
9321 simdv_float max_friction_impulse = normal_impulse * simd_float::loadv(constraint.friction);
9322 normal_impulse = normal_impulse - old_normal_impulse;
9324 simdv_float friction_x = simd_float::loadv(constraint.friction_coefficient_x);
9325 simdv_float friction_factor = t_xx * friction_x;
9326 simdv_float linear_impulse_x = n_x * normal_impulse;
9328 simdv_float friction_y = simd_float::loadv(constraint.friction_coefficient_y);
9329 friction_factor = simd_float::madd(t_yy, friction_y, friction_factor);
9330 simdv_float linear_impulse_y = n_y * normal_impulse;
9332 simdv_float friction_z = simd_float::loadv(constraint.friction_coefficient_z);
9333 friction_factor = simd_float::madd(t_xy, friction_z, friction_factor);
9334 simdv_float linear_impulse_z = n_z * normal_impulse;
9336 friction_factor = simd_float::recip(friction_factor);
9338 simdv_float na_x = simd_float::loadv(constraint.na_x);
9339 simdv_float na_y = simd_float::loadv(constraint.na_y);
9340 simdv_float na_z = simd_float::loadv(constraint.na_z);
9342 a_angular_velocity_x = simd_float::madd(na_x, normal_impulse, a_angular_velocity_x);
9343 a_angular_velocity_y = simd_float::madd(na_y, normal_impulse, a_angular_velocity_y);
9344 a_angular_velocity_z = simd_float::madd(na_z, normal_impulse, a_angular_velocity_z);
9346 simdv_float old_friction_impulse_x = simd_float::loadv(constraint_states[i].applied_friction_impulse_x);
9347 simdv_float old_friction_impulse_y = simd_float::loadv(constraint_states[i].applied_friction_impulse_y);
9349 friction_factor = simd_float::min(simd_float::makev(1e+6f), friction_factor);
9351 simdv_float friction_impulse_x = t_x*friction_factor;
9352 simdv_float friction_impulse_y = t_y*friction_factor;
9354 friction_impulse_x = old_friction_impulse_x - friction_impulse_x;
9355 friction_impulse_y = old_friction_impulse_y - friction_impulse_y;
9357 simdv_float friction_clamp_scale = friction_impulse_x*friction_impulse_x + friction_impulse_y*friction_impulse_y;
9359 simdv_float nb_x = simd_float::loadv(constraint.nb_x);
9360 simdv_float nb_y = simd_float::loadv(constraint.nb_y);
9361 simdv_float nb_z = simd_float::loadv(constraint.nb_z);
9363 friction_clamp_scale = simd_float::rsqrt(friction_clamp_scale);
9365 b_angular_velocity_x = simd_float::madd(nb_x, normal_impulse, b_angular_velocity_x);
9366 b_angular_velocity_y = simd_float::madd(nb_y, normal_impulse, b_angular_velocity_y);
9367 b_angular_velocity_z = simd_float::madd(nb_z, normal_impulse, b_angular_velocity_z);
9369 friction_clamp_scale = friction_clamp_scale * max_friction_impulse;
9370 friction_clamp_scale = simd_float::min(simd_float::makev(1.0f), friction_clamp_scale);
9372 friction_impulse_x = friction_impulse_x * friction_clamp_scale;
9373 friction_impulse_y = friction_impulse_y * friction_clamp_scale;
9375 simd_float::storev(constraint_states[i].applied_friction_impulse_x, friction_impulse_x);
9376 simd_float::storev(constraint_states[i].applied_friction_impulse_y, friction_impulse_y);
9378 friction_impulse_x -= old_friction_impulse_x;
9379 friction_impulse_y -= old_friction_impulse_y;
9381 linear_impulse_x = simd_float::madd(fu_x, friction_impulse_x, linear_impulse_x);
9382 linear_impulse_y = simd_float::madd(fu_y, friction_impulse_x, linear_impulse_y);
9383 linear_impulse_z = simd_float::madd(fu_z, friction_impulse_x, linear_impulse_z);
9385 linear_impulse_x = simd_float::madd(fv_x, friction_impulse_y, linear_impulse_x);
9386 linear_impulse_y = simd_float::madd(fv_y, friction_impulse_y, linear_impulse_y);
9387 linear_impulse_z = simd_float::madd(fv_z, friction_impulse_y, linear_impulse_z);
9389 simdv_float a_mass_inverse_neg = simd::bitwise_xor(a_mass_inverse, simd_float::makev(-0.0f));
9391 a_velocity_x = simd_float::madd(linear_impulse_x, a_mass_inverse_neg, a_velocity_x);
9392 a_velocity_y = simd_float::madd(linear_impulse_y, a_mass_inverse_neg, a_velocity_y);
9393 a_velocity_z = simd_float::madd(linear_impulse_z, a_mass_inverse_neg, a_velocity_z);
9395 simdv_float ua_x = simd_float::loadv(constraint.ua_x);
9396 simdv_float ua_y = simd_float::loadv(constraint.ua_y);
9397 simdv_float ua_z = simd_float::loadv(constraint.ua_z);
9399 a_angular_velocity_x = simd_float::madd(ua_x, friction_impulse_x, a_angular_velocity_x);
9400 a_angular_velocity_y = simd_float::madd(ua_y, friction_impulse_x, a_angular_velocity_y);
9401 a_angular_velocity_z = simd_float::madd(ua_z, friction_impulse_x, a_angular_velocity_z);
9403 simdv_float va_x = simd_float::loadv(constraint.va_x);
9404 simdv_float va_y = simd_float::loadv(constraint.va_y);
9405 simdv_float va_z = simd_float::loadv(constraint.va_z);
9407 a_angular_velocity_x = simd_float::madd(va_x, friction_impulse_y, a_angular_velocity_x);
9408 a_angular_velocity_y = simd_float::madd(va_y, friction_impulse_y, a_angular_velocity_y);
9409 a_angular_velocity_z = simd_float::madd(va_z, friction_impulse_y, a_angular_velocity_z);
9411 a_angular_velocity_w = simd_float::zerov();
9413 store8<
sizeof(bodies.
momentum[0]), 1>((
float*)bodies.
momentum, constraint.a,
9414 a_velocity_x, a_velocity_y, a_velocity_z, a_mass_inverse,
9415 a_angular_velocity_x, a_angular_velocity_y, a_angular_velocity_z, a_angular_velocity_w);
9417 b_velocity_x = simd_float::madd(linear_impulse_x, b_mass_inverse, b_velocity_x);
9418 b_velocity_y = simd_float::madd(linear_impulse_y, b_mass_inverse, b_velocity_y);
9419 b_velocity_z = simd_float::madd(linear_impulse_z, b_mass_inverse, b_velocity_z);
9421 simdv_float ub_x = simd_float::loadv(constraint.ub_x);
9422 simdv_float ub_y = simd_float::loadv(constraint.ub_y);
9423 simdv_float ub_z = simd_float::loadv(constraint.ub_z);
9425 b_angular_velocity_x = simd_float::madd(ub_x, friction_impulse_x, b_angular_velocity_x);
9426 b_angular_velocity_y = simd_float::madd(ub_y, friction_impulse_x, b_angular_velocity_y);
9427 b_angular_velocity_z = simd_float::madd(ub_z, friction_impulse_x, b_angular_velocity_z);
9429 simdv_float vb_x = simd_float::loadv(constraint.vb_x);
9430 simdv_float vb_y = simd_float::loadv(constraint.vb_y);
9431 simdv_float vb_z = simd_float::loadv(constraint.vb_z);
9433 b_angular_velocity_x = simd_float::madd(vb_x, friction_impulse_y, b_angular_velocity_x);
9434 b_angular_velocity_y = simd_float::madd(vb_y, friction_impulse_y, b_angular_velocity_y);
9435 b_angular_velocity_z = simd_float::madd(vb_z, friction_impulse_y, b_angular_velocity_z);
9437 b_angular_velocity_w = simd_float::zerov();
9439 store8<
sizeof(bodies.
momentum[0]), 1>((
float*)bodies.
momentum, constraint.b,
9440 b_velocity_x, b_velocity_y, b_velocity_z, b_mass_inverse,
9441 b_angular_velocity_x, b_angular_velocity_y, b_angular_velocity_z, b_angular_velocity_w);
9445void update_cached_impulses(ContactConstraintData* data, ContactImpulseData* contact_impulses) {
9446 uint32_t* constraint_to_contact = data->constraint_to_contact;
9448 ContactConstraintV* constraints = data->constraints;
9449 ContactConstraintStateV* constraint_states = data->constraint_states;
9450 unsigned constraint_count = data->constraint_batches * simdv_width32;
9452 for (
unsigned i = 0; i < constraint_count; ++i) {
9453 unsigned contact = constraint_to_contact[i];
9455 unsigned b = i >> simdv_width32_log2;
9456 unsigned l = i & (simdv_width32-1);
9458 float* impulse = contact_impulses->data[contact].impulse;
9460 impulse[0] = (constraint_states[b].applied_normal_impulse[l] * constraints[b].n_x[l] +
9461 constraint_states[b].applied_friction_impulse_x[l] * constraints[b].u_x[l] +
9462 constraint_states[b].applied_friction_impulse_y[l] * constraints[b].v_x[l]);
9464 impulse[1] = (constraint_states[b].applied_normal_impulse[l] * constraints[b].n_y[l] +
9465 constraint_states[b].applied_friction_impulse_x[l] * constraints[b].u_y[l] +
9466 constraint_states[b].applied_friction_impulse_y[l] * constraints[b].v_y[l]);
9468 impulse[2] = (constraint_states[b].applied_normal_impulse[l] * constraints[b].n_z[l] +
9469 constraint_states[b].applied_friction_impulse_x[l] * constraints[b].u_z[l] +
9470 constraint_states[b].applied_friction_impulse_y[l] * constraints[b].v_z[l]);
9474void advance(
context_t* c,
float time_step) {
9477 float half_time_step = 0.5f * time_step;
9483 for (
unsigned n = 0; n < active_bodies->
count; ++n) {
9484 unsigned i = active_bodies->
indices[n];
9493 if (length2(velocity) < sleeping_threshold_linear_velocity_squared && length2(angular_velocity) < sleeping_threshold_angular_velocity_squared) {
9494 if (*idle_counter < 0xff) {
9513 Rotation dr = { angular_velocity, 0.f };
9516 dr.v *= half_time_step;
9517 dr.s *= half_time_step;
9521 float* bodyPosition3 = bodyTransform->
position;
9522 float* bodyRotation4 = bodyTransform->
rotation;
9524 bodyPosition3[0] += velocity.x * time_step;
9525 bodyPosition3[1] += velocity.y * time_step;
9526 bodyPosition3[2] += velocity.z * time_step;
9528 bodyRotation4[0] += dr.v.x;
9529 bodyRotation4[1] += dr.v.y;
9530 bodyRotation4[2] += dr.v.z;
9531 bodyRotation4[3] += dr.s;
9533 Rotation rotation = normalize(make_rotation(bodyRotation4));
9535 bodyRotation4[0] = rotation.v.x;
9536 bodyRotation4[1] = rotation.v.y;
9537 bodyRotation4[2] = rotation.v.z;
9538 bodyRotation4[3] = rotation.s;
int can_add_box(context_t *c)
Definition nudge.h:978
void body_recalculate_bounding_box(context_t *c, uint32_t body)
Recalculates the bounding box of the body (BodyInfo::aabb_center and BodyInfo::aabb_extents)
uint32_t colliders_get_num_remaining_spheres(context_t *c)
Return the number of sphere colliders that can still be added to the physic world.
int can_add_compound(context_t *c, unsigned num_boxes, unsigned num_spheres)
Definition nudge.h:986
int can_add_clone(context_t *c, unsigned body_to_clone)
Definition nudge.h:990
void remove_body(context_t *c, unsigned body)
Removes a body from the simulation.
unsigned get_next_add_body_index(context_t *c)
Allows to peek the body index that is going to be returned in next add_xxx(...) call.
Definition nudge.h:999
uint32_t colliders_get_num_remaining_boxes(context_t *c)
Return the number of box colliders that can still be added to the physic world.
unsigned add_sphere(context_t *c, float mass, float radius, const Transform *T=NULL, const float comOffset[3]=NULL)
Adds a new body to the simulation with a single sphere collider.
void body_scale(nudge::context_t *c, unsigned body, float scale_factor, float mass_scale_factor=0.f)
[Experimental] Uniformly scales the specified body incrementally
unsigned add_clone(context_t *c, unsigned body_to_clone, float mass, const Transform *T=NULL, float scale_factor=1.f, const float newComOffsetInPreScaledUnits[3]=NULL)
[Experimental] Adds a new body to the simulation cloning an existing body
unsigned add_compound(context_t *c, float mass, float inertia[3], unsigned num_boxes, const float *hsizeTriplets, const Transform *boxOffsetTransforms, unsigned num_spheres, const float *radii, const Transform *sphereOffsetTransforms, const Transform *T=NULL, const float comOffset[3]=NULL, float *centerMeshAndRetrieveOldCenter3Out=NULL)
Adds a new body to the simulation with a compound collider made up of num_boxes box colliders and num...
void body_change_motion_state(nudge::context_t *c, unsigned body, nudge::FlagMask new_motion_state, float mass_fallback=1.f)
[Experimental] Changes the body motion state (i.e. the BF_IS_STATIC_OR_KINEMATIC_OR_DYNAMIC group of ...
unsigned add_box(context_t *c, float mass, float hsizex, float hsizey, float hsizez, const Transform *T=NULL, const float comOffset[3]=NULL)
Adds a new body to the simulation with a single box collider.
int can_add_sphere(context_t *c)
Definition nudge.h:982
void init_context(context_t *c)
Mandatory function to be called at program startup.
void show_info()
Displays basic info at program startup; very important call to detect the SIMD configuration of the p...
void load_context(FILE *f, context_t *c)
Loads a saved nudge context.
void save_context(FILE *f, const context_t *c)
Saves the nudge context.
void restart_context(context_t *c)
Optional function that restarts a valid context, preserving the simulation settings and the allocated...
void init_context_with(context_t *c, unsigned MAX_NUM_BOXES, unsigned MAX_NUM_SPHERES)
Mandatory function to be called at program startup.
void destroy_context(context_t *c)
Mandatory function to be called at program exit.
void body_set_collision_group_and_mask(context_t *c, uint32_t body, CollisionMask single_collision_group_body_belongs_to, CollisionMask collision_group_mask_body_can_collide_with=COLLISION_GROUP_ALL)
Sets the body collision group (a single value of COLLISION_GROUP_) and mask (a combination of COLLISI...
Definition nudge.h:1242
FlagMask * body_get_flags(context_t *c, uint32_t body)
Shortcut that returns a pointer to the body flags (a combination of BF_ enums)
Definition nudge.h:1263
CollisionMask * body_get_collision_mask(context_t *c, uint32_t body)
Gets the body collision mask (a combination of COLLISION_GROUP_ values)
Definition nudge.h:1258
CollisionMask * body_get_collision_group(context_t *c, uint32_t body)
Gets the body collision group (a single value of COLLISION_GROUP_)
Definition nudge.h:1252
void calculate_capsule_inertia_inverse(float result[3], float mass, float radius, float halfCylinderHeight, AxisEnum upAxis=AXIS_Y, const float comOffset[3]=NULL)
void calculate_cylinder_inertia_inverse(float result[3], float mass, float radius, float halfHeight, AxisEnum upAxis=AXIS_Y, const float comOffset[3]=NULL)
void calculate_box_inertia_inverse(float result[3], float mass, float hsizex, float hsizey, float hsizez, const float comOffset[3]=NULL)
void calculate_torus_inertia_inverse(float result[3], float mass, float majorRadius, float minorRadius, AxisEnum upAxis=AXIS_Y, const float comOffset[3]=NULL)
void calculate_hollow_cylinder_inertia(float result[3], float mass, float R, float r, float halfHeight, AxisEnum upAxis=AXIS_Y, const float comOffset[3]=NULL)
void calculate_sphere_inertia_inverse(float result[3], float mass, float radius, const float comOffset[3]=NULL, bool hollow=false)
void calculate_cone_inertia(float result[3], float mass, float radius, float halfHeight, AxisEnum upAxis=AXIS_Y, const float comOffset[3]=NULL)
void calculate_sphere_inertia(float result[3], float mass, float radius, const float comOffset[3]=NULL, bool hollow=false)
void calculate_cylinder_inertia(float result[3], float mass, float radius, float halfHeight, AxisEnum upAxis=AXIS_Y, const float comOffset[3]=NULL)
void calculate_box_inertia(float result[3], float mass, float hsizex, float hsizey, float hsizez, const float comOffset[3]=NULL)
void calculate_capsule_inertia(float result[3], float mass, float radius, float halfCylinderHeight, AxisEnum upAxis=AXIS_Y, const float comOffset[3]=NULL)
void calculate_torus_inertia(float result[3], float mass, float majorRadius, float minorRadius, AxisEnum upAxis=AXIS_Y, const float comOffset[3]=NULL)
void calculate_cone_inertia_inverse(float result[3], float mass, float radius, float halfHeight, AxisEnum upAxis=AXIS_Y, const float comOffset[3]=NULL)
void calculate_hollow_cylinder_inertia_inverse(float result[3], float mass, float R, float r, float halfHeight, AxisEnum upAxis=AXIS_Y, const float comOffset[3]=NULL)
void kinematic_data_reserve_animations(KinematicData *kd, size_t new_size)
Reserves additional space for KinematicData animations.
void kinematic_data_reserve_key_frames(KinematicData *kd, size_t new_size)
Reserves additional space for KinematicData key frames.
int flush(void)
Flushes the log. It defaults to fflush(NUDGE_LOG_FILE_PTR)
int log(const char *format,...)
Logging function used by the library. It defaults to printf.
float * calculate_graphic_transform_for_body(context_t *c, unsigned body, float *pModelMatrix16Out)
Function that can be used to calculate the smoothed 16-float column-major model matrix of a single bo...
void simulation_step(context_t *c)
Mandatory function that must be called once per frame.
unsigned pre_simulation_step(context_t *c, double elapsedSecondsFromLastCall)
Mandatory function that must be called once per frame.
void calculate_graphic_transforms(context_t *c, float *pModelMatricesOut, unsigned modelMatrixStrideInFloatUnits, int loopActiveBodiesOnly=0)
Function that can be used to calculate the smoothed 16-float column-major model matrices of all the b...
float * nm_QuatFromMat4(float *__restrict result4, const float *__restrict m16)
Turns the 3x3 submatrix of a 16-floats column-major matrix (without scaling applied) into a quaternio...
float * nm_QuatFromMat3(float *__restrict result4, const float *__restrict m9)
Turns the 3x3 9-floats column-major rotation matrix (without scaling applied) into a quaternion.
void TransformAssignToBody(context_t *c, unsigned body, Transform newT, float deltaTime, int16_t aux_body=-1)
Assigns a new Transform to a body, and sets its linear and angular velocities based on the difference...
Transform TransformSlerp(Transform T0, Transform T1, float time)
Applies (spherical) lerp between T0 and T1.
void nm_QuatGetAngularVelocity(float *__restrict angVel3, const float *newQuat4, const float *oldQuat4, float halfTimeStep)
Given an old and a new quaternion and a small time step, it calculates the angular velocities.
float * nm_QuatMulVec3(float *__restrict vOut3, const float *__restrict q4, const float *__restrict vIn3)
Transforms a 3-floats vector by a 4-floats quaternion.
float * nm_Mat4SetRotationFromQuat(float *__restrict result16, const float *__restrict q4)
Replaces the 3x3 submatrix of a 16-floats column-major matrix with the 3x3 matrix representing the gi...
float * nm_QuatRotate(float *__restrict qInOut4, float angle, float axisX, float axisY, float axisZ)
Rotates an input 4-floats unit quaternion by an angle in radians around a specified axis.
float * TransformToMat4(float *matrix16Out, const Transform *T)
Converts a nudge::Transform to a column-major 16-floats matrix.
float * nm_QuatGetAxis(float *__restrict vOut3, const float *__restrict q4, float axisX, float axisY, float axisZ)
Transforms a particular axis from the input quaternion space to word space.
void TransformAdvanceBodyFromVelocities(context_t *c, unsigned body, float deltaTime)
Advances the body's transform based on its linear and angular velocities.
float nm_Vec3Normalized(float *__restrict v3Out, const float *__restrict v3)
Get a normalizes copy of an input 3-floats vector.
float * nm_QuatGetAxisZ(float *__restrict axisOut3, const float *__restrict q4)
Definition nudge.h:1537
Transform * Mat4WithoutScalingToTransform(Transform *Tout, const float *matrix16WithoutScaling)
Converts a column-major 16-floats matrix without any scaling applied to a nudge::Transform.
float * nm_QuatGetAxisY(float *__restrict axisOut3, const float *__restrict q4)
Definition nudge.h:1536
float * nm_QuatFromAngleAxis(float *__restrict qOut4, float rfAngle, float rkAxisX, float rkAxisY, float rkAxisZ)
Generates a 4-floats quaternion based on an orientation arouns an axis.
float * nm_Mat4Mul(float *result16, const float *ml16, const float *mr16)
Multiplies two column-major 16-floats 4x4 matrices.
float * nm_QuatMul(float *qOut4, const float *a4, const float *b4)
Multiplies two 4-floats quaternions.
float * nm_QuatSlerp(float *__restrict result4, const float *__restrict a4, const float *__restrict b4, float slerpTime_In_0_1, int normalizeResult4AfterLerp)
Performs a spherical lerp between two quaternions (in 4-floats format)
Transform TransformMul(Transform T0, Transform T1)
Multiplies two transforms.
float nm_Vec3Normalize(float *__restrict v3)
Normalizes a 3-floats vector in place.
float * nm_Vec3Cross(float *__restrict vOut3, const float *__restrict a3, const float *__restrict b3)
Computes the cross product between two 3-floats vectors.
float * nm_Mat3FromQuat(float *__restrict result9, const float *__restrict q4)
Converts the given quaternion the a 3x3 9-floats column-major rotation matrix.
void nm_QuatToAngleAxis(const float *__restrict q4, float *__restrict rfAngleOut1, float *__restrict rkAxisOut3)
Calculates the angle-axis representation of the given 4-float quaternion.
void nm_QuatNormalize(float *__restrict q4)
normalizes a 4-floats quaternion in place
float nm_Vec3Dot(const float *__restrict a3, const float *__restrict b3)
Computes the dot product between two 3-floats vectors.
void nm_QuatAdvance(float *__restrict qOut4, const float *__restrict q4, const float *__restrict angVel3, float halfTimeStep)
Advances a quaternion given an angular velocity and a (small) time step.
float * nm_QuatGetAxisX(float *__restrict axisOut3, const float *__restrict q4)
Definition nudge.h:1535
NUDGE_FLAG_MASK_TYPE FlagMask
The unsigned type used for the BF_ flags; it defaults to uint16_t (i.e. 16 flags available) if C++11 ...
Definition nudge.h:386
AxisEnum
The AxisEnum enum.
Definition nudge.h:688
@ AXIS_Z
Definition nudge.h:688
@ AXIS_X
Definition nudge.h:688
@ AXIS_Y
Definition nudge.h:688
float * body_get_position(context_t *c, uint32_t body)
Gets the pointer to the body position (3-floats vector)
Definition nudge.h:1062
NUDGE_COLLISION_MASK_TYPE CollisionMask
The unsigned type used for the COLLISION_GROUP_ flags; it defaults to uint8_t (i.e....
Definition nudge.h:359
GlobalDataMaskEnum
The GlobalDataMaskEnum enum.
Definition nudge.h:633
@ GF_DONT_RESET_AUX_BODIES
Definition nudge.h:635
@ GF_USE_GLOBAL_GRAVITY
Definition nudge.h:634
float * body_get_angular_velocity(context_t *c, uint32_t body)
Gets the pointer to the body angular velocity (3-floats)
Definition nudge.h:1053
CollisionMaskEnum
The CollisionMaskEnum enum.
Definition nudge.h:369
@ COLLISION_GROUP_A
Definition nudge.h:371
@ COLLISION_GROUP_DEFAULT
Definition nudge.h:370
@ COLLISION_GROUP_C
Definition nudge.h:373
@ COLLISION_GROUP_ALL
Definition nudge.h:378
@ COLLISION_GROUP_F
Definition nudge.h:376
@ COLLISION_GROUP_B
Definition nudge.h:372
@ COLLISION_GROUP_E
Definition nudge.h:375
@ COLLISION_GROUP_D
Definition nudge.h:374
@ COLLISION_GROUP_G
Definition nudge.h:377
BodyFlagEnum
The BodyFlagEnum enum.
Definition nudge.h:398
@ BF_NEVER_SLEEPING
Definition nudge.h:406
@ BF_IS_DYNAMIC
Definition nudge.h:405
@ BF_IS_KINEMATIC_OR_DYNAMIC
Definition nudge.h:418
@ BF_HAS_COM_OFFSET
Definition nudge.h:399
@ BF_HAS_DIFFERENT_GRAVITY_MODE
Definition nudge.h:407
@ BF_IS_PLATFORM
Definition nudge.h:411
@ BF_IS_STATIC_OR_DYNAMIC
Definition nudge.h:417
@ BF_IS_STATIC
Definition nudge.h:403
@ BF_IS_FRUSTUM_CULLED
Definition nudge.h:413
@ BF_IS_DISABLED_OR_REMOVED_OR_FRUSTUM_CULLED
Definition nudge.h:414
@ BF_HAS_DIFFERENT_AUX_BODIES_RESET_MODE
Definition nudge.h:408
@ BF_IS_STATIC_OR_KINEMATIC_OR_DISABLED_OR_REMOVED
Definition nudge.h:420
@ BF_IS_DISABLED_OR_REMOVED
Definition nudge.h:402
@ BF_IS_KINEMATIC
Definition nudge.h:404
@ BF_IS_STATIC_OR_KINEMATIC_OR_DYNAMIC
Definition nudge.h:419
@ BF_IS_SENSOR
Definition nudge.h:412
@ BF_IS_REMOVED
Definition nudge.h:401
@ BF_IS_DISABLED
Definition nudge.h:400
@ BF_IS_CHARACTER
Definition nudge.h:410
@ BF_IS_STATIC_OR_KINEMATIC
Definition nudge.h:416
float * body_get_orientation(context_t *c, uint32_t body)
Gets the pointer to the body orientation (4-floats quaternion in {x,y,z,w} format)
Definition nudge.h:1069
float * body_get_velocity(context_t *c, uint32_t body)
Gets the pointer to the body linear velocity (3-floats)
Definition nudge.h:1045
#define NUDGE_DEFAULT_SIMULATION_TIMESTEP
Definition nudge.h:1609
#define NUDGE_DEFAULT_SLEEPING_THRESHOLD_ANGULAR_VELOCITY_SQUARED
Definition nudge.h:1627
#define NUDGE_DEFAULT_MAX_NUM_SIMULATION_SUBSTEPS
Definition nudge.h:1612
#define NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES
Definition nudge.h:465
#define NUDGE_DEFAULT_DAMPING_LINEAR
Definition nudge.h:1618
#define NUDGE_DEFAULT_NUM_SIMULATION_ITERATIONS
Definition nudge.h:1615
#define NUDGE_COLLISION_MASK_TYPE
Definition nudge.h:205
#define NUDGE_FLAG_MASK_TYPE
Definition nudge.h:208
#define NUDGE_DEFAULT_DAMPING_ANGULAR
Definition nudge.h:1621
#define NUDGE_DEFAULT_PENETRATION_ALLOWED_AMOUNT
Definition nudge.h:1630
#define NUDGE_DEFAULT_PENETRATION_BIAS_FACTOR
Definition nudge.h:1633
#define NUDGE_INVALID_BODY_ID
Definition nudge.h:552
#define NUDGE_DEFAULT_SLEEPING_THRESHOLD_LINEAR_VELOCITY_SQUARED
Definition nudge.h:1624
The ActiveBodies struct.
Definition nudge.h:539
uint32_t capacity
Definition nudge.h:541
uint32_t count
Definition nudge.h:542
uint16_t * indices
Definition nudge.h:540
The Arena struct used internally.
Definition nudge.h:218
uintptr_t size
Definition nudge.h:220
void * data
Definition nudge.h:219
[unused] The BodyConnections struct is actually just sketched in nudge (it was intended to add custom...
Definition nudge.h:514
uint32_t count
Definition nudge.h:516
BodyPair * data
Definition nudge.h:515
The main struct contained in context_t: it exposes every per-body data in the simulation,...
Definition nudge.h:500
BodyMomentum * momentum
Definition nudge.h:503
BodyLayout * layouts
Definition nudge.h:505
uint8_t * idle_counters
Definition nudge.h:507
BodyInfo * infos
Definition nudge.h:506
BodyFilter * filters
Definition nudge.h:504
uint32_t count
Definition nudge.h:508
BodyProperties * properties
Definition nudge.h:502
Transform * transforms
Definition nudge.h:501
The BodyFilter struct.
Definition nudge.h:428
CollisionMask collision_mask
Definition nudge.h:431
FlagMask flags
Definition nudge.h:429
CollisionMask collision_group
Definition nudge.h:430
The BodyInfo struct contains some read-only graphic properties of the body (e.g. axis aligned boundin...
Definition nudge.h:453
union nudge::BodyInfo::@16::@18 sk_user
int16_t aux_bodies[NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES]
Definition nudge.h:469
uint32_t u32[NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES/2]
Definition nudge.h:483
int8_t i8[NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES *2]
Definition nudge.h:487
float com_offset[3]
Definition nudge.h:462
float aabb_center[3]
Definition nudge.h:460
float aabb_enlarged_radius
Definition nudge.h:463
float aabb_half_extents[3]
Definition nudge.h:461
uint8_t u8[NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES *2]
Definition nudge.h:488
uint16_t u16[NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES]
Definition nudge.h:486
UserData32Bit user
Definition nudge.h:455
int16_t i16[NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES]
Definition nudge.h:485
int32_t i32[NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES/2]
Definition nudge.h:482
Per-body struct that contains the indices of the body colliders inside ColliderData::boxes and Collid...
Definition nudge.h:438
uint16_t num_boxes
Definition nudge.h:439
uint16_t num_spheres
Definition nudge.h:442
int16_t first_sphere_index
Definition nudge.h:443
int16_t first_box_index
Definition nudge.h:440
The BodyMomentum struct.
Definition nudge.h:285
float unused0
Definition nudge.h:287
float unused1
Definition nudge.h:289
float angular_velocity[3]
Definition nudge.h:288
float velocity[3]
Definition nudge.h:286
The BodyPair struct.
Definition nudge.h:317
uint16_t b
Definition nudge.h:319
uint16_t a
Definition nudge.h:318
The BodyProperties struct.
Definition nudge.h:276
float gravity[3]
Definition nudge.h:279
float friction
Definition nudge.h:280
float inertia_inverse[3]
Definition nudge.h:277
float mass_inverse
Definition nudge.h:278
The BoxCollider struct.
Definition nudge.h:301
float unused
Definition nudge.h:303
float size[3]
Definition nudge.h:302
This struct is used to access all the colliders in the physic world.
Definition nudge.h:338
Transform * transforms
Definition nudge.h:342
struct nudge::ColliderData::@14 boxes
uint32_t count
Definition nudge.h:343
SphereCollider * data
Definition nudge.h:348
BoxCollider * data
Definition nudge.h:341
struct nudge::ColliderData::@15 spheres
uint16_t * tags
Definition nudge.h:340
The GlobalData struct inglobes global fields that could not fit in the SimulationParams struct.
Definition nudge.h:643
float gravity[3]
Definition nudge.h:644
uint32_t flags
Definition nudge.h:645
uint32_t removed_bodies_count
Definition nudge.h:648
const uint32_t removed_bodies_capacity
Definition nudge.h:650
FlagMask exclude_smoothing_graphic_transform_flags
Definition nudge.h:646
uint32_t * removed_bodies
Definition nudge.h:647
uint32_t finalized_removed_bodies_count
Definition nudge.h:649
The Animation class. Each animation owns a (kinematic) body index and a range of key frames.
Definition nudge.h:585
float play_time
Definition nudge.h:586
float speed
Definition nudge.h:588
LoopMode
Definition nudge.h:596
@ LM_LOOP_NORMAL
Definition nudge.h:598
@ LM_NO_LOOP
Definition nudge.h:597
@ LM_LOOP_PING_PONG
Definition nudge.h:599
float offset_time
Definition nudge.h:587
uint32_t body
Definition nudge.h:593
bool use_baseT
Definition nudge.h:595
Transform baseT
Definition nudge.h:590
uint32_t key_frame_start
Definition nudge.h:591
float total_time
Definition nudge.h:589
enum nudge::KinematicData::Animation::LoopMode loop_mode
uint32_t key_frame_count
Definition nudge.h:592
bool playing
Definition nudge.h:594
The KinematicData is composed by two arrays: an array of global key frames and an array of animations...
Definition nudge.h:565
uint32_t key_frame_capacity
Definition nudge.h:576
uint32_t animations_capacity
Definition nudge.h:602
Transform * key_frame_transforms
Definition nudge.h:567
struct nudge::KinematicData::Animation * animations
uint32_t animations_count
Definition nudge.h:603
TimeMode
TimeMode enum is an optional experimental flag.
Definition nudge.h:571
@ TM_NORMAL
Definition nudge.h:572
@ TM_DECELERATE
Definition nudge.h:574
@ TM_ACCELERATE
Definition nudge.h:573
uint32_t key_frame_count
Definition nudge.h:577
enum nudge::KinematicData::TimeMode * key_frame_modes
The SimulationParams struct.
Definition nudge.h:609
float linear_damping
Definition nudge.h:616
float sleeping_threshold_linear_velocity_squared
Definition nudge.h:614
unsigned numsubsteps_overflow_in_last_frame
Definition nudge.h:628
unsigned num_iterations_per_substep
Definition nudge.h:613
float penetration_allowed_amount
Definition nudge.h:618
float sleeping_threshold_angular_velocity_squared
Definition nudge.h:615
unsigned num_substeps_in_last_frame
Definition nudge.h:627
unsigned long long num_frames
Definition nudge.h:623
unsigned long long num_total_substeps
Definition nudge.h:624
double time_step
Definition nudge.h:611
unsigned numsubsteps_overflow_warning_mode
Definition nudge.h:620
float penetration_bias_factor
Definition nudge.h:619
float angular_damping
Definition nudge.h:617
double remaining_time_in_seconds
Definition nudge.h:625
float time_step_minus_remaining_time
Definition nudge.h:626
unsigned max_num_substeps
Definition nudge.h:612
The SphereCollider struct.
Definition nudge.h:295
float radius
Definition nudge.h:296
Main struct of the library.
Definition nudge.h:657
ColliderData colliders
Definition nudge.h:662
BodyData bodies
Definition nudge.h:661
ContactData contact_data
Definition nudge.h:663
Arena arena
Definition nudge.h:659
const unsigned MAX_NUM_SPHERES
Definition nudge.h:674
ActiveBodies active_bodies
Definition nudge.h:666
GlobalData global_data
Definition nudge.h:670
SimulationParams simulation_params
Definition nudge.h:671
ContactCache contact_cache
Definition nudge.h:665
const unsigned MAX_NUM_BODIES
Definition nudge.h:675
UserData64Bit user
Definition nudge.h:681
const unsigned MAX_NUM_BOXES
Definition nudge.h:673
KinematicData kinematic_data
Definition nudge.h:669
Storage struct for user data (by default used inside BodyInfo): a per-body 32-bit user space in 7 dif...
Definition nudge.h:240
uint32_t u32
Definition nudge.h:240
int32_t i32
Definition nudge.h:240
uint8_t u8[4]
Definition nudge.h:240
int8_t i8[4]
Definition nudge.h:240
uint16_t u16[2]
Definition nudge.h:240
float f32
Definition nudge.h:240
int16_t i16[2]
Definition nudge.h:240
Storage struct for user data (by default used inside context_t): a per-context 64-bit user space in 1...
Definition nudge.h:226
uint32_t u32[2]
Definition nudge.h:234
int64_t i64
Definition nudge.h:230
int16_t i16[4]
Definition nudge.h:235
float f32[2]
Definition nudge.h:234
double f64
Definition nudge.h:230
uint8_t u8[8]
Definition nudge.h:235
int8_t i8[8]
Definition nudge.h:235
int32_t i32[2]
Definition nudge.h:234
uint16_t u16[4]
Definition nudge.h:235
uint64_t u64
Definition nudge.h:230