Nudge Physics
A single file, header-only 3D physics library
Loading...
Searching...
No Matches
nudge.h
Go to the documentation of this file.
1//
2// Copyright (c) 2017 Rasmus Barringer
3//
4// Permission is hereby granted, free of charge, to any person obtaining a copy
5// of this software and associated documentation files (the "Software"), to deal
6// in the Software without restriction, including without limitation the rights
7// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8// copies of the Software, and to permit persons to whom the Software is
9// furnished to do so, subject to the following conditions:
10//
11// The above copyright notice and this permission notice shall be included in all
12// copies or substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20// SOFTWARE.
21//
22
23// Modified, refactored and documented by Flix01 (https://github.com/Flix01/nudge/tree/master) in 2024
24// [I'm not a physics engine expert at all, the mods I've made are just to ease my usage scenario]
25// [I've also probably decreased performance and broken something, so if you're a
26// physics engine expert, I suggest you base your mods on the original version]
27// Further info can be read in the doxygen comment below.
28
29
157#ifndef NUDGE_H
158#define NUDGE_H
159
160#include <stddef.h>
161#include <stdint.h>
162#include <stdarg.h> // log function declaration
163#ifdef NUDGE_USER_CFG_FILE_NAME
164# include NUDGE_USER_CFG_FILE_NAME // optional definition for advanced users (to be placed in the project options or in the compiler commandline, not inside code files). Remember to escape quotes (e.g. -DNUDGE_USER_CFG_FILE_NAME="\"nudge_user_cfg.h\"" on the commandline)
165#endif
166#ifndef NUDGE_NO_STDIO
167# include <stdio.h>
168#endif
169#ifndef __cplusplus
170# error nudge.h is a c++ file and should be compiled as c++
171#elif __cplusplus < 201103L
172# define NUDGE_NO_CPP11_DETECTED
173# define NUDGE_CONSTEXPRFNC /*no-op*/ /*never used in nudge.h*/
174# define NUDGE_CONSTEXPR const /*never used in nudge.h*/
175# define NUDGE_STATIC_ASSERT_WITH_MESSAGE(X,MESSAGE) assert(X) /*this needs <assert.h>, but I don't want to include it here*/
176# undef NUDGE_USE_INT32_ENUMS
177# define NUDGE_USE_INT32_ENUMS // if set: bigger enums (more space for body flags and collision groups: the BodyFilter struct is 3x bigger), but worse cache performance (note that bit operations are generally not faster with smaller types)
178// Also note that there are still a few anonymous structs (that by standard require C++11 compilation),
179// but in my tests both g++ and clang++ work just fine with -std=c++98
180// In case of problems please define NUDGE_NO_ANONYMOUS_STRUCTS
181#else // c++11 detected
182# define NUDGE_CONSTEXPRFNC constexpr /*never used in nudge.h*/
183# define NUDGE_CONSTEXPR constexpr /*never used in nudge.h*/
184# define NUDGE_STATIC_ASSERT_WITH_MESSAGE(X,MESSAGE) static_assert((X), MESSAGE) /*no <assert.h> required; never used in nudge.h*/
185#endif // __cplusplus
186#define NUDGE_STATIC_ASSERT(X) NUDGE_STATIC_ASSERT_WITH_MESSAGE((X), "") /*never used in nudge.h*/
187
188#ifdef __SIZEOF_POINTER__
189# define NUDGE_POINTER_SIZE ((__SIZEOF_POINTER__)*8) /* __SIZEOF_POINTER__ (gcc/clang) is in bytes */
190#elif defined(_WIN64) || defined(_M_X64)
191# define NUDGE_POINTER_SIZE (64)
192#elif defined(_WIN32) || defined(_M_X86)
193# define NUDGE_POINTER_SIZE (32)
194#else
195# define NUDGE_POINTER_SIZE (0)
196#endif //__SIZEOF_POINTER__
197
198#ifdef NUDGE_USE_INT32_ENUMS
199# undef NUDGE_COLLISION_MASK_TYPE
200# define NUDGE_COLLISION_MASK_TYPE uint32_t
201# undef NUDGE_FLAG_MASK_TYPE
202# define NUDGE_FLAG_MASK_TYPE uint32_t
203#else //NUDGE_USE_INT32_ENUMS
204# ifndef NUDGE_COLLISION_MASK_TYPE
205# define NUDGE_COLLISION_MASK_TYPE uint8_t
206# endif
207# ifndef NUDGE_FLAG_MASK_TYPE
208# define NUDGE_FLAG_MASK_TYPE uint16_t
209# endif
210#endif // NUDGE_USE_INT32_ENUMS
211
212namespace nudge {
213
218 struct Arena {
219 void* data;
220 uintptr_t size;
221 };
222
227# if NUDGE_POINTER_SIZE==64
228 void* ptrs[1];
229# endif
230 int64_t i64;uint64_t u64;double f64;
231# if NUDGE_POINTER_SIZE==32
232 void* ptrs[2];
233# endif
234 int32_t i32[2];uint32_t u32[2];float f32[2];
235 int16_t i16[4];uint16_t u16[4];int8_t i8[8];uint8_t u8[8];
236 };
240 union UserData32Bit{int32_t i32;uint32_t u32;float f32;int16_t i16[2];uint16_t u16[2];int8_t i8[4];uint8_t u8[4];};
241
249 struct Transform {
250 union {
251 float position[3];
252 float p[3];
253 struct {float x,y,z;} vector;
254# ifndef NUDGE_NO_ANONYMOUS_STRUCTS
255 struct {float px,py,pz;};
256# endif
257 };
258 union {
259 uint32_t body;
260 float time;
261 };
262 union {
263 float rotation[4];
264 float r[4];
265 float q[4];
266 struct {float x,y,z,w;} quaternion;
267# ifndef NUDGE_NO_ANONYMOUS_STRUCTS
268 struct {float rx,ry,rz,rw;};
269 struct {float qx,qy,qz,qw;};
270# endif
271 };
272 };
279 float gravity[3];
280 float friction;
281 };
286 float velocity[3];
287 float unused0;
289 float unused1;
290 };
291
296 float radius;
297 };
301 struct BoxCollider {
302 float size[3];
303 float unused;
304 };
308 struct Contact {
309 float position[3];
311 float normal[3];
312 float friction;
313 };
317 struct BodyPair {
318 uint16_t a;
319 uint16_t b;
320 };
324 struct ContactData {
327 uint64_t* tags;
328 uint32_t capacity;
329 uint32_t count;
331 uint32_t* sleeping_pairs;
332 uint32_t sleeping_count;
333 };
339 struct {
340 uint16_t* tags;
343 uint32_t count;
346 struct {
347 uint16_t* tags;
349 Transform* transforms; // probably a simple position is enough here... possible optimization?
350 uint32_t count;
352 };
353
360
380
387
433
438 struct BodyLayout {
439 uint16_t num_boxes;
442 uint16_t num_spheres;
444 };
445
446
447
453 struct BodyInfo {
454# ifndef NUDGE_BODYINFO_STRUCT_NO_USER_DATA
456# endif // NUDGE_BODYINFO_STRUCT_NO_USER_DATA
457# ifdef NUDGE_BODYINFO_STRUCT_EXTRA_FIELDS
458 NUDGE_BODYINFO_STRUCT_EXTRA_FIELDS
459# endif // NUDGE_BODYINFO_STRUCT_EXTRA_FIELDS
460 float aabb_center[3];
462 float com_offset[3];
464# ifndef NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES
465# define NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES (2) // sizeof(BodyInfo): (2) => 48 bytes; (4) => 52 bytes; (6) => 56 bytes; (8) => 60 bytes; (10) => 64 bytes
466# endif // NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES
467# if NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES>0
468 union {
470 union {
471# if (NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES%4)==0
472# if NUDGE_POINTER_SIZE==64
474# endif
477# endif
478# if (NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES%2)==0
479# if NUDGE_POINTER_SIZE==32
481# endif
484# endif
490 };
491# endif // NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES
492# ifdef NUDGE_BODYINFO_STRUCT_EXTRA_PADDING
493 NUDGE_BODYINFO_STRUCT_EXTRA_PADDING
494# endif // NUDGE_BODYINFO_STRUCT_EXTRA_PADDING
495 };
516 uint32_t count;
517 };
523 float impulse[3];
524 float unused;
525 };
530 uint64_t* tags;
532 uint32_t capacity;
533 uint32_t count;
534 };
540 uint16_t* indices;
541 uint32_t capacity;
542 uint32_t count;
543 };
544
545 struct ContactImpulseData;
546 struct ContactConstraintData;
547
552# define NUDGE_INVALID_BODY_ID (32767)
553
554 // TODO: currently int32_t or uint32_t is used as body index: but at most MAX_NUM_BODIES is 8192.
555 // So we can use int16_t and uint16_t (where padding/alignment is not required).
556 // [Well, there are some parts in nudge internal where the upper
557 // 16-bit of a body index is used, so I'm not sure if this can be done everywhere]
558
566 // key_frame_transforms and key_frame_modes are a single memory block used by all animations
604 };
636# ifdef NUDGE_GLOBALDATAMASK_ENUM_EXTRA_FIELDS
637# define NUDGE_GLOBALDATAMASK_ENUM_EXTRA_FIELDS
638# endif
639 };
657 struct context_t {
658 // original nudge fields
668 // extended stuff
673 const unsigned MAX_NUM_BOXES;
674 const unsigned MAX_NUM_SPHERES;
675 const unsigned MAX_NUM_BODIES;
677# ifdef NUDGE_CONTEXT_STRUCT_EXTRA_FIELDS
678 NUDGE_CONTEXT_STRUCT_EXTRA_FIELDS
679# endif // NUDGE_CONTEXT_STRUCT_EXTRA_FIELDS
680# ifndef NUDGE_CONTEXT_STRUCT_NO_USER_DATA
682# endif // NUDGE_CONTEXT_STRUCT_NO_USER_DATA
683 };
684
689
690
691# ifdef NUDGE_USE_TIME_CONTEXT
695 struct time_context_t {
696 // (How bad is proper cpp style with private variables and getters/setters... plain C rules!)
697 public:
698 // Usage: just call this ALWAYS once per frame
699 inline void update(double globalTimeInSeconds) {
700 double elapsedTime,elapsedNetTime,deltaTime;
701 double currentTime = totalTime;
702
703 // paused time code
704 if (wasPausedLastFrame!=paused) {
705 wasPausedLastFrame = paused;
706 if (paused) beginPausedTime=globalTimeInSeconds;
707 else {
708 beginNetTime+=globalTimeInSeconds-beginPausedTime;beginPausedTime = 0;
709 }
710 }
711 // time calculations
712 if (beginTime==0) beginTime = globalTimeInSeconds;
713 if (beginNetTime==0) beginNetTime = globalTimeInSeconds;
714
715 elapsedTime = globalTimeInSeconds;if (elapsedTime<beginTime) beginTime=elapsedTime;
716 elapsedTime-=beginTime;
717 totalTime = elapsedTime;
718
719 if (!paused) {
720 elapsedNetTime = globalTimeInSeconds;if (elapsedNetTime<beginNetTime) beginNetTime=elapsedNetTime;
721 elapsedNetTime-=beginNetTime;
722 totalTimeWithoutPause = elapsedNetTime;
723 }
724
725 deltaTime = elapsedTime;if (deltaTime<currentTime) currentTime=deltaTime;
726 deltaTime-=currentTime;
727 currentTime = elapsedTime;
728 instantFrameTime = deltaTime;
729
730 timeNow = globalTimeInSeconds;
731 ++num_frames;
732 //assert(totalTime==currentTime);
733 }
734 inline double getInstantFrameTime() const {return instantFrameTime;}
735 inline double getTotalTime() const {return totalTime;}
736 inline double getTotalTimeWithoutPause() const {return totalTimeWithoutPause;}
737 inline double getBeginTime() const {return beginTime;}
738 inline double getTimeNow() const {return timeNow;}
739 inline double getInstantFPS() const {return instantFrameTime!=0?1.0/instantFrameTime:0;}
740 inline unsigned long getNumFrames() const {return num_frames;}
741 inline bool getPaused() const {return paused;}
742 inline void setPaused(bool flag) {paused = flag;}
743 inline void togglePaused() {paused = !paused;}
744 time_context_t() : instantFrameTime(16.2),totalTime(0),totalTimeWithoutPause(0),paused(false),
745 beginTime(0),beginNetTime(0),beginPausedTime(0),timeNow(0),num_frames(0),wasPausedLastFrame(false) {}
746 inline void restoreFrom(time_context_t* o) {
747 //const double deltaTime = totalTime - o->totalTime;
748 //beginTime += deltaTime;
749 beginNetTime += totalTimeWithoutPause - o->totalTimeWithoutPause;
750 totalTimeWithoutPause = o->totalTimeWithoutPause;
751 num_frames = o->num_frames;
752 //return deltaTime;
753 }
754 private:
755 double instantFrameTime; // get; seconds elapsed from last frame
756 double totalTime; // get; second elapsed from the start (including 'paused' time)
757 double totalTimeWithoutPause; // get; second elapsed from the start (excluding 'paused' time)
758 bool paused; // get/set
759
760 double beginTime,beginNetTime,beginPausedTime,timeNow;
761 unsigned long num_frames;
762 bool wasPausedLastFrame;
763 };
764# endif //NUDGE_USE_TIME_CONTEXT
765
766
775 void show_info();
783 void init_context_with(context_t *c, unsigned MAX_NUM_BOXES,unsigned MAX_NUM_SPHERES);
803
804# ifndef NUDGE_NO_STDIO
812 void save_context(FILE* f,const context_t* c);
821 void load_context(FILE* f,context_t* c);
822# endif //NUDGE_NO_STDIO
// end of context_group
824
836 unsigned pre_simulation_step(context_t* c,double elapsedSecondsFromLastCall);
837
844
853 float* calculate_graphic_transform_for_body(context_t* c,unsigned body,float* pModelMatrix16Out);
862 void calculate_graphic_transforms(context_t* c,float* pModelMatricesOut,unsigned modelMatrixStrideInFloatUnits,int loopActiveBodiesOnly=0);
// end of main_group
864
883 unsigned add_box(context_t* c,float mass, float hsizex, float hsizey, float hsizez, const Transform* T=NULL,const float comOffset[3]=NULL);
888 unsigned add_box(context_t* c,float mass, float hsizex, float hsizey, float hsizez, const float* mMatrix16WithoutScaling,const float comOffset[3]=NULL);
900 unsigned add_sphere(context_t* c,float mass, float radius, const Transform* T=NULL,const float comOffset[3]=NULL);
905 unsigned add_sphere(context_t* c,float mass, float radius, const float* mMatrix16WithoutScaling,const float comOffset[3]=NULL);
924 unsigned add_compound(context_t* c, float mass, float inertia[3], unsigned num_boxes, const float* hsizeTriplets, const Transform* boxOffsetTransforms, unsigned num_spheres, const float* radii, const Transform* sphereOffsetTransforms, const Transform* T=NULL, const float comOffset[3]=NULL, float *centerMeshAndRetrieveOldCenter3Out = NULL);
929 unsigned add_compound(context_t* c,float mass, float inertia[3],unsigned num_boxes,const float* hsizeTriplets,const float* boxOffsetMatrices16WithoutScaling,unsigned num_spheres,const float* radii,const float* sphereOffsetMatrices16WithoutScaling,const float* mMatrix16WithoutScaling=NULL, const float comOffset[3]=NULL, float *centerMeshAndRetrieveOldCenter3Out = NULL);
942 unsigned add_clone(context_t* c,unsigned body_to_clone,float mass,const Transform* T=NULL,float scale_factor=1.f,const float newComOffsetInPreScaledUnits[3]=NULL);
947 unsigned add_clone(context_t* c,unsigned body_to_clone,float mass,const float* mMatrix16WithoutScaling,float scale_factor=1.f,const float newComOffsetInPreScaledUnits[3]=NULL);
948
964 void remove_body(context_t* c,unsigned body);
986 inline int can_add_compound(context_t* c,unsigned num_boxes,unsigned num_spheres) {return (colliders_get_num_remaining_boxes(c)>=num_boxes && colliders_get_num_remaining_spheres(c)>=num_spheres);}
990 inline int can_add_clone(context_t* c,unsigned body_to_clone) {return (colliders_get_num_remaining_boxes(c)>=c->bodies.layouts[body_to_clone].num_boxes && colliders_get_num_remaining_spheres(c)>=c->bodies.layouts[body_to_clone].num_spheres);}
1000
1001
1010
1022 void body_change_motion_state(nudge::context_t* c,unsigned body,nudge::FlagMask new_motion_state,float mass_fallback=1.f);
1023
1034 void body_scale(nudge::context_t* c,unsigned body,float scale_factor,float mass_scale_factor=0.f);
1035
// end of add_group
1037
1038
1045 inline float* body_get_velocity(context_t* c,uint32_t body) {return c->bodies.momentum[body].velocity;}
1046
1053 inline float* body_get_angular_velocity(context_t* c,uint32_t body) {return c->bodies.momentum[body].angular_velocity;}
1054
1055
1062 inline float* body_get_position(context_t* c,uint32_t body) {return c->bodies.transforms[body].p;}
1069 inline float* body_get_orientation(context_t* c,uint32_t body) {return c->bodies.transforms[body].q;}
1070
1071
1072 namespace extra {
1094 unsigned add_compound_prism(context_t* c,float mass,float radius,float hheight, unsigned num_lateral_faces=0,const Transform* T=NULL,AxisEnum axis=AXIS_Y,const float comOffset[3]=NULL);
1099 unsigned add_compound_prism(context_t* c, float mass, float radius, float hheight, unsigned num_lateral_faces, const float* mMatrix16WithoutScaling, AxisEnum axis=AXIS_Y, const float comOffset[3]=NULL);
1116 unsigned add_compound_cylinder(context_t* c, float mass, float radius, float hheight, const Transform* T=NULL, AxisEnum axis=AXIS_Y, unsigned num_boxes=0, unsigned num_spheres=0, const float comOffset[3]=NULL, float box_lateral_side_shrinking=-1.f);
1121 unsigned add_compound_cylinder(context_t* c,float mass,float radius,float hheight, const float* mMatrix16WithoutScaling,AxisEnum axis=AXIS_Y,unsigned num_boxes=0,unsigned num_spheres=0,const float comOffset[3]=NULL, float box_lateral_side_shrinking=-1.f);
1138 unsigned add_compound_capsule(context_t* c,float mass,float radius,float hheight, const Transform* T=NULL,AxisEnum axis=AXIS_Y,unsigned num_boxes=1,unsigned num_spheres=3,const float comOffset[3]=NULL, float box_lateral_side_shrinking=-1.f);
1142 unsigned add_compound_capsule(context_t* c,float mass,float radius,float hheight, const float* mMatrix16WithoutScaling,AxisEnum axis=AXIS_Y,unsigned num_boxes=1,unsigned num_spheres=3,const float comOffset[3]=NULL, float box_lateral_side_shrinking=-1.f);
1158 unsigned add_compound_hollow_cylinder(context_t* c,float mass,float min_radius,float max_radius,float hheight, const Transform* T=NULL,AxisEnum axis=AXIS_Y,unsigned num_boxes=8,const float comOffset[3]=NULL);
1163 unsigned add_compound_hollow_cylinder(context_t* c,float mass,float min_radius,float max_radius,float hheight, const float* mMatrix16WithoutScaling, AxisEnum axis=AXIS_Y, unsigned num_boxes=8, const float comOffset[3]=NULL);
1178 unsigned add_compound_torus(context_t* c,float mass,float radius,float inner_radius, const Transform* T=NULL,AxisEnum axis=AXIS_Y,unsigned num_boxes=8,const float comOffset[3]=NULL);
1183 unsigned add_compound_torus(context_t* c,float mass,float radius,float inner_radius, const float* mMatrix16WithoutScaling,AxisEnum axis=AXIS_Y,unsigned num_boxes=8,const float comOffset[3]=NULL);
1199 unsigned add_compound_cone(context_t* c, float mass, float radius, float hheight, const Transform* T=NULL, AxisEnum axis=AXIS_Y, unsigned num_boxes=0, unsigned num_spheres=0, const float comOffset[3]=NULL);
1204 unsigned add_compound_cone(context_t* c, float mass, float radius, float hheight, const float* mMatrix16WithoutScaling, AxisEnum axis=AXIS_Y, unsigned num_boxes=0, unsigned num_spheres=0, const float comOffset[3]=NULL);
1205
1221 unsigned add_compound_staircase(context_t* c,float mass, float hdepth, float hheight, float hlength, unsigned num_steps=15, const Transform* T=NULL, int orientation_in_0_3=0, const float comOffset[3]=NULL);
1226 unsigned add_compound_staircase(context_t* c,float mass, float hdepth, float hheight, float hlength, unsigned num_steps, const float* mMatrix16WithoutScaling, int orientation_in_0_3=0, const float comOffset[3]=NULL);
1227
1228
1229
// end of extra_group
1231 } // namespace extra
1232
1242 inline void body_set_collision_group_and_mask(context_t* c,uint32_t body,CollisionMask single_collision_group_body_belongs_to,CollisionMask collision_group_mask_body_can_collide_with=COLLISION_GROUP_ALL) {
1243 nudge::BodyFilter* filter = &c->bodies.filters[body];
1244 filter->collision_group=single_collision_group_body_belongs_to;
1245 filter->collision_mask=collision_group_mask_body_can_collide_with;
1246 }
1258 inline CollisionMask* body_get_collision_mask(context_t* c,uint32_t body) {return &c->bodies.filters[body].collision_mask;}
1263 inline FlagMask* body_get_flags(context_t* c,uint32_t body) {return &c->bodies.filters[body].flags;}
// end of filter_group
1265
// end of kinematic_animation_group
1286
1297 int log(const char* format, ...);
1302 int flush(void);
// end of log_group
1304
1315 unsigned contact_data_index,
1316 int16_t* box_collider_index_for_body_a,
1317 int16_t* sphere_collider_index_for_body_a,
1318 int16_t* box_collider_index_for_body_b,
1319 int16_t* sphere_collider_index_for_body_b,
1320 int use_relative_values_for_output_indices=0
1321 );
// end of contact_group
1323
1329 static const Transform identity_transform = { {}, {0}, { {0.0f, 0.0f, 0.0f, 1.0f} } };
1336 Transform* Mat4WithoutScalingToTransform(Transform* Tout,const float* matrix16WithoutScaling);
1342 Transform Mat4WithoutScalingToTransform(const float* matrix16WithoutScaling);
1349 float *TransformToMat4(float* matrix16Out,const Transform* T);
1358 void TransformAssignToBody(context_t* c,unsigned body,Transform newT,float deltaTime,int16_t aux_body=-1);
1365 void TransformAdvanceBodyFromVelocities(context_t* c,unsigned body,float deltaTime);
1381
1390 void nm_QuatAdvance(float* __restrict qOut4,const float* __restrict q4,const float* __restrict angVel3,float halfTimeStep);
1398 float* nm_QuatFromMat4(float* __restrict result4,const float* __restrict m16);
1406 float* nm_Mat4SetRotationFromQuat(float* __restrict result16,const float* __restrict q4);
1414 float* nm_QuatFromMat3(float* __restrict result4,const float* __restrict m9);
1422 float* nm_Mat3FromQuat(float* __restrict result9,const float* __restrict q4);
1431 void nm_QuatGetAngularVelocity(float* __restrict angVel3,const float* newQuat4,const float* oldQuat4,float halfTimeStep);
1442 float* nm_QuatSlerp(float* __restrict result4,const float* __restrict a4,const float* __restrict b4,float slerpTime_In_0_1,int normalizeResult4AfterLerp/*=1*/);
1451 float* nm_QuatMul(float* /*__restrict*/ qOut4,const float* /*__restrict*/ a4,const float* /*__restrict*/ b4);
1457 void nm_QuatNormalize(float* __restrict q4);
1468 float* nm_QuatFromAngleAxis(float* __restrict qOut4,float rfAngle,float rkAxisX,float rkAxisY,float rkAxisZ);
1476 void nm_QuatToAngleAxis(const float* __restrict q4,float* __restrict rfAngleOut1,float* __restrict rkAxisOut3);
1483 float nm_Vec3Normalize(float* __restrict v3);
1491 float nm_Vec3Normalized(float* __restrict v3Out,const float* __restrict v3);
1498 float nm_Vec3Dot(const float* __restrict a3,const float* __restrict b3);
1507 float* nm_Vec3Cross(float* __restrict vOut3,const float* __restrict a3,const float* __restrict b3);
1508
1519 float* nm_QuatMulVec3(float* __restrict vOut3,const float* __restrict q4,const float* __restrict vIn3);
1520
1534 float* nm_QuatGetAxis(float* __restrict vOut3,const float* __restrict q4,float axisX,float axisY,float axisZ);
1535 inline float* nm_QuatGetAxisX(float* __restrict axisOut3,const float* __restrict q4) {return nm_QuatGetAxis(axisOut3,q4,1,0,0);}
1536 inline float* nm_QuatGetAxisY(float* __restrict axisOut3,const float* __restrict q4) {return nm_QuatGetAxis(axisOut3,q4,0,1,0);}
1537 inline float* nm_QuatGetAxisZ(float* __restrict axisOut3,const float* __restrict q4) {return nm_QuatGetAxis(axisOut3,q4,0,0,1);}
1538
1550 float* nm_QuatRotate(float* __restrict qInOut4,float angle,float axisX,float axisY,float axisZ);
1551
1561 float* nm_Mat4Mul(float* result16,const float* ml16,const float* mr16);
// end of math_group
1563
1569 void calculate_box_inertia(float result[3],float mass,float hsizex,float hsizey,float hsizez,const float comOffset[3]=NULL);
1570 void calculate_sphere_inertia(float result[3],float mass,float radius,const float comOffset[3]=NULL,bool hollow=false);
1571 void calculate_cylinder_inertia(float result[3],float mass,float radius,float halfHeight,AxisEnum upAxis=AXIS_Y,const float comOffset[3]=NULL);
1572 void calculate_capsule_inertia(float result[3],float mass,float radius,float halfCylinderHeight,AxisEnum upAxis=AXIS_Y,const float comOffset[3]=NULL);
1573 void calculate_torus_inertia(float result[3],float mass,float majorRadius,float minorRadius,AxisEnum upAxis=AXIS_Y,const float comOffset[3]=NULL);
1574 void calculate_hollow_cylinder_inertia(float result[3], float mass, float R, float r, float halfHeight, AxisEnum upAxis=AXIS_Y, const float comOffset[3]=NULL);
1575 void calculate_cone_inertia(float result[3],float mass,float radius,float halfHeight,AxisEnum upAxis=AXIS_Y,const float comOffset[3]=NULL);
1576
1577 void calculate_box_inertia_inverse(float result[3],float mass,float hsizex,float hsizey,float hsizez,const float comOffset[3]=NULL);
1578 void calculate_sphere_inertia_inverse(float result[3],float mass,float radius,const float comOffset[3]=NULL,bool hollow=false);
1579 void calculate_cylinder_inertia_inverse(float result[3], float mass, float radius, float halfHeight, AxisEnum upAxis=AXIS_Y, const float comOffset[3]=NULL);
1580 void calculate_capsule_inertia_inverse(float result[3], float mass, float radius, float halfCylinderHeight, AxisEnum upAxis=AXIS_Y, const float comOffset[3]=NULL);
1581 void calculate_torus_inertia_inverse(float result[3],float mass,float majorRadius,float minorRadius,AxisEnum upAxis=AXIS_Y,const float comOffset[3]=NULL);
1582 void calculate_hollow_cylinder_inertia_inverse(float result[3], float mass, float R, float r, float halfHeight, AxisEnum upAxis=AXIS_Y, const float comOffset[3]=NULL);
1583 void calculate_cone_inertia_inverse(float result[3], float mass, float radius, float halfHeight, AxisEnum upAxis=AXIS_Y, const float comOffset[3]=NULL);
// end of inertia
1585
1586
1587
1588# ifndef M_PIOVER180
1589# define M_PIOVER180 ((float)(3.14159265358979323846/180.0))
1590# endif
1591# ifndef M_180OVERPI
1592# define M_180OVERPI ((float)(180.0/3.14159265358979323846))
1593# endif
1594# ifndef M_DEG2RAD
1595# define M_DEG2RAD(X) ((X)*(float)M_PIOVER180)
1596# endif
1597# ifndef M_RAD2DEG
1598# define M_RAD2DEG(X) ((X)*(float)M_180OVERPI)
1599# endif
1600
1601# ifndef NUDGE_NO_STDIO
1602# ifdef NUDGE_USE_TIME_CONTEXT
1603 void save_time_context(FILE* f,const time_context_t* c);
1604 void load_time_context(FILE* f,time_context_t* c);
1605# endif //NUDGE_USE_TIME_CONTEXT
1606# endif //NUDGE_NO_STDIO
1607
1608#ifndef NUDGE_DEFAULT_SIMULATION_TIMESTEP
1609# define NUDGE_DEFAULT_SIMULATION_TIMESTEP (1.0/60.0)
1610#endif
1611#ifndef NUDGE_DEFAULT_MAX_NUM_SIMULATION_SUBSTEPS
1612# define NUDGE_DEFAULT_MAX_NUM_SIMULATION_SUBSTEPS (2) //2-10
1613#endif
1614#ifndef NUDGE_DEFAULT_NUM_SIMULATION_ITERATIONS
1615# define NUDGE_DEFAULT_NUM_SIMULATION_ITERATIONS (5) //5-350
1616#endif
1617#ifndef NUDGE_DEFAULT_DAMPING_LINEAR
1618# define NUDGE_DEFAULT_DAMPING_LINEAR (0.25f)
1619#endif
1620#ifndef NUDGE_DEFAULT_DAMPING_ANGULAR
1621# define NUDGE_DEFAULT_DAMPING_ANGULAR (0.25f)
1622#endif
1623#ifndef NUDGE_DEFAULT_SLEEPING_THRESHOLD_LINEAR_VELOCITY_SQUARED
1624# define NUDGE_DEFAULT_SLEEPING_THRESHOLD_LINEAR_VELOCITY_SQUARED (1e-2f)
1625#endif
1626#ifndef NUDGE_DEFAULT_SLEEPING_THRESHOLD_ANGULAR_VELOCITY_SQUARED
1627# define NUDGE_DEFAULT_SLEEPING_THRESHOLD_ANGULAR_VELOCITY_SQUARED (1e-1f)
1628#endif
1629#ifndef NUDGE_DEFAULT_PENETRATION_ALLOWED_AMOUNT
1630# define NUDGE_DEFAULT_PENETRATION_ALLOWED_AMOUNT (1e-3f)
1631#endif
1632#ifndef NUDGE_DEFAULT_PENETRATION_BIAS_FACTOR
1633# define NUDGE_DEFAULT_PENETRATION_BIAS_FACTOR (2.0f)
1634#endif
1635
1636 //--------------------------------------------------------------------------------------------------------------------------
1637
1638} // namespace nudge
1639
1640#endif // NUDGE_H
1641
1642
1643
1644
1645
1646
1647//--- Hack for better code completion on QtCreator (to remove? no)-------------
1648#if (!defined(HELLO_WORLD_CPP_) && !defined(EXAMPLE02_CPP_) && !defined(NUDGE_IMPLEMENTATION) && defined(NUDGE_DEVELOPMENT))
1649 #define NUDGE_IMPLEMENTATION
1650#endif
1651//-----------------------------------------------------------------------------
1652
1653#ifdef NUDGE_IMPLEMENTATION
1654#ifndef NUDGE_IMPLEMENTATION_GUARD
1655#define NUDGE_IMPLEMENTATION_GUARD
1656
1657#include <assert.h>
1658
1659#ifdef NUDGE_USE_SIMDE
1660# ifndef SIMDE_ENABLE_NATIVE_ALIASES
1661//# error Please define SIMDE_ENABLE_NATIVE_ALIASES globally and recompile
1662# define SIMDE_ENABLE_NATIVE_ALIASES
1663# endif
1664//# include "./simde/x86/avx2.h"
1665# include "./simde/x86/sse2.h"
1666#ifndef NUDGE_SIMDE_USE_CUSTOM_MM_MALLOC
1667# include <mm_malloc.h> // _mm_malloc and _mm_free
1668#endif
1669#else
1670# include <immintrin.h>
1671#endif
1672
1673#include <math.h>
1674#include <string.h>
1675
1676#ifdef __MSC_VER//_WIN32
1677#include <intrin.h>
1678#define NUDGE_ALIGNED(n) __declspec(align(n))
1679#define NUDGE_FORCEINLINE __forceinline
1680#else
1681#define NUDGE_ALIGNED(n) __attribute__((aligned(n)))
1682#define NUDGE_FORCEINLINE inline __attribute__((always_inline))
1683#endif
1684
1685#ifdef __AVX2__
1686#define NUDGE_SIMDV_WIDTH 256
1687#else
1688#define NUDGE_SIMDV_WIDTH 128
1689#endif
1690
1691#define NUDGE_ARENA_SCOPE(A) Arena& scope_arena_##A = A; Arena A = scope_arena_##A
1692
1693
1694//---- LOGGING IMPLEMENTATION -----------------------------------
1695namespace nudge {
1696 int dummy_vprintf(const char* /*format*/, va_list /*vlist*/ ) {return 0;}
1697 int dummy_flush(void) {return 0;}
1698}
1699#ifdef NUDGE_NO_STDIO
1700# ifndef NUDGE_VLOG_FUNC
1701//# error Please define the two macros NUDGE_VLOG_FUNC(FORMAT,VLIST) and NUDGE_LOG_FLUSH() for custom logging without stdio.h
1702# define NUDGE_VLOG_FUNC(A,B) nudge::dummy_vprintf((A),(B))
1703# undef NUDGE_LOG_FLUSH
1704# endif //NUDGE_VLOG_FUNC
1705#endif // NUDGE_NO_STDIO
1706#ifndef NUDGE_VLOG_FUNC
1707# include <stdio.h>
1708# ifndef NUDGE_LOG_FILE_PTR
1713# define NUDGE_LOG_FILE_PTR (stdout)
1714# endif // NUDGE_LOG_FILE_PTR
1715 // int vprintf(const char* format, va_list vlist);
1720# define NUDGE_VLOG_FUNC(CONST_CHAR_PTR_ARG_PTR,VA_LIST_ARG) vfprintf(NUDGE_LOG_FILE_PTR,CONST_CHAR_PTR_ARG_PTR,VA_LIST_ARG)
1725# define NUDGE_LOG_FLUSH() fflush(NUDGE_LOG_FILE_PTR) // int fflush(FILE*)
1726#endif // NUDGE_VLOG_FUNC
1727#ifndef NUDGE_LOG_FLUSH
1728# define NUDGE_LOG_FLUSH() nudge::dummy_flush()
1729#endif //NUDGE_LOG_FLUSH
1730//--------------------------------------------------------------
1731
1732
1733namespace nudge {
1734
1735int log(const char* format, ...) {va_list ap;va_start(ap, format);int rv=NUDGE_VLOG_FUNC(format, ap);va_end(ap);return rv;}
1736int flush(void) {return NUDGE_LOG_FLUSH();}
1737
1738
1739#if NUDGE_SIMDV_WIDTH == 128
1740#define NUDGE_SIMDV_ALIGNED NUDGE_ALIGNED(16)
1741static const unsigned simdv_width32 = 4;
1742static const unsigned simdv_width32_log2 = 2;
1743#elif NUDGE_SIMDV_WIDTH == 256
1744#define NUDGE_SIMDV_ALIGNED NUDGE_ALIGNED(32)
1745static const unsigned simdv_width32 = 8;
1746static const unsigned simdv_width32_log2 = 3;
1747#endif
1748
1749#ifdef __MSC_VER//_WIN32
1750NUDGE_FORCEINLINE __m128 operator - (__m128 a) {
1751 return _mm_xor_ps(a, _mm_set1_ps(-0.0f));
1752}
1753
1754NUDGE_FORCEINLINE __m128 operator + (__m128 a, __m128 b) {
1755 return _mm_add_ps(a, b);
1756}
1757
1758NUDGE_FORCEINLINE __m128 operator - (__m128 a, __m128 b) {
1759 return _mm_sub_ps(a, b);
1760}
1761
1762NUDGE_FORCEINLINE __m128 operator * (__m128 a, __m128 b) {
1763 return _mm_mul_ps(a, b);
1764}
1765
1766NUDGE_FORCEINLINE __m128 operator / (__m128 a, __m128 b) {
1767 return _mm_div_ps(a, b);
1768}
1769
1770NUDGE_FORCEINLINE __m128& operator += (__m128& a, __m128 b) {
1771 return a = _mm_add_ps(a, b);
1772}
1773
1774NUDGE_FORCEINLINE __m128& operator -= (__m128& a, __m128 b) {
1775 return a = _mm_sub_ps(a, b);
1776}
1777
1778NUDGE_FORCEINLINE __m128& operator *= (__m128& a, __m128 b) {
1779 return a = _mm_mul_ps(a, b);
1780}
1781
1782NUDGE_FORCEINLINE __m128& operator /= (__m128& a, __m128 b) {
1783 return a = _mm_div_ps(a, b);
1784}
1785#ifdef __AVX2__
1786NUDGE_FORCEINLINE __m256 operator - (__m256 a) {
1787 return _mm256_xor_ps(a, _mm256_set1_ps(-0.0f));
1788}
1789
1790NUDGE_FORCEINLINE __m256 operator + (__m256 a, __m256 b) {
1791 return _mm256_add_ps(a, b);
1792}
1793
1794NUDGE_FORCEINLINE __m256 operator - (__m256 a, __m256 b) {
1795 return _mm256_sub_ps(a, b);
1796}
1797
1798NUDGE_FORCEINLINE __m256 operator * (__m256 a, __m256 b) {
1799 return _mm256_mul_ps(a, b);
1800}
1801
1802NUDGE_FORCEINLINE __m256 operator / (__m256 a, __m256 b) {
1803 return _mm256_div_ps(a, b);
1804}
1805
1806NUDGE_FORCEINLINE __m256& operator += (__m256& a, __m256 b) {
1807 return a = _mm256_add_ps(a, b);
1808}
1809
1810NUDGE_FORCEINLINE __m256& operator -= (__m256& a, __m256 b) {
1811 return a = _mm256_sub_ps(a, b);
1812}
1813
1814NUDGE_FORCEINLINE __m256& operator *= (__m256& a, __m256 b) {
1815 return a = _mm256_mul_ps(a, b);
1816}
1817
1818NUDGE_FORCEINLINE __m256& operator /= (__m256& a, __m256 b) {
1819 return a = _mm256_div_ps(a, b);
1820}
1821#endif
1822#endif
1823
1824typedef __m128 simd4_float;
1825typedef __m128i simd4_int32;
1826
1827namespace simd128 {
1828 NUDGE_FORCEINLINE __m128 unpacklo32(__m128 x, __m128 y) {
1829 return _mm_unpacklo_ps(x, y);
1830 }
1831
1832 NUDGE_FORCEINLINE __m128 unpackhi32(__m128 x, __m128 y) {
1833 return _mm_unpackhi_ps(x, y);
1834 }
1835
1836 NUDGE_FORCEINLINE __m128i unpacklo32(__m128i x, __m128i y) {
1837 return _mm_unpacklo_epi32(x, y);
1838 }
1839
1840 NUDGE_FORCEINLINE __m128i unpackhi32(__m128i x, __m128i y) {
1841 return _mm_unpackhi_epi32(x, y);
1842 }
1843
1844 template<unsigned x0, unsigned x1, unsigned y0, unsigned y1>
1845 NUDGE_FORCEINLINE __m128 concat2x32(__m128 x, __m128 y) {
1846 return _mm_shuffle_ps(x, y, _MM_SHUFFLE(y1, y0, x1, x0));
1847 }
1848
1849 template<unsigned i0, unsigned i1, unsigned i2, unsigned i3>
1850 NUDGE_FORCEINLINE __m128 shuffle32(__m128 x) {
1851 return _mm_shuffle_ps(x, x, _MM_SHUFFLE(i3, i2, i1, i0));
1852 }
1853
1854 template<unsigned i0, unsigned i1, unsigned i2, unsigned i3>
1855 NUDGE_FORCEINLINE __m128i shuffle32(__m128i x) {
1856 return _mm_shuffle_epi32(x, _MM_SHUFFLE(i3, i2, i1, i0));
1857 }
1858
1859 NUDGE_FORCEINLINE void transpose32(simd4_float& x, simd4_float& y, simd4_float& z, simd4_float& w) {
1860 _MM_TRANSPOSE4_PS(x, y, z, w);
1861 }
1862}
1863
1864namespace simd {
1865 NUDGE_FORCEINLINE unsigned signmask32(__m128 x) {
1866 return _mm_movemask_ps(x);
1867 }
1868
1869 NUDGE_FORCEINLINE unsigned signmask32(__m128i x) {
1870 return _mm_movemask_ps(_mm_castsi128_ps(x));
1871 }
1872
1873 NUDGE_FORCEINLINE __m128 bitwise_xor(__m128 x, __m128 y) {
1874 return _mm_xor_ps(x, y);
1875 }
1876
1877 NUDGE_FORCEINLINE __m128 bitwise_or(__m128 x, __m128 y) {
1878 return _mm_or_ps(x, y);
1879 }
1880
1881 NUDGE_FORCEINLINE __m128 bitwise_and(__m128 x, __m128 y) {
1882 return _mm_and_ps(x, y);
1883 }
1884
1885 NUDGE_FORCEINLINE __m128 bitwise_notand(__m128 x, __m128 y) {
1886 return _mm_andnot_ps(x, y);
1887 }
1888
1889 NUDGE_FORCEINLINE __m128i bitwise_xor(__m128i x, __m128i y) {
1890 return _mm_xor_si128(x, y);
1891 }
1892
1893 NUDGE_FORCEINLINE __m128i bitwise_or(__m128i x, __m128i y) {
1894 return _mm_or_si128(x, y);
1895 }
1896
1897 NUDGE_FORCEINLINE __m128i bitwise_and(__m128i x, __m128i y) {
1898 return _mm_and_si128(x, y);
1899 }
1900
1901 NUDGE_FORCEINLINE __m128i bitwise_notand(__m128i x, __m128i y) {
1902 return _mm_andnot_si128(x, y);
1903 }
1904
1905 NUDGE_FORCEINLINE __m128 blendv32(__m128 x, __m128 y, __m128 s) {
1906#if defined(__SSE4_1__) || defined(__AVX__)
1907#define NUDGE_NATIVE_BLENDV32
1908 return _mm_blendv_ps(x, y, s);
1909#else
1910 s = _mm_castsi128_ps(_mm_srai_epi32(_mm_castps_si128(s), 31));
1911 return _mm_or_ps(_mm_andnot_ps(s, x), _mm_and_ps(s, y));
1912#endif
1913 }
1914
1915 NUDGE_FORCEINLINE __m128i blendv32(__m128i x, __m128i y, __m128i s) {
1916 return _mm_castps_si128(blendv32(_mm_castsi128_ps(x), _mm_castsi128_ps(y), _mm_castsi128_ps(s)));
1917 }
1918}
1919
1920namespace simd_float {
1921 NUDGE_FORCEINLINE float extract_first_float(simd4_float x) {
1922 return _mm_cvtss_f32(x);
1923 }
1924
1925 NUDGE_FORCEINLINE simd4_float zero4() {
1926 return _mm_setzero_ps();
1927 }
1928
1929 NUDGE_FORCEINLINE simd4_float make4(float x) {
1930 return _mm_set1_ps(x);
1931 }
1932
1933 NUDGE_FORCEINLINE simd4_float make4(float x, float y, float z, float w) {
1934 return _mm_setr_ps(x, y, z, w);
1935 }
1936
1937 NUDGE_FORCEINLINE simd4_float broadcast_load4(const float* p) {
1938 return _mm_set1_ps(*p);
1939 }
1940
1941 NUDGE_FORCEINLINE simd4_float load4(const float* p) {
1942 return _mm_load_ps(p);
1943 }
1944
1945 NUDGE_FORCEINLINE simd4_float loadu4(const float* p) {
1946 return _mm_loadu_ps(p);
1947 }
1948
1949 NUDGE_FORCEINLINE void store4(float* p, simd4_float x) {
1950 _mm_store_ps(p, x);
1951 }
1952
1953 NUDGE_FORCEINLINE void storeu4(float* p, simd4_float x) {
1954 _mm_storeu_ps(p, x);
1955 }
1956
1957 NUDGE_FORCEINLINE simd4_float madd(simd4_float x, simd4_float y, simd4_float z) {
1958#ifdef __FMA__
1959 return _mm_fmadd_ps(x, y, z);
1960#else
1961 return _mm_add_ps(_mm_mul_ps(x, y), z);
1962#endif
1963 }
1964
1965 NUDGE_FORCEINLINE simd4_float msub(simd4_float x, simd4_float y, simd4_float z) {
1966#ifdef __FMA__
1967 return _mm_fmsub_ps(x, y, z);
1968#else
1969 return _mm_sub_ps(_mm_mul_ps(x, y), z);
1970#endif
1971 }
1972
1973 // Note: First operand is returned on NaN.
1974 NUDGE_FORCEINLINE simd4_float min(simd4_float x, simd4_float y) {
1975 return _mm_min_ps(y, x); // Note: For SSE, second operand is returned on NaN.
1976 }
1977
1978 // Note: First operand is returned on NaN.
1979 NUDGE_FORCEINLINE simd4_float max(simd4_float x, simd4_float y) {
1980 return _mm_max_ps(y, x); // Note: For SSE, second operand is returned on NaN.
1981 }
1982
1983 NUDGE_FORCEINLINE simd4_float rsqrt(simd4_float x) {
1984 return _mm_rsqrt_ps(x);
1985 }
1986
1987 NUDGE_FORCEINLINE simd4_float recip(simd4_float x) {
1988 return _mm_rcp_ps(x);
1989 }
1990
1991 NUDGE_FORCEINLINE simd4_float sqrt(simd4_float x) {
1992 return _mm_sqrt_ps(x);
1993 }
1994
1995 NUDGE_FORCEINLINE simd4_float abs(simd4_float x) {
1996 return _mm_andnot_ps(_mm_set1_ps(-0.0f), x);
1997 }
1998
1999 NUDGE_FORCEINLINE simd4_float cmp_gt(simd4_float x, simd4_float y) {
2000 return _mm_cmpgt_ps(x, y);
2001 }
2002
2003 NUDGE_FORCEINLINE simd4_float cmp_ge(simd4_float x, simd4_float y) {
2004 return _mm_cmpge_ps(x, y);
2005 }
2006
2007 NUDGE_FORCEINLINE simd4_float cmp_le(simd4_float x, simd4_float y) {
2008 return _mm_cmple_ps(x, y);
2009 }
2010
2011 NUDGE_FORCEINLINE simd4_float cmp_eq(simd4_float x, simd4_float y) {
2012 return _mm_cmpeq_ps(x, y);
2013 }
2014
2015 NUDGE_FORCEINLINE simd4_float cmp_neq(simd4_float x, simd4_float y) {
2016 return _mm_cmpneq_ps(x, y);
2017 }
2018
2019 NUDGE_FORCEINLINE simd4_int32 asint(simd4_float x) {
2020 return _mm_castps_si128(x);
2021 }
2022
2023 NUDGE_FORCEINLINE simd4_int32 toint(simd4_float x) {
2024 return _mm_cvttps_epi32(x);
2025 }
2026}
2027
2028namespace simd_int32 {
2029 NUDGE_FORCEINLINE simd4_int32 zero4() {
2030 return _mm_setzero_si128();
2031 }
2032
2033 NUDGE_FORCEINLINE simd4_int32 make4(int32_t x) {
2034 return _mm_set1_epi32(x);
2035 }
2036
2037 NUDGE_FORCEINLINE simd4_int32 make4(int32_t x, int32_t y, int32_t z, int32_t w) {
2038 return _mm_setr_epi32(x, y, z, w);
2039 }
2040
2041 NUDGE_FORCEINLINE simd4_int32 load4(const int32_t* p) {
2042 return _mm_load_si128((const __m128i*)p);
2043 }
2044
2045 NUDGE_FORCEINLINE simd4_int32 loadu4(const int32_t* p) {
2046 return _mm_loadu_si128((const __m128i*)p);
2047 }
2048
2049 NUDGE_FORCEINLINE void store4(int32_t* p, simd4_int32 x) {
2050 _mm_store_si128((__m128i*)p, x);
2051 }
2052
2053 NUDGE_FORCEINLINE void storeu4(int32_t* p, simd4_int32 x) {
2054 _mm_storeu_si128((__m128i*)p, x);
2055 }
2056
2057 template<unsigned bits>
2058 NUDGE_FORCEINLINE simd4_int32 shift_left(simd4_int32 x) {
2059 return _mm_slli_epi32(x, bits);
2060 }
2061
2062 template<unsigned bits>
2063 NUDGE_FORCEINLINE simd4_int32 shift_right(simd4_int32 x) {
2064 return _mm_srli_epi32(x, bits);
2065 }
2066
2067 NUDGE_FORCEINLINE simd4_int32 add(simd4_int32 x, simd4_int32 y) {
2068 return _mm_add_epi32(x, y);
2069 }
2070
2071 NUDGE_FORCEINLINE simd4_int32 cmp_eq(simd4_int32 x, simd4_int32 y) {
2072 return _mm_cmpeq_epi32(x, y);
2073 }
2074
2075 NUDGE_FORCEINLINE simd4_float asfloat(simd4_int32 x) {
2076 return _mm_castsi128_ps(x);
2077 }
2078}
2079
2080#ifdef __AVX2__
2081typedef __m256 simd8_float;
2082typedef __m256i simd8_int32;
2083
2084namespace simd128 {
2085 NUDGE_FORCEINLINE __m256 unpacklo32(__m256 x, __m256 y) {
2086 return _mm256_unpacklo_ps(x, y);
2087 }
2088
2089 NUDGE_FORCEINLINE __m256 unpackhi32(__m256 x, __m256 y) {
2090 return _mm256_unpackhi_ps(x, y);
2091 }
2092
2093 NUDGE_FORCEINLINE __m256i unpacklo32(__m256i x, __m256i y) {
2094 return _mm256_unpacklo_epi32(x, y);
2095 }
2096
2097 NUDGE_FORCEINLINE __m256i unpackhi32(__m256i x, __m256i y) {
2098 return _mm256_unpackhi_epi32(x, y);
2099 }
2100
2101 template<unsigned x0, unsigned x1, unsigned y0, unsigned y1>
2102 NUDGE_FORCEINLINE __m256 concat2x32(__m256 x, __m256 y) {
2103 return _mm256_shuffle_ps(x, y, _MM_SHUFFLE(y1, y0, x1, x0));
2104 }
2105
2106 template<unsigned i0, unsigned i1, unsigned i2, unsigned i3>
2107 NUDGE_FORCEINLINE __m256 shuffle32(__m256 x) {
2108 return _mm256_shuffle_ps(x, x, _MM_SHUFFLE(i3, i2, i1, i0));
2109 }
2110
2111 template<unsigned i0, unsigned i1, unsigned i2, unsigned i3>
2112 NUDGE_FORCEINLINE __m256i shuffle32(__m256i x) {
2113 return _mm256_shuffle_epi32(x, _MM_SHUFFLE(i3, i2, i1, i0));
2114 }
2115
2116 NUDGE_FORCEINLINE void transpose32(simd8_float& x, simd8_float& y, simd8_float& z, simd8_float& w) {
2117 __m256 t0 = _mm256_unpacklo_ps(x, y);
2118 __m256 t1 = _mm256_unpacklo_ps(z, w);
2119 __m256 t2 = _mm256_unpackhi_ps(x, y);
2120 __m256 t3 = _mm256_unpackhi_ps(z, w);
2121 x = _mm256_shuffle_ps(t0, t1, _MM_SHUFFLE(1,0,1,0));
2122 y = _mm256_shuffle_ps(t0, t1, _MM_SHUFFLE(3,2,3,2));
2123 z = _mm256_shuffle_ps(t2, t3, _MM_SHUFFLE(1,0,1,0));
2124 w = _mm256_shuffle_ps(t2, t3, _MM_SHUFFLE(3,2,3,2));
2125 }
2126}
2127
2128namespace simd256 {
2129 template<unsigned i0, unsigned i1>
2130 NUDGE_FORCEINLINE simd8_float permute128(simd8_float x, simd8_float y) {
2131 return _mm256_castsi256_ps(_mm256_permute2x128_si256(_mm256_castps_si256(x), _mm256_castps_si256(y), i0 | (i1 << 4)));
2132 }
2133
2134 template<unsigned i0, unsigned i1>
2135 NUDGE_FORCEINLINE simd8_int32 permute128(simd8_int32 x, simd8_int32 y) {
2136 return _mm256_permute2x128_si256(x, y, i0 | (i1 << 4));
2137 }
2138
2139 template<unsigned i0, unsigned i1>
2140 NUDGE_FORCEINLINE simd8_float shuffle128(simd8_float x) {
2141 return _mm256_castsi256_ps(_mm256_permute2x128_si256(_mm256_castps_si256(x), _mm256_castps_si256(x), i0 | (i1 << 4)));
2142 }
2143
2144 template<unsigned i0, unsigned i1>
2145 NUDGE_FORCEINLINE simd8_int32 shuffle128(simd8_int32 x) {
2146 return _mm256_permute2x128_si256(x, x, i0 | (i1 << 4));
2147 }
2148
2149 NUDGE_FORCEINLINE simd8_float broadcast(simd4_float x) {
2150 return _mm256_insertf128_ps(_mm256_castps128_ps256(x), x, 1);
2151 }
2152
2153 NUDGE_FORCEINLINE simd8_int32 broadcast(simd4_int32 x) {
2154 return _mm256_insertf128_si256(_mm256_castsi128_si256(x), x, 1);
2155 }
2156}
2157
2158namespace simd {
2159 NUDGE_FORCEINLINE simd8_float concat(simd4_float x, simd4_float y) {
2160 return _mm256_insertf128_ps(_mm256_castps128_ps256(x), y, 1);
2161 }
2162
2163 NUDGE_FORCEINLINE simd4_float extract_low(simd8_float x) {
2164 return _mm256_castps256_ps128(x);
2165 }
2166
2167 NUDGE_FORCEINLINE simd4_float extract_high(simd8_float x) {
2168 return _mm256_extractf128_ps(x, 1);
2169 }
2170
2171 NUDGE_FORCEINLINE simd4_int32 extract_low(simd8_int32 x) {
2172 return _mm256_castsi256_si128(x);
2173 }
2174
2175 NUDGE_FORCEINLINE simd4_int32 extract_high(simd8_int32 x) {
2176 return _mm256_extractf128_si256(x, 1);
2177 }
2178
2179 NUDGE_FORCEINLINE unsigned signmask32(__m256 x) {
2180 return _mm256_movemask_ps(x);
2181 }
2182
2183 NUDGE_FORCEINLINE unsigned signmask32(__m256i x) {
2184 return _mm256_movemask_ps(_mm256_castsi256_ps(x));
2185 }
2186
2187 NUDGE_FORCEINLINE __m256 bitwise_xor(__m256 x, __m256 y) {
2188 return _mm256_xor_ps(x, y);
2189 }
2190
2191 NUDGE_FORCEINLINE __m256 bitwise_or(__m256 x, __m256 y) {
2192 return _mm256_or_ps(x, y);
2193 }
2194
2195 NUDGE_FORCEINLINE __m256 bitwise_and(__m256 x, __m256 y) {
2196 return _mm256_and_ps(x, y);
2197 }
2198
2199 NUDGE_FORCEINLINE __m256 bitwise_notand(__m256 x, __m256 y) {
2200 return _mm256_andnot_ps(x, y);
2201 }
2202
2203 NUDGE_FORCEINLINE __m256i bitwise_xor(__m256i x, __m256i y) {
2204 return _mm256_xor_si256(x, y);
2205 }
2206
2207 NUDGE_FORCEINLINE __m256i bitwise_or(__m256i x, __m256i y) {
2208 return _mm256_or_si256(x, y);
2209 }
2210
2211 NUDGE_FORCEINLINE __m256i bitwise_and(__m256i x, __m256i y) {
2212 return _mm256_and_si256(x, y);
2213 }
2214
2215 NUDGE_FORCEINLINE __m256i bitwise_notand(__m256i x, __m256i y) {
2216 return _mm256_andnot_si256(x, y);
2217 }
2218
2219 NUDGE_FORCEINLINE __m256 blendv32(__m256 x, __m256 y, __m256 s) {
2220 return _mm256_blendv_ps(x, y, s);
2221 }
2222
2223 NUDGE_FORCEINLINE __m256i blendv32(__m256i x, __m256i y, __m256i s) {
2224 return _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(x), _mm256_castsi256_ps(y), _mm256_castsi256_ps(s)));
2225 }
2226}
2227
2228namespace simd_float {
2229 NUDGE_FORCEINLINE float extract_first_float(simd8_float x) {
2230 return _mm_cvtss_f32(_mm256_castps256_ps128(x));
2231 }
2232
2233 NUDGE_FORCEINLINE simd8_float zero8() {
2234 return _mm256_setzero_ps();
2235 }
2236
2237 NUDGE_FORCEINLINE simd8_float make8(float x) {
2238 return _mm256_set1_ps(x);
2239 }
2240
2241 NUDGE_FORCEINLINE simd8_float make8(float x0, float y0, float z0, float w0, float x1, float y1, float z1, float w1) {
2242 return _mm256_setr_ps(x0, y0, z0, w0, x1, y1, z1, w1);
2243 }
2244
2245 NUDGE_FORCEINLINE simd8_float broadcast_load8(const float* p) {
2246 return _mm256_broadcast_ss(p);
2247 }
2248
2249 NUDGE_FORCEINLINE simd8_float load8(const float* p) {
2250 return _mm256_load_ps(p);
2251 }
2252
2253 NUDGE_FORCEINLINE simd8_float loadu8(const float* p) {
2254 return _mm256_loadu_ps(p);
2255 }
2256
2257 NUDGE_FORCEINLINE void store8(float* p, simd8_float x) {
2258 _mm256_store_ps(p, x);
2259 }
2260
2261 NUDGE_FORCEINLINE void storeu8(float* p, simd8_float x) {
2262 _mm256_storeu_ps(p, x);
2263 }
2264
2265 NUDGE_FORCEINLINE simd8_float madd(simd8_float x, simd8_float y, simd8_float z) {
2266#ifdef __FMA__
2267 return _mm256_fmadd_ps(x, y, z);
2268#else
2269 return _mm256_add_ps(_mm256_mul_ps(x, y), z);
2270#endif
2271 }
2272
2273 NUDGE_FORCEINLINE simd8_float msub(simd8_float x, simd8_float y, simd8_float z) {
2274#ifdef __FMA__
2275 return _mm256_fmsub_ps(x, y, z);
2276#else
2277 return _mm256_sub_ps(_mm256_mul_ps(x, y), z);
2278#endif
2279 }
2280
2281 // Note: First operand is returned on NaN.
2282 NUDGE_FORCEINLINE simd8_float min(simd8_float x, simd8_float y) {
2283 return _mm256_min_ps(y, x); // Note: For SSE, second operand is returned on NaN.
2284 }
2285
2286 // Note: First operand is returned on NaN.
2287 NUDGE_FORCEINLINE simd8_float max(simd8_float x, simd8_float y) {
2288 return _mm256_max_ps(y, x); // Note: For SSE, second operand is returned on NaN.
2289 }
2290
2291 NUDGE_FORCEINLINE simd8_float rsqrt(simd8_float x) {
2292 return _mm256_rsqrt_ps(x);
2293 }
2294
2295 NUDGE_FORCEINLINE simd8_float recip(simd8_float x) {
2296 return _mm256_rcp_ps(x);
2297 }
2298
2299 NUDGE_FORCEINLINE simd8_float sqrt(simd8_float x) {
2300 return _mm256_sqrt_ps(x);
2301 }
2302
2303 NUDGE_FORCEINLINE simd8_float abs(simd8_float x) {
2304 return _mm256_andnot_ps(_mm256_set1_ps(-0.0f), x);
2305 }
2306
2307 NUDGE_FORCEINLINE simd8_float cmp_gt(simd8_float x, simd8_float y) {
2308 return _mm256_cmp_ps(x, y, _CMP_GT_OQ);
2309 }
2310
2311 NUDGE_FORCEINLINE simd8_float cmp_ge(simd8_float x, simd8_float y) {
2312 return _mm256_cmp_ps(x, y, _CMP_GE_OQ);
2313 }
2314
2315 NUDGE_FORCEINLINE simd8_float cmp_le(simd8_float x, simd8_float y) {
2316 return _mm256_cmp_ps(x, y, _CMP_LE_OQ);
2317 }
2318
2319 NUDGE_FORCEINLINE simd8_float cmp_eq(simd8_float x, simd8_float y) {
2320 return _mm256_cmp_ps(x, y, _CMP_EQ_OQ);
2321 }
2322
2323 NUDGE_FORCEINLINE simd8_float cmp_neq(simd8_float x, simd8_float y) {
2324 return _mm256_cmp_ps(x, y, _CMP_NEQ_OQ);
2325 }
2326
2327 NUDGE_FORCEINLINE simd8_int32 asint(simd8_float x) {
2328 return _mm256_castps_si256(x);
2329 }
2330
2331 NUDGE_FORCEINLINE simd8_int32 toint(simd8_float x) {
2332 return _mm256_cvttps_epi32(x);
2333 }
2334}
2335
2336namespace simd_int32 {
2337 NUDGE_FORCEINLINE simd8_int32 zero8() {
2338 return _mm256_setzero_si256();
2339 }
2340
2341 NUDGE_FORCEINLINE simd8_int32 make8(int32_t x) {
2342 return _mm256_set1_epi32(x);
2343 }
2344
2345 NUDGE_FORCEINLINE simd8_int32 make8(int32_t x0, int32_t y0, int32_t z0, int32_t w0, int32_t x1, int32_t y1, int32_t z1, int32_t w1) {
2346 return _mm256_setr_epi32(x0, y0, z0, w0, x1, y1, z1, w1);
2347 }
2348
2349 NUDGE_FORCEINLINE simd8_int32 load8(const int32_t* p) {
2350 return _mm256_load_si256((const __m256i*)p);
2351 }
2352
2353 NUDGE_FORCEINLINE simd8_int32 loadu8(const int32_t* p) {
2354 return _mm256_loadu_si256((const __m256i*)p);
2355 }
2356
2357 NUDGE_FORCEINLINE void store8(int32_t* p, simd8_int32 x) {
2358 _mm256_store_si256((__m256i*)p, x);
2359 }
2360
2361 NUDGE_FORCEINLINE void storeu8(int32_t* p, simd8_int32 x) {
2362 _mm256_storeu_si256((__m256i*)p, x);
2363 }
2364
2365 template<unsigned bits>
2366 NUDGE_FORCEINLINE simd8_int32 shift_left(simd8_int32 x) {
2367 return _mm256_slli_epi32(x, bits);
2368 }
2369
2370 template<unsigned bits>
2371 NUDGE_FORCEINLINE simd8_int32 shift_right(simd8_int32 x) {
2372 return _mm256_srli_epi32(x, bits);
2373 }
2374
2375 NUDGE_FORCEINLINE simd8_int32 add(simd8_int32 x, simd8_int32 y) {
2376 return _mm256_add_epi32(x, y);
2377 }
2378
2379 NUDGE_FORCEINLINE simd8_int32 cmp_eq(simd8_int32 x, simd8_int32 y) {
2380 return _mm256_cmpeq_epi32(x, y);
2381 }
2382
2383 NUDGE_FORCEINLINE simd8_float asfloat(simd8_int32 x) {
2384 return _mm256_castsi256_ps(x);
2385 }
2386}
2387#endif
2388
2389#if NUDGE_SIMDV_WIDTH == 128
2390typedef simd4_float simdv_float;
2391typedef simd4_int32 simdv_int32;
2392
2393namespace simd_float {
2394 NUDGE_FORCEINLINE simdv_float zerov() {
2395 return zero4();
2396 }
2397
2398 NUDGE_FORCEINLINE simdv_float makev(float x) {
2399 return make4(x);
2400 }
2401
2402 NUDGE_FORCEINLINE simdv_float broadcast_loadv(const float* p) {
2403 return broadcast_load4(p);
2404 }
2405
2406 NUDGE_FORCEINLINE simdv_float loadv(const float* p) {
2407 return load4(p);
2408 }
2409
2410 NUDGE_FORCEINLINE simdv_float loaduv(const float* p) {
2411 return loadu4(p);
2412 }
2413
2414 NUDGE_FORCEINLINE void storev(float* p, simdv_float x) {
2415 store4(p, x);
2416 }
2417
2418 NUDGE_FORCEINLINE void storeuv(float* p, simdv_float x) {
2419 storeu4(p, x);
2420 }
2421}
2422
2423namespace simd_int32 {
2424 NUDGE_FORCEINLINE simdv_int32 zerov() {
2425 return zero4();
2426 }
2427
2428 NUDGE_FORCEINLINE simdv_int32 makev(int32_t x) {
2429 return make4(x);
2430 }
2431
2432 NUDGE_FORCEINLINE simdv_int32 loadv(const int32_t* p) {
2433 return load4(p);
2434 }
2435
2436 NUDGE_FORCEINLINE simdv_int32 loaduv(const int32_t* p) {
2437 return loadu4(p);
2438 }
2439
2440 NUDGE_FORCEINLINE void storev(int32_t* p, simdv_int32 x) {
2441 store4(p, x);
2442 }
2443
2444 NUDGE_FORCEINLINE void storeuv(int32_t* p, simdv_int32 x) {
2445 storeu4(p, x);
2446 }
2447}
2448#elif NUDGE_SIMDV_WIDTH == 256
2449typedef simd8_float simdv_float;
2450typedef simd8_int32 simdv_int32;
2451
2452namespace simd_float {
2453 NUDGE_FORCEINLINE simdv_float zerov() {
2454 return zero8();
2455 }
2456
2457 NUDGE_FORCEINLINE simdv_float makev(float x) {
2458 return make8(x);
2459 }
2460
2461 NUDGE_FORCEINLINE simdv_float broadcast_loadv(const float* p) {
2462 return broadcast_load8(p);
2463 }
2464
2465 NUDGE_FORCEINLINE simdv_float loadv(const float* p) {
2466 return load8(p);
2467 }
2468
2469 NUDGE_FORCEINLINE simdv_float loaduv(const float* p) {
2470 return loadu8(p);
2471 }
2472
2473 NUDGE_FORCEINLINE void storev(float* p, simdv_float x) {
2474 store8(p, x);
2475 }
2476
2477 NUDGE_FORCEINLINE void storeuv(float* p, simdv_float x) {
2478 storeu8(p, x);
2479 }
2480}
2481
2482namespace simd_int32 {
2483 NUDGE_FORCEINLINE simdv_int32 zerov() {
2484 return zero8();
2485 }
2486
2487 NUDGE_FORCEINLINE simdv_int32 makev(int32_t x) {
2488 return make8(x);
2489 }
2490
2491 NUDGE_FORCEINLINE simdv_int32 loadv(const int32_t* p) {
2492 return load8(p);
2493 }
2494
2495 NUDGE_FORCEINLINE simdv_int32 loaduv(const int32_t* p) {
2496 return loadu8(p);
2497 }
2498
2499 NUDGE_FORCEINLINE void storev(int32_t* p, simdv_int32 x) {
2500 store8(p, x);
2501 }
2502
2503 NUDGE_FORCEINLINE void storeuv(int32_t* p, simdv_int32 x) {
2504 storeu8(p, x);
2505 }
2506}
2507#endif
2508
2509namespace simd_aos {
2510 NUDGE_FORCEINLINE simd4_float dot(simd4_float a, simd4_float b) {
2511 simd4_float c = a*b;
2512 return simd128::shuffle32<0,0,0,0>(c) + simd128::shuffle32<1,1,1,1>(c) + simd128::shuffle32<2,2,2,2>(c);
2513 }
2514
2515 NUDGE_FORCEINLINE simd4_float cross(simd4_float a, simd4_float b) {
2516 simd4_float c = simd128::shuffle32<1,2,0,0>(a) * simd128::shuffle32<2,0,1,0>(b);
2517 simd4_float d = simd128::shuffle32<2,0,1,0>(a) * simd128::shuffle32<1,2,0,0>(b);
2518 return c - d;
2519 }
2520}
2521
2522namespace simd_soa {
2523 NUDGE_FORCEINLINE void cross(simd4_float ax, simd4_float ay, simd4_float az, simd4_float bx, simd4_float by, simd4_float bz, simd4_float& rx, simd4_float& ry, simd4_float& rz) {
2524 rx = ay*bz - az*by;
2525 ry = az*bx - ax*bz;
2526 rz = ax*by - ay*bx;
2527 }
2528
2529 NUDGE_FORCEINLINE void normalize(simd4_float& x, simd4_float& y, simd4_float& z) {
2530 simd4_float f = simd_float::rsqrt(x*x + y*y + z*z);
2531 x *= f;
2532 y *= f;
2533 z *= f;
2534 }
2535
2536#if NUDGE_SIMDV_WIDTH >= 256
2537 NUDGE_FORCEINLINE void cross(simd8_float ax, simd8_float ay, simd8_float az, simd8_float bx, simd8_float by, simd8_float bz, simd8_float& rx, simd8_float& ry, simd8_float& rz) {
2538 rx = ay*bz - az*by;
2539 ry = az*bx - ax*bz;
2540 rz = ax*by - ay*bx;
2541 }
2542
2543 NUDGE_FORCEINLINE void normalize(simd8_float& x, simd8_float& y, simd8_float& z) {
2544 simd8_float f = simd_float::rsqrt(x*x + y*y + z*z);
2545 x *= f;
2546 y *= f;
2547 z *= f;
2548 }
2549#endif
2550}
2551
2552#ifdef NUDGE_USE_ANONYMOUS_NAMESPACE
2553namespace {
2554#endif
2555 struct float3 {
2556 float x, y, z;
2557 };
2558
2559 struct float3x3 {
2560 float3 c0, c1, c2;
2561 };
2562
2563 struct Rotation {
2564 float3 v;
2565 float s;
2566 };
2567
2568 struct AABB {
2569 float3 min;
2570 float unused0;
2571 float3 max;
2572 float unused1;
2573 };
2574
2575 struct AABBV {
2576 float min_x[simdv_width32];
2577 float max_x[simdv_width32];
2578 float min_y[simdv_width32];
2579 float max_y[simdv_width32];
2580 float min_z[simdv_width32];
2581 float max_z[simdv_width32];
2582 };
2583
2584 struct ContactSlotV {
2585 uint32_t indices[simdv_width32];
2586 };
2587
2588 struct ContactPairV {
2589 uint32_t ab[simdv_width32];
2590 };
2591
2592 struct ContactConstraintV {
2593 uint16_t a[simdv_width32];
2594 uint16_t b[simdv_width32];
2595
2596 float pa_z[simdv_width32];
2597 float pa_x[simdv_width32];
2598 float pa_y[simdv_width32];
2599
2600 float pb_z[simdv_width32];
2601 float pb_x[simdv_width32];
2602 float pb_y[simdv_width32];
2603
2604 float n_x[simdv_width32];
2605 float u_x[simdv_width32];
2606 float v_x[simdv_width32];
2607
2608 float n_y[simdv_width32];
2609 float u_y[simdv_width32];
2610 float v_y[simdv_width32];
2611
2612 float n_z[simdv_width32];
2613 float u_z[simdv_width32];
2614 float v_z[simdv_width32];
2615
2616 float bias[simdv_width32];
2617 float friction[simdv_width32];
2618 float normal_velocity_to_normal_impulse[simdv_width32];
2619
2620 float friction_coefficient_x[simdv_width32];
2621 float friction_coefficient_y[simdv_width32];
2622 float friction_coefficient_z[simdv_width32];
2623
2624 float na_x[simdv_width32];
2625 float na_y[simdv_width32];
2626 float na_z[simdv_width32];
2627
2628 float nb_x[simdv_width32];
2629 float nb_y[simdv_width32];
2630 float nb_z[simdv_width32];
2631
2632 float ua_x[simdv_width32];
2633 float ua_y[simdv_width32];
2634 float ua_z[simdv_width32];
2635
2636 float va_x[simdv_width32];
2637 float va_y[simdv_width32];
2638 float va_z[simdv_width32];
2639
2640 float ub_x[simdv_width32];
2641 float ub_y[simdv_width32];
2642 float ub_z[simdv_width32];
2643
2644 float vb_x[simdv_width32];
2645 float vb_y[simdv_width32];
2646 float vb_z[simdv_width32];
2647 };
2648
2649 struct ContactConstraintStateV {
2650 float applied_normal_impulse[simdv_width32];
2651 float applied_friction_impulse_x[simdv_width32];
2652 float applied_friction_impulse_y[simdv_width32];
2653 };
2654
2655 struct InertiaTransform {
2656 float xx;
2657 float yy;
2658 float zz;
2659 float unused0;
2660 float xy;
2661 float xz;
2662 float yz;
2663 float unused1;
2664 };
2665#ifdef NUDGE_USE_ANONYMOUS_NAMESPACE
2666}
2667#endif
2668
2669#ifdef __MSC_VER//_WIN32
2670static inline unsigned first_set_bit(unsigned x) {
2671 unsigned long r = 0;
2672 _BitScanForward(&r, x);
2673 return r;
2674}
2675#else
2676static inline unsigned first_set_bit(unsigned x) {
2677 return __builtin_ctz(x);
2678}
2679#endif
2680
2681
2682static inline void* align(Arena* arena, uintptr_t alignment) {
2683 uintptr_t data = (uintptr_t)arena->data;
2684 uintptr_t end = data + arena->size;
2685 uintptr_t mask = alignment-1;
2686
2687 data = (data + mask) & ~mask;
2688
2689 arena->data = (void*)data;
2690 arena->size = end - data;
2691
2692 assert((intptr_t)arena->size >= 0); // Out of memory.
2693
2694 return arena->data;
2695}
2696
2697static inline void* allocate(Arena* arena, uintptr_t size) {
2698 void* data = arena->data;
2699 arena->data = (void*)((uintptr_t)data + size);
2700 arena->size -= size;
2701
2702 assert((intptr_t)arena->size >= 0); // Out of memory.
2703
2704 return data;
2705}
2706
2707static inline void* allocate(Arena* arena, uintptr_t size, uintptr_t alignment) {
2708 align(arena, alignment);
2709
2710 void* data = arena->data;
2711 arena->data = (void*)((uintptr_t)data + size);
2712 arena->size -= size;
2713
2714 assert((intptr_t)arena->size >= 0); // Out of memory. [this probably happens when initial arena size is too small (to many live contacts per frame). aligned_realloc does not exist on most systems, and probably would not work]
2715
2716 return data;
2717}
2718
2719template<class T>
2720static inline T* allocate_struct(Arena* arena, uintptr_t alignment) {
2721 return static_cast<T*>(allocate(arena, sizeof(T), alignment));
2722}
2723
2724template<class T>
2725static inline T* allocate_array(Arena* arena, uintptr_t count, uintptr_t alignment) {
2726 return static_cast<T*>(allocate(arena, sizeof(T)*count, alignment));
2727}
2728
2729static inline void* reserve(Arena* arena, uintptr_t size, uintptr_t alignment) {
2730 align(arena, alignment);
2731 assert(size <= arena->size); // Cannot reserve this amount.
2732 return arena->data;
2733}
2734
2735static inline void commit(Arena* arena, uintptr_t size) {
2736 allocate(arena, size);
2737}
2738
2739template<class T>
2740static inline T* reserve_array(Arena* arena, uintptr_t count, uintptr_t alignment) {
2741 return static_cast<T*>(reserve(arena, sizeof(T)*count, alignment));
2742}
2743
2744template<class T>
2745static inline void commit_array(Arena* arena, uintptr_t count) {
2746 commit(arena, sizeof(T)*count);
2747}
2748
2749static inline Rotation make_rotation(const float q[4]) {
2750 Rotation r = { { q[0], q[1], q[2] }, q[3] };
2751 return r;
2752}
2753
2754static inline float3 make_float3(const float x[3]) {
2755 float3 r = { x[0], x[1], x[2] };
2756 return r;
2757}
2758
2759static inline float3 make_float3(float x, float y, float z) {
2760 float3 r = { x, y, z };
2761 return r;
2762}
2763
2764static inline float3 make_float3(float x) {
2765 float3 r = { x, x, x };
2766 return r;
2767}
2768
2769static inline float3 operator + (float3 a, float3 b) {
2770 float3 r = { a.x + b.x, a.y + b.y, a.z + b.z };
2771 return r;
2772}
2773
2774static inline float3 operator - (float3 a, float3 b) {
2775 float3 r = { a.x - b.x, a.y - b.y, a.z - b.z };
2776 return r;
2777}
2778
2779static inline float3 operator * (float a, float3 b) {
2780 float3 r = { a * b.x, a * b.y, a * b.z };
2781 return r;
2782}
2783
2784static inline float3 operator * (float3 a, float b) {
2785 float3 r = { a.x * b, a.y * b, a.z * b };
2786 return r;
2787}
2788
2789static inline float3& operator *= (float3& a, float b) {
2790 a.x *= b;
2791 a.y *= b;
2792 a.z *= b;
2793 return a;
2794}
2795
2796static inline float dot(float3 a, float3 b) {
2797 return a.x*b.x + a.y*b.y + a.z*b.z;
2798}
2799
2800static inline float length2(float3 a) {
2801 return dot(a, a);
2802}
2803
2804static inline float3 cross(float3 a, float3 b) {
2805 float3 v = { a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x };
2806 return v;
2807}
2808
2809static inline float3 operator * (Rotation lhs, float3 rhs) {
2810 float3 t = 2.0f * cross(lhs.v, rhs);
2811 return rhs + lhs.s * t + cross(lhs.v, t);
2812}
2813
2814static inline Rotation operator * (Rotation lhs, Rotation rhs) {
2815 float3 v = rhs.v*lhs.s + lhs.v*rhs.s + cross(lhs.v, rhs.v);
2816 Rotation r = { v, lhs.s*rhs.s - dot(lhs.v, rhs.v) };
2817 return r;
2818}
2819
2820static inline Rotation normalize(Rotation r) {
2821 float f = 1.0f / sqrtf(r.s*r.s + r.v.x*r.v.x + r.v.y*r.v.y + r.v.z*r.v.z);
2822 r.v *= f;
2823 r.s *= f;
2824 return r;
2825}
2826
2827static inline Rotation inverse(Rotation r) {
2828 r.v.x = -r.v.x;
2829 r.v.y = -r.v.y;
2830 r.v.z = -r.v.z;
2831 return r;
2832}
2833
2834static inline float3x3 matrix(Rotation q) {
2835 float kx = q.v.x + q.v.x;
2836 float ky = q.v.y + q.v.y;
2837 float kz = q.v.z + q.v.z;
2838
2839 float xx = kx*q.v.x;
2840 float yy = ky*q.v.y;
2841 float zz = kz*q.v.z;
2842 float xy = kx*q.v.y;
2843 float xz = kx*q.v.z;
2844 float yz = ky*q.v.z;
2845 float sx = kx*q.s;
2846 float sy = ky*q.s;
2847 float sz = kz*q.s;
2848
2849 float3x3 m = {
2850 { 1.0f - yy - zz, xy + sz, xz - sy },
2851 { xy - sz, 1.0f - xx - zz, yz + sx },
2852 { xz + sy, yz - sx, 1.0f - xx - yy },
2853 };
2854 return m;
2855}
2856
2857static inline Transform operator * (Transform lhs, Transform rhs) {
2858 float3 p = make_rotation(lhs.rotation) * make_float3(rhs.position) + make_float3(lhs.position);
2859 Rotation q = make_rotation(lhs.rotation) * make_rotation(rhs.rotation);
2860
2861 Transform r = {
2862 {{ p.x, p.y, p.z }},
2863 {rhs.body},
2864 {{ q.v.x, q.v.y, q.v.z, q.s }},
2865 };
2866 return r;
2867}
2868
2869// old function declarations now hidden in the implementation
2870void simulate(context_t* c,float timeStep, unsigned numSubSteps, unsigned numIterations);
2871void collide(context_t* c, BodyConnections body_connections);
2872ContactImpulseData* read_cached_impulses(ContactCache contact_cache, ContactData contacts, Arena* memory);
2873void write_cached_impulses(ContactCache* contact_cache, ContactData contacts, ContactImpulseData* contact_impulses);
2874ContactConstraintData* setup_contact_constraints(context_t* c,/*ActiveBodies active_bodies, ContactData contacts, BodyData bodies,*/ ContactImpulseData* contact_impulses, Arena* memory);
2875void apply_impulses(ContactConstraintData* data, BodyData bodies);
2876void update_cached_impulses(ContactConstraintData* data, ContactImpulseData* contact_impulses);
2877void advance(context_t* c, float time_step);
2878
2879// new stuff -------------------------------------------------------------------------
2880Transform TransformMul(Transform T0,Transform T1) {return T0*T1;}
2881
2882#ifdef NUDGE_SIMDE_USE_CUSTOM_MM_MALLOC
2883#if (defined(__EMSCRIPTEN__) || (defined(NUDGE_USE_SIMDE) && defined(SIMDE_NO_NATIVE)))
2884static inline void* _mm_malloc (size_t size, size_t alignment) {
2885# ifdef _WIN32
2886 return ::_aligned_malloc(size, alignment);
2887# else
2888 void *ptr;
2889 if (alignment == 1) return ::malloc (size);
2890 if (alignment == 2 || (sizeof (void *) == 8 && alignment == 4)) alignment = sizeof (void *);
2891 if (::posix_memalign (&ptr, alignment, size) == 0) return ptr;
2892 else return NULL;
2893# endif
2894}
2895static inline void _mm_free (void * ptr) {
2896# if defined(WIN32)
2897 ::_aligned_free(ptr);
2898# else
2899 ::free (ptr);
2900# endif
2901}
2902#endif
2903#endif // NUDGE_SIMDE_USE_CUSTOM_MM_MALLOC
2904
2905void* malloc(size_t size) {return _mm_malloc(size,64);}
2906void free(void* ptr) {_mm_free(ptr);}
2907
2908inline float* nm_QuatFromMat3Or4(float* __restrict result4,const float* __restrict m16,int num_m16_cols=4) {
2909 // this code is glm based
2910 float* q=result4;const float* m=m16;
2911 float *qx=&q[0],*qy=&q[1],*qz=&q[2],*qw=&q[3];const int bc2=num_m16_cols,bc3=num_m16_cols*2;
2912 const float c00=m[0],c01=m[1],c02=m[2], c10=m[bc2],c11=m[bc2+1],c12=m[bc2+2], c20=m[bc3],c21=m[bc3+1],c22=m[bc3+2];
2913
2914 float fourXSquaredMinus1 = c00 - c11 - c22, fourYSquaredMinus1 = c11 - c00 - c22;
2915 float fourZSquaredMinus1 = c22 - c00 - c11, fourWSquaredMinus1 = c00 + c11 + c22;
2916 float biggestVal,mult,fourBiggestSquaredMinus1 = fourWSquaredMinus1;
2917 int biggestIndex = 0;
2918
2919 if(fourXSquaredMinus1 > fourBiggestSquaredMinus1) {fourBiggestSquaredMinus1 = fourXSquaredMinus1;biggestIndex = 1;}
2920 if(fourYSquaredMinus1 > fourBiggestSquaredMinus1) {fourBiggestSquaredMinus1 = fourYSquaredMinus1;biggestIndex = 2;}
2921 if(fourZSquaredMinus1 > fourBiggestSquaredMinus1) {fourBiggestSquaredMinus1 = fourZSquaredMinus1;biggestIndex = 3;}
2922
2923 biggestVal = sqrtf(fourBiggestSquaredMinus1 + (float)1) * (float)0.5;
2924 mult = (float)0.25 / biggestVal;
2925
2926 switch (biggestIndex) {
2927 case 0:
2928 *qw = biggestVal; *qx = (c12 - c21) * mult; *qy = (c20 - c02) * mult; *qz = (c01 - c10) * mult;
2929 break;
2930 case 1:
2931 *qw = (c12 - c21) * mult; *qx = biggestVal; *qy = (c01 + c10) * mult; *qz = (c20 + c02) * mult;
2932 break;
2933 case 2:
2934 *qw = (c20 - c02) * mult; *qx = (c01 + c10) * mult; *qy = biggestVal; *qz = (c12 + c21) * mult;
2935 break;
2936 case 3:
2937 *qw = (c01 - c10) * mult; *qx = (c20 + c02) * mult; *qy = (c12 + c21) * mult; *qz = biggestVal;
2938 break;
2939
2940 default: // Silence a -Wswitch-default warning in GCC. Should never actually get here. Assert is just for sanity.
2941 //NM_ASSER(1);
2942 *qx=*qy=*qz=(float)0;*qw=(float)1;
2943 break;
2944 }
2945 return result4;
2946}
2947float* nm_QuatFromMat4(float* __restrict result4,const float* __restrict m16) {return nm_QuatFromMat3Or4(result4,m16,4);}
2948float* nm_QuatFromMat3(float* __restrict result4,const float* __restrict m9) {return nm_QuatFromMat3Or4(result4,m9,3);}
2949
2950inline float* nm_Mat3Or4SetRotationFromQuat(float* __restrict result16,const float* __restrict q4,int num_res_cols=4) {
2951 // this code is glm based
2952 const float one =(float)1,two=(float)2;
2953 float* m=result16;const float* q=q4;
2954 const float qx=q[0],qy=q[1],qz=q[2],qw=q[3];const int bc2=num_res_cols,bc3=num_res_cols*2;
2955 float *c00=&m[0],*c01=&m[1],*c02=&m[2], *c10=&m[bc2],*c11=&m[bc2+1],*c12=&m[bc2+2], *c20=&m[bc3],*c21=&m[bc3+1],*c22=&m[bc3+2];
2956
2957 float qxx = (qx * qx), qyy = (qy * qy), qzz = (qz * qz);
2958 float qxz = (qx * qz), qxy = (qx * qy), qyz = (qy * qz);
2959 float qwx = (qw * qx), qwy = (qw * qy), qwz = (qw * qz);
2960
2961 *c00 = one - two * (qyy + qzz); *c01 = two * (qxy + qwz); *c02 = two * (qxz - qwy);
2962 *c10 = two * (qxy - qwz); *c11 = one - two * (qxx + qzz); *c12 = two * (qyz + qwx);
2963 *c20 = two * (qxz + qwy); *c21 = two * (qyz - qwx); *c22 = one - two * (qxx + qyy);
2964
2965 return result16;
2966}
2967float* nm_Mat4SetRotationFromQuat(float* __restrict result16,const float* __restrict q4) {return nm_Mat3Or4SetRotationFromQuat(result16,q4,4);}
2968float* nm_Mat3FromQuat(float* __restrict result9,const float* __restrict q4) {return nm_Mat3Or4SetRotationFromQuat(result9,q4,3);}
2969
2970void nm_QuatGetAngularVelocity(float* __restrict angVel3,const float* newQuat4,const float* oldQuat4,float halfTimeStep) {
2971 // assert: this works for unit length quaternions only
2972 // oldQuat4 and newQuat4 must be 'close' for this to work (small halfTimeStep)
2973 const float a[4] = {newQuat4[0]-oldQuat4[0],newQuat4[1]-oldQuat4[1],newQuat4[2]-oldQuat4[2],newQuat4[3]-oldQuat4[3]}; // deltaQ
2974 const float b[4] = {-oldQuat4[0],-oldQuat4[1],-oldQuat4[2],oldQuat4[3]}; // invOldQ
2975 const float invHalfTimeStep = halfTimeStep!=(float)0 ? (float)1/halfTimeStep : (float)0;
2976 //assert(halfTimeStep!=0);
2977 angVel3[0] = (a[3] * b[0] + a[0] * b[3] + a[1] * b[2] - a[2] * b[1])*invHalfTimeStep; // x
2978 angVel3[1] = (a[3] * b[1] + a[1] * b[3] + a[2] * b[0] - a[0] * b[2])*invHalfTimeStep; // y
2979 angVel3[2] = (a[3] * b[2] + a[2] * b[3] + a[0] * b[1] - a[1] * b[0])*invHalfTimeStep; // z
2980
2981 //nm_QuatDifferentiateAngularVelocityApprox(angVel3,newQuat4,oldQuat4,halfTimeStep*2.0); // is this better?
2982}
2983float* nm_QuatMul(float* /*__restrict*/ qOut4,const float* /*__restrict*/ a4,const float* /*__restrict*/ b4) {
2984// we should activate simd, but maybe this same function is already present somewhere in the nudge code. TODO: fetch it!
2985# if (defined(NM_USE_SIMD) && defined(__SSE__))
2986/*# ifndef NM_ALIGN_STRUCTS // hope all the calls are used with aligned data... (not sure!)
2987# define NM_MM_LOAD_PS(X) _mm_loadu_ps(X)
2988# define NM_MM256_LOAD_PD(X) _mm256_loadu_pd(X)
2989# define NM_MM_STORE_PS(X,Y) _mm_storeu_ps(X,Y)
2990# define NM_MM256_STORE_PD(X,Y) _mm256_storeu_pd(X,Y)
2991# else //NM_ALIGN_STRUCTS*/
2992# define NM_MM_LOAD_PS(X) _mm_load_ps(X)
2993# define NM_MM256_LOAD_PD(X) _mm256_load_pd(X)
2994# define NM_MM_STORE_PS(X,Y) _mm_store_ps(X,Y)
2995# define NM_MM256_STORE_PD(X,Y) _mm256_store_pd(X,Y)
2996//# endif //NM_ALIGN_STRUCTS
2997 __m128 xyzw = NM_MM_LOAD_PS(a4);
2998 __m128 abcd = NM_MM_LOAD_PS(b4);
2999
3000 __m128 wzyx = _mm_shuffle_ps(xyzw, xyzw, _MM_SHUFFLE(0,1,2,3));
3001 __m128 baba = _mm_shuffle_ps(abcd, abcd, _MM_SHUFFLE(0,1,0,1));
3002 __m128 dcdc = _mm_shuffle_ps(abcd, abcd, _MM_SHUFFLE(2,3,2,3));
3003
3004 /* variable names below are for parts of componens of result (X,Y,Z,W) */
3005 /* nX stands for -X and similarly for the other components */
3006
3007 /* znxwy = (xb - ya, zb - wa, wd - zc, yd - xc) */
3008 __m128 ZnXWY = _mm_hsub_ps(_mm_mul_ps(xyzw, baba), _mm_mul_ps(wzyx, dcdc));
3009
3010 /* xzynw = (xd + yc, zd + wc, wb + za, yb + xa) */
3011 __m128 XZYnW = _mm_hadd_ps(_mm_mul_ps(xyzw, dcdc), _mm_mul_ps(wzyx, baba));
3012
3013 /* _mm_shuffle_ps(XZYnW, ZnXWY, _MM_SHUFFLE(3,2,1,0)) */
3014 /* = (xd + yc, zd + wc, wd - zc, yd - xc) */
3015 /* _mm_shuffle_ps(ZnXWY, XZYnW, _MM_SHUFFLE(2,3,0,1)) */
3016 /* = (zb - wa, xb - ya, yb + xa, wb + za) */
3017
3018 /* _mm_addsub_ps adds elements 1 and 3 and subtracts elements 0 and 2, so we get: */
3019 /* _mm_addsub_ps(*, *) = (xd+yc-zb+wa, xb-ya+zd+wc, wd-zc+yb+xa, yd-xc+wb+za) */
3020
3021 __m128 XZWY = _mm_addsub_ps(_mm_shuffle_ps(XZYnW, ZnXWY, _MM_SHUFFLE(3,2,1,0)),
3022 _mm_shuffle_ps(ZnXWY, XZYnW, _MM_SHUFFLE(2,3,0,1)));
3023
3024 /* now we only need to shuffle the components in place and return the result */
3025 NM_MM_STORE_PS(qOut4,_mm_shuffle_ps(XZWY, XZWY, _MM_SHUFFLE(2,1,3,0)));
3026# else //NM_USE_SIMD
3027 /* reference implementation */
3028 const float x = a4[0],y = a4[1],z = a4[2], w = a4[3];
3029 const float a = b4[0],b = b4[1],c = b4[2], d = b4[3];
3030 qOut4[0] = x*d + y*c - z*b + w*a;
3031 qOut4[1] = -x*c + y*d + z*a + w*b;
3032 qOut4[2] = x*b - y*a + z*d + w*c;
3033 qOut4[3] = -x*a - y*b - z*c + w*d;
3034# endif //NM_USE_SIMD
3035 return qOut4;
3036 /*float* q=qOut4;const float *a=a4,*b=b4;
3037 q[0] = a[3] * b[0] + a[0] * b[3] + a[1] * b[2] - a[2] * b[1];
3038 q[1] = a[3] * b[1] + a[1] * b[3] + a[2] * b[0] - a[0] * b[2];
3039 q[2] = a[3] * b[2] + a[2] * b[3] + a[0] * b[1] - a[1] * b[0];
3040 q[3] = a[3] * b[3] - a[0] * b[0] - a[1] * b[1] - a[2] * b[2];
3041 return qOut4;*/
3042}
3043void nm_QuatAdvance(float* __restrict qOut4,const float* __restrict q4,const float* __restrict angVel3,float halfTimeStep) {
3044 // assert: this works for unit length quaternions only
3045 // advancement must be small for this to work (small halfTimeStep)
3046 float deltaQ[4] = {angVel3[0],angVel3[1],angVel3[2],(float)0};int i;
3047 nm_QuatMul(deltaQ,deltaQ,q4);
3048 for (i=0;i<4;i++) qOut4[i] = q4[i]+deltaQ[i]*halfTimeStep;
3049 nm_QuatNormalize(qOut4);
3050
3051 //nm_QuatIntegrateAngularVelocityApprox(qOut4,q4,angVel3,halfTimeStep*2.0); // is this better?
3052}
3053float nm_Vec3Dot(const float* __restrict a3,const float* __restrict b3) {return a3[0]*b3[0]+a3[1]*b3[1]+a3[2]*b3[2];}
3054float* nm_Vec3Cross(float* __restrict vOut3,const float* __restrict a3,const float* __restrict b3) {
3055 vOut3[0] = a3[1] * b3[2] - a3[2] * b3[1];
3056 vOut3[1] = a3[2] * b3[0] - a3[0] * b3[2];
3057 vOut3[2] = a3[0] * b3[1] - a3[1] * b3[0];
3058 return vOut3;
3059}
3060#ifndef NM_EPSILON
3061# define NM_EPSILON (0.00000000001f)
3062#endif
3063float nm_Vec3Normalize(float* __restrict v3) {
3064 float len = v3[0]*v3[0]+v3[1]*v3[1]+v3[2]*v3[2];int i;
3065 if (len>NM_EPSILON) {len = sqrtf(len);for (i=0;i<3;i++) v3[i]/=len;}
3066 else {len=v3[0]=v3[2]=(float)0;v3[1]=(float)1;}
3067 return len;
3068}
3069float nm_Vec3Normalized(float* __restrict v3Out,const float* __restrict v3) {
3070 float len = v3[0]*v3[0]+v3[1]*v3[1]+v3[2]*v3[2];int i;
3071 if (len>NM_EPSILON) {len = sqrtf(len);for (i=0;i<3;i++) v3Out[i]=v3[i]/len;}
3072 else {len=v3Out[0]=v3Out[2]=(float)0;v3Out[1]=(float)1;}
3073 return len;
3074}
3075void nm_QuatNormalize(float* __restrict q4) {const float len=sqrtf(q4[0]*q4[0]+q4[1]*q4[1]+q4[2]*q4[2]+q4[3]*q4[3]);if (len>0) {q4[0]/=len;q4[1]/=len;q4[2]/=len;q4[3]/=len;} else {q4[0]=q4[1]=q4[2]=q4[3]=(float)0;}}
3076float* nm_QuatSlerpEps(float* __restrict result4,const float* __restrict a4,const float* __restrict b4,float slerpTime_In_0_1,int normalizeResult4AfterLerp/*=1*/,float eps/*= NM_SLERP_EPSILON*/) {
3077 // Adapted from OgraMath (www.ogre3d.org AFAIR)
3078
3079 //const int normalizeQOutAfterLerp = 1; // When using Lerp instead of Slerp qOut should be normalized. However some users prefer setting eps small enough so that they can leave the Lerp as it is.
3080 //const float eps = NM_SLERP_EPSILON; // In [0 = 100% Slerp,1 = 100% Lerp] Faster but less precise with bigger epsilon (Lerp is used instead of Slerp more often). Users should tune it to achieve a performance boost.
3081 const float one = (float)1;
3082 const float *qStart=a4;
3083 float qEnd[4]={b4[0],b4[1],b4[2],b4[3]};
3084 float* qOut=result4;
3085
3086 float fCos = qStart[0] * qEnd[0] + qStart[1] * qEnd[1] + qStart[2] * qEnd[2] + qStart[3] * qEnd[3];
3087
3088 // Do we need to invert rotation?
3089 if(fCos < 0) //Originally it was if(fCos < (float)0 && shortestPath)
3090 {fCos = -fCos;qEnd[0] = -qEnd[0];qEnd[1] = -qEnd[1];qEnd[2] = -qEnd[2];qEnd[3] = -qEnd[3];}
3091
3092 if( fCos < one - eps) // Originally if was "Ogre::Math::Abs(fCos)" instead of "fCos", but we know fCos>0, because we have hard coded shortestPath=true
3093 {
3094 // Standard case (slerp)
3095# ifndef NM_QUAT_SLERP_USE_ACOS_AND_SIN_INSTEAD_OF_ATAN2_AND_SQRT
3096 // Ogre::Quaternion uses this branch by default
3097 float fSin = sqrtf(one - fCos*fCos);
3098 float fAngle = atan2f(fSin, fCos);
3099# else //NM_QUAT_SLERP_USE_ACOS_AND_SIN_INSTEAD_OF_ATAN2_AND_SQRT
3100 // Possible replacement of the two lines above
3101 // (it's hard to tell if they're faster, but my instinct tells me I should trust atan2 better than acos (geometry geeks needed here...)):
3102 // But probably sin(...) is faster than (sqrt + 1 subtraction and mult)
3103 float fAngle = acosf(fCos);
3104 float fSin = sinf(fAngle);
3105# endif //NM_QUAT_SLERP_USE_ACOS_AND_SIN_INSTEAD_OF_ATAN2_AND_SQRT
3106
3107 const float fInvSin = one / fSin;
3108 const float fCoeff0 = sinf((one - slerpTime_In_0_1) * fAngle) * fInvSin;
3109 const float fCoeff1 = sinf(slerpTime_In_0_1 * fAngle) * fInvSin;
3110
3111 //qOut = fCoeff0 * qStart + fCoeff1 * qEnd; //Avoided for maximum portability and conversion of the code
3112 qOut[0] = (fCoeff0 * qStart[0] + fCoeff1 * qEnd[0]);
3113 qOut[1] = (fCoeff0 * qStart[1] + fCoeff1 * qEnd[1]);
3114 qOut[2] = (fCoeff0 * qStart[2] + fCoeff1 * qEnd[2]);
3115 qOut[3] = (fCoeff0 * qStart[3] + fCoeff1 * qEnd[3]);
3116 } else
3117 {
3118 // There are two situations:
3119 // 1. "qStart" and "qEnd" are very close (fCos ~= +1), so we can do a linear
3120 // interpolation safely.
3121 // 2. "qStart" and "qEnd" are almost inverse of each other (fCos ~= -1), there
3122 // are an infinite number of possibilities interpolation. but we haven't
3123 // have method to fix this case, so just use linear interpolation here.
3124 // IMPORTANT: CASE 2 can't happen anymore because we have hardcoded "shortestPath = true" and now fCos > 0
3125
3126 const float fCoeff0 = one - slerpTime_In_0_1;
3127 const float fCoeff1 = slerpTime_In_0_1;
3128
3129 //qOut = fCoeff0 * qStart + fCoeff1 * qEnd; //Avoided for maximum portability and conversion of the code
3130 qOut[0] = (fCoeff0 * qStart[0] + fCoeff1 * qEnd[0]);
3131 qOut[1] = (fCoeff0 * qStart[1] + fCoeff1 * qEnd[1]);
3132 qOut[2] = (fCoeff0 * qStart[2] + fCoeff1 * qEnd[2]);
3133 qOut[3] = (fCoeff0 * qStart[3] + fCoeff1 * qEnd[3]);
3134 if (normalizeResult4AfterLerp) nm_QuatNormalize(qOut);
3135 }
3136
3137 return qOut;
3138}
3139#ifndef NM_SLERP_EPSILON
3140# define NM_SLERP_EPSILON (0.0001f)
3141#endif //NM_SLERP_EPSILON
3142float* nm_QuatSlerp(float* __restrict result4,const float* __restrict a4,const float* __restrict b4,float slerpTime_In_0_1,int normalizeResult4AfterLerp/*=1*/) {return nm_QuatSlerpEps(result4,a4,b4,slerpTime_In_0_1,normalizeResult4AfterLerp,NM_SLERP_EPSILON);}
3143float* nm_QuatFromAngleAxis(float* __restrict qOut4,float rfAngle,float rkAxisX,float rkAxisY,float rkAxisZ) {
3144 // assert: axis[] is unit length
3145 //
3146 // The quaternion representing the rotation is
3147 // q = cos(A/2)+sin(A/2)*(x*i+y*j+z*k)
3148 float fSin,fCos;//sincosf((float)(0.5)*rfAngle,&fSin,&fCos);
3149 const float hangle=(float)(0.5)*rfAngle;fSin=sinf(hangle),fCos=cosf(hangle);
3150 qOut4[3]=fCos; qOut4[0]=fSin*rkAxisX; qOut4[1]=fSin*rkAxisY; qOut4[2]=fSin*rkAxisZ;
3151 return qOut4;
3152}
3153void nm_QuatToAngleAxis(const float* __restrict q4,float* __restrict rfAngleOut1,float* __restrict rkAxisOut3) {
3154 const float* q=q4;
3155 // These both seem to work.
3156 // Implementation 1
3157 // The quaternion representing the rotation is
3158 // q = cos(A/2)+sin(A/2)*(x*i+y*j+z*k)
3159
3160 float fSqrLength = q[0]*q[0]+q[1]*q[1]+q[2]*q[2];
3161 if (fSqrLength > (float)0) {
3162 float fInvLength;*rfAngleOut1 = (float)2*acosf(q[3]);fInvLength = (float)1/sqrtf(fSqrLength);
3163 rkAxisOut3[0] = q[0]*fInvLength;rkAxisOut3[1] = q[1]*fInvLength;rkAxisOut3[2] = q[2]*fInvLength;
3164 }
3165 else {
3166 // angle is 0 (mod 2*pi), so any axis will do
3167 *rfAngleOut1 = rkAxisOut3[0] = rkAxisOut3[2] = (float)0;
3168 rkAxisOut3[1] = (float)1;
3169 }
3170 /*// Implementation 2
3171 // more based on the glm library code:
3172 float tmp1 = (float)1 - q[3]*q[3];
3173 *rfAngleOut1 = acosf(q[3]) * (float)2;
3174 if (tmp1 <= (float)0) {rkAxisOut3[0]=rkAxisOut3[1]=(float)0;rkAxisOut3[2]=(float)1;}
3175 else {
3176 float tmp2 = (float)1 / sqrtf(tmp1);
3177 rkAxisOut[0]=q[0]*tmp2; rkAxisOut[1]=q[1]*tmp2; rkAxisOut[2]=q[2]*tmp2;
3178 }*/
3179}
3180Transform* Mat4WithoutScalingToTransform(Transform* Tout, const float* matrix16WithoutScaling) {
3181 if (matrix16WithoutScaling) {
3182 nm_QuatFromMat4(Tout->rotation,matrix16WithoutScaling);
3183 memcpy(Tout->position,&matrix16WithoutScaling[12],3*sizeof(float));
3184 }
3185 else *Tout = identity_transform;
3186 return Tout;
3187}
3188Transform Mat4WithoutScalingToTransform(const float* matrix16WithoutScaling) {
3189 Transform Tout;
3190 assert(matrix16WithoutScaling);
3191 nm_QuatFromMat4(Tout.rotation,matrix16WithoutScaling);
3192 memcpy(Tout.position,&matrix16WithoutScaling[12],3*sizeof(float));
3193 return Tout;
3194}
3195float* TransformToMat4(float* matrix16Out,const Transform* T) {
3196 int i;
3197 nm_Mat4SetRotationFromQuat(matrix16Out,T->rotation);
3198 for (i=0;i<3;i++) matrix16Out[12+i] = T->position[i];
3199 matrix16Out[3]=matrix16Out[7]=matrix16Out[11]=0.f;matrix16Out[15]=1.f;
3200 return matrix16Out;
3201}
3202float* nm_QuatMulVec3(float* __restrict vOut3,const float* __restrict q4,const float* __restrict vIn3) {
3203 float uv[3],uuv[3];int i;
3204 nudge::nm_Vec3Cross(uuv,q4,nm_Vec3Cross(uv,q4,vIn3));
3205 for (i=0;i<3;i++) vOut3[i] = vIn3[i] + ((uv[i] * q4[3]) + uuv[i]) * (float)2;
3206 return vOut3;
3207}
3208float* nm_QuatGetAxis(float* __restrict vOut3,const float* __restrict q4,float axisX,float axisY,float axisZ) {
3209 const float vIn[3]={axisX,axisY,axisZ};
3210 return nm_QuatMulVec3(vOut3,q4,vIn);
3211 /* Other stuff that we can do if input axis is {0,1,0}:
3212 // direct calculation
3213 //vOut3[0] = 2.f*(q4[0]*q4[2]+q4[3]*q4[1]);
3214 //vOut3[1] = 2.f*(q4[1]*q4[2]-q4[3]*q4[0]);
3215 //vOut3[2] = 1.f-2.f*(q4[0]*q4[0]+q4[1]*q4[1]);
3216
3217 // or this
3218 float angle,axis[3];nm_QuatToAngleAxis(q4,&angle,axis); // if we know that 'axis' is (0,1,0) in advance, we can ignore it
3219 //angle*=axis[1]; // axis[1] can be 1 or -1 AFAICS [useless if quat axis y is always axis[1]==1]
3220 vOut3[0]=sinf(angle),vOut3[1]=0.f,vOut3[2]=cosf(angle); // however I'm not too sure this is faster than the other methods...
3221 */
3222}
3223float* nm_QuatRotate(float* __restrict qInOut4,float angle,float axisX,float axisY,float axisZ) {
3224 float qa[4];nm_QuatFromAngleAxis(qa,angle,axisX,axisY,axisZ);
3225 return nm_QuatMul(qInOut4,qInOut4,qa);
3226}
3227inline float* nm_Mat4Mul_NoCheck(float* __restrict result16,const float* __restrict ml16,const float* __restrict mr16) {
3228 int i,i4;float mri4plus0,mri4plus1,mri4plus2,mri4plus3;
3229 for(i = 0; i < 4; i++) {
3230 i4=4*i;mri4plus0=mr16[i4];mri4plus1=mr16[i4+1];mri4plus2=mr16[i4+2];mri4plus3=mr16[i4+3];
3231 result16[ i4] = ml16[0]*mri4plus0 + ml16[4]*mri4plus1 + ml16[ 8]*mri4plus2 + ml16[12]*mri4plus3;
3232 result16[1+i4] = ml16[1]*mri4plus0 + ml16[5]*mri4plus1 + ml16[ 9]*mri4plus2 + ml16[13]*mri4plus3;
3233 result16[2+i4] = ml16[2]*mri4plus0 + ml16[6]*mri4plus1 + ml16[10]*mri4plus2 + ml16[14]*mri4plus3;
3234 result16[3+i4] = ml16[3]*mri4plus0 + ml16[7]*mri4plus1 + ml16[11]*mri4plus2 + ml16[15]*mri4plus3;
3235 }
3236 return result16;
3237}
3238float* nm_Mat4Mul(float* result16,const float* ml16,const float* mr16) {
3239 if (result16==ml16) {float ML16[16];memcpy(ML16,ml16,16*sizeof(float));return nm_Mat4Mul_NoCheck(result16,ML16,mr16);}
3240 else if (result16==mr16) {float MR16[16];memcpy(MR16,mr16,16*sizeof(float));return nm_Mat4Mul_NoCheck(result16,ml16,MR16);}
3241 return nm_Mat4Mul_NoCheck(result16,ml16,mr16);
3242}
3243
3244void TransformAssignToBody(context_t* c,unsigned body,Transform newT,float deltaTime,int16_t aux_body) {
3245 assert(c && body<c->bodies.count);
3246 BodyFilter* filter = &c->bodies.filters[body];
3247 const FlagMask flags = filter->flags;
3248 Transform* T = &c->bodies.transforms[body];
3249 float* P = newT.position;float* Q = newT.rotation;
3250 // calculate velocities
3251 float* linvel = c->bodies.momentum[body].velocity;
3252 float* angvel = c->bodies.momentum[body].angular_velocity;
3253 if (deltaTime!=0.f) {
3254 nm_QuatGetAngularVelocity(angvel,Q,T->rotation,deltaTime*0.5f);
3255 if (aux_body>=0) {
3256 assert((unsigned)aux_body!=T->body);
3257 assert((unsigned)aux_body<c->bodies.count);
3258 const float* auxLinVel = c->bodies.momentum[aux_body].velocity;
3259 const float* auxAngVel = c->bodies.momentum[aux_body].angular_velocity;
3260 const Transform* auxT = &c->bodies.transforms[aux_body];
3261 // add auxLinVel and auxAngVel to linvel and angvel
3262 const float delta_position[3] = {T->position[0] - auxT->position[0],T->position[1] - auxT->position[1],T->position[2] - auxT->position[2]};
3263 float deltaLinVel[3];nm_Vec3Cross(deltaLinVel,auxAngVel,delta_position);
3264 for (int l=0;l<3;l++) {
3265 linvel[l]=(P[l]-T->position[l])/deltaTime + auxLinVel[l] + deltaLinVel[l];
3266 angvel[l]+=auxAngVel[l];
3267 }
3268 if (flags&BF_IS_KINEMATIC) {
3269 // recalculate P and Q based on T, linvel and angvel
3270 for (int l=0;l<3;l++) {P[l]=T->position[l]+linvel[l]*deltaTime;}
3271 nm_QuatAdvance(Q,T->rotation,angvel,deltaTime*0.5f);
3272 //nm_QuatNormalize(T->rotation);
3273 }
3274 }
3275 else {for (int l=0;l<3;l++) linvel[l]=(P[l]-T->position[l])/deltaTime;}
3276 if (flags&BF_IS_KINEMATIC) {
3277 // assign the new position and orientation
3278 memcpy(T->position,P,3*sizeof(float));
3279 memcpy(T->rotation,Q,4*sizeof(float));
3280 }
3281 }
3282 else {
3283 // if aux_body is present this is not correct (but we'd need delta_aux_T to calculate it)
3284 memset(linvel,0,3*sizeof(float));
3285 memset(angvel,0,3*sizeof(float));
3286 // assign the new position and orientation
3287 memcpy(T->position,P,3*sizeof(float));
3288 memcpy(T->rotation,Q,4*sizeof(float));
3289 }
3290 if (flags&BF_IS_DYNAMIC)
3291 c->bodies.idle_counters[body]=0; // this prevents sleeping (but does not improve things if body has a negative mass
3292}
3293void TransformAdvanceBodyFromVelocities(context_t* c,unsigned body,float deltaTime) {
3294 // advance Transform based on lin and ang velocities
3295 assert(c && body<c->bodies.count);
3296 Transform* newT = &c->bodies.transforms[body];
3297 Transform oldT = *newT;
3298 float* linvel = c->bodies.momentum[body].velocity;
3299 float* angvel = c->bodies.momentum[body].angular_velocity;
3300 // advance newT based on oldT, linvel and angvel directly only in kinematic objects
3301 //const float mass_inverse = c->bodies.properties[body].mass_inverse;
3302 const uint32_t flags = c->bodies.filters[body].flags;
3303 if (flags&BF_IS_KINEMATIC) {
3304 // advance newT based on oldT, linvel and angvel
3305 for (int l=0;l<3;l++) {newT->position[l]=oldT.position[l]+linvel[l]*deltaTime;}
3306 nm_QuatAdvance(newT->rotation,oldT.rotation,angvel,deltaTime*0.5f);
3307 }
3308 else if (flags&BF_IS_DYNAMIC) c->bodies.idle_counters[body]=0; // wake up dynamic object
3309}
3310
3311Transform TransformSlerp(Transform T0,Transform T1,float time) {
3312 Transform R;const float c0 = 1.f - time, c1 = time;R.body=T0.body;
3313 for (int l=0;l<3;l++) R.position[l]=c0*T0.position[l]+c1*T1.position[l];
3314 nm_QuatSlerp(R.rotation,T0.rotation,T1.rotation,time,1);
3315 return R;
3316}
3317
3318void calculate_box_inertia(float result[3],float mass,float hsizex,float hsizey,float hsizez,const float comOffset[3]) {
3319 if (mass!=0.f) {
3320 float k = mass/3.f;
3321 float kcx2 = k*hsizex*hsizex, kcy2 = k*hsizey*hsizey, kcz2 = k*hsizez*hsizez;
3322 result[0] = (kcy2+kcz2); result[1] = (kcx2+kcz2); result[2] = (kcx2+kcy2);
3323 if (comOffset) {
3324 result[0] += mass * (comOffset[1] * comOffset[1] + comOffset[2] * comOffset[2]);
3325 result[1] += mass * (comOffset[0] * comOffset[0] + comOffset[2] * comOffset[2]);
3326 result[2] += mass * (comOffset[0] * comOffset[0] + comOffset[1] * comOffset[1]);
3327 }
3328 }
3329 else memset(result, 0, 3*sizeof(float));
3330}
3331void calculate_box_inertia_inverse(float result[3],float mass,float hsizex,float hsizey,float hsizez,const float comOffset[3]) {
3332 if (mass!=0.f) {
3333 calculate_box_inertia(result,mass,hsizex,hsizey,hsizez,comOffset);
3334 for (int i=0;i<3;i++) result[i] = 1.f/result[i];
3335 }
3336 else memset(result, 0, 3*sizeof(float));
3337}
3338void calculate_sphere_inertia(float result[3], float mass, float radius, const float comOffset[3], bool hollow) {
3339 if (mass!=0.f) {
3340 result[0] = result[1] = result[2] = (mass*radius*radius)/(hollow?1.5f:2.5f);
3341 if (comOffset) {
3342 result[0] += mass * (comOffset[1] * comOffset[1] + comOffset[2] * comOffset[2]);
3343 result[1] += mass * (comOffset[0] * comOffset[0] + comOffset[2] * comOffset[2]);
3344 result[2] += mass * (comOffset[0] * comOffset[0] + comOffset[1] * comOffset[1]);
3345 }
3346 }
3347 else memset(result, 0, 3*sizeof(float));
3348}
3349void calculate_sphere_inertia_inverse(float result[3], float mass, float radius, const float comOffset[3], bool hollow) {
3350 if (mass!=0.f) {
3351 calculate_sphere_inertia(result,mass,radius,comOffset,hollow);
3352 for (int i=0;i<3;i++) result[i] = 1.f/result[i];
3353 }
3354 else memset(result, 0, 3*sizeof(float));
3355}
3356void calculate_cylinder_inertia(float result[3], float mass, float radius, float halfHeight, AxisEnum upAxis, const float comOffset[3]) {
3357 if (mass!=0.f) {
3358 float radius2 = radius*radius, h2 = halfHeight*halfHeight*4.f;
3359 result[0] = result[1] = result[2] = mass*(3.f*radius2+h2)/12.f;
3360 result[upAxis] = mass*radius2/2.f;
3361 if (comOffset) {
3362 result[0] += mass * (comOffset[1] * comOffset[1] + comOffset[2] * comOffset[2]);
3363 result[1] += mass * (comOffset[0] * comOffset[0] + comOffset[2] * comOffset[2]);
3364 result[2] += mass * (comOffset[0] * comOffset[0] + comOffset[1] * comOffset[1]);
3365 }
3366 }
3367 else memset(result, 0, 3*sizeof(float));
3368}
3369void calculate_cylinder_inertia_inverse(float result[3],float mass,float radius,float halfHeight,AxisEnum upAxis,const float comOffset[3]) {
3370 if (mass!=0.f) {
3371 calculate_cylinder_inertia(result,mass,radius,halfHeight,upAxis,comOffset);
3372 for (int i=0;i<3;i++) result[i] = 1.f/result[i];
3373 }
3374 else memset(result, 0, 3*sizeof(float));
3375}
3376void calculate_capsule_inertia(float result[3], float mass, float radius, float halfCylinderHeight, AxisEnum upAxis, const float comOffset[3]) {
3377 // based on https://xissburg.github.io/2022-10-01-calculating-moment-of-inertia-capsule/
3378 if (mass!=0.f) {
3379 const float L = 2.f*halfCylinderHeight;
3380 float radius2 = radius*radius;
3381 const float Vcyl = M_PI*radius2*L, Vhem = 2.0f*M_PI*radius2*radius;const float Vtot = Vcyl + 2.f*Vhem;
3382 const float Mcyl = mass*Vcyl/Vtot, Mhem = mass*Vhem/Vtot;
3383 const float Icyl = Mcyl*(L*L+3.f*radius2)/12.f;
3384 const float Ihem = Mhem*radius2/2.5f;
3385 result[0] = result[1] = result[2] = Icyl + 2.f*Ihem + Mhem*(4.f*L+3*radius)*(4.f*L+3*radius)/32.f;
3386 result[upAxis] = (5.f*Mcyl+8.f*Mhem)*radius2*0.1f;
3387 if (comOffset) {
3388 result[0] += mass * (comOffset[1] * comOffset[1] + comOffset[2] * comOffset[2]);
3389 result[1] += mass * (comOffset[0] * comOffset[0] + comOffset[2] * comOffset[2]);
3390 result[2] += mass * (comOffset[0] * comOffset[0] + comOffset[1] * comOffset[1]);
3391 }
3392 }
3393 else memset(result, 0, 3*sizeof(float));
3394}
3395void calculate_capsule_inertia_inverse(float result[3],float mass,float radius,float halfCylinderHeight,AxisEnum upAxis,const float comOffset[3]) {
3396 if (mass!=0.f) {
3397 calculate_capsule_inertia(result,mass,radius,halfCylinderHeight,upAxis,comOffset);
3398 for (int i=0;i<3;i++) result[i] = 1.f/result[i];
3399 }
3400 else memset(result, 0, 3*sizeof(float));
3401}
3402void calculate_hollow_cylinder_inertia(float result[3],float mass,float R,float r,float halfHeight,AxisEnum upAxis,const float comOffset[3]) {
3403 // R the total radius of the cylinder (including the border depth)
3404 // r must be: R-border_depth
3405 // => same as calculate_cylinder_inertia, but with: radius = sqrtf(R*R+r*r)
3406 if (mass!=0.f) {
3407 float radius2 = R*R+r*r, h2 = halfHeight*halfHeight*4.f;
3408 result[0] = result[1] = result[2] = mass*(3.f*radius2+h2)/12.f;
3409 result[upAxis] = mass*radius2/2.f;
3410 if (comOffset) {
3411 result[0] += mass * (comOffset[1] * comOffset[1] + comOffset[2] * comOffset[2]);
3412 result[1] += mass * (comOffset[0] * comOffset[0] + comOffset[2] * comOffset[2]);
3413 result[2] += mass * (comOffset[0] * comOffset[0] + comOffset[1] * comOffset[1]);
3414 }
3415 }
3416 else memset(result, 0, 3*sizeof(float));
3417}
3418void calculate_hollow_cylinder_inertia_inverse(float result[3],float mass,float R,float r,float halfHeight,AxisEnum upAxis,const float comOffset[3]) {
3419 if (mass!=0.f) {
3420 calculate_hollow_cylinder_inertia(result,mass,R,r,halfHeight,upAxis,comOffset);
3421 for (int i=0;i<3;i++) result[i] = 1.f/result[i];
3422 }
3423 else memset(result, 0, 3*sizeof(float));
3424}
3425void calculate_torus_inertia(float result[3], float mass, float majorRadius, float minorRadius, AxisEnum upAxis, const float comOffset[3]) {
3426 // majorRadius radius of the torus ( 0|-----------|--->R | )
3427 // minorRadius radius of the circle ( 0| |<---R | )
3428 if (mass!=0.f) {
3429 float a2 = minorRadius*minorRadius, b2 = majorRadius*majorRadius;
3430 result[0] = result[1] = result[2] = 0.25f*mass*(4.f*b2+3*a2);
3431 result[upAxis] = 0.125f*mass*(5.f*a2+4.f*b2);
3432 if (comOffset) {
3433 result[0] += mass * (comOffset[1] * comOffset[1] + comOffset[2] * comOffset[2]);
3434 result[1] += mass * (comOffset[0] * comOffset[0] + comOffset[2] * comOffset[2]);
3435 result[2] += mass * (comOffset[0] * comOffset[0] + comOffset[1] * comOffset[1]);
3436 }
3437 }
3438 else memset(result, 0, 3*sizeof(float));
3439}
3440void calculate_torus_inertia_inverse(float result[3], float mass, float majorRadius, float minorRadius, AxisEnum upAxis, const float comOffset[3]) {
3441 if (mass!=0.f) {
3442 calculate_torus_inertia(result,mass,majorRadius,minorRadius,upAxis,comOffset);
3443 for (int i=0;i<3;i++) result[i] = 1.f/result[i];
3444 }
3445 else memset(result, 0, 3*sizeof(float));
3446}
3447void calculate_cone_inertia(float result[3], float mass, float radius, float halfHeight, AxisEnum upAxis, const float comOffset[3]) {
3448 if (mass!=0.f) {
3449 float radius2 = radius*radius, h2 = halfHeight*halfHeight;
3450 result[0] = result[1] = result[2] = mass*(3.f*radius2+2.f*h2)/20.f;
3451 result[upAxis] = mass*3.f*radius2/10.f;
3452 if (comOffset) {
3453 result[0] += mass * (comOffset[1] * comOffset[1] + comOffset[2] * comOffset[2]);
3454 result[1] += mass * (comOffset[0] * comOffset[0] + comOffset[2] * comOffset[2]);
3455 result[2] += mass * (comOffset[0] * comOffset[0] + comOffset[1] * comOffset[1]);
3456 }
3457 }
3458 else memset(result, 0, 3*sizeof(float));
3459}
3460void calculate_cone_inertia_inverse(float result[3], float mass, float radius, float halfHeight, AxisEnum upAxis, const float comOffset[3]) {
3461 if (mass!=0.f) {
3462 calculate_cone_inertia(result,mass,radius,halfHeight,upAxis,comOffset);
3463 for (int i=0;i<3;i++) result[i] = 1.f/result[i];
3464 }
3465 else memset(result, 0, 3*sizeof(float));
3466}
3467
3468
3469// main functions
3470void show_info() {
3471 // Print information about instruction set.
3472#ifdef __AVX2__
3473 log("Using 8-wide AVX\n");
3474#else
3475 log("Using 4-wide SSE\n");
3476#if defined(__SSE4_1__) || defined(__AVX__)
3477 log("BLENDVPS: Enabled\n");
3478#else
3479 log("BLENDVPS: Disabled\n");
3480#endif
3481#endif
3482
3483#ifdef __FMA__
3484 log("FMA: Enabled\n");
3485#else
3486 log("FMA: Disabled\n");
3487#endif
3488
3489# ifdef NUDGE_USE_SIMDE
3490 log("\nUSING SIMDE (simd everywhere).\n");
3491# ifdef SIMDE_AVX2_NATIVE
3492 log("SIMDE: SIMDE_AVX2_NATIVE is defined.\n");
3493//# error SIMDE_AVX2_NATIVE is defined.
3494# endif
3495# ifdef SIMDE_AVX_NATIVE
3496 log("SIMDE: SIMDE_AVX_NATIVE is defined.\n");
3497//# error SIMDE_AVX_NATIVE is defined.
3498# endif
3499# ifdef SIMDE_SSE2_NATIVE
3500 log("SIMDE: SIMDE_SSE2_NATIVE is defined.\n");
3501//# error SIMDE_SSE2_NATIVE is defined.
3502# endif
3503# ifdef SIMDE_SSE_NATIVE
3504 log("SIMDE: SIMDE_SSE_NATIVE is defined.\n");
3505//# error SIMDE_SSE_NATIVE is defined.
3506# endif
3507# ifdef SIMDE_MMX_NATIVE
3508 log("SIMDE: SIMDE_MMX_NATIVE is defined.\n");
3509//# error SIMDE_MMX_NATIVE is defined.
3510# endif
3511# ifdef SIMDE_NO_NATIVE
3512 log("SIMDE: SIMDE_NO_NATIVE is defined.\n");
3513# endif
3514# endif
3515
3516 flush();
3517}
3518
3519#ifndef NUDGE_DEFAULT_MAX_NUM_BOXES
3520# define NUDGE_DEFAULT_MAX_NUM_BOXES 256 /*1024*4*/
3521#endif
3522#ifndef NUDGE_DEFAULT_MAX_NUM_SPHERES
3523# define NUDGE_DEFAULT_MAX_NUM_SPHERES 256 /*1024*4 //512*6*/
3524#endif
3525#define NUDGE_START_SPHERE_TAG (16384)
3526
3527#if ((NUDGE_DEFAULT_MAX_NUM_BOXES+NUDGE_DEFAULT_MAX_NUM_SPHERES)>8192)
3528# error. It must be (NUDGE_DEFAULT_MAX_NUM_BOXES+NUDGE_DEFAULT_MAX_NUM_SPHERES)<=8192
3529#endif
3530
3531#ifndef NUDGE_FRICTION_MODEL
3532# define NUDGE_FRICTION_MODEL(F1,F2) ((F1)*(F2)*0.5f)
3533#endif
3534
3535#ifndef NUDGE_TOTAL_NUM_KINEMATIC_ANIMATION_KEY_FRAMES
3536# define NUDGE_TOTAL_NUM_KINEMATIC_ANIMATION_KEY_FRAMES 40
3537#endif
3538#ifndef NUDGE_MAX_NUM_KINEMATIC_ANIMATIONS
3539# define NUDGE_MAX_NUM_KINEMATIC_ANIMATIONS 10
3540#endif
3541
3542void* _my_mm_realloc(void** pp,size_t new_capacity,size_t capacity,size_t item_size,size_t alignment) {
3543 assert(pp);
3544 unsigned char* p_old = (unsigned char*) *pp;assert(p_old);
3545 unsigned char* p = (unsigned char*) _mm_malloc(new_capacity*item_size, alignment);assert(p);
3546 memcpy(p,p_old,capacity*item_size);
3547 _mm_free(p_old);
3548 *pp = p;
3549 return p;
3550}
3551size_t _my_mm_realloc_grow(void** pp,size_t new_size,size_t capacity,size_t item_size,size_t alignment) {
3552 //returns the new_capacity
3553 if (new_size>=capacity) return capacity;
3554 const size_t new_capacity = capacity==0 ? new_size : (new_size + capacity/2);
3555 void* p = _my_mm_realloc(pp,new_capacity,capacity,item_size,alignment);
3556 assert(*pp=p);
3557 return new_capacity;
3558}
3559void kinematic_data_reserve_key_frames(KinematicData* kd, size_t new_size) {
3560 const uint32_t capacity = kd->key_frame_capacity;
3561 if (capacity>=new_size) return;
3562 const size_t new_capacity = _my_mm_realloc_grow((void**) &kd->key_frame_transforms,new_size,capacity,sizeof(kd->key_frame_transforms[0]),64);
3563 const size_t tmp = _my_mm_realloc_grow((void**) &kd->key_frame_modes,new_size,capacity,sizeof(kd->key_frame_modes[0]),64);assert(tmp==new_capacity);
3564 kd->key_frame_capacity = new_capacity;
3565 for (uint32_t i=capacity;i<new_capacity;i++) {
3566 kd->key_frame_transforms[i] = identity_transform;
3567 kd->key_frame_modes[i]=KinematicData::TM_NORMAL;
3568 }
3569 assert(new_capacity>new_size);
3570}
3571void kinematic_data_reserve_animations(KinematicData* kd, size_t new_size) {
3572 const uint32_t capacity = kd->animations_capacity;
3573 if (capacity>=new_size) return;
3574 const size_t new_capacity = _my_mm_realloc_grow((void**) &kd->animations,new_size,capacity,sizeof(kd->animations[0]),64);
3575 kd->animations_capacity = new_capacity;
3576 memset(&kd->animations[capacity],0,sizeof(KinematicData::Animation)*(new_capacity-capacity));
3577 for (uint32_t i=capacity;i<new_capacity;i++) {
3578 KinematicData::Animation* m = &kd->animations[i];
3579 m->baseT = identity_transform;
3581 m->total_time = m->play_time = -1.f;
3582 m->speed = 1.f;
3583 }
3584 assert(new_capacity>new_size);
3585}
3586
3587
3588
3589void restart_context(context_t* c) {
3590# ifndef NUDGE_DEFAULT_GRAVITY
3591# define NUDGE_DEFAULT_GRAVITY (-9.82f)
3592# endif
3593# ifndef NUDGE_DEFAULT_FRICTION
3594# define NUDGE_DEFAULT_FRICTION (1.f)
3595# endif
3596 assert(c && c->MAX_NUM_BODIES==c->MAX_NUM_SPHERES+c->MAX_NUM_BOXES && c->MAX_NUM_SPHERES>0 && c->MAX_NUM_BOXES>0);
3597 memset(c->bodies.idle_counters,0,sizeof(uint8_t)*c->MAX_NUM_BODIES);
3598 memset(c->colliders.boxes.data,0,sizeof(SphereCollider)*c->MAX_NUM_BOXES);
3599 memset(c->colliders.spheres.data,0,sizeof(SphereCollider)*c->MAX_NUM_SPHERES);
3600 memset(c->kinematic_data.animations,0,sizeof(KinematicData::Animation)*c->kinematic_data.animations_capacity);
3601 memset(c->kinematic_data.key_frame_transforms,0,sizeof(Transform)*c->kinematic_data.key_frame_capacity);
3602 memset(c->kinematic_data.key_frame_modes,0,sizeof(KinematicData::TimeMode)*c->kinematic_data.key_frame_capacity);
3603 c->active_bodies.count=0;
3604 c->bodies.count=0;
3605 c->colliders.boxes.count = 0;
3606 for (uint16_t i=0;i<c->MAX_NUM_BOXES;i++) {c->colliders.boxes.tags[i] = i;}
3607 c->colliders.spheres.count = 0;
3608 for (uint16_t i=0;i<c->MAX_NUM_SPHERES;i++) {c->colliders.spheres.tags[i] = NUDGE_START_SPHERE_TAG+i;}
3609 c->contact_cache.count=0;assert(c->contact_cache.capacity>0);
3610 c->contact_data.count=0;assert(c->contact_data.capacity>0);
3611 c->global_data.removed_bodies_count = c->global_data.finalized_removed_bodies_count = 0;assert(c->global_data.removed_bodies_capacity>0);
3612 c->kinematic_data.key_frame_count=0;
3613 c->kinematic_data.animations_count=0;
3614 for (uint32_t i=0;i<c->kinematic_data.key_frame_capacity;i++) {
3615 c->kinematic_data.key_frame_transforms[i] = identity_transform;
3616 c->kinematic_data.key_frame_modes[i]=KinematicData::TM_NORMAL;
3617 }
3618 for (uint32_t i=0;i<c->kinematic_data.animations_capacity;i++) {
3619 KinematicData::Animation* m = &c->kinematic_data.animations[i];
3620 m->baseT = identity_transform;
3622 m->total_time = m->play_time = -1.f;
3623 m->speed = 1.f;
3624 }
3625 const int must_reset_aux_bodies = (c->global_data.flags&GF_DONT_RESET_AUX_BODIES) ? 0 : 1;
3626 for (unsigned i=0;i<c->MAX_NUM_BODIES;i++) {
3627 BodyInfo* info = &c->bodies.infos[i];
3628 BodyProperties* property = &c->bodies.properties[i];
3629 BodyFilter* filter = &c->bodies.filters[i];
3630 BodyLayout* layout = &c->bodies.layouts[i];
3631 memset(info,0,sizeof(BodyInfo));
3632 memset(property,0,sizeof(BodyProperties));
3633 memset(filter,0,sizeof(BodyFilter));
3634 memset(layout,0,sizeof(BodyLayout));
3635 layout->first_box_index=layout->first_sphere_index=-1;
3636 property->gravity[1] = NUDGE_DEFAULT_GRAVITY;
3637 property->friction = NUDGE_DEFAULT_FRICTION;
3638 filter->flags=0;
3639 filter->collision_group=COLLISION_GROUP_DEFAULT;
3640 filter->collision_mask=COLLISION_GROUP_ALL;
3641# if NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES>0
3642 if (must_reset_aux_bodies) memset(&info->aux_bodies,0xFF,NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES*sizeof(int16_t)); // sets all components to -1
3643# endif // NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES
3644 }
3645
3646 SimulationParams* sp = &c->simulation_params;
3647 sp->numsubsteps_overflow_in_last_frame=0;
3648 sp->num_substeps_in_last_frame=0;
3649 sp->remaining_time_in_seconds=0;
3650 sp->time_step_minus_remaining_time=0;
3651
3652 //sp->num_total_substeps=sp->num_frames = 0;
3653}
3654void init_context_with(context_t* c,unsigned MAX_NUM_BOXES,unsigned MAX_NUM_SPHERES) {
3655#if (!defined(__EMSCRIPTEN__) && !defined(NUDGE_USE_SIMDE)) // TODO: #if (defined(__EMSCRIPTEN__) || (defined(NUDGE_USE_SIMDE) && defined(SIMDE_SSE_NO_NATIVE)))
3656 // Disable denormals for performance.
3657#ifndef NUDGE_USE_SIMDE
3658 _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
3659 _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
3660#else //NUDGE_USE_SIMDE
3661 SIMDE_MM_SET_FLUSH_ZERO_MODE(SIMDE_MM_FLUSH_ZERO_ON);
3662 SIMDE_MM_SET_DENORMALS_ZERO_MODE(SIMDE_MM_DENORMALS_ZERO_ON);
3663#endif //NUDGE_USE_SIMDE
3664#endif //__EMSCRIPTEN__
3665
3666#ifndef NUDGE_ARENA_SIZE_MACRO
3667# define NUDGE_ARENA_SIZE_MACRO(MAX_NUM_BODIES) (512000+50*(MAX_NUM_BODIES)*(MAX_NUM_BODIES))//(64*1024*1024) //(48*NUDGE_MAX_NUM_BODIES*NUDGE_MAX_NUM_BODIES/4)//(64*1024*1024)
3668#endif
3669
3670 assert(c);
3671 assert(c->MAX_NUM_BODIES==0);
3672 assert((MAX_NUM_BOXES+MAX_NUM_SPHERES<=8192) && "nudge has a upper limit on the number of colliders: (MAX_NUM_BOXES+MAX_NUM_SPHERES<=8192)."); // nudge has a upper limit on the number of colliders: (MAX_NUM_BOXES+MAX_NUM_SPHERES<=8192)
3673 //memset(&c,0,sizeof(context_t));
3674
3675 *((unsigned*)&c->MAX_NUM_BOXES) = MAX_NUM_BOXES;
3676 *((unsigned*)&c->MAX_NUM_SPHERES) = MAX_NUM_SPHERES;
3677 *((unsigned*)&c->MAX_NUM_BODIES) = c->MAX_NUM_BOXES+c->MAX_NUM_SPHERES;
3678
3679 const unsigned NUDGE_MAX_NUM_BODIES = c->MAX_NUM_BODIES;
3680 const unsigned NUDGE_MAX_NUM_BOXES = c->MAX_NUM_BOXES;
3681 const unsigned NUDGE_MAX_NUM_SPHERES = c->MAX_NUM_SPHERES;
3682
3683 // Set valid simulation data
3684 struct SimulationParams* sp = &c->simulation_params;
3685 if (sp->time_step<=0) sp->time_step = NUDGE_DEFAULT_SIMULATION_TIMESTEP;
3686 assert(sp->time_step>0);
3687 if (sp->max_num_substeps==0) sp->max_num_substeps = NUDGE_DEFAULT_MAX_NUM_SIMULATION_SUBSTEPS;
3688 assert(sp->max_num_substeps>0);
3689 if (sp->num_iterations_per_substep==0) sp->num_iterations_per_substep = NUDGE_DEFAULT_NUM_SIMULATION_ITERATIONS;
3690 assert(sp->num_iterations_per_substep>0);
3691 if (sp->sleeping_threshold_linear_velocity_squared<=0.f) sp->sleeping_threshold_linear_velocity_squared = NUDGE_DEFAULT_SLEEPING_THRESHOLD_LINEAR_VELOCITY_SQUARED;
3692 if (sp->sleeping_threshold_angular_velocity_squared<=0.f) sp->sleeping_threshold_angular_velocity_squared = NUDGE_DEFAULT_SLEEPING_THRESHOLD_ANGULAR_VELOCITY_SQUARED;
3693 if (sp->linear_damping<=0.f) sp->linear_damping = NUDGE_DEFAULT_DAMPING_LINEAR;
3694 if (sp->angular_damping<=0.f) sp->angular_damping = NUDGE_DEFAULT_DAMPING_ANGULAR;
3695 if (sp->penetration_allowed_amount<=0) sp->penetration_allowed_amount = NUDGE_DEFAULT_PENETRATION_ALLOWED_AMOUNT;
3696 if (sp->penetration_bias_factor<=0) sp->penetration_bias_factor = NUDGE_DEFAULT_PENETRATION_BIAS_FACTOR;
3697 if (sp->numsubsteps_overflow_warning_mode>2) sp->numsubsteps_overflow_warning_mode=0;
3698 sp->num_total_substeps=sp->num_frames=0;
3699
3700 // Allocate memory for simulation arena.
3701# ifndef NUDGE_ARENA_SIZE_ALIGNMENT
3702# define NUDGE_ARENA_SIZE_ALIGNMENT (4096) // is this correct? Isn't 4096 too much?
3703# endif
3704 assert(c->arena.size==0);assert(c->arena.data==NULL); // sharing it could be useful, we should allow it and add an 'owned' flag
3705 c->arena.size = NUDGE_ARENA_SIZE_MACRO(NUDGE_MAX_NUM_BODIES);
3706 c->arena.data = _mm_malloc(c->arena.size, NUDGE_ARENA_SIZE_ALIGNMENT);//memset(c->arena.data,0,c->arena.size);
3707
3708 // Allocate memory for bodies, colliders, and contacts.
3709 c->active_bodies.capacity = NUDGE_MAX_NUM_BODIES;
3710 c->active_bodies.indices = static_cast<uint16_t*>(_mm_malloc(sizeof(uint16_t)*NUDGE_MAX_NUM_BODIES, 64));
3711
3712 c->bodies.transforms = static_cast<Transform*>(_mm_malloc(sizeof(Transform)*NUDGE_MAX_NUM_BODIES, 64));
3713 c->bodies.momentum = static_cast<BodyMomentum*>(_mm_malloc(sizeof(BodyMomentum)*NUDGE_MAX_NUM_BODIES, 64));
3714 c->bodies.properties = static_cast<BodyProperties*>(_mm_malloc(sizeof(BodyProperties)*NUDGE_MAX_NUM_BODIES, 64));
3715 c->bodies.filters = static_cast<BodyFilter*>(_mm_malloc(sizeof(BodyFilter)*NUDGE_MAX_NUM_BODIES, 64));
3716 c->bodies.layouts = static_cast<BodyLayout*>(_mm_malloc(sizeof(BodyLayout)*NUDGE_MAX_NUM_BODIES, 64));
3717 c->bodies.idle_counters = static_cast<uint8_t*>(_mm_malloc(sizeof(uint8_t)*NUDGE_MAX_NUM_BODIES, 64));
3718 c->bodies.infos = static_cast<BodyInfo*>(_mm_malloc(sizeof(BodyInfo)*NUDGE_MAX_NUM_BODIES,64));
3719
3720 c->colliders.boxes.data = static_cast<BoxCollider*>(_mm_malloc(sizeof(BoxCollider)*NUDGE_MAX_NUM_BOXES, 64));
3721 c->colliders.boxes.tags = static_cast<uint16_t*>(_mm_malloc(sizeof(uint16_t)*NUDGE_MAX_NUM_BOXES, 64));
3722 c->colliders.boxes.transforms = static_cast<Transform*>(_mm_malloc(sizeof(Transform)*NUDGE_MAX_NUM_BOXES, 64));
3723
3724 c->colliders.spheres.data = static_cast<SphereCollider*>(_mm_malloc(sizeof(SphereCollider)*NUDGE_MAX_NUM_SPHERES, 64));
3725 c->colliders.spheres.tags = static_cast<uint16_t*>(_mm_malloc(sizeof(uint16_t)*NUDGE_MAX_NUM_SPHERES, 64));
3726 c->colliders.spheres.transforms = static_cast<Transform*>(_mm_malloc(sizeof(Transform)*NUDGE_MAX_NUM_SPHERES, 64));
3727
3728 c->contact_data.capacity = NUDGE_MAX_NUM_BODIES*64;
3729 c->contact_data.bodies = static_cast<BodyPair*>(_mm_malloc(sizeof(BodyPair)*c->contact_data.capacity, 64));
3730 c->contact_data.data = static_cast<Contact*>(_mm_malloc(sizeof(Contact)*c->contact_data.capacity, 64));
3731 c->contact_data.tags = static_cast<uint64_t*>(_mm_malloc(sizeof(uint64_t)*c->contact_data.capacity, 64));
3732 c->contact_data.sleeping_pairs = static_cast<uint32_t*>(_mm_malloc(sizeof(uint32_t)*c->contact_data.capacity, 64));
3733
3734 c->contact_cache.capacity = NUDGE_MAX_NUM_BODIES*64;
3735 c->contact_cache.data = static_cast<CachedContactImpulse*>(_mm_malloc(sizeof(CachedContactImpulse)*c->contact_cache.capacity, 64));
3736 c->contact_cache.tags = static_cast<uint64_t*>(_mm_malloc(sizeof(uint64_t)*c->contact_cache.capacity, 64));
3737
3738 c->kinematic_data.key_frame_capacity = NUDGE_TOTAL_NUM_KINEMATIC_ANIMATION_KEY_FRAMES;
3739 c->kinematic_data.key_frame_transforms = static_cast<Transform*>(_mm_malloc(sizeof(Transform)*c->kinematic_data.key_frame_capacity,64));
3740 c->kinematic_data.key_frame_modes = static_cast<KinematicData::TimeMode*>(_mm_malloc(sizeof(KinematicData::TimeMode)*c->kinematic_data.key_frame_capacity,64));
3741 c->kinematic_data.animations_capacity = NUDGE_MAX_NUM_KINEMATIC_ANIMATIONS;
3742 c->kinematic_data.animations = static_cast<KinematicData::Animation*>(_mm_malloc(sizeof(KinematicData::Animation)*c->kinematic_data.animations_capacity,64));
3743
3744 *((uint32_t*)&c->global_data.removed_bodies_capacity) = NUDGE_MAX_NUM_BODIES;assert(c->global_data.removed_bodies_capacity==NUDGE_MAX_NUM_BODIES);
3745 c->global_data.removed_bodies = static_cast<uint32_t*>(_mm_malloc(sizeof(uint32_t)*c->global_data.removed_bodies_capacity, 64));
3746 c->global_data.flags = 0;
3747 c->global_data.exclude_smoothing_graphic_transform_flags=0;
3748 c->global_data.gravity[0]=c->global_data.gravity[2]=0.f;
3749 c->global_data.gravity[1]=NUDGE_DEFAULT_GRAVITY;
3750
3751 restart_context(c);
3752}
3753void init_context(context_t* c) {init_context_with(c,NUDGE_DEFAULT_MAX_NUM_BOXES,NUDGE_DEFAULT_MAX_NUM_SPHERES);}
3754void destroy_context(context_t* c) {
3755 assert(c->MAX_NUM_BODIES>0);
3756 assert(c->global_data.removed_bodies_capacity>0 && c->global_data.removed_bodies);
3757 _mm_free(c->global_data.removed_bodies);c->global_data.removed_bodies_count = c->global_data.finalized_removed_bodies_count = 0;
3758 *((uint32_t*)&c->global_data.removed_bodies_capacity) = 0;
3759
3760 _mm_free(c->kinematic_data.animations);c->kinematic_data.animations_capacity = c->kinematic_data.animations_count = 0;
3761 _mm_free(c->kinematic_data.key_frame_modes);
3762 _mm_free(c->kinematic_data.key_frame_transforms);
3763 c->kinematic_data.key_frame_capacity = c->kinematic_data.key_frame_count = 0;
3764
3765 _mm_free(c->contact_cache.data);c->contact_cache.data = 0;
3766 _mm_free(c->contact_cache.tags);c->contact_cache.tags = 0;
3767 c->contact_cache.capacity = c->contact_cache.count = 0;
3768
3769 _mm_free(c->contact_data.bodies);c->contact_data.bodies = 0;
3770 _mm_free(c->contact_data.data);c->contact_data.data = 0;
3771 _mm_free(c->contact_data.tags);c->contact_data.tags = 0;
3772 _mm_free(c->contact_data.sleeping_pairs);c->contact_data.sleeping_pairs = 0;
3773 c->contact_data.count = c->contact_data.capacity = 0;
3774
3775 _mm_free(c->colliders.spheres.data);c->colliders.spheres.data = 0;
3776 _mm_free(c->colliders.spheres.tags);c->colliders.spheres.tags = 0;
3777 _mm_free(c->colliders.spheres.transforms);c->colliders.spheres.transforms = 0;
3778 c->colliders.spheres.count = 0;
3779
3780 _mm_free(c->colliders.boxes.data);c->colliders.boxes.data = 0;
3781 _mm_free(c->colliders.boxes.tags);c->colliders.boxes.tags = 0;
3782 _mm_free(c->colliders.boxes.transforms);c->colliders.boxes.transforms = 0;
3783 c->colliders.boxes.count = 0;
3784
3785 _mm_free(c->bodies.infos);c->bodies.infos = 0;
3786 _mm_free(c->bodies.idle_counters);c->bodies.idle_counters = 0;
3787 _mm_free(c->bodies.filters);c->bodies.filters = 0;
3788 _mm_free(c->bodies.layouts);c->bodies.layouts = 0;
3789 _mm_free(c->bodies.transforms);c->bodies.transforms = 0;
3790 _mm_free(c->bodies.momentum);c->bodies.momentum = 0;
3791 _mm_free(c->bodies.properties);c->bodies.properties = 0;
3792 c->bodies.count = 0;
3793
3794 _mm_free(c->active_bodies.indices);c->active_bodies.indices = 0;
3795 c->active_bodies.capacity = c->active_bodies.count = 0;
3796
3797 _mm_free(c->arena.data);c->arena.data = 0;
3798 c->arena.size = 0;
3799
3800 //memset(c,0,sizeof(*c));
3801 *((unsigned*)&c->MAX_NUM_BODIES) = *((unsigned*)&c->MAX_NUM_BOXES) = *((unsigned*)&c->MAX_NUM_SPHERES) =0;
3802}
3803
3804
3805typedef unsigned body_type;
3806static context_t* _tmpc = NULL;
3807static inline int _compare_bodies_by_box_collider(const void* av,const void*bv) {
3808 const body_type a = *((body_type*)av), b = *((body_type*)bv);
3809 assert(_tmpc);
3810 assert(a<_tmpc->bodies.count);
3811 assert(b<_tmpc->bodies.count);
3812 const int aa=_tmpc->bodies.layouts[a].first_box_index,bb=_tmpc->bodies.layouts[b].first_box_index;
3813 return (aa<bb)?-1:(aa>bb)?1:0;
3814}
3815static inline int _compare_bodies_by_sphere_collider(const void* av,const void*bv) {
3816 const body_type a = *((body_type*)av), b = *((body_type*)bv);
3817 assert(_tmpc);
3818 assert(a<_tmpc->bodies.count);
3819 assert(b<_tmpc->bodies.count);
3820 const int aa=_tmpc->bodies.layouts[a].first_sphere_index,bb=_tmpc->bodies.layouts[b].first_sphere_index;
3821 return (aa<bb)?-1:(aa>bb)?1:0;
3822}
3823void finalize_removed_bodies(context_t* c) {
3824 // I honestly don't know what alse we should do other then removing the collision shapes here
3825 // Here are some optional attempts to perform other tasks:
3826 const int clean_active_bodies = 0; // removes the removed bodies from the c->active_body list
3827 const int clean_contact_data = 0; // slow... removes the c->contact_data that refer to at least a removed body
3828 const int clean_cached_impulses = 0; // slowest... (same for c->contact_cache) + implementation is probably wrong.
3829 // End optional attempts
3830
3831 const int32_t removed_bodies_count = (int32_t) c->global_data.removed_bodies_count;
3832 const int32_t finalized_removed_bodies_count = (int32_t) c->global_data.finalized_removed_bodies_count;
3833 assert(finalized_removed_bodies_count<=removed_bodies_count);
3834 if (finalized_removed_bodies_count==removed_bodies_count) return;
3835
3836 //log("[nudge_frame:%llu] finalize_removed_bodies(...): finalized_removed_bodies_count=%d removed_bodies_count=%d\n",c->simulation_params.num_frames,finalized_removed_bodies_count,removed_bodies_count);
3837 assert(sizeof(body_type)==sizeof(c->global_data.removed_bodies[0]));
3838 body_type* removed_bodies = &c->global_data.removed_bodies[0];
3839 int16_t start;uint16_t count;
3840 BodyLayout* layouts = c->bodies.layouts;
3841 uint32_t num_boxes_to_remove=0,num_spheres_to_remove=0;int i;
3842 uint32_t max_num_allocated_tags = 0;
3843 Arena arena = c->arena;
3844 uint16_t* tags = NULL;
3845
3846 // [pre-processing for arena allocation]
3847 for (i=removed_bodies_count-1;i>=finalized_removed_bodies_count;--i) {
3848 const body_type body = removed_bodies[i];assert(body<c->bodies.count);
3849 const uint16_t num_boxes=layouts[body].num_boxes;
3850 const uint16_t num_spheres=layouts[body].num_spheres;
3851 num_boxes_to_remove+=num_boxes;
3852 num_spheres_to_remove+=num_spheres;
3853 // dbg:
3854 if (num_boxes>0) {assert(layouts[body].first_box_index>=0);}
3855 if (num_spheres>0) {assert(layouts[body].first_sphere_index>=0);}
3856
3857 if (clean_active_bodies && c->active_bodies.count) {
3858 for (int j=(int)(c->active_bodies.count-1);j>=0;--j) {
3859 assert(j<(int)c->active_bodies.count);
3860 if (body==c->active_bodies.indices[j]) {
3861 // |-------------|--|-----------|
3862 // 0 j j+1 count
3863 memmove(&c->active_bodies.indices[j],&c->active_bodies.indices[j+1],sizeof(c->active_bodies.indices[0])*(c->active_bodies.count-(j+1)));
3864 --c->active_bodies.count;
3865 }
3866 }
3867 }
3868 if (clean_contact_data && c->contact_data.count) {
3869 for (int j=(int)(c->contact_data.count-1);j>=0;--j) {
3870 // struct ContactData {Contact* data;BodyPair* bodies;uint64_t* tags;uint32_t capacity;uint32_t count; uint32_t* sleeping_pairs;uint32_t sleeping_count;};
3871 const BodyPair* bp = &c->contact_data.bodies[j];
3872 if (body==bp->a || body==bp->b) {
3873 memmove(&c->contact_data.data[j],&c->contact_data.data[j+1],sizeof(c->contact_data.data[0])*(c->contact_data.count-(j+1)));
3874 memmove(&c->contact_data.bodies[j],&c->contact_data.bodies[j+1],sizeof(c->contact_data.bodies[0])*(c->contact_data.count-(j+1)));
3875 memmove(&c->contact_data.tags[j],&c->contact_data.tags[j+1],sizeof(c->contact_data.tags[0])*(c->contact_data.count-(j+1)));
3876 --c->active_bodies.count;
3877 }
3878 // What are 'sleeping_pairs' and 'sleeping_pairs_count' inside 'ContactData'?
3879 }
3880 }
3881 }
3882 max_num_allocated_tags = num_boxes_to_remove>num_spheres_to_remove?num_boxes_to_remove:num_spheres_to_remove;
3883 tags = allocate_array<uint16_t>(&arena, max_num_allocated_tags, 32);
3884 assert(sizeof(tags[0])==sizeof(c->colliders.boxes.tags[0]));
3885 assert(sizeof(tags[0])==sizeof(c->colliders.spheres.tags[0]));
3886
3887 // [boxes]
3888 if (num_boxes_to_remove>0)
3889 {
3890 _tmpc=c;qsort(&removed_bodies[finalized_removed_bodies_count],removed_bodies_count-finalized_removed_bodies_count,sizeof(body_type),&_compare_bodies_by_box_collider);_tmpc=NULL;
3891 // process
3892 uint32_t num_finalized_boxes=0;unsigned moveGap,amount,lastBodyId;
3893 const body_type last_body = removed_bodies[removed_bodies_count-1];
3894 start=layouts[last_body].first_box_index;count=layouts[last_body].num_boxes;
3895 for (i=removed_bodies_count-1;i>=finalized_removed_bodies_count;--i) {
3896 if (i>finalized_removed_bodies_count) {
3897 const body_type body = removed_bodies[i-1]; // prev_body actually (next in the for loop)
3898 const int16_t body_start = layouts[body].first_box_index;
3899 const uint16_t body_count = layouts[body].num_boxes;
3900 if (body_start+body_count==start) {start=body_start;count+=body_count;continue;}
3901 }
3902 if (count>0) {
3903 assert(start>=0);
3904 //log("[nudge_frame:%llu] [Finalize %u/%u Boxes in [%d,%u): c->colliders.boxes.count=%u]\n",c->simulation_params.num_frames,count,num_boxes_to_remove,start,start+count,c->colliders.boxes.count);
3905 num_finalized_boxes+=count;
3906 // process interval: move [start,start+count) after c->colliders.boxes.count
3907 // remove [start,start+count)
3908 /*
3909 |-----------|----------------------|------------------------|
3910 start start+count c->colliders.boxes.count
3911 | | |
3912 |-------moveGap--------|---------amount---------|
3913
3914 After the move we want:
3915
3916 |-----------|----------------------|------------------------|
3917 start c->colliders.boxes.count |
3918 | | |
3919 |--------amount--------|---------moveGap--------|
3920
3921 We also need to keep the original tags that were present in [start,start+count)
3922 and place them in the unused space past (the new) c->colliders.boxes.count
3923 */
3924 assert(start+count<=(int)c->colliders.boxes.count);
3925 moveGap = count;amount = c->colliders.boxes.count-(start+count);
3926 memmove(&c->colliders.boxes.data[start],&c->colliders.boxes.data[start+count],amount*sizeof(c->colliders.boxes.data[0]));
3927 memmove(&c->colliders.boxes.transforms[start],&c->colliders.boxes.transforms[start+count],amount*sizeof(c->colliders.boxes.transforms[0]));
3928 // handle tags the correct way! -------------
3929 assert(count<=max_num_allocated_tags);
3930 memcpy(tags,&c->colliders.boxes.tags[start],moveGap*sizeof(c->colliders.boxes.tags[0])); // pre-move
3931 memmove(&c->colliders.boxes.tags[start],&c->colliders.boxes.tags[start+count],amount*sizeof(c->colliders.boxes.tags[0]));
3932 //memcpy(&c->colliders.boxes.tags[count-moveGap],tags,moveGap*sizeof(c->colliders.boxes.tags[0])); // post-move
3933 memcpy(&c->colliders.boxes.tags[start+amount],tags,moveGap*sizeof(c->colliders.boxes.tags[0])); // post-move
3934 // -------------------------------------------
3935 c->colliders.boxes.count-=count;
3936
3937 // we must re-assign c->bodies.infos[bodyId].first_box_index for
3938 // all the moved box-colliders in [start,c->colliders.boxes.count)
3939 lastBodyId = c->MAX_NUM_BODIES;
3940 for (unsigned i=start,isz=c->colliders.boxes.count;i<isz;i++) {
3941 const unsigned bodyId = c->colliders.boxes.transforms[i].body;
3942 assert(bodyId<c->bodies.count);
3943 assert(!(c->bodies.filters[bodyId].flags&BF_IS_REMOVED)); // not sure about this
3944 if (lastBodyId!=bodyId) {
3945 lastBodyId=bodyId;
3946 BodyLayout* bl = &c->bodies.layouts[bodyId];
3947 assert(bl->first_box_index>=0 && bl->num_boxes>0);
3948 assert(bl->first_box_index>=start+count);
3949 assert((int16_t)i==bl->first_box_index-(int16_t)count);
3950 bl->first_box_index = (int16_t) i;
3951 assert((uint16_t)bl->first_box_index+bl->num_boxes<=c->colliders.boxes.count);
3952 }
3953 }
3954 }
3955 //---------------------------------------------
3956 if (i>finalized_removed_bodies_count) {
3957 const body_type body = removed_bodies[i-1]; // prev_body actually (next in the for loop)
3958 start = layouts[body].first_box_index;
3959 count = layouts[body].num_boxes;
3960 }
3961 }
3962 if (num_finalized_boxes!=num_boxes_to_remove) {
3963 log("[nudge_frame:%llu] finalize_removed_bodies(...) has NOT handled %u box colliders and %u sphere colliders\n",c->simulation_params.num_frames,num_boxes_to_remove,num_spheres_to_remove);
3964 flush();
3965 assert(num_finalized_boxes==num_boxes_to_remove);
3966 }
3967 }
3968
3969 // [spheres]
3970 if (num_spheres_to_remove>0)
3971 {
3972 _tmpc=c;qsort(&removed_bodies[finalized_removed_bodies_count],removed_bodies_count-finalized_removed_bodies_count,sizeof(body_type),&_compare_bodies_by_sphere_collider);_tmpc=NULL;
3973 // process
3974 uint32_t num_finalized_spheres=0;unsigned moveGap,amount,lastBodyId;
3975 const body_type last_body = removed_bodies[removed_bodies_count-1];
3976 start=layouts[last_body].first_sphere_index;count=layouts[last_body].num_spheres;
3977 for (i=removed_bodies_count-1;i>=finalized_removed_bodies_count;--i) {
3978 if (i>finalized_removed_bodies_count) {
3979 const body_type body = removed_bodies[i-1]; // prev_body actually (next in the for loop)
3980 const int16_t body_start = layouts[body].first_sphere_index;
3981 const uint16_t body_count = layouts[body].num_spheres;
3982 if (body_start+body_count==start) {start=body_start;count+=body_count;continue;}
3983 }
3984 if (count>0) {
3985 assert(start>=0);
3986 //log("[nudge_frame:%llu] [Finalize %u/%u Spheres in [%d,%u): c->colliders.spheres.count=%u]\n",c->simulation_params.num_frames,count,num_spheres_to_remove,start,start+count,c->colliders.spheres.count);
3987 num_finalized_spheres+=count;
3988 // process interval: move [start,start+count) after c->colliders.spheres.count
3989 // remove [start,start+count)
3990 /*
3991 |-----------|----------------------|------------------------|
3992 start start+count c->colliders.spheres.count
3993 | | |
3994 |-------moveGap--------|---------amount---------|
3995
3996 After the move we want:
3997
3998 |-----------|----------------------|------------------------|
3999 start c->colliders.spheres.count |
4000 | | |
4001 |--------amount--------|---------moveGap--------|
4002
4003 We also need to keep the original tags that were present in [start,start+count)
4004 and place them in the unused space past (the new) c->colliders.spheres.count
4005 */
4006 assert(start+count<=(int)c->colliders.spheres.count);
4007 moveGap = count;amount = c->colliders.spheres.count-(start+count); // 46-(46+1) = -1 [TODO: ERROR!]
4008 memmove(&c->colliders.spheres.data[start],&c->colliders.spheres.data[start+count],amount*sizeof(c->colliders.spheres.data[0]));
4009 memmove(&c->colliders.spheres.transforms[start],&c->colliders.spheres.transforms[start+count],amount*sizeof(c->colliders.spheres.transforms[0]));
4010 // handle tags the correct way! -------------
4011 assert(count<=max_num_allocated_tags);
4012 memcpy(tags,&c->colliders.spheres.tags[start],moveGap*sizeof(c->colliders.spheres.tags[0])); // pre-move
4013 memmove(&c->colliders.spheres.tags[start],&c->colliders.spheres.tags[start+count],amount*sizeof(c->colliders.spheres.tags[0]));
4014 //memcpy(&c->colliders.spheres.tags[count-moveGap],tags,moveGap*sizeof(c->colliders.spheres.tags[0])); // post-move
4015 memcpy(&c->colliders.spheres.tags[start+amount],tags,moveGap*sizeof(c->colliders.spheres.tags[0])); // post-move
4016 // -------------------------------------------
4017 c->colliders.spheres.count-=count;
4018
4019 // we must re-assign c->bodies.infos[bodyId].first_sphere_index for
4020 // all the moved sphere-colliders in [start,c->colliders.spheres.count)
4021 lastBodyId = c->MAX_NUM_BOXES+c->MAX_NUM_SPHERES;assert(c->MAX_NUM_BODIES==c->MAX_NUM_BOXES+c->MAX_NUM_SPHERES);
4022 for (unsigned i=start,isz=c->colliders.spheres.count;i<isz;i++) {
4023 const unsigned bodyId = c->colliders.spheres.transforms[i].body;
4024 assert(bodyId<c->bodies.count);
4025 assert(!(c->bodies.filters[bodyId].flags&BF_IS_REMOVED));
4026 if (lastBodyId!=bodyId) {
4027 lastBodyId=bodyId;
4028 BodyLayout* bl = &c->bodies.layouts[bodyId];
4029 assert(bl->first_sphere_index>=0 && bl->num_spheres>0);
4030 assert(bl->first_sphere_index>=start+count);
4031 assert((int16_t)i==bl->first_sphere_index-(int16_t)count);
4032 bl->first_sphere_index = (int16_t) i;
4033 assert((uint16_t)bl->first_sphere_index+bl->num_spheres<=c->colliders.spheres.count);
4034 }
4035 }
4036 }
4037 //---------------------------------------------
4038 if (i>finalized_removed_bodies_count) {
4039 const body_type body = removed_bodies[i-1]; // prev_body actually (next in the for loop)
4040 start = layouts[body].first_sphere_index;
4041 count = layouts[body].num_spheres;
4042 }
4043 }
4044 if (num_finalized_spheres!=num_spheres_to_remove) {
4045 log("[nudge_frame:%llu] finalize_removed_bodies(...) has NOT handled %u box colliders and %u sphere colliders\n",c->simulation_params.num_frames,num_boxes_to_remove,num_spheres_to_remove);
4046 flush();
4047 assert(num_finalized_spheres==num_spheres_to_remove);
4048 }
4049 }
4050
4051 // remove colliders from removed objects
4052 for (int i=finalized_removed_bodies_count;i<removed_bodies_count;i++) {
4053 const body_type body = removed_bodies[i];
4054 BodyLayout* bl = &c->bodies.layouts[body];
4055 bl->first_box_index=-1;bl->first_sphere_index=-1;
4056 bl->num_boxes=0;bl->num_spheres=0;
4057 BodyInfo* info = &c->bodies.infos[body]; // reset some info data too
4058 memset(&info->aabb_center[0],0,3*sizeof(float));
4059 memset(&info->aabb_half_extents[0],0,3*sizeof(float));
4060 memset(&info->com_offset[0],0,3*sizeof(float));
4061 info->aabb_enlarged_radius=0.f;
4062 }
4063 c->global_data.finalized_removed_bodies_count=c->global_data.removed_bodies_count;
4064
4065 //log("[nudge_frame:%llu] finalize_removed_bodies(...) has handled %u box colliders and %u sphere colliders\n",c->simulation_params.num_frames,num_boxes_to_remove,num_spheres_to_remove);
4066 //flush();
4067 assert(num_boxes_to_remove || num_spheres_to_remove);
4068
4069
4070 if (clean_cached_impulses && c->contact_cache.count) {
4071 // not robust enough...
4072 for (int i=(int)c->contact_cache.count-1;i>=0;--i) {
4073 //struct ContactCache {uint64_t* tags;CachedContactImpulse* data;uint32_t capacity;uint32_t count;};
4074 const uint64_t tag = c->contact_cache.tags[i];
4075 const uint16_t a_tag = (uint16_t) ((tag&0x0000FFFF00000000ULL)>>(2ULL*16ULL));
4076 const uint16_t b_tag = (uint16_t) ((tag&0xFFFF000000000000ULL)>>(3ULL*16ULL));
4077 //const uint16_t a_tag = (uint16_t) ((tag&0x000000000000FFFFULL)>>(0ULL*16ULL));
4078 //const uint16_t b_tag = (uint16_t) ((tag&0x00000000FFFF0000ULL)>>(1ULL*16ULL));
4079 int found=0;
4080 // here I'm assuming that the tags of the colliders we've just removed
4081 // are in the range: [j,jsz]. Hope it's correct...
4082 for (unsigned j=c->colliders.boxes.count,jsz=c->colliders.boxes.count+num_boxes_to_remove;j<jsz;j++) {
4083 const uint16_t tg = c->colliders.boxes.tags[j];
4084 if (tg==a_tag || tg==b_tag) {found = 1;break;}
4085 }
4086 if (!found) {
4087 for (unsigned j=c->colliders.spheres.count,jsz=c->colliders.spheres.count+num_spheres_to_remove;j<jsz;j++) {
4088 const uint16_t tg = c->colliders.spheres.tags[j];
4089 if (tg==a_tag || tg==b_tag) {found = 1;break;}
4090 }
4091 }
4092 if (found) {
4093 // remove c->contact_cache[i]
4094 // |---------------|--|----------------|
4095 // 0 i i+1 count
4096 memmove(&c->contact_cache.tags[i],&c->contact_cache.tags[i+1],sizeof(c->contact_cache.tags[0])*(c->contact_cache.count-(i+1)));
4097 memmove(&c->contact_cache.data[i],&c->contact_cache.data[i+1],sizeof(CachedContactImpulse)*(c->contact_cache.count-(i+1)));
4098 --c->contact_cache.count;
4099 }
4100 }
4101 }
4102
4103# define TEST_NUDGE_COLLIDER_TAGS_INTEGRITY // TO REMOVE
4104# ifdef TEST_NUDGE_COLLIDER_TAGS_INTEGRITY
4105 {
4106 Arena arena = c->arena;
4107 if (arena.size>=sizeof(uint8_t)*(c->MAX_NUM_BOXES+c->MAX_NUM_SPHERES)) {
4108 uint8_t* tagsMap = allocate_array<uint8_t>(&arena, c->MAX_NUM_BOXES+c->MAX_NUM_SPHERES, 32);assert(tagsMap);
4109 memset(tagsMap,0,(c->MAX_NUM_BOXES+c->MAX_NUM_SPHERES)*sizeof(uint8_t));
4110 for (unsigned i=0;i<c->MAX_NUM_BOXES;i++) {
4111 const uint16_t tag = c->colliders.boxes.tags[i];
4112 assert(tag<c->MAX_NUM_BOXES);
4113 assert(tagsMap[tag]==0);
4114 tagsMap[tag]=1;
4115 }
4116 for (unsigned i=0;i<c->MAX_NUM_SPHERES;i++) {
4117 uint16_t tag = c->colliders.spheres.tags[i];
4118 assert(tag>=NUDGE_START_SPHERE_TAG && tag<NUDGE_START_SPHERE_TAG+c->MAX_NUM_SPHERES);
4119 tag=tag-NUDGE_START_SPHERE_TAG+c->MAX_NUM_BOXES;
4120 assert(tagsMap[tag]==0);
4121 tagsMap[tag]=1;
4122 }
4123 unsigned unset_tags=0;
4124 for (unsigned i=0;i<c->MAX_NUM_BOXES+c->MAX_NUM_SPHERES;i++) {
4125 if (!tagsMap[i]) ++unset_tags;
4126 }
4127 assert(unset_tags==0);
4128 }
4129 }
4130# endif //TEST_NUDGE_COLLIDER_TAGS_INTEGRITY
4131
4132# define TEST_COLLIDER_COHERENCY // TO REMOVE
4133# ifdef TEST_COLLIDER_COHERENCY
4134 {
4135 unsigned body_last = NUDGE_INVALID_BODY_ID,delta_shape_count=0;
4136 for (unsigned i=0;i<c->colliders.boxes.count;i++) {
4137 //const uint16_t tag = &c->colliders.boxes.tags[i];
4138 const unsigned body = c->colliders.boxes.transforms[i].body;
4139 assert(body<c->bodies.count);
4140 BodyLayout* bl = &c->bodies.layouts[body];
4141 if (body_last!=body) {
4142 body_last=body;
4143 assert(bl->first_box_index==(int)i);
4144 delta_shape_count=0;
4145 }
4146 else {
4147 ++delta_shape_count;
4148 assert(i>=(uint16_t)bl->first_box_index);
4149 assert(i<(uint16_t)bl->first_box_index+bl->num_boxes);
4150 assert(i==(uint16_t)bl->first_box_index+delta_shape_count);
4151 }
4152 }
4153 body_last = NUDGE_INVALID_BODY_ID;delta_shape_count=0;
4154 for (unsigned i=0;i<c->colliders.spheres.count;i++) {
4155 //const uint16_t tag = &c->colliders.spheres.tags[i];
4156 const unsigned body = c->colliders.spheres.transforms[i].body;
4157 assert(body<c->bodies.count);
4158 assert(!(c->bodies.filters[body].flags&BF_IS_REMOVED)); // not sure about this
4159 BodyLayout* bl = &c->bodies.layouts[body];assert(bl->first_sphere_index>=0);
4160 if (body_last!=body) {
4161 body_last=body;
4162 assert(bl->first_sphere_index==(int)i);
4163 delta_shape_count=0;
4164 }
4165 else {
4166 ++delta_shape_count;
4167 assert(bl->first_sphere_index>=0);
4168 assert(i>=(uint16_t)bl->first_sphere_index);
4169 assert(i<(uint16_t)bl->first_sphere_index+bl->num_spheres);
4170 assert(i==(uint16_t)bl->first_sphere_index+delta_shape_count);
4171 }
4172 }
4173 }
4174# endif
4175
4176}
4177
4178void remove_body(context_t* c,unsigned body) {
4179 assert(body<c->bodies.count);
4180 for (unsigned i=0;i<c->global_data.removed_bodies_count;i++) {if (c->global_data.removed_bodies[i]==body) return;}
4181 assert(c->global_data.removed_bodies_count<c->MAX_NUM_BODIES);
4182 BodyFilter* f = &c->bodies.filters[body];
4183 f->flags|=BF_IS_DISABLED_OR_REMOVED;
4184 f->collision_group=f->collision_mask=0;
4185 c->bodies.idle_counters[body]=0xff; // set it to sleep
4186 c->bodies.properties[body].mass_inverse = 0.f; // turn it to static so it stops falling
4187 f->flags&=~(BF_IS_DYNAMIC|BF_IS_KINEMATIC);f->flags|=BF_IS_STATIC; // not sure about this
4188 float* lvel = &c->bodies.momentum[body].velocity[0];
4189 float* avel = &c->bodies.momentum[body].angular_velocity[0];
4190 lvel[0]=lvel[1]=lvel[2]=avel[0]=avel[1]=avel[2]=0.f;
4191 float* pos = c->bodies.transforms[body].position;pos[1]-=100000.f; // probably useless
4192 c->global_data.removed_bodies[c->global_data.removed_bodies_count++] = body;
4193}
4194
4195uint32_t colliders_get_num_remaining_boxes(context_t* c) {assert(c->colliders.boxes.count<=c->MAX_NUM_BOXES);return c->MAX_NUM_BOXES-c->colliders.boxes.count;}
4196uint32_t colliders_get_num_remaining_spheres(context_t* c) {assert(c->colliders.spheres.count<=c->MAX_NUM_SPHERES);return c->MAX_NUM_SPHERES-c->colliders.spheres.count;}
4197
4198
4199unsigned add_box(context_t* c,float mass, float hsizex, float hsizey, float hsizez, const Transform* T, const float comOffset[3]) {
4200 unsigned body,collider;
4201 if (c->global_data.finalized_removed_bodies_count>0) {
4202 assert(c->global_data.finalized_removed_bodies_count<=c->global_data.removed_bodies_count);
4203 assert(sizeof(c->global_data.removed_bodies[0])==sizeof(body_type));
4204 body=c->global_data.removed_bodies[0];--c->global_data.finalized_removed_bodies_count;--c->global_data.removed_bodies_count;
4205 memmove(&c->global_data.removed_bodies[0],&c->global_data.removed_bodies[1],c->global_data.removed_bodies_count*sizeof(body_type));
4206 assert(body<c->bodies.count);
4207 const BodyLayout* bl = &c->bodies.layouts[body];
4208 assert(bl->first_box_index==-1);
4209 assert(bl->num_boxes==0);
4210 assert(bl->first_sphere_index==-1);
4211 assert(bl->num_spheres==0);
4212 }
4213 else {
4214 assert(c->bodies.count<c->MAX_NUM_BODIES && c->colliders.boxes.count<c->MAX_NUM_BOXES); // Further boxes can't be added
4215 if (c->bodies.count == c->MAX_NUM_BODIES || c->colliders.boxes.count == c->MAX_NUM_BOXES) return NUDGE_INVALID_BODY_ID;
4216 body = c->bodies.count++;
4217 }
4218
4219 BodyProperties* prop = &c->bodies.properties[body];
4220 Transform* xform = &c->bodies.transforms[body], *xform_collider = NULL;
4221 assert(xform);
4222 BodyInfo* info = &c->bodies.infos[body];BodyFilter* filter = &c->bodies.filters[body];BodyLayout* layout = &c->bodies.layouts[body];
4223 if (comOffset && comOffset[0]==0.f && comOffset[1]==0.f && comOffset[2]==0.f) comOffset = NULL;
4224 collider = c->colliders.boxes.count++;
4225 BoxCollider* boxCollider = &c->colliders.boxes.data[collider];
4226 filter->flags = mass>0?BF_IS_DYNAMIC:(mass<0?BF_IS_KINEMATIC:BF_IS_STATIC);
4227
4228 *xform = T ? (*T) : identity_transform; // transform
4229 xform->body = body; // body id
4230 memset(&c->bodies.momentum[body], 0, sizeof(c->bodies.momentum[body])); // no velocity/angular velocity
4231 memset(prop,0,sizeof(*prop));prop->friction = NUDGE_DEFAULT_FRICTION;prop->gravity[1]=NUDGE_DEFAULT_GRAVITY; // reset mass/inertia/friction
4232 if (mass<0) mass=-mass;
4233 if (mass>0) {prop->mass_inverse = 1.0f/mass;calculate_box_inertia_inverse(prop->inertia_inverse,mass,hsizex,hsizey,hsizez,comOffset);}
4234 c->bodies.idle_counters[body] = (filter->flags&BF_IS_DYNAMIC)?0:0xff;
4235
4236 //memset(info,0,sizeof(BodyInfo));
4237 info->com_offset[0]=info->com_offset[1]=info->com_offset[2]=0.f;
4238 layout->num_boxes = 1;layout->num_spheres = 0;
4239 layout->first_box_index = (int16_t) collider;
4240 layout->first_sphere_index = -1;
4241 boxCollider->size[0] = hsizex;
4242 boxCollider->size[1] = hsizey;
4243 boxCollider->size[2] = hsizez;
4244 xform_collider = &c->colliders.boxes.transforms[collider];
4245 *xform_collider = identity_transform;
4246 xform_collider->body = body;
4247 if (comOffset) {filter->flags|=BF_HAS_COM_OFFSET;for (int l=0;l<3;l++) {xform_collider->position[l]-=comOffset[l];info->com_offset[l]=comOffset[l];}}
4248
4249 filter->collision_group = COLLISION_GROUP_DEFAULT;filter->collision_mask = COLLISION_GROUP_ALL;
4250 //log("%d) Added box [mass:%1.3f;hsize{%1.3f,%1.3f,%1.3f};pos{%1.3f,%1.3f,%1.3f}]\n",body,mass,hsizex,hsizey,hsizez,T->position[0],T->position[1],T->position[2]);
4251 //assert(getBoxColliderId(c->bodies.count-1)==collider);
4252
4253 body_recalculate_bounding_box(c,body); // new
4254
4255 return body;
4256}
4257unsigned add_box(context_t* c,float mass, float hsizex, float hsizey, float hsizez, const float* mMatrix16WithoutScaling, const float comOffset[3]) {
4258 if (!mMatrix16WithoutScaling) return add_box(c,mass,hsizex,hsizey,hsizez,(const Transform*)NULL);
4259 else {Transform T;Mat4WithoutScalingToTransform(&T,mMatrix16WithoutScaling);return add_box(c,mass,hsizex,hsizey,hsizez,&T,comOffset);}
4260}
4261
4262unsigned add_sphere(context_t* c, float mass, float radius, const Transform* T, const float comOffset[3]) {
4263 unsigned body,collider;
4264 if (c->global_data.finalized_removed_bodies_count>0) {
4265 assert(c->global_data.finalized_removed_bodies_count<=c->global_data.removed_bodies_count);
4266 assert(sizeof(c->global_data.removed_bodies[0])==sizeof(body_type));
4267 body=c->global_data.removed_bodies[0];--c->global_data.finalized_removed_bodies_count;--c->global_data.removed_bodies_count;
4268 memmove(&c->global_data.removed_bodies[0],&c->global_data.removed_bodies[1],c->global_data.removed_bodies_count*sizeof(body_type));
4269 assert(body<c->bodies.count);
4270 const BodyLayout* bl = &c->bodies.layouts[body];
4271 assert(bl->first_box_index==-1);
4272 assert(bl->num_boxes==0);
4273 assert(bl->first_sphere_index==-1);
4274 assert(bl->num_spheres==0);
4275 }
4276 else {
4277 assert(c->bodies.count<c->MAX_NUM_BODIES && c->colliders.spheres.count<c->MAX_NUM_SPHERES); // Further spheres can't be added
4278 if (c->bodies.count == c->MAX_NUM_BODIES || c->colliders.spheres.count == c->MAX_NUM_SPHERES) return NUDGE_INVALID_BODY_ID;
4279 body = c->bodies.count++;
4280 }
4281
4282 BodyProperties* prop = &c->bodies.properties[body];
4283 Transform *xform = &c->bodies.transforms[body], *xform_collider = NULL;
4284 BodyInfo* info = &c->bodies.infos[body];BodyFilter* filter = &c->bodies.filters[body];BodyLayout* layout = &c->bodies.layouts[body];
4285 if (comOffset && comOffset[0]==0.f && comOffset[1]==0.f && comOffset[2]==0.f) comOffset = NULL;
4286 collider = c->colliders.spheres.count++;
4287 filter->flags = mass>0?BF_IS_DYNAMIC:(mass<0?BF_IS_KINEMATIC:BF_IS_STATIC);
4288
4289 *xform = T ? (*T) : identity_transform; // transform
4290 xform->body = body; // body id
4291 memset(&c->bodies.momentum[body], 0, sizeof(c->bodies.momentum[body])); // no velocity/angular velocity
4292 memset(prop,0,sizeof(*prop));prop->friction = NUDGE_DEFAULT_FRICTION;prop->gravity[1]=NUDGE_DEFAULT_GRAVITY; // reset mass/inertia/friction
4293 if (mass<0) mass=-mass;
4294 if (mass>0) {prop->mass_inverse = 1.0f/mass;calculate_sphere_inertia_inverse(prop->inertia_inverse,mass,radius,comOffset);}
4295 c->bodies.idle_counters[body] = (filter->flags&BF_IS_DYNAMIC)?0:0xff;
4296
4297 //memset(info,0,sizeof(BodyInfo));
4298 info->com_offset[0]=info->com_offset[1]=info->com_offset[2]=0.f;
4299 layout->num_boxes = 0;layout->num_spheres = 1;
4300 layout->first_box_index = -1;
4301 layout->first_sphere_index = (int16_t) collider;
4302 c->colliders.spheres.data[collider].radius = radius;
4303 xform_collider = &c->colliders.spheres.transforms[collider];
4304 *xform_collider = identity_transform;
4305 xform_collider->body = body;
4306 if (comOffset) {filter->flags|=BF_HAS_COM_OFFSET;for (int l=0;l<3;l++) {xform_collider->position[l]-=comOffset[l];info->com_offset[l]=comOffset[l];}}
4307
4308 filter->collision_group = COLLISION_GROUP_DEFAULT;filter->collision_mask = COLLISION_GROUP_ALL;
4309
4310 body_recalculate_bounding_box(c,body); // new
4311
4312 //log("%d) Added sphere [mass:%1.3f;radius=%1.3f;pos{%1.3f,%1.3f,%1.3f}]\n",body,mass,radius,T->position[0],T->position[1],T->position[2]);
4313 //assert(getSphereColliderId(c->bodies.count-1)==collider);
4314 return body;
4315}
4316unsigned add_sphere(context_t* c, float mass, float radius, const float* mMatrix16WithoutScaling, const float comOffset[3]) {
4317 if (!mMatrix16WithoutScaling) return add_sphere(c,mass,radius,(const Transform*)NULL);
4318 else {Transform T;Mat4WithoutScalingToTransform(&T,mMatrix16WithoutScaling);return add_sphere(c,mass,radius,&T,comOffset);}
4319}
4320
4321unsigned add_compound(context_t* c, float mass, float inertia[3], unsigned num_boxes, const float* hsizeTriplets, const Transform* boxOffsetTransforms, unsigned num_spheres, const float* radii, const Transform* sphereOffsetTransforms, const Transform* T, const float comOffset[3],float* centerMeshAndRetrieveOldCenter3Out) {
4322 unsigned body = NUDGE_INVALID_BODY_ID;
4323 assert(num_boxes+num_spheres>0);
4324 assert(c->colliders.boxes.count+num_boxes<=c->MAX_NUM_BOXES);
4325 assert(c->colliders.spheres.count+num_spheres<=c->MAX_NUM_SPHERES);
4326 if (c->colliders.boxes.count+num_boxes>c->MAX_NUM_BOXES || c->colliders.spheres.count+num_spheres>c->MAX_NUM_SPHERES) return NUDGE_INVALID_BODY_ID;
4327 if (c->global_data.finalized_removed_bodies_count>0) {
4328 assert(c->global_data.finalized_removed_bodies_count<=c->global_data.removed_bodies_count);
4329 assert(sizeof(c->global_data.removed_bodies[0])==sizeof(body_type));
4330 body=c->global_data.removed_bodies[0];--c->global_data.finalized_removed_bodies_count;--c->global_data.removed_bodies_count;
4331 memmove(&c->global_data.removed_bodies[0],&c->global_data.removed_bodies[1],c->global_data.removed_bodies_count*sizeof(body_type));
4332 assert(body<c->bodies.count);
4333 const BodyLayout* bl = &c->bodies.layouts[body];
4334 assert(bl->first_box_index==-1);
4335 assert(bl->num_boxes==0);
4336 assert(bl->first_sphere_index==-1);
4337 assert(bl->num_spheres==0);
4338 }
4339 else {
4340 assert(c->bodies.count<c->MAX_NUM_BODIES); // Further bodies can't be added (it should never happen, since: c->MAX_NUM_BODIES=c->MAX_NUM_BOXES+c->MAX_NUM_SPHERES)
4341 if (c->bodies.count == c->MAX_NUM_BODIES) return NUDGE_INVALID_BODY_ID;
4342 body = c->bodies.count++;
4343 }
4344 BodyProperties* prop = &c->bodies.properties[body];
4345 Transform *xform = &c->bodies.transforms[body];
4346 BodyInfo* info = &c->bodies.infos[body];BodyFilter* filter = &c->bodies.filters[body];BodyLayout* layout = &c->bodies.layouts[body];
4347 if (comOffset && comOffset[0]==0.f && comOffset[1]==0.f && comOffset[2]==0.f) comOffset = NULL;
4348 //memset(info,0,sizeof(BodyInfo));
4349 info->com_offset[0]=info->com_offset[1]=info->com_offset[2]=0.f;
4350 filter->flags = mass>0?BF_IS_DYNAMIC:(mass<0?BF_IS_KINEMATIC:BF_IS_STATIC);
4351 if (comOffset) filter->flags|=BF_HAS_COM_OFFSET;
4352
4353 *xform = T ? (*T) : identity_transform; // transform
4354 xform->body = body; // body id
4355 memset(&c->bodies.momentum[body], 0, sizeof(c->bodies.momentum[body])); // no velocity/angular velocity
4356 memset(prop,0,sizeof(*prop));prop->friction = NUDGE_DEFAULT_FRICTION;prop->gravity[1]=NUDGE_DEFAULT_GRAVITY; // reset mass/inertia/friction
4357 if (mass<0) mass=-mass;
4358 if (mass>0) prop->mass_inverse = 1.0f/mass;
4359 c->bodies.idle_counters[body] = (filter->flags&BF_IS_DYNAMIC)?0:0xff;
4360
4361 for (unsigned i=0;i<num_boxes;i++) {
4362 unsigned collider = c->colliders.boxes.count++;
4363 BoxCollider* boxCollider = &c->colliders.boxes.data[collider];
4364 Transform* xf = &c->colliders.boxes.transforms[collider];
4365 for (int j=0;j<3;j++) boxCollider->size[j] = hsizeTriplets[3*i+j];
4366 *xf = boxOffsetTransforms ? boxOffsetTransforms[i] : identity_transform;
4367 if (comOffset && !centerMeshAndRetrieveOldCenter3Out) {for (int l=0;l<3;l++) {xf->position[l]-=comOffset[l];info->com_offset[l]=comOffset[l];}}
4368 xf->body = body;
4369 if (i==0) layout->first_box_index = collider;
4370 }
4371 layout->num_boxes = num_boxes;
4372
4373 for (unsigned i=0;i<num_spheres;i++) {
4374 unsigned collider = c->colliders.spheres.count++;
4375 assert(collider<c->MAX_NUM_SPHERES && collider<c->colliders.spheres.count);
4376 Transform* xf = &c->colliders.spheres.transforms[collider];
4377 c->colliders.spheres.data[collider].radius = radii[i];
4378 *xf = sphereOffsetTransforms ? sphereOffsetTransforms[i] : identity_transform;
4379 if (comOffset && !centerMeshAndRetrieveOldCenter3Out) {for (int l=0;l<3;l++) {xf->position[l]-=comOffset[l];info->com_offset[l]=comOffset[l];}}
4380 xf->body = body;
4381 if (i==0) layout->first_sphere_index = collider;
4382 }
4383 layout->num_spheres = num_spheres;
4384
4385 filter->collision_group = COLLISION_GROUP_DEFAULT;filter->collision_mask = COLLISION_GROUP_ALL;
4386
4387 body_recalculate_bounding_box(c,body); // new get aabb
4388
4389 float aabb_he[3] = {info->aabb_half_extents[0],info->aabb_half_extents[1],info->aabb_half_extents[2]};
4390 if (centerMeshAndRetrieveOldCenter3Out) {
4391 centerMeshAndRetrieveOldCenter3Out[0]=info->aabb_center[0];
4392 centerMeshAndRetrieveOldCenter3Out[1]=info->aabb_center[1];
4393 centerMeshAndRetrieveOldCenter3Out[2]=info->aabb_center[2];
4394 float offset[3]; // aabb_center + com_offset
4395 // calculate total offset and correct info->aabb_min_max
4396 for (int i=0;i<3;i++) {
4397 offset[i]=centerMeshAndRetrieveOldCenter3Out[i]+(comOffset?comOffset[i]:0.f);
4398 info->aabb_center[i]-=centerMeshAndRetrieveOldCenter3Out[i];
4399 }
4400 // remove offset from each collider transform and set info->com_offset to comOffset
4401 for (unsigned i=0;i<layout->num_boxes;i++) {
4402 Transform* xf = &c->colliders.boxes.transforms[layout->first_box_index+i];
4403 {for (int l=0;l<3;l++) {xf->position[l]-=offset[l];if (comOffset) info->com_offset[l]=comOffset[l];}}
4404 }
4405 for (unsigned i=0;i<layout->num_spheres;i++) {
4406 Transform* xf = &c->colliders.spheres.transforms[layout->first_sphere_index+i];
4407 {for (int l=0;l<3;l++) {xf->position[l]-=offset[l];if (comOffset) info->com_offset[l]=comOffset[l];}}
4408 }
4409 // recalculate info->aabb_enlarged_radius
4410 info->aabb_enlarged_radius = 0;
4411 const float* t = info->aabb_half_extents;float s = t[0]*t[0] + t[1]*t[1] + t[2]*t[2];if (s>NM_EPSILON) info->aabb_enlarged_radius+=sqrtf(s);
4412 t = info->aabb_center;s = t[0]*t[0] + t[1]*t[1] + t[2]*t[2];if (s>NM_EPSILON) info->aabb_enlarged_radius+=sqrtf(s);
4413 }
4414
4415 if (mass>0) {
4416 float tmp[3];
4417 if (!inertia) {
4418 // now that we can calculate a per-body aabb, we can assign a box inertia as default
4419 calculate_box_inertia(tmp,mass,aabb_he[0],aabb_he[1],aabb_he[2],comOffset); // here we simply use comOffset (not the additional offset to recenter the mesh): is this the default option? Yes: comOffset should not depend on the input mesh aabb center
4420 inertia=tmp;
4421 }
4422 assert(inertia);
4423 if (inertia) {for (int i=0;i<3;i++) prop->inertia_inverse[i] = inertia[i]!=0.f ? (1.0f / inertia[i]) : 0.f;}
4424 }
4425
4426 //log("%d) Added compound [mass:%1.3f;pos{%1.3f,%1.3f,%1.3f};num_boxes=%u;num_spheres=%u]\n",body,mass,T->position[0],T->position[1],T->position[2],num_boxes,num_spheres);
4427
4428 return body;
4429}
4430unsigned add_compound(context_t* c, float mass, float inertia[3], unsigned num_boxes, const float* hsizeTriplets, const float* boxOffsetMatrices16WithoutScaling, unsigned num_spheres, const float* radii, const float* sphereOffsetMatrices16WithoutScaling, const float* mMatrix16WithoutScaling, const float comOffset[3], float *centerMeshAndRetrieveOldCenter3Out) {
4431 Arena arena = c->arena;
4432 Transform* boxTransforms = allocate_array<Transform>(&arena, num_boxes+num_spheres, 32);
4433 Transform* sphereTransforms = &boxTransforms[num_boxes];
4434 for (unsigned i=0;i<num_boxes;i++) Mat4WithoutScalingToTransform(&boxTransforms[i],&boxOffsetMatrices16WithoutScaling[16*i]);
4435 for (unsigned i=0;i<num_spheres;i++) Mat4WithoutScalingToTransform(&sphereTransforms[i],&sphereOffsetMatrices16WithoutScaling[16*i]);
4436 Transform T = identity_transform;
4437 if (mMatrix16WithoutScaling) Mat4WithoutScalingToTransform(&T,mMatrix16WithoutScaling);
4438 return add_compound(c,mass,inertia,num_boxes,hsizeTriplets,boxTransforms,num_spheres,radii,sphereTransforms,&T,comOffset,centerMeshAndRetrieveOldCenter3Out);
4439}
4440
4441unsigned add_clone(context_t* c, unsigned body_to_clone, float mass, const Transform* T, float scale_factor, const float newComOffsetInPreScaledUnits[3]) {
4442 unsigned body = NUDGE_INVALID_BODY_ID;
4443 const unsigned srcbody = body_to_clone;
4444 assert(srcbody<c->bodies.count);
4445 assert(scale_factor!=0.f);
4446 const BodyLayout* srclayout = &c->bodies.layouts[srcbody];const uint16_t num_boxes = srclayout->num_boxes, num_spheres = srclayout->num_spheres;
4448 assert(num_boxes+num_spheres>0);
4449 assert(c->colliders.boxes.count+num_boxes<=c->MAX_NUM_BOXES);
4450 assert(c->colliders.spheres.count+num_spheres<=c->MAX_NUM_SPHERES);
4451 if (c->colliders.boxes.count+num_boxes>c->MAX_NUM_BOXES || c->colliders.spheres.count+num_spheres>c->MAX_NUM_SPHERES) return NUDGE_INVALID_BODY_ID;
4452 if (c->global_data.finalized_removed_bodies_count>0) {
4453 assert(c->global_data.finalized_removed_bodies_count<=c->global_data.removed_bodies_count);
4454 assert(sizeof(c->global_data.removed_bodies[0])==sizeof(body_type));
4455 body=c->global_data.removed_bodies[0];--c->global_data.finalized_removed_bodies_count;--c->global_data.removed_bodies_count;
4456 memmove(&c->global_data.removed_bodies[0],&c->global_data.removed_bodies[1],c->global_data.removed_bodies_count*sizeof(body_type));
4457 assert(body<c->bodies.count);
4458 const BodyLayout* bl = &c->bodies.layouts[body];
4459 assert(bl->first_box_index==-1);
4460 assert(bl->num_boxes==0);
4461 assert(bl->first_sphere_index==-1);
4462 assert(bl->num_spheres==0);
4463 }
4464 else {
4465 assert(c->bodies.count<c->MAX_NUM_BODIES); // Further bodies can't be added (it should never happen, since: c->MAX_NUM_BODIES=c->MAX_NUM_BOXES+c->MAX_NUM_SPHERES)
4466 if (c->bodies.count == c->MAX_NUM_BODIES) return NUDGE_INVALID_BODY_ID;
4467 body = c->bodies.count++;
4468 }
4469 const BodyProperties* srcprop = &c->bodies.properties[srcbody];BodyProperties* prop = &c->bodies.properties[body];
4470 const BodyInfo* srcinfo = &c->bodies.infos[srcbody];BodyInfo* info = &c->bodies.infos[body];
4471 const BodyFilter* srcfilter = &c->bodies.filters[srcbody];BodyFilter* filter = &c->bodies.filters[body];
4472 BodyLayout* layout = &c->bodies.layouts[body];
4473 Transform *xform = &c->bodies.transforms[body];
4474 *xform = T ? (*T) : identity_transform; // transform
4475 xform->body = body; // body id
4476 memset(&c->bodies.momentum[body], 0, sizeof(c->bodies.momentum[body])); // no velocity/angular velocity
4477 float com_delta[3] = {0.f,0.f,0.f};if (newComOffsetInPreScaledUnits) {for (int k=0;k<3;k++) com_delta[k] = newComOffsetInPreScaledUnits[k]-srcinfo->com_offset[k];}
4478 if (scale_factor<0.f) scale_factor=(srcinfo->aabb_half_extents[1]>=0.f)?(-scale_factor/srcinfo->aabb_half_extents[1]):-scale_factor;
4479 assert(scale_factor>0.f);
4480
4481 // clone almost everything (with some flags/scaling/com_offset adjustments)
4482 *prop=*srcprop;
4484 filter->flags|=(mass>0.f)?BF_IS_DYNAMIC:((mass<0.f)?BF_IS_KINEMATIC:BF_IS_STATIC);if (mass<0.f) mass=-mass;
4485 c->bodies.idle_counters[body]=(filter->flags&BF_IS_DYNAMIC)?0:0xFF;
4486 if (num_boxes) {
4487 assert(srclayout->first_box_index>=0 && (uint16_t)srclayout->first_box_index+num_boxes<=c->colliders.boxes.count);
4488 const Transform* srcT = &c->colliders.boxes.transforms[srclayout->first_box_index];
4489 const BoxCollider* srcC = &c->colliders.boxes.data[srclayout->first_box_index];
4490 layout->first_box_index = c->colliders.boxes.count;layout->num_boxes = num_boxes;c->colliders.boxes.count+=num_boxes;assert(c->colliders.boxes.count<=c->MAX_NUM_BOXES);
4491 Transform* T = &c->colliders.boxes.transforms[layout->first_box_index];
4492 BoxCollider* C = &c->colliders.boxes.data[layout->first_box_index];
4493 for (uint16_t i=0;i<num_boxes;i++) {
4494 T[i]=srcT[i];T[i].body=body;C[i]=srcC[i];
4495 for (int k=0;k<3;k++) {
4496 T[i].p[k]=scale_factor*(T[i].p[k]-com_delta[k]);
4497 C[i].size[k]*=scale_factor;
4498 }
4499 }
4500 }
4501 if (num_spheres) {
4502 assert(srclayout->first_sphere_index>=0 && (uint16_t)srclayout->first_sphere_index+num_spheres<=c->colliders.spheres.count);
4503 const Transform* srcT = &c->colliders.spheres.transforms[srclayout->first_sphere_index];
4504 const SphereCollider* srcC = &c->colliders.spheres.data[srclayout->first_sphere_index];
4505 layout->first_sphere_index = c->colliders.spheres.count;layout->num_spheres = num_spheres;c->colliders.spheres.count+=num_spheres;assert(c->colliders.spheres.count<=c->MAX_NUM_SPHERES);
4506 Transform* T = &c->colliders.spheres.transforms[layout->first_sphere_index];
4507 SphereCollider* C = &c->colliders.spheres.data[layout->first_sphere_index];
4508 for (uint16_t i=0;i<num_spheres;i++) {
4509 T[i]=srcT[i];T[i].body=body;C[i]=srcC[i];C[i].radius*=scale_factor;
4510 for (int k=0;k<3;k++) T[i].p[k]=scale_factor*(T[i].p[k]-com_delta[k]);
4511 }
4512 }
4513 // com offset adjustments
4514 if (newComOffsetInPreScaledUnits) {
4515 if (newComOffsetInPreScaledUnits[0]==0.f && newComOffsetInPreScaledUnits[1]==0.f && newComOffsetInPreScaledUnits[2]==0.f) memset(info->com_offset,0,3*sizeof(float));
4516 else {
4517 for (int k=0;k<3;k++) info->com_offset[k]=scale_factor*newComOffsetInPreScaledUnits[k];
4518 filter->flags|=BF_HAS_COM_OFFSET;
4519 }
4520 }
4521 else {
4522 // we must leave (and scale) srcinfo->com_offset
4523 if (srcinfo->com_offset[0]==0.f && srcinfo->com_offset[1]==0.f && srcinfo->com_offset[2]==0.f) {
4524 assert(!(srcfilter->flags&BF_HAS_COM_OFFSET));
4525 memset(info->com_offset,0,3*sizeof(float));
4526 }
4527 else {
4528 assert(srcfilter->flags&BF_HAS_COM_OFFSET);
4529 for (int k=0;k<3;k++) info->com_offset[k]=scale_factor*srcinfo->com_offset[k];
4530 filter->flags|=BF_HAS_COM_OFFSET;
4531 }
4532 }
4533 // recalculate aabb
4535 // scale mass and local_inertia
4536 assert(mass>=0);assert(srcprop->mass_inverse>=0.f);
4537 if (mass>0.f) {
4538 if (srcprop->mass_inverse>0.f) {
4539 // const float sc = scale_factor*scale_factor*mass*srcprop->mass_inverse;
4540 // I'xx = Ixx*sc => (1/I'xx) = (1/Ixx)*(1/sc) => (1/I'xx) = (1/Ixx)/sc => (1/I'xx)=(1/Ixx);(1/I'xx)/=sc; // valid for all the 3 components
4541 for (int k=0;k<3;k++) prop->inertia_inverse[k]/=scale_factor*scale_factor*mass*srcprop->mass_inverse;
4542 prop->mass_inverse=1.f/mass;
4543 }
4544 else {
4545 prop->mass_inverse=0.f;
4546 calculate_box_inertia_inverse(prop->inertia_inverse,mass,info->aabb_half_extents[0],info->aabb_half_extents[1],info->aabb_half_extents[2],info->com_offset);
4547 }
4548 }
4549
4550 return body;
4551}
4552unsigned add_clone(context_t* c, unsigned body_to_clone, float mass, const float* mMatrix16WithoutScaling, float scale_factor, const float newComOffsetInPreScaledUnits[3]) {
4553 Transform T;Mat4WithoutScalingToTransform(&T,mMatrix16WithoutScaling);return add_clone(c,body_to_clone,mass,&T,scale_factor,newComOffsetInPreScaledUnits);
4554}
4555
4556
4557namespace extra {
4558
4559#ifndef NUDGE_EXTRA_RADIUS_ON_BOX_SHRINK
4560# define NUDGE_EXTRA_RADIUS_ON_BOX_SHRINK (1.f/3.5f)
4561#endif
4562
4563unsigned add_compound_prism(context_t* c, float mass, float radius, float hsize, unsigned num_lateral_faces, const Transform* T, AxisEnum axis, const float comOffset[3]) {
4564 if (num_lateral_faces==0) num_lateral_faces=8;
4565 if (num_lateral_faces<4) return NUDGE_INVALID_BODY_ID;
4566 if (num_lateral_faces==4) return add_box(c,mass,axis==AXIS_X?hsize:radius,axis==AXIS_Y?hsize:radius,axis==AXIS_Z?hsize:radius,T,comOffset);
4567 const int use_half_number_of_boxes = ((num_lateral_faces%2)==0);
4568 if (!use_half_number_of_boxes) return add_compound_hollow_cylinder(c,mass,0.f,radius,hsize,T,axis,num_lateral_faces,comOffset);
4569 const unsigned num_boxes = num_lateral_faces/2;
4570 Arena arena = c->arena;assert(arena.size>num_boxes*(3*sizeof(float)+sizeof(Transform))+64);
4571 const float axisv[3] = {(axis==AXIS_X)?1.f:0.f,(axis==AXIS_Y)?1.f:0.f,(axis==AXIS_Z)?1.f:0.f};
4572 float* box_hsizes = NULL;Transform* boxT=NULL;
4573 const float hsz = radius*tanf(M_PI/(float)num_lateral_faces);
4574 const float hln = radius;
4575 int axisi[3] = {0,1,2};
4576 if (axis==AXIS_X) {axisi[0]=2;axisi[1]=0;axisi[2]=1;}
4577 else if (axis==AXIS_Z) {axisi[0]=1;axisi[1]=2;axisi[2]=0;}
4578 box_hsizes = allocate_array<float>(&arena, num_boxes*3, 32);
4579 boxT = allocate_array<Transform>(&arena, num_boxes, 32);
4580 for (unsigned i=0;i<num_boxes;i++) {
4581 float* hs = &box_hsizes[3*i];Transform* t = &boxT[i];*t=identity_transform;
4582 const float angle = (float)i*M_PI/(float)num_boxes;
4583 nm_QuatFromAngleAxis(t->q,-angle,axisv[0],axisv[1],axisv[2]);
4584 hs[axisi[0]]=hln;hs[axisi[1]]=hsize;hs[axisi[2]]=hsz;
4585 }
4586 float inertia[3]; calculate_cylinder_inertia(inertia,mass,radius,hsize,axis,comOffset);float stripped_center[3];
4587 return add_compound(c,mass,inertia,num_boxes,box_hsizes,boxT,0,NULL,NULL,T,comOffset,stripped_center);
4588}
4589
4590unsigned add_compound_cylinder(context_t* c,float mass,float radius,float hsize, const Transform* T,AxisEnum axis,unsigned num_boxes,unsigned num_spheres,const float comOffset[3],float box_lateral_side_shrinking) {
4591 const bool is_short_cylinder = (radius>=hsize);
4592 if (num_boxes==0 && num_spheres==0) {
4593 if (is_short_cylinder) {num_boxes=8;num_spheres=0;}
4594 else {num_boxes=1;num_spheres=3;}
4595 }
4596 if (is_short_cylinder) num_spheres = 0;
4597 if (box_lateral_side_shrinking<0.f) {
4598 if (num_spheres==0) box_lateral_side_shrinking=(num_boxes<=1)?0.f:(1.f-1.f/1.41f);
4599 else box_lateral_side_shrinking=NUDGE_EXTRA_RADIUS_ON_BOX_SHRINK;
4600 }
4601 Arena arena = c->arena;assert(arena.size>num_boxes*(3*sizeof(float)+sizeof(Transform))+num_spheres*(1*sizeof(float)+sizeof(Transform))+128);
4602 const float axisv[3] = {(axis==AXIS_X)?1.f:0.f,(axis==AXIS_Y)?1.f:0.f,(axis==AXIS_Z)?1.f:0.f};
4603 float* box_hsizes = NULL;Transform* boxT=NULL;
4604 if (num_boxes>0) {
4605 const float offset = radius*box_lateral_side_shrinking; // box sides is radius-offset
4606 const float box_size = radius-offset;
4607 float angle = M_PI*0.5f/num_boxes;
4608 box_hsizes = allocate_array<float>(&arena, num_boxes*3, 32);
4609 boxT = allocate_array<Transform>(&arena, num_boxes, 32);
4610 for (unsigned i=0;i<num_boxes;i++) {
4611 float* hs = &box_hsizes[3*i];Transform* t = &boxT[i];*t=identity_transform;
4612 hs[0]=hs[1]=hs[2]=box_size;hs[axis]=hsize;
4613 nm_QuatFromAngleAxis(t->q,angle*i,axisv[0],axisv[1],axisv[2]);
4614 }
4615 }
4616 float* sphere_radii = NULL;Transform* sphereT = NULL;
4617 if (num_spheres>0) {
4618 sphere_radii = allocate_array<float>(&arena, num_boxes*1, 32);
4619 sphereT = allocate_array<Transform>(&arena, num_boxes, 32);
4620 for (unsigned i=0;i<num_spheres;i++) {sphere_radii[i]=radius;sphereT[i]=identity_transform;}
4621 if (num_spheres>=2) {
4622 Transform* t = &sphereT[0];t->p[axis]=-hsize+radius;
4623 t = &sphereT[1];t->p[axis]=hsize-radius;
4624 if (num_spheres>2) {
4625 // |---------------|----------------| // 1 => 1/2
4626 // |---------| |----------| // 2 => 1/3
4627 // |-------|-------|--------|-------| // 3 => 1/4
4628 const float dist = (2.f*hsize)/(float)(num_spheres+1);
4629 for (unsigned i=2;i<num_spheres;i++) {sphereT[i].p[axis]=-hsize+dist*i;}
4630 }
4631 }
4632 /*else if (mass==0) {
4633 assert(num_spheres==1);
4634 sphereT[i].p[axis]=-hsize+radius; // if it's static maybe we prefer a single sphere at the bottom?
4635 }*/
4636 }
4637 float inertia[3]; calculate_cylinder_inertia(inertia,mass,radius,hsize,axis,comOffset);float stripped_center[3];
4638 return add_compound(c,mass,inertia,num_boxes,box_hsizes,boxT,num_spheres,sphere_radii,sphereT,T,comOffset,stripped_center);
4639}
4640unsigned add_compound_capsule(context_t* c, float mass, float radius, float hsize, const Transform* T, AxisEnum axis, unsigned num_boxes, unsigned num_spheres, const float comOffset[3], float box_lateral_side_shrinking) {
4641 if (num_boxes==0 && num_spheres==0) {num_boxes=1;num_spheres=3;}
4642 assert(num_spheres>=2);
4643 if (box_lateral_side_shrinking<0.f) box_lateral_side_shrinking=NUDGE_EXTRA_RADIUS_ON_BOX_SHRINK;
4644 Arena arena = c->arena;assert(arena.size>num_boxes*(3*sizeof(float)+sizeof(Transform))+num_spheres*(1*sizeof(float)+sizeof(Transform))+128);
4645 const float axisv[3] = {(axis==AXIS_X)?1.f:0.f,(axis==AXIS_Y)?1.f:0.f,(axis==AXIS_Z)?1.f:0.f};
4646 float* box_hsizes = NULL;Transform* boxT=NULL;
4647 if (num_boxes>0) {
4648 const float offset = radius*box_lateral_side_shrinking; // box sides is radius-offset
4649 float angle = M_PI*0.5f/num_boxes;
4650 box_hsizes = allocate_array<float>(&arena, num_boxes*3, 32);
4651 boxT = allocate_array<Transform>(&arena, num_boxes, 32);
4652 for (unsigned i=0;i<num_boxes;i++) {
4653 float* hs = &box_hsizes[3*i];Transform* t = &boxT[i];*t=identity_transform;
4654 hs[0]=hs[1]=hs[2]=radius-offset;hs[axis]=hsize;
4655 nm_QuatFromAngleAxis(t->q,angle*i,axisv[0],axisv[1],axisv[2]);
4656 }
4657 }
4658 float* sphere_radii = NULL;Transform* sphereT = NULL;
4659 if (num_spheres>0) {
4660 sphere_radii = allocate_array<float>(&arena, num_boxes*1, 32);
4661 sphereT = allocate_array<Transform>(&arena, num_boxes, 32);
4662 for (unsigned i=0;i<num_spheres;i++) {sphere_radii[i]=radius;sphereT[i]=identity_transform;}
4663 Transform* t = &sphereT[0];t->p[axis]=-hsize;
4664 t = &sphereT[1];t->p[axis]=hsize;
4665 if (num_spheres>2) {
4666 // |---------------|----------------| // 1 => 1/2
4667 // |---------| |----------| // 2 => 1/3
4668 // |-------|-------|--------|-------| // 3 => 1/4
4669 const float dist = (2.f*(hsize+radius))/(float)(num_spheres+1);
4670 for (unsigned i=2;i<num_spheres;i++) {sphereT[i].p[axis]=-hsize-radius+dist*i;}
4671 }
4672 }
4673 float inertia[3]; calculate_capsule_inertia(inertia,mass,radius,hsize,axis,comOffset);float stripped_center[3];
4674 return add_compound(c,mass,inertia,num_boxes,box_hsizes,boxT,num_spheres,sphere_radii,sphereT,T,comOffset,stripped_center);
4675}
4676unsigned add_compound_hollow_cylinder(context_t* c,float mass,float min_radius,float max_radius,float hsize, const Transform* T,AxisEnum axis,unsigned num_boxes,const float comOffset[3]) {
4677 const unsigned num_spheres = 0;assert(min_radius<max_radius);if (num_boxes==0) num_boxes=8;
4678 const float radius = (max_radius+min_radius)*0.5f,inner_radius=(max_radius-min_radius)*0.5f;
4679 Arena arena = c->arena;assert(arena.size>num_boxes*(3*sizeof(float)+sizeof(Transform))+num_spheres*(1*sizeof(float)+sizeof(Transform))+128);
4680 const float axisv[3] = {(axis==AXIS_X)?1.f:0.f,(axis==AXIS_Y)?1.f:0.f,(axis==AXIS_Z)?1.f:0.f};
4681 int axisi[3] = {0,1,2};
4682 if (axis==AXIS_X) {axisi[0]=2;axisi[1]=0;axisi[2]=1;}
4683 else if (axis==AXIS_Z) {axisi[0]=1;axisi[1]=2;axisi[2]=0;}
4684 float* box_hsizes = NULL;Transform* boxT=NULL;
4685 if (num_boxes>0) {
4686 const float box_length = max_radius*tanf(M_PI/(float)num_boxes);
4687 box_hsizes = allocate_array<float>(&arena, num_boxes*3, 32);
4688 boxT = allocate_array<Transform>(&arena, num_boxes, 32);
4689 for (unsigned i=0;i<num_boxes;i++) {
4690 float* hs = &box_hsizes[3*i];Transform* t = &boxT[i];*t=identity_transform;
4691 hs[axisi[0]]=box_length;hs[axisi[1]]=hsize;hs[axisi[2]]=inner_radius;
4692 const float angle = (float)i*2.f*M_PI/(float)num_boxes;
4693 const float sinAngle = sinf(angle), cosAngle = cosf(angle);
4694 nm_QuatFromAngleAxis(t->q,-angle,axisv[0],axisv[1],axisv[2]);
4695 t->p[axisi[0]]=(radius)*sinAngle;t->p[axisi[1]]=0.f;t->p[axisi[2]]=-(radius)*cosAngle;
4696 }
4697 }
4698 float* sphere_radii = NULL;Transform* sphereT = NULL;
4699 /*if (num_spheres>0) {
4700 sphere_radii = allocate_array<float>(&arena, num_boxes*1, 32);
4701 sphereT = allocate_array<Transform>(&arena, num_boxes, 32);
4702 for (unsigned i=0;i<num_spheres;i++) {sphere_radii[i]=radius;sphereT[i]=identity_transform;}
4703 if (num_spheres>=2) {
4704 Transform* t = &sphereT[0];t->p[axis]=-hsize+radius;
4705 t = &sphereT[1];t->p[axis]=hsize-radius;
4706 if (num_spheres>2) {
4707 // |---------------|----------------| // 1 => 1/2
4708 // |---------| |----------| // 2 => 1/3
4709 // |-------|-------|--------|-------| // 3 => 1/4
4710 const float dist = (2.f*hsize)/(float)(num_spheres+1);
4711 for (unsigned i=2;i<num_spheres;i++) {sphereT[i].p[axis]=-hsize+dist*i;}
4712 }
4713 }
4714 }*/
4715 float inertia[3]; calculate_hollow_cylinder_inertia(inertia,mass,max_radius,min_radius,hsize,axis,comOffset);float stripped_center[3];
4716 return add_compound(c,mass,inertia,num_boxes,box_hsizes,boxT,num_spheres,sphere_radii,sphereT,T,comOffset,stripped_center);
4717}
4718unsigned add_compound_torus(context_t* c,float mass,float radius,float inner_radius, const Transform* T,AxisEnum axis,unsigned num_boxes,const float comOffset[3]) {
4719 const unsigned num_spheres = 0;assert(inner_radius<=radius);if (num_boxes==0) num_boxes=8;
4720 Arena arena = c->arena;assert(arena.size>num_boxes*(3*sizeof(float)+sizeof(Transform))+num_spheres*(1*sizeof(float)+sizeof(Transform))+128);
4721 const float axisv[3] = {(axis==AXIS_X)?1.f:0.f,(axis==AXIS_Y)?1.f:0.f,(axis==AXIS_Z)?1.f:0.f};
4722 int axisi[3] = {0,1,2};
4723 if (axis==AXIS_X) {axisi[0]=2;axisi[1]=0;axisi[2]=1;}
4724 else if (axis==AXIS_Z) {axisi[0]=1;axisi[1]=2;axisi[2]=0;}
4725 float* box_hsizes = NULL;Transform* boxT=NULL;
4726 if (num_boxes>0) {
4727 const float box_length = (radius+inner_radius)*tanf(M_PI/(float)num_boxes);
4728 box_hsizes = allocate_array<float>(&arena, num_boxes*3, 32);
4729 boxT = allocate_array<Transform>(&arena, num_boxes, 32);
4730 for (unsigned i=0;i<num_boxes;i++) {
4731 float* hs = &box_hsizes[3*i];Transform* t = &boxT[i];*t=identity_transform;
4732 hs[axisi[0]]=box_length;hs[axisi[1]]=inner_radius;hs[axisi[2]]=inner_radius;
4733 const float angle = (float)i*2.f*M_PI/(float)num_boxes;
4734 const float sinAngle = sinf(angle), cosAngle = cosf(angle);
4735 nm_QuatFromAngleAxis(t->q,-angle,axisv[0],axisv[1],axisv[2]);
4736 t->p[axisi[0]]=(radius)*sinAngle;t->p[axisi[1]]=0.f;t->p[axisi[2]]=-(radius)*cosAngle;
4737 }
4738 }
4739 float* sphere_radii = NULL;Transform* sphereT = NULL;
4740 /*if (num_spheres>0) {
4741 sphere_radii = allocate_array<float>(&arena, num_boxes*1, 32);
4742 sphereT = allocate_array<Transform>(&arena, num_boxes, 32);
4743 for (unsigned i=0;i<num_spheres;i++) {sphere_radii[i]=radius;sphereT[i]=identity_transform;}
4744 if (num_spheres>=2) {
4745 Transform* t = &sphereT[0];t->p[axis]=-hsize+radius;
4746 t = &sphereT[1];t->p[axis]=hsize-radius;
4747 if (num_spheres>2) {
4748 // |---------------|----------------| // 1 => 1/2
4749 // |---------| |----------| // 2 => 1/3
4750 // |-------|-------|--------|-------| // 3 => 1/4
4751 const float dist = (2.f*hsize)/(float)(num_spheres+1);
4752 for (unsigned i=2;i<num_spheres;i++) {sphereT[i].p[axis]=-hsize+dist*i;}
4753 }
4754 }
4755 }*/
4756 float inertia[3]; calculate_torus_inertia(inertia,mass,radius,inner_radius,axis,comOffset);float stripped_center[3];
4757 return add_compound(c,mass,inertia,num_boxes,box_hsizes,boxT,num_spheres,sphere_radii,sphereT,T,comOffset,stripped_center);
4758}
4759unsigned add_compound_cone(context_t* c, float mass, float radius, float hheight, const Transform* T, AxisEnum axis, unsigned num_boxes, unsigned num_spheres, const float comOffset[3]) {
4760 if (num_boxes==0) num_boxes=4;
4761 if (num_spheres==0) num_spheres=3;
4762 Arena arena = c->arena;assert(arena.size>num_boxes*(3*sizeof(float)+sizeof(Transform))+num_spheres*(1*sizeof(float)+sizeof(Transform))+128);
4763 const float axisv[3] = {(axis==AXIS_X)?1.f:0.f,(axis==AXIS_Y)?1.f:0.f,(axis==AXIS_Z)?1.f:0.f};
4764 const float R=radius,HH=hheight,H=hheight*2.f,theta=atanf(R/H);//,phi=M_PI*0.5f-theta;
4765 float* box_hsizes = NULL;Transform* boxT=NULL;
4766 if (num_boxes>0) {
4767 box_hsizes = allocate_array<float>(&arena, num_boxes*3, 32);
4768 boxT = allocate_array<Transform>(&arena, num_boxes, 32);for (unsigned i=0;i<num_boxes;i++) boxT[i]=identity_transform;
4769 if (num_boxes>0) {
4770 unsigned num_group_boxes[4] = {};
4771 if (num_boxes>3) num_group_boxes[3]=1;
4772 if (num_boxes>5) {num_group_boxes[1]=2;}
4773 else if (num_boxes>4) {num_group_boxes[1]=1;}
4774 if (num_spheres>=3 && num_group_boxes[1]==1) {num_group_boxes[3]=0;num_group_boxes[1]=2;}
4775 num_group_boxes[0]=num_boxes-num_group_boxes[1]-num_group_boxes[2]-num_group_boxes[3];
4776 //log("%u+%u+%u+%u=%u\n",num_group_boxes[0],num_group_boxes[1],num_group_boxes[2],num_group_boxes[3],num_boxes);
4777 const float h_fracs[4]={0.1f,0.2f,0.7f,0.85f};
4778 unsigned num_box_offset=0;
4779 for (unsigned j=0;j<4;j++) {
4780 const unsigned group_boxes = num_group_boxes[j];if (group_boxes==0) continue;;
4781 const float dh = h_fracs[j]*HH,dr = R*(HH-dh)/(HH*1.41f);
4782 float angle = M_PI*0.5f/group_boxes, angle_offset=M_PI*0.25f*j;
4783 for (unsigned i=0;i<group_boxes;i++) {
4784 float* hs = &box_hsizes[3*(i+num_box_offset)];Transform* t = &boxT[i+num_box_offset];*t=identity_transform;
4785 t->p[axis]=-HH+dh;
4786 hs[0]=hs[1]=hs[2]=dr;hs[axis]=dh;
4787 nm_QuatFromAngleAxis(t->q,angle_offset+angle*i,axisv[0],axisv[1],axisv[2]);
4788 }
4789 num_box_offset+=group_boxes;
4790 }
4791 }
4792 }
4793 float* sphere_radii = NULL;Transform* sphereT = NULL;
4794 if (num_spheres>0) {
4795 sphere_radii = allocate_array<float>(&arena, num_spheres*1, 32);
4796 sphereT = allocate_array<Transform>(&arena, num_spheres, 32);
4797 for (unsigned i=0;i<num_spheres;i++) sphereT[i]=identity_transform;
4798 const float sin_theta = sinf(theta);
4799 const float r = H*sin_theta/(sin_theta+1);sphere_radii[0]=r;sphereT[0].p[axis]=-HH+r; // bigger sphere
4800 if (num_spheres>1) {
4801 const unsigned remaining_spheres = num_spheres-1;
4802 const float min_rad=0.2f*r;
4803 const float max_rad=num_spheres<=3?0.45f*r:(num_spheres>=6?0.85f*r:(0.45f*r+((0.85f*r-0.45f*r)*(num_spheres-3))/2));
4804 for (unsigned i=0;i<remaining_spheres;i++) {
4805 const float rtop = remaining_spheres==1?min_rad:(min_rad+((max_rad-min_rad)*i)/(remaining_spheres-1));//0.65f*r-0.2f*r*((float)(i+1)/(float)remaining_spheres);
4806 sphere_radii[i+1]=rtop;
4807 sphereT[i+1].p[axis]=HH-rtop/sinf(theta);
4808 }
4809 }
4810 }
4811 float inertia[3]; calculate_cone_inertia(inertia,mass,radius,hheight,axis,comOffset);float stripped_center[3];
4812 return add_compound(c,mass,inertia,num_boxes,box_hsizes,boxT,num_spheres,sphere_radii,sphereT,T,comOffset,stripped_center);
4813}
4814unsigned add_compound_prism(context_t* c, float mass, float radius, float hsize, unsigned num_lateral_faces, const float* mMatrix16WithoutScaling, AxisEnum axis, const float comOffset[3]) {
4815 if (!mMatrix16WithoutScaling) return add_compound_prism(c,mass,radius,hsize,num_lateral_faces,(const Transform*)NULL,axis,comOffset);
4816 else {Transform T;Mat4WithoutScalingToTransform(&T,mMatrix16WithoutScaling);return add_compound_prism(c,mass,radius,hsize,num_lateral_faces,&T,axis,comOffset);}
4817}
4818unsigned add_compound_cylinder(context_t* c, float mass, float radius, float hsize, const float* mMatrix16WithoutScaling, AxisEnum axis, unsigned num_boxes, unsigned num_spheres, const float comOffset[3], float box_lateral_side_shrinking) {
4819 if (!mMatrix16WithoutScaling) return add_compound_cylinder(c,mass,radius,hsize,(const Transform*)NULL,axis,num_boxes,num_spheres,comOffset,box_lateral_side_shrinking);
4820 else {Transform T;Mat4WithoutScalingToTransform(&T,mMatrix16WithoutScaling);return add_compound_cylinder(c,mass,radius,hsize,&T,axis,num_boxes,num_spheres,comOffset,box_lateral_side_shrinking);}
4821}
4822unsigned add_compound_capsule(context_t* c, float mass, float radius, float hsize, const float* mMatrix16WithoutScaling, AxisEnum axis, unsigned num_boxes, unsigned num_spheres, const float comOffset[3], float box_lateral_side_shrinking) {
4823 if (!mMatrix16WithoutScaling) return add_compound_capsule(c,mass,radius,hsize,(const Transform*)NULL,axis,num_boxes,num_spheres,comOffset,box_lateral_side_shrinking);
4824 else {Transform T;Mat4WithoutScalingToTransform(&T,mMatrix16WithoutScaling);return add_compound_capsule(c,mass,radius,hsize,&T,axis,num_boxes,num_spheres,comOffset,box_lateral_side_shrinking);}
4825}
4826unsigned add_compound_hollow_cylinder(context_t* c,float mass,float min_radius,float max_radius,float hsize,const float* mMatrix16WithoutScaling,AxisEnum axis,unsigned num_boxes,const float comOffset[3]) {
4827 if (!mMatrix16WithoutScaling) return add_compound_hollow_cylinder(c,mass,min_radius,max_radius,hsize,(const Transform*)NULL,axis,num_boxes,comOffset);
4828 else {Transform T;Mat4WithoutScalingToTransform(&T,mMatrix16WithoutScaling);return add_compound_hollow_cylinder(c,mass,min_radius,max_radius,hsize,&T,axis,num_boxes,comOffset);}
4829}
4830unsigned add_compound_torus(context_t* c,float mass,float radius,float inner_radius, const float* mMatrix16WithoutScaling,AxisEnum axis,unsigned num_boxes,const float comOffset[3]) {
4831 if (!mMatrix16WithoutScaling) return add_compound_torus(c,mass,radius,inner_radius,(const Transform*)NULL,axis,num_boxes,comOffset);
4832 else {Transform T;Mat4WithoutScalingToTransform(&T,mMatrix16WithoutScaling);return add_compound_torus(c,mass,radius,inner_radius,&T,axis,num_boxes,comOffset);}
4833}
4834unsigned add_compound_cone(context_t* c, float mass, float radius, float hheight, const float* mMatrix16WithoutScaling, AxisEnum axis, unsigned num_boxes, unsigned num_spheres, const float comOffset[3]) {
4835 if (!mMatrix16WithoutScaling) return add_compound_cone(c,mass,radius,hheight,(const Transform*)NULL,axis,num_boxes,num_spheres,comOffset);
4836 else {Transform T;Mat4WithoutScalingToTransform(&T,mMatrix16WithoutScaling);return add_compound_cone(c,mass,radius,hheight,&T,axis,num_boxes,num_spheres,comOffset);}
4837}
4838
4839unsigned add_compound_staircase(context_t* c,float mass, float hdepth, float hheight, float hlength, unsigned num_steps, const Transform* T, int orientation_in_0_3, const float comOffset[3]) {
4840 if (num_steps<=0) num_steps=15;
4841 Arena arena = c->arena;assert(arena.size>num_steps*(3*sizeof(float)+sizeof(Transform))+128);
4842 int axisi[3] = {0,1,2};if (orientation_in_0_3<0) orientation_in_0_3=-orientation_in_0_3;orientation_in_0_3%=4;const float sign = orientation_in_0_3<2?-1.f:1.f;
4843 if (orientation_in_0_3%2==1) {axisi[0]=2;axisi[1]=1;axisi[2]=0;}
4844 float* box_hsizes = NULL;Transform* boxT=NULL;
4845 const float step_hheight = hheight/(float)(num_steps);
4846 const float step_hlen = hlength/(float)(num_steps);
4847 if (num_steps>0) {
4848 box_hsizes = allocate_array<float>(&arena, num_steps*3, 32);
4849 boxT = allocate_array<Transform>(&arena, num_steps, 32);
4850 for (unsigned i=0;i<num_steps;i++) {
4851 float* hs = &box_hsizes[3*i];Transform* t = &boxT[i];*t=identity_transform;
4852 hs[axisi[0]]=hdepth;hs[axisi[1]]=step_hheight;hs[axisi[2]]=hlength-(float)i*step_hlen;
4853 t->p[axisi[1]]=-hheight+step_hheight+step_hheight*2.f*(float)i;
4854 t->p[axisi[2]]=sign*step_hlen*(float)i; // invert sign to mirror staircase on the z-axis
4855 }
4856 }
4857 return add_compound(c,mass,NULL,num_steps,box_hsizes,boxT,0,NULL,NULL,T,comOffset,NULL);
4858}
4859unsigned add_compound_staircase(context_t* c, float mass, float hdepth, float hheight, float hlength, unsigned num_steps, const float* mMatrix16WithoutScaling, int orientation_in_0_3, const float comOffset[3]) {
4860 if (!mMatrix16WithoutScaling) return add_compound_staircase(c,mass,hdepth,hheight,hlength,num_steps,(const Transform*)NULL,orientation_in_0_3,comOffset);
4861 else {Transform T;Mat4WithoutScalingToTransform(&T,mMatrix16WithoutScaling);return add_compound_staircase(c,mass,hdepth,hheight,hlength,num_steps,&T,orientation_in_0_3,comOffset);}
4862
4863}
4864
4865} // namespace extra
4866// ------------
4867void body_recalculate_bounding_box(context_t* c,uint32_t body) {
4868 assert(c && body<c->bodies.count);
4869 float aabb_min[3]={0,0,0},aabb_max[3]={0,0,0};
4870 const BodyLayout* L = &c->bodies.layouts[body];
4871 if (L->num_spheres>0) {
4872 assert(L->first_sphere_index>=0 && (uint16_t)L->first_sphere_index+L->num_spheres<=c->colliders.spheres.count);
4873 const SphereCollider* S = &c->colliders.spheres.data[L->first_sphere_index];
4874 const Transform* T = &c->colliders.spheres.transforms[L->first_sphere_index];
4875 for (int i=0;i<3;i++) {aabb_min[i]=T[0].p[i]-S[0].radius;aabb_max[i]=T[0].p[i]+S[0].radius;} // assign first aabb_min_max
4876 // process the remaining spheres and enlarge aabb_min_max
4877 for (int j=1;j<L->num_spheres;j++) {
4878 const Transform* t = &T[j];const float r = S[j].radius;
4879 for (int i=0;i<3;i++) {
4880 if (aabb_min[i]>t->p[i]-r) aabb_min[i]=t->p[i]-r;
4881 if (aabb_max[i]<t->p[i]+r) aabb_max[i]=t->p[i]+r; // 'else' at the beginning is wrong!
4882 }
4883 }
4884 }
4885 if (L->num_boxes>0) {
4886 assert(L->first_box_index>=0 && (uint16_t)L->first_box_index+L->num_boxes<=c->colliders.boxes.count);
4887 const BoxCollider* B = &c->colliders.boxes.data[L->first_box_index];
4888 const Transform* T = &c->colliders.boxes.transforms[L->first_box_index];
4889 for (int j=0;j<L->num_boxes;j++) {
4890 const float* hs = &B[j].size[0];const Transform* t = &T[j];
4891 if (t->q[0]==0.f && t->q[1]==0.f && t->q[2]==0.f && t->q[3]==1.f) {
4892 // fast code path
4893 if (j==0 && L->num_spheres==0) {
4894 for (int i=0;i<3;i++) {aabb_min[i]=t->p[i]-hs[i];aabb_max[i]=t->p[i]+hs[i];} // assign first aabb_min_max
4895 continue;
4896 }
4897 // enlarge aabb_min_max with t and hs
4898 for (int i=0;i<3;i++) {
4899 if (aabb_min[i]>t->p[i]-hs[i]) aabb_min[i]=t->p[i]-hs[i];
4900 if (aabb_max[i]<t->p[i]+hs[i]) aabb_max[i]=t->p[i]+hs[i]; // 'else' at the beginning is wrong!
4901 }
4902 continue;
4903 }
4904 else {
4905 // slow code path
4906 float m[9];nm_Mat3FromQuat(m,t->q); // rotation matrix of t->q
4907 // calculate result based on m[9], t->p[] and hs[]
4908 for (int i=0;i<3;i++) {
4909 float vmin,vmax; // min and max of the (i) component of the aabb of the j-th box
4910 const float hd=fabsf(m[i]*hs[0])+fabsf(m[3+i]*hs[1])+fabsf(m[6+i]*hs[2]);
4911 vmin = vmax = t->p[i]; vmin-= hd;vmax+= hd;
4912 if (j==0 && L->num_spheres==0) {
4913 aabb_min[i]=vmin;aabb_max[i]=vmax; // assign first aabb_min_max
4914 continue;
4915 }
4916 // enlarge (aabb_min_max) with vmin and vmax
4917 if (aabb_min[i]>vmin) aabb_min[i]=vmin;
4918 if (aabb_max[i]<vmax) aabb_max[i]=vmax; // 'else' at the beginning is wrong!
4919 }
4920 }
4921 }
4922 }
4923 // convert aabb_min/aabb_max to aabb_center/aabb_extents
4924 const int stripComOffset=0;
4925 BodyInfo* info = &c->bodies.infos[body];
4926 for (int i=0;i<3;i++) {
4927 info->aabb_center[i]= (aabb_max[i]+aabb_min[i])*0.5f;
4928 info->aabb_half_extents[i]=(aabb_max[i]-aabb_min[i])*0.5f;
4929 if (stripComOffset) info->aabb_center[i]+=info->com_offset[i];
4930 }
4931 // calculate info->aabb_enlarged_radius
4932 info->aabb_enlarged_radius = 0;
4933 const float* t = info->aabb_half_extents;float s = t[0]*t[0] + t[1]*t[1] + t[2]*t[2];if (s>NM_EPSILON) info->aabb_enlarged_radius+=sqrtf(s);
4934 t = info->aabb_center;s = t[0]*t[0] + t[1]*t[1] + t[2]*t[2];if (s>NM_EPSILON) info->aabb_enlarged_radius+=sqrtf(s);
4935}
4936
4937void body_change_motion_state(nudge::context_t* c,unsigned body,nudge::FlagMask new_motion_state,float mass_fallback) {
4938 using namespace nudge;assert(c && body<c->bodies.count);
4939 new_motion_state&=BF_IS_STATIC_OR_KINEMATIC_OR_DYNAMIC;assert(new_motion_state); // wrong input argument
4940 BodyFilter* bf = &c->bodies.filters[body];if (bf->flags&new_motion_state) return;
4941 BodyProperties* bp = &c->bodies.properties[body];BodyMomentum* bm = &c->bodies.momentum[body];
4942 if (new_motion_state&BF_IS_DYNAMIC) {
4943 if (bp->mass_inverse!=0) {assert(bp->mass_inverse>0.f);/*if (bp->mass_inverse<0) {bp->mass_inverse=-bp->mass_inverse;mass_fallback = 1.f/bp->mass_inverse;}*/}
4944 else {if (mass_fallback<0.f) {mass_fallback=-mass_fallback;} bp->mass_inverse=1.f/mass_fallback;}
4945 if (bp->inertia_inverse[0]==0.f && bp->inertia_inverse[1]==0.f && bp->inertia_inverse[2]==0.f) {BodyInfo* bi = &c->bodies.infos[body];float* he=bi->aabb_half_extents;calculate_box_inertia_inverse(bp->inertia_inverse,mass_fallback,he[0],he[1],he[2],bi->com_offset);}
4946 else {assert(!(bp->inertia_inverse[0]<0.f) && !(bp->inertia_inverse[1]<0.f) && !(bp->inertia_inverse[2]<0.f));
4947 /*for (int k=0;k<3;k++) {if (bp->inertia_inverse[k]<0.f) bp->inertia_inverse[k]=-bp->inertia_inverse[k];}*/
4948 }
4949 }
4950 memset(bm->velocity,0,3*sizeof(bm->velocity));memset(bm->angular_velocity,0,3*sizeof(bm->angular_velocity));
4951 bf->flags&=~BF_IS_STATIC_OR_KINEMATIC_OR_DYNAMIC;bf->flags|=new_motion_state;
4952 c->bodies.idle_counters[body]=(new_motion_state&BF_IS_DYNAMIC)?0:0xFF; // wake up or put to sleep
4953}
4954
4955void body_scale(nudge::context_t* c,unsigned body,float scale_factor,float mass_scale_factor) {
4956 assert(c && body<c->bodies.count);
4957 assert(scale_factor!=0.f);
4958 if (scale_factor<0.f) {
4959 const float hey = c->bodies.infos[body].aabb_half_extents[1];assert(hey>0.f);
4960 scale_factor = -scale_factor/hey;
4961 }
4962 // scale mass and local_inertia
4963 BodyProperties* bp = &c->bodies.properties[body];
4964 if (mass_scale_factor==0.f) mass_scale_factor = scale_factor*scale_factor*scale_factor;
4965 else if (mass_scale_factor<0.f) {
4966 mass_scale_factor=-mass_scale_factor; // this is our new mass
4967 for (int k=0;k<3;k++) bp->inertia_inverse[k]/=scale_factor*scale_factor*mass_scale_factor*bp->mass_inverse;
4968 bp->mass_inverse=1.f/mass_scale_factor;
4969 }
4970 else {
4971 bp->mass_inverse/=mass_scale_factor;
4972 for (int k=0;k<3;k++) bp->inertia_inverse[k]/=(scale_factor*scale_factor*mass_scale_factor);
4973 }
4974 // scale colliders
4975 BodyLayout* bl = &c->bodies.layouts[body];
4976 if (bl->num_boxes>0) {
4977 assert(bl->first_box_index>=0 && (uint16_t)bl->first_box_index+bl->num_boxes<=c->colliders.boxes.count);
4980 for (uint16_t i=0;i<bl->num_boxes;i++) {for (int k=0;k<3;k++) {T[i].p[k]*=scale_factor;C[i].size[k]*=scale_factor;}}
4981 }
4982 if (bl->num_spheres>0) {
4983 assert(bl->first_sphere_index>=0 && (uint16_t)bl->first_sphere_index+bl->num_spheres<=c->colliders.spheres.count);
4986 for (uint16_t i=0;i<bl->num_spheres;i++) {Transform* t = &T[i];C[i].radius*=scale_factor;for (int k=0;k<3;k++) t->p[k]*=scale_factor;}
4987 }
4988 // scale aabb and com_offset
4990
4991 if (bp->mass_inverse && (bp->inertia_inverse[0]==0 && bp->inertia_inverse[1]==0 && bp->inertia_inverse[2]==0)) {
4992 // this case might indeed happen, when a static body with zero mass and inertia is scaled with a negative 'mass_scaling_factor'
4993 // the body remains static (flag+sleeping state), but it's better to set a valid inertia to it (or to just reset its mass to zero)
4994 const BodyInfo* bi = &c->bodies.infos[body];
4996 }
4997}
4998
4999static void simulate_kinematic_animations(context_t* c,float timeStep) {
5000 // This code is too long... can we compress it better?
5001 for (int j=0,j_sz=(int)c->kinematic_data.animations_count;j<j_sz;j++) {
5003 if (ka->body>=c->bodies.count) continue;
5004 const uint32_t flags = c->bodies.filters[ka->body].flags;
5005 if (flags&BF_IS_DISABLED_OR_REMOVED) {
5006 if (flags&BF_IS_REMOVED) {
5007# ifdef NUDGE_DELETE_KINEMATIC_ANIMATIONS_REFERENCING_REMOVED_BODIES
5008 // Delete KinematicData::Animation
5009 // |------|-----|-----|-----|
5010 // 0 1 2 3 4
5011 // j count
5013 --j;--j_sz;--c->kinematic_data.animations_count;
5014# else // NUDGE_DELETE_KINEMATIC_ANIMATIONS_REFERENCING_REMOVED_BODIES
5015 ka->body = NUDGE_INVALID_BODY_ID;assert(ka->body>=c->bodies.count);
5016# endif // NUDGE_DELETE_KINEMATIC_ANIMATIONS_REFERENCING_REMOVED_BODIES
5017 }
5018 continue;
5019 }
5020 const float absSpeed = fabsf(ka->speed); //ka->speed<0 ? -ka->speed : ka->speed;
5021 if (ka->playing && absSpeed!=0 && ka->body<c->bodies.count
5022 && flags&BF_IS_KINEMATIC
5023 ) {
5024 const float deltaTime = timeStep;
5025 Transform T = identity_transform;
5026 //assert(ka->body<c->bodies.count);
5027 //assert(c->bodies.properties[ka->body].mass_inverse<0.f);
5031 if (ka->total_time<0.f) {ka->total_time = 0;for (uint32_t l=0;l<ka->key_frame_count;l++) ka->total_time+=pkfT[l] .time;}
5032 //if (ka->play_time<0.f) play_time = ka->offset_time; // Nope: this must be set together with ka->playing to start playback
5033 ka->play_time+=deltaTime*ka->speed;
5034 bool mustReverse = ka->play_time < 0;
5035 float absPlayTime = mustReverse ? -ka->play_time : ka->play_time;
5036 float curTime(0);
5037
5039 const float totalTime = ka->total_time;
5040 if (totalTime<=0) continue;
5041 const float fractionTime = absPlayTime/totalTime;
5042 const unsigned long fractionTimeUL = (unsigned long)fractionTime;
5044 if (mustReverse) mustReverse = fractionTimeUL%2==0;
5045 else mustReverse = fractionTimeUL%2==1;
5046 }
5047 absPlayTime-=(totalTime*fractionTimeUL);
5048 }
5049 const int ksz=(int)ka->key_frame_count;
5050 if (!mustReverse || ksz<=1) {
5051 for (int keyFrameIndex=0;keyFrameIndex<ksz;keyFrameIndex++) {
5052 const Transform* kfT = &pkfT[keyFrameIndex];
5053 const KinematicData::TimeMode* kfMode = &pkfMode[keyFrameIndex];
5054 if (kfT->time<=0) continue;
5055 curTime+=kfT->time;
5056 if (absPlayTime <= curTime) {
5057 float factor = float(1)-(curTime-absPlayTime)/kfT->time;
5058 if (*kfMode == KinematicData::TM_ACCELERATE) factor*=factor;
5059 else if (*kfMode == KinematicData::TM_DECELERATE && factor>0) factor=sqrtf(factor);
5060
5061 if (keyFrameIndex>0) {
5062 const Transform* kfTp = &pkfT[keyFrameIndex-1];
5063 T = TransformSlerp(*kfTp,*kfT,factor);
5064 if (ka->use_baseT) T = ka->baseT*T;
5065 }
5066 else {
5068 T = TransformSlerp(c->bodies.transforms[ka->body],ka->use_baseT ? (ka->baseT*(*kfT)) : (*kfT),factor);
5069 }
5070 else {
5071 const Transform* startT = &pkfT[ksz-1];
5072 T = TransformSlerp(*startT,*kfT,factor);
5073 if (ka->use_baseT) T = ka->baseT*T;
5074 }
5075 }
5076 TransformAssignToBody(c,ka->body,T,deltaTime);
5077 break;
5078 }
5079 else if (keyFrameIndex == ksz-1) {
5081 TransformAssignToBody(c,ka->body,ka->use_baseT ? (ka->baseT*(*kfT)) : (*kfT),deltaTime);
5082 ka->playing = false;ka->play_time=ka->offset_time; //TODO: fire end event here?
5083 }
5084 }
5085 }
5086 }
5087 else {
5088 // Play animations backwards
5089 // ksz > 1 here
5090 for (int keyFrameIndex=ksz-1;keyFrameIndex>=0;keyFrameIndex--) {
5091 const Transform* kfT = &pkfT[keyFrameIndex];
5092 const bool isFirstKeyFrame = (keyFrameIndex==ksz-1);
5093 const float& kfTime = isFirstKeyFrame ? pkfT[0].time : pkfT[keyFrameIndex+1].time;
5094 const KinematicData::TimeMode timeMode = isFirstKeyFrame ? pkfMode[0] : pkfMode[keyFrameIndex+1];
5095 if (kfTime<=0) continue; //MMMhhh, empty frame array will break both autoFlags and endEvents here....
5096 curTime+=kfTime;
5097 if (absPlayTime <= curTime) {
5098 float factor = float(1)-(curTime-absPlayTime)/kfTime;
5099 if (timeMode == KinematicData::TM_DECELERATE ) factor*=factor;
5100 else if (timeMode == KinematicData::TM_ACCELERATE && factor>0) factor=sqrtf(factor);
5101
5102 if (!isFirstKeyFrame) {
5103 const Transform* kfTp = &pkfT[keyFrameIndex+1];
5104 T = TransformSlerp(*kfTp,*kfT,factor);
5105 if (ka->use_baseT) T = ka->baseT*T;
5106 }
5107 else {
5109 T = TransformSlerp(c->bodies.transforms[ka->body],ka->use_baseT ? (ka->baseT*(*kfT)) : (*kfT),factor);
5110 }
5111 else {
5112 const Transform* startT = &pkfT[0];
5113 T = TransformSlerp(*startT,*kfT,factor);
5114 if (ka->use_baseT) T = ka->baseT*T;
5115 }
5116 }
5117 TransformAssignToBody(c,ka->body,T,deltaTime);
5118 break;
5119 }
5120 else if (keyFrameIndex == 0) {
5122 TransformAssignToBody(c,ka->body,ka->use_baseT ? (ka->baseT*(*kfT)) : (*kfT),deltaTime);
5123 ka->playing = false;ka->play_time=ka->offset_time; //TODO: fire end event here?
5124 }
5125 }
5126 }
5127 }
5128 }
5129 }
5130}
5131
5132
5133float* calculate_graphic_transform_for_body(context_t* c,unsigned body,float* pModelMatrix16Out) {
5134 assert(body<c->bodies.count);
5135 assert(pModelMatrix16Out);
5136 const Transform* T = &c->bodies.transforms[body];assert(T->body==body);
5137 //const float mass_inverse = c->bodies.properties[T->body].mass_inverse<0.f;
5138 const uint32_t flags = c->bodies.filters[T->body].flags;
5140 const int isSleeping = c->bodies.idle_counters[T->body]==0xff;
5141 const float timeStepMinusRemainingTime=c->simulation_params.time_step_minus_remaining_time;
5142 Transform Tn;memcpy(&Tn,T,sizeof(Transform));
5143
5144 int mustSmoothTransform = timeStepMinusRemainingTime>0 && !(exclude_flags&flags);
5145 if (mustSmoothTransform && (flags&BF_IS_STATIC || (flags&BF_IS_DYNAMIC && isSleeping))) mustSmoothTransform=0;
5146
5147 if (mustSmoothTransform) {
5148 // same work done in advance(...) here AFAIK
5149 assert(T==&c->bodies.transforms[T->body]);
5150 /*
5151 // Actually this is the correct code to advance graphic transforms...
5152 // ...but we DON'T want to do it
5153 const float* linvel = c->bodies.momentum[T->body].velocity;
5154 const float* angvel = c->bodies.momentum[T->body].angular_velocity;
5155 // advance Tn based on T, linvel and angvel
5156 for (int l=0;l<3;l++) {Tn.position[l]+=linvel[l]*remainingTime;}
5157 nm_QuatAdvance(Tn.rotation,T->rotation,angvel,remainingTime*0.5f);
5158 */
5159 // Instead we want to move graphic transforms backwards!
5160 // This is what we really want (even is we add a 'timeStep' delay)!
5161 // In fact simulate(...) calculates linvel and angvel per body and then uses them to advance bodies in advance(...).
5162 // That's why advancing bodies further is NOT correct, but going back is CORRECT (note that we couldn't just advance a fraction in advance(...): that's the PHYSIC transform and it would affect simulation).
5163
5164 const float* linvel = c->bodies.momentum[T->body].velocity;
5165 const float* angvel = c->bodies.momentum[T->body].angular_velocity;
5166 // retrace Tn based on T, linvel and angvel
5167 for (int l=0;l<3;l++) {
5168 Tn.position[l]-=linvel[l]*timeStepMinusRemainingTime;
5169 //Tn.rotation[l]-=angvel[l]*timeStepMinusRemainingTime; // approximate but faster (mmmh, spheres "pulse" with this)
5170 }
5171 const float angvelinv[3] = {-angvel[0],-angvel[1],-angvel[2]}; // good but slower
5172 nm_QuatAdvance(Tn.rotation,T->rotation,angvelinv,timeStepMinusRemainingTime*0.5f);
5173 }
5174
5175 // we must convert the physic Transform Tn to the graphic mMatrix (16 floats) used for rendering
5176 if (pModelMatrix16Out) TransformToMat4(pModelMatrix16Out,&Tn);
5177 // graphic transform (pModelMatrix16Out) must be updated if the body has a COM offset
5178 if ((flags&BF_HAS_COM_OFFSET)) {
5179 // comOffset must be subtracted
5180 const float* comOffset = c->bodies.infos[T->body].com_offset;
5181 //for (int l=0;l<3;l++) Tn.position[l]-= ???;
5182 for (int l=0;l<3;l++) pModelMatrix16Out[12+l] -= pModelMatrix16Out[l]*comOffset[0]+pModelMatrix16Out[4+l]*comOffset[1]+pModelMatrix16Out[8+l]*comOffset[2];
5183 }
5184 return pModelMatrix16Out;
5185}
5186
5187void calculate_graphic_transforms(context_t* c,float* pModelMatricesOut,unsigned modelMatrixStrideInFloatUnits,int loopActiveBodiesOnly) {
5188 if (modelMatrixStrideInFloatUnits<16) modelMatrixStrideInFloatUnits=16;
5189 const unsigned bodies_count = loopActiveBodiesOnly ? : c->bodies.count;
5190
5191 for (uint32_t i=0;i<bodies_count;i++) {
5192 const uint32_t body = loopActiveBodiesOnly ? c->active_bodies.indices[i] : i;
5193 calculate_graphic_transform_for_body(c,body,&pModelMatricesOut[body*modelMatrixStrideInFloatUnits]);
5194 }
5195}
5196
5198 unsigned contact_data_index,
5199 int16_t* box_collider_index_for_body_a,
5200 int16_t* sphere_collider_index_for_body_a,
5201 int16_t* box_collider_index_for_body_b,
5202 int16_t* sphere_collider_index_for_body_b,
5203 int use_relative_values_for_output_indices
5204 ) {
5205 assert(c && contact_data_index<c->contact_data.count);
5206 const ContactData* cc = &c->contact_data;
5207 const uint64_t tag = cc->tags[contact_data_index]; // each 64-bit tag is a combination of the 2 16-bit tags of the colliders inside the two bodies and a 32-bit(?) tag of the contact feature (e.g. EDGE-EDGE, etc.)
5208 const BodyPair* bp = &cc->bodies[contact_data_index];
5209 const unsigned a = bp->a;assert(a<c->bodies.count);
5210 const unsigned b = bp->b;assert(b<c->bodies.count);
5211 const uint64_t a_tag = (tag&0x0000FFFF00000000ULL)>>(2ULL*16ULL);
5212 const uint64_t b_tag = (tag&0xFFFF000000000000ULL)>>(3ULL*16ULL);
5213 struct coll_t {unsigned body;uint64_t tag;int16_t first_box_index;int16_t* box_colliding_index;uint16_t num_boxes;int16_t first_sphere_index;int16_t* sphere_colliding_index;uint16_t num_spheres;};
5214 struct coll_t coll[2]=
5215 {{a,a_tag,c->bodies.layouts[a].first_box_index,box_collider_index_for_body_a,c->bodies.layouts[a].num_boxes,c->bodies.layouts[a].first_sphere_index,sphere_collider_index_for_body_a,c->bodies.layouts[a].num_spheres},
5216 {b,b_tag,c->bodies.layouts[b].first_box_index,box_collider_index_for_body_b,c->bodies.layouts[b].num_boxes,c->bodies.layouts[b].first_sphere_index,sphere_collider_index_for_body_b,c->bodies.layouts[b].num_spheres}};
5217 for (int t=0;t<2;t++) {
5218 struct coll_t* cl = &coll[t];
5219 assert(cl->num_boxes || cl->num_spheres);
5220 assert(cl->tag<NUDGE_START_SPHERE_TAG+c->MAX_NUM_SPHERES);
5221 if (cl->box_colliding_index) {
5222 *cl->box_colliding_index=-1;
5223 if (cl->num_boxes>0) {
5224 assert(cl->first_box_index>=0);
5225 assert((unsigned)cl->first_box_index+cl->num_boxes<=c->colliders.boxes.count);
5226 for (uint16_t ci=cl->first_box_index;ci<cl->first_box_index+cl->num_boxes;ci++) {
5227 assert(c->colliders.boxes.transforms[ci].body==cl->body);
5228 if (c->colliders.boxes.tags[ci]==cl->tag) {*cl->box_colliding_index=use_relative_values_for_output_indices?(ci-cl->first_box_index):ci;break;}
5229 }
5230 assert(*cl->box_colliding_index>=0);
5231 }
5232 }
5233 if (cl->sphere_colliding_index) {
5234 *cl->sphere_colliding_index=-1;
5235 if (cl->num_spheres>0) {
5236 assert(cl->first_sphere_index>=0);
5237 assert((unsigned)cl->first_sphere_index+cl->num_spheres<=c->colliders.spheres.count);
5238 for (uint16_t ci=cl->first_sphere_index;ci<cl->first_sphere_index+cl->num_spheres;ci++) {
5239 assert(c->colliders.spheres.transforms[ci].body==cl->body);
5240 if (c->colliders.spheres.tags[ci]==cl->tag) {*cl->sphere_colliding_index=use_relative_values_for_output_indices?(ci-cl->first_sphere_index):ci;break;}
5241 }
5242 assert(*cl->sphere_colliding_index>=0);
5243 }
5244 if (cl->box_colliding_index && cl->sphere_colliding_index) {
5245 assert(*cl->box_colliding_index>=0 || *cl->sphere_colliding_index>=0);
5246 assert(*cl->box_colliding_index==-1 || *cl->sphere_colliding_index==-1);
5247 // we've correctly found the subshape involved in the collision!
5248 }
5249 }
5250 }
5251}
5252
5253
5254unsigned pre_simulation_step(context_t* c,double elapsedSecondsFromLastCall) {
5255 struct SimulationParams* sp = &c->simulation_params;
5256 unsigned sim_is_burning_time = 0;
5258 if (elapsedSecondsFromLastCall<0) elapsedSecondsFromLastCall=0;
5259 sp->remaining_time_in_seconds+=elapsedSecondsFromLastCall;
5260 while (sp->remaining_time_in_seconds>=sp->time_step) {
5263//# ifndef NDEBUG
5265 const int must_warn = sp->numsubsteps_overflow_warning_mode!=0 || sp->numsubsteps_overflow_in_last_frame>0;
5266 if (must_warn) log("[PhysicFrame: %llu] max_num_substeps=%u reached:\tBurnt remaining_time=%1.3f (on time_step=%1.3f)\n",sp->num_frames,sp->max_num_substeps,elapsedSecondsFromLastCall,sp->time_step);
5267 }
5268//# endif
5269 // setting a bigger 'sp->max_num_substeps' can help if you see this message every frame, but it you only see
5270 // it every now and then it's absolutely normal.
5271 sim_is_burning_time = 1;
5272 break;
5273 }
5276 }
5277 //assert(sp->remaining_time_in_seconds<sp->time_step);
5278 //if (sp->remaining_time_in_seconds>sp->time_step) sp->remaining_time_in_seconds=0; // It might happen when we pause the game
5280
5281 sp->numsubsteps_overflow_in_last_frame = sim_is_burning_time;
5282 // Good to debug 'sp->num_substeps_in_last_frame':
5283 //log("[PhysicFrame: %llu] num_substeps_in_last_frame=%d time_step=%f remaining_time=%f\n",sp->num_frames,sp->num_substeps_in_last_frame,sp->time_step,sp->remaining_time_in_seconds);
5284 return sp->num_substeps_in_last_frame;
5285}
5286
5287
5288extern uintptr_t get_required_arena_size_for_setup_contact_constraints(context_t* c);
5289void simulate(context_t* c,float timeStep, unsigned numSubSteps, unsigned numIterations) {
5290
5291 finalize_removed_bodies(c);
5292
5293# define NUDGE_KINEMATIC_ANIMATION_QUALITY_LOW
5294# ifdef NUDGE_KINEMATIC_ANIMATION_QUALITY_LOW
5295 if (numSubSteps>0) simulate_kinematic_animations(c,timeStep*numSubSteps);
5296# endif
5297
5298 for (unsigned n = 0; n < numSubSteps; ++n) {
5299 // Kinematic objects (or outside this loop?)
5300# ifndef NUDGE_KINEMATIC_ANIMATION_QUALITY_LOW
5301 simulate_kinematic_animations(c,timeStep);
5302# endif
5303
5304 uintptr_t required_arena_size = get_required_arena_size_for_setup_contact_constraints(c);
5305 // TODO: we should estimate all the other memory contributions, since 'setup_contact_constraints' is not the only called function that allocates... so we do:
5306 required_arena_size = required_arena_size+required_arena_size;
5307
5308 if (c->arena.size<required_arena_size) {
5309 _mm_free(c->arena.data);c->arena.data=0;
5310 const uintptr_t new_size = required_arena_size+c->arena.size/2;
5311 c->arena.data = _mm_malloc(new_size,NUDGE_ARENA_SIZE_ALIGNMENT);memset(c->arena.data,0,new_size);
5312 log("[nudge_frame: %llu] Resized Arena from %lu to %lu [consider redefining NUDGE_ARENA_SIZE]\n",c->simulation_params.num_frames,c->arena.size,new_size);flush();
5313 c->arena.size = new_size;
5314 }
5315
5316
5317 // Find contacts.
5318 BodyConnections connections = {}; // NOTE: Custom constraints should be added as body connections.
5319 collide(c, connections);
5320
5321 Arena temporary = c->arena;
5322
5323 // NOTE: Custom contacts can be added here, e.g., against the static environment.
5324
5325 // Apply gravity and damping.
5326 float damping_linear = 1.0f - timeStep*c->simulation_params.linear_damping;
5327 float damping_angular = 1.0f - timeStep*c->simulation_params.angular_damping;
5328 const float* pGravity[2] = {c->global_data.gravity,NULL};
5329 const int gravityIdx = (c->global_data.flags&GF_USE_GLOBAL_GRAVITY) ? 0 : 1;
5330 const int must_reset_aux_bodies = (c->global_data.flags&GF_DONT_RESET_AUX_BODIES) ? 0 : 1;
5331 for (unsigned i = 0; i < c->active_bodies.count; ++i) {
5332 const unsigned index = c->active_bodies.indices[i];
5333 //const BodyFilter* filter = &c->bodies.filters[index];
5334 const FlagMask flags = c->bodies.filters[index].flags;
5335 if (flags&BF_IS_DYNAMIC) {
5336 // Apply gravity and damping
5337 BodyMomentum* momentum = &c->bodies.momentum[index];
5338 pGravity[1] = c->bodies.properties[index].gravity;
5339 const float* gravity = pGravity[(flags&BF_HAS_DIFFERENT_GRAVITY_MODE)?(!gravityIdx):gravityIdx];
5340 for (int l=0;l<3;l++) {
5341 momentum->velocity[l] += gravity[l] * timeStep;
5342 momentum->velocity[l] *= damping_linear;
5343 momentum->angular_velocity[l] *= damping_angular;
5344 }
5345 if (flags&BF_NEVER_SLEEPING) c->bodies.idle_counters[index]=0; // prevents sleeping
5346
5347# if NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES>0
5348 const int must_reset = (flags&BF_HAS_DIFFERENT_AUX_BODIES_RESET_MODE)?(!must_reset_aux_bodies):must_reset_aux_bodies;
5349 if (must_reset) memset(&c->bodies.infos[index].aux_bodies,0xFF,NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES*sizeof(int16_t)); // sets all components to -1
5350# endif // NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES
5351 }
5352 }
5353
5354 // Read previous impulses from contact cache.
5355 ContactImpulseData* contact_impulses = read_cached_impulses(c->contact_cache, c->contact_data, &temporary);
5356
5357 // Setup contact constraints and apply the initial impulses.
5358 ContactConstraintData* contact_constraints = setup_contact_constraints(c,/*c->active_bodies, c->contact_data, c->bodies,*/ contact_impulses, &temporary);
5359
5360 // Apply contact impulses. Increasing the number of iterations will improve stability.
5361 for (unsigned i = 0; i < numIterations; ++i) {
5362 apply_impulses(contact_constraints, c->bodies);
5363 // NOTE: Custom constraint impulses should be applied here.
5364 }
5365
5366 // Update contact impulses.
5367 update_cached_impulses(contact_constraints, contact_impulses);
5368
5369 // Write the updated contact impulses to the cache.
5370 write_cached_impulses(&c->contact_cache, c->contact_data, contact_impulses);
5371
5372 // Move active bodies.
5373 advance(c, timeStep);
5374
5376 }
5377
5378}
5379
5380void simulation_step(context_t* c) {
5381 struct SimulationParams* sp = &c->simulation_params;
5382 if (sp->num_substeps_in_last_frame>0) {
5383 assert(sp->time_step>0 && sp->num_iterations_per_substep>0);
5385 ++sp->num_frames;
5386 }
5387}
5388
5389#ifndef NUDGE_NO_STDIO
5390void save_context(FILE* f,const context_t* c) {
5391 size_t rv = 0;
5392 assert(f && c);
5393 // max values
5394 fprintf(f,"MAX_NUM_BOXES:\n%u\n",c->MAX_NUM_BOXES);
5395 fprintf(f,"MAX_NUM_SPHERES:\n%u\n",c->MAX_NUM_SPHERES);
5396 // c->bodies
5397 const BodyData* bd = &c->bodies;
5398 uint32_t size_of_BodyInfo_user = 0;
5399# ifndef NUDGE_BODYINFO_STRUCT_NO_USER_DATA
5400 size_of_BodyInfo_user = (uint32_t) sizeof(BodyInfo::user);
5401# endif
5402 fprintf(f,"BodyData:\ncount: %u\nsizeof(BodyData): %u\nsizeof(Transform): %u\nsizeof(BodyProperties): %u\nsizeof(BodyMomentum): %u\n"
5403 "sizeof(BodyFilter): %u\nsizeof(BodyInfo): %u\nsizeof(BodyInfo::user): %u\nnum_aux_bodies: %u\n",bd->count,(uint32_t)sizeof(BodyData),(uint32_t)sizeof(Transform),(uint32_t)sizeof(BodyProperties),(uint32_t)sizeof(BodyMomentum),(uint32_t)sizeof(BodyFilter),(uint32_t)sizeof(BodyInfo),size_of_BodyInfo_user,NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES);
5404 rv=fwrite(bd->transforms,sizeof(Transform),bd->count,f);assert(rv==bd->count);
5405 rv=fwrite(bd->properties,sizeof(BodyProperties),bd->count,f);assert(rv==bd->count);
5406 rv=fwrite(bd->momentum,sizeof(BodyMomentum),bd->count,f);assert(rv==bd->count);
5407 rv=fwrite(bd->filters,sizeof(BodyFilter),bd->count,f);assert(rv==bd->count);
5408 rv=fwrite(bd->layouts,sizeof(BodyLayout),bd->count,f);assert(rv==bd->count);
5409 rv=fwrite(bd->idle_counters,sizeof(uint8_t),bd->count,f);assert(rv==bd->count);
5410 rv=fwrite(bd->infos,sizeof(BodyInfo),bd->count,f);assert(rv==bd->count);
5411 // c->colliders
5412 const ColliderData* cd = &c->colliders;
5413 fprintf(f,"\nColliderData::boxes:\n%u\n",cd->boxes.count);
5414 rv=fwrite(cd->boxes.tags,sizeof(uint16_t),cd->boxes.count,f);assert(rv==cd->boxes.count);
5415 rv=fwrite(cd->boxes.data,sizeof(BoxCollider),cd->boxes.count,f);assert(rv==cd->boxes.count);
5416 rv=fwrite(cd->boxes.transforms,sizeof(Transform),cd->boxes.count,f);assert(rv==cd->boxes.count);
5417 fprintf(f,"\nColliderData::spheres:\n%u\n",cd->spheres.count);
5418 rv=fwrite(cd->spheres.tags,sizeof(uint16_t),cd->spheres.count,f);assert(rv==cd->spheres.count);
5419 rv=fwrite(cd->spheres.data,sizeof(SphereCollider),cd->spheres.count,f);assert(rv==cd->spheres.count);
5420 rv=fwrite(cd->spheres.transforms,sizeof(Transform),cd->spheres.count,f);assert(rv==cd->spheres.count);
5421 // c->contact_data
5422 const ContactData* td = &c->contact_data;
5423 fprintf(f,"\nContactData::count\n%u\n",td->count); // we skip td->capacity
5424 rv=fwrite(td->data,sizeof(Contact),td->count,f);assert(rv==td->count);
5425 rv=fwrite(td->bodies,sizeof(BodyPair),td->count,f);assert(rv==td->count);
5426 rv=fwrite(td->tags,sizeof(uint64_t),td->count,f);assert(rv==td->count);
5427 fprintf(f,"\nContactData::sleeping_count\n%u\n",td->sleeping_count);
5428 rv=fwrite(td->sleeping_pairs,sizeof(uint32_t),td->sleeping_count,f);assert(rv==td->sleeping_count);
5429 // c->contact_cache
5430 const ContactCache* tc = &c->contact_cache;
5431 fprintf(f,"\nContactCache::count\n%u\n",tc->count); // we skip tc->capacity
5432 rv=fwrite(tc->tags,sizeof(uint64_t),tc->count,f);assert(rv==tc->count);
5433 rv=fwrite(tc->data,sizeof(CachedContactImpulse),tc->count,f);assert(rv==tc->count);
5434 // c->active_bodies
5435 const ActiveBodies* ab = &c->active_bodies;
5436 fprintf(f,"\nActiveBodies::count\n%u\n",ab->count); // we skip ab->capacity
5437 rv=fwrite(ab->indices,sizeof(uint16_t),ab->count,f);assert(rv==ab->count);
5438 // c->kinematic_data
5439 const KinematicData* kd = &c->kinematic_data;
5440 fprintf(f,"\nKinematicData::key_frame_count\n%u\n",kd->key_frame_count);
5441 rv=fwrite(kd->key_frame_transforms,sizeof(Transform),kd->key_frame_count,f);assert(rv==kd->key_frame_count);
5442 rv=fwrite(kd->key_frame_modes,sizeof(KinematicData::TimeMode),kd->key_frame_count,f);assert(rv==kd->key_frame_count);
5443 fprintf(f,"\nKinematicData::animations_count\n%u\n",kd->animations_count);
5444 rv=fwrite(kd->animations,sizeof(KinematicData::Animation),kd->animations_count,f);assert(rv==kd->animations_count);
5445 // c->simulation_params
5446 fprintf(f,"\nsizeof(SimulationParams)\n%u\n",(uint32_t)sizeof(SimulationParams));
5447 rv=fwrite(&c->simulation_params,sizeof(SimulationParams),1,f);assert(rv==1);
5448 // c->global_data
5449 fprintf(f,"\nsizeof(GlobalData)\n%u\n",(uint32_t)sizeof(GlobalData));
5450 rv=fwrite(&c->global_data,sizeof(GlobalData),1,f);assert(rv==1);
5452 // c->userUint64
5453# ifndef NUDGE_CONTEXT_STRUCT_NO_USER_DATA
5454 fprintf(f,"\nsizeof(c->user)\n%u\n",(uint32_t)sizeof(c->user));
5455 rv=fwrite(&c->user,sizeof(c->user),1,f);assert(rv==1);
5456# endif // NUDGE_CONTEXT_STRUCT_NO_USER_DATA
5457}
5458void load_context(FILE* f,context_t* c) {
5459 size_t rv = 0;uint32_t tmp[8]={};
5460 assert(f && c);
5461 // max values
5462 unsigned num_saved_boxes=0,num_saved_spheres=0;
5463 rv=fscanf(f,"MAX_NUM_BOXES:\n%u\n",&num_saved_boxes);assert(rv==1);
5464 rv=fscanf(f,"MAX_NUM_SPHERES:\n%u\n",&num_saved_spheres);assert(rv==1);
5465 assert(c->MAX_NUM_BOXES>=num_saved_boxes); // TODO?: free and reallocate 'c' before continuing?
5466 assert(c->MAX_NUM_SPHERES>=num_saved_spheres); // TODO?: free and reallocate 'c' before continuing?
5467 // Must be limit current context? Yes...
5468 *((unsigned*)&c->MAX_NUM_BOXES) = num_saved_boxes;*((unsigned*)&c->MAX_NUM_SPHERES) = num_saved_spheres;*((unsigned*)&c->MAX_NUM_BODIES) = num_saved_boxes+num_saved_spheres;
5469 // c->bodies
5470 BodyData* bd = &c->bodies;assert(bd->count<=c->MAX_NUM_BODIES);
5471 rv=fscanf(f,"BodyData:\ncount: %u\nsizeof(BodyData): %u\nsizeof(Transform): %u\nsizeof(BodyProperties): %u\nsizeof(BodyMomentum): %u\n"
5472 "sizeof(BodyFilter): %u\nsizeof(BodyInfo): %u\nsizeof(BodyInfo::user): %u\nnum_aux_bodies: %u\n",&bd->count,&tmp[0],&tmp[1],&tmp[2],&tmp[3],&tmp[4],&tmp[5],&tmp[6],&tmp[7]);assert(rv==9);
5473 assert(tmp[0]==sizeof(BodyData));
5474 assert(tmp[1]==sizeof(Transform));
5475 assert(tmp[2]==sizeof(BodyProperties));
5476 assert(tmp[3]==sizeof(BodyMomentum));
5477 assert(tmp[4]==sizeof(BodyFilter));
5478 assert(tmp[5]==sizeof(BodyInfo));
5479# ifndef NUDGE_BODYINFO_STRUCT_NO_USER_DATA
5480 assert(tmp[6]==sizeof(BodyInfo::user));
5481# else
5482 assert(tmp[6]==0);
5483# endif
5485 rv=fread(bd->transforms,sizeof(Transform),bd->count,f);assert(rv==bd->count);
5486 rv=fread(bd->properties,sizeof(BodyProperties),bd->count,f);assert(rv==bd->count);
5487 rv=fread(bd->momentum,sizeof(BodyMomentum),bd->count,f);assert(rv==bd->count);
5488 rv=fread(bd->filters,sizeof(BodyFilter),bd->count,f);assert(rv==bd->count);
5489 rv=fread(bd->layouts,sizeof(BodyLayout),bd->count,f);assert(rv==bd->count);
5490 rv=fread(bd->idle_counters,sizeof(uint8_t),bd->count,f);assert(rv==bd->count);
5491 rv=fread(bd->infos,sizeof(BodyInfo),bd->count,f);assert(rv==bd->count);
5492 // c->colliders
5493 ColliderData* cd = &c->colliders;
5494 rv=fscanf(f,"\nColliderData::boxes:\n%u\n",&cd->boxes.count);assert(rv==1);assert(cd->boxes.count<=c->MAX_NUM_BOXES);
5495 rv=fread(cd->boxes.tags,sizeof(uint16_t),cd->boxes.count,f);assert(rv==cd->boxes.count);
5496 rv=fread(cd->boxes.data,sizeof(BoxCollider),cd->boxes.count,f);assert(rv==cd->boxes.count);
5497 rv=fread(cd->boxes.transforms,sizeof(Transform),cd->boxes.count,f);assert(rv==cd->boxes.count);
5498 rv=fscanf(f,"\nColliderData::spheres:\n%u\n",&cd->spheres.count);assert(rv==1);assert(cd->spheres.count<=c->MAX_NUM_BOXES);
5499 rv=fread(cd->spheres.tags,sizeof(uint16_t),cd->spheres.count,f);assert(rv==cd->spheres.count);
5500 rv=fread(cd->spheres.data,sizeof(SphereCollider),cd->spheres.count,f);assert(rv==cd->spheres.count);
5501 rv=fread(cd->spheres.transforms,sizeof(Transform),cd->spheres.count,f);assert(rv==cd->spheres.count);
5502 // c->contact_data
5503 ContactData* td = &c->contact_data;
5504 rv=fscanf(f,"\nContactData::count\n%u\n",&td->count);assert(rv==1);assert(td->count<=td->capacity);
5505 rv=fread(td->data,sizeof(Contact),td->count,f);assert(rv==td->count);
5506 rv=fread(td->bodies,sizeof(BodyPair),td->count,f);assert(rv==td->count);
5507 rv=fread(td->tags,sizeof(uint64_t),td->count,f);assert(rv==td->count);
5508 rv=fscanf(f,"\nContactData::sleeping_count\n%u\n",&td->sleeping_count);assert(rv==1);
5509 rv=fread(td->sleeping_pairs,sizeof(uint32_t),td->sleeping_count,f);assert(rv==td->sleeping_count);
5510 // c->contact_cache
5511 ContactCache* tc = &c->contact_cache;
5512 rv=fscanf(f,"\nContactCache::count\n%u\n",&tc->count);assert(tc->count<=tc->capacity);assert(rv==1);
5513 rv=fread(tc->tags,sizeof(uint64_t),tc->count,f);assert(rv==tc->count);
5514 rv=fread(tc->data,sizeof(CachedContactImpulse),tc->count,f);assert(rv==tc->count);
5515 // c->active_bodies
5516 ActiveBodies* ab = &c->active_bodies;
5517 rv=fscanf(f,"\nActiveBodies::count\n%u\n",&ab->count);assert(rv==1);assert(ab->count<=ab->capacity);
5518 rv=fread(ab->indices,sizeof(uint16_t),ab->count,f);assert(rv==ab->count);
5519 // c->kinematic_data
5520 KinematicData* kd = &c->kinematic_data;
5521 rv=fscanf(f,"\nKinematicData::key_frame_count\n%u\n",&kd->key_frame_count);assert(rv==1);
5522 if (kd->key_frame_count>kd->key_frame_capacity) kinematic_data_reserve_key_frames(kd,kd->key_frame_count); // TODO: we should reset unuset space between count and capacity
5523 rv=fread(kd->key_frame_transforms,sizeof(Transform),kd->key_frame_count,f);assert(rv==kd->key_frame_count);
5524 rv=fread(kd->key_frame_modes,sizeof(KinematicData::TimeMode),kd->key_frame_count,f);assert(rv==kd->key_frame_count);
5525 rv=fscanf(f,"\nKinematicData::animations_count\n%u\n",&kd->animations_count);assert(kd->animations_count<=kd->animations_capacity);assert(rv==1);
5526 if (kd->animations_count>kd->animations_capacity) kinematic_data_reserve_animations(kd,kd->animations_count); // TODO: we should reset unuset space between count and capacity
5527 rv=fread(kd->animations,sizeof(KinematicData::Animation),kd->animations_count,f);assert(rv==kd->animations_count);
5528 // c->simulation_params
5529 const SimulationParams old_simulation_params = c->simulation_params;
5530 uint32_t simulation_params_size=0;
5531 rv=fscanf(f,"\nsizeof(SimulationParams)\n%u\n",&simulation_params_size);assert(simulation_params_size==(uint32_t)sizeof(SimulationParams));assert(rv==1);
5532 rv=fread(&c->simulation_params,sizeof(SimulationParams),1,f);assert(rv==1);
5533 // c->global_data
5534 uint32_t* host_ptr = c->global_data.removed_bodies; // we can't overwrite this!
5535 uint32_t global_data_size=0;
5536 rv=fscanf(f,"\nsizeof(GlobalData)\n%u\n",&global_data_size);assert(global_data_size==(uint32_t)sizeof(GlobalData));assert(rv==1);
5537 rv=fread(&c->global_data,sizeof(GlobalData),1,f);assert(rv==1);
5538 c->global_data.removed_bodies = host_ptr;
5540 // c->userUint64
5541# ifndef NUDGE_CONTEXT_STRUCT_NO_USER_DATA
5542 uint32_t user_size=0;
5543 rv=fscanf(f,"\nsizeof(c->user)\n%u\n",&user_size);assert(user_size==(uint32_t)sizeof(c->user));assert(rv==1);
5544 rv=fread(&c->user,sizeof(c->user),1,f);assert(rv==1);
5545# endif // NUDGE_CONTEXT_STRUCT_NO_USER_DATA
5546 // Which fields of 'old_simulation_params' must we keep? I'd say nothing, all or 'num_frames' and 'num_total_substeps', but what to choose?
5547 c->simulation_params.num_frames = old_simulation_params.num_frames;
5548 c->simulation_params.num_total_substeps = old_simulation_params.num_total_substeps;
5549 // -----------------------------------------------------
5550
5551 // But now we must reassign the remaining tags
5552 {
5553 //for (uint16_t i=0;i<c->MAX_NUM_BOXES;i++) {c->colliders.boxes.tags[i] = i;}
5554 //for (uint16_t i=0;i<c->MAX_NUM_SPHERES;i++) {c->colliders.spheres.tags[i] = NUDGE_START_SPHERE_TAG+i;}
5555 Arena arena = c->arena;
5556 const unsigned required_size = c->MAX_NUM_BOXES>c->MAX_NUM_SPHERES?c->MAX_NUM_BOXES:c->MAX_NUM_SPHERES;assert(arena.size>=required_size*sizeof(bool));
5557 bool* check_array = allocate_array<bool>(&arena, required_size, 32);assert(check_array);
5558 {
5559 bool* box_checks = check_array;
5560 for (unsigned i=0;i<c->MAX_NUM_BOXES;i++) box_checks[i]=false;
5561 for (uint32_t i=0;i<c->colliders.boxes.count;i++) {
5562 const uint16_t tag = c->colliders.boxes.tags[i];
5563 assert(tag<c->MAX_NUM_BOXES);
5564 assert(box_checks[tag]==false);
5565 box_checks[tag]=true;
5566 }
5567 uint32_t starti = c->colliders.boxes.count;
5568 for (uint32_t i=0;i<c->MAX_NUM_BOXES;i++) {
5569 if (!box_checks[i]) {
5570 assert(starti<c->MAX_NUM_BOXES);
5571 c->colliders.boxes.tags[starti++]=(uint16_t)i;
5572 }
5573 }
5574 assert(starti==c->MAX_NUM_BOXES);
5575 }
5576 {
5577 bool* sphere_checks = check_array;
5578 for (unsigned i=0;i<c->MAX_NUM_SPHERES;i++) sphere_checks[i]=false;
5579 for (uint32_t i=0;i<c->colliders.spheres.count;i++) {
5580 const uint16_t tag = c->colliders.spheres.tags[i];
5581 assert(tag>=NUDGE_START_SPHERE_TAG && tag<NUDGE_START_SPHERE_TAG+c->MAX_NUM_SPHERES);
5582 assert(sphere_checks[tag-NUDGE_START_SPHERE_TAG]==false);
5583 sphere_checks[tag-NUDGE_START_SPHERE_TAG]=true;
5584 }
5585 uint32_t starti = c->colliders.spheres.count;
5586 for (uint32_t i=0;i<c->MAX_NUM_SPHERES;i++) {
5587 if (!sphere_checks[i]) {
5588 assert(starti<c->MAX_NUM_SPHERES);
5589 c->colliders.spheres.tags[starti++]=(uint16_t)(NUDGE_START_SPHERE_TAG+i);
5590 }
5591 }
5592 assert(starti==c->MAX_NUM_SPHERES);
5593 }
5594 }
5595
5596 // And we should reset
5597}
5598
5599# ifdef NUDGE_USE_TIME_CONTEXT
5600void save_time_context(FILE* f,const time_context_t* c) {
5601 assert(f && c);
5602 fprintf(f,"\nsizeof(time_context_t)\n%u\n",(uint32_t)sizeof(time_context_t));
5603 fwrite(c,sizeof(time_context_t),1,f);
5604}
5605void load_time_context(FILE* f,time_context_t* c) {
5606 assert(f && c);
5607 uint32_t time_context_size=0;
5608 fscanf(f,"\nsizeof(time_context_t)\n%u\n",&time_context_size);assert(time_context_size==(uint32_t)sizeof(time_context_t));
5609 fread(c,sizeof(time_context_t),1,f);
5610}
5611# endif //NUDGE_USE_TIME_CONTEXT
5612
5613#endif //NUDGE_NO_STDIO
5614
5615// -----------------------------------------------------------------------------------
5616
5617static unsigned box_box_collide(uint32_t* pairs, unsigned pair_count, BoxCollider* colliders, Transform* transforms, Contact* contacts, BodyPair* bodies, uint64_t* tags, const BodyProperties* properties, Arena temporary) {
5618 // TODO: We may want to batch/chunk this for better cache behavior for repeatedly accessed data.
5619 // TODO: We should make use of 8-wide SIMD here as well.
5620
5621 float* feature_penetrations = allocate_array<float>(&temporary, pair_count + 7, 32); // Padding is required.
5622 uint32_t* features = allocate_array<uint32_t>(&temporary, pair_count + 7, 32);
5623
5624 unsigned count = 0;
5625
5626 // Determine most separating face and reject pairs separated by a face.
5627 {
5628 pairs[pair_count+0] = 0; // Padding.
5629 pairs[pair_count+1] = 0;
5630 pairs[pair_count+2] = 0;
5631
5632 unsigned added = 0;
5633
5634 // Transform each box into the local space of the other in order to quickly determine per-face penetration.
5635 for (unsigned i = 0; i < pair_count; i += 4) {
5636 // Load pairs.
5637 unsigned pair0 = pairs[i+0];
5638 unsigned pair1 = pairs[i+1];
5639 unsigned pair2 = pairs[i+2];
5640 unsigned pair3 = pairs[i+3];
5641
5642 unsigned a0_index = pair0 & 0xffff;
5643 unsigned b0_index = pair0 >> 16;
5644
5645 unsigned a1_index = pair1 & 0xffff;
5646 unsigned b1_index = pair1 >> 16;
5647
5648 unsigned a2_index = pair2 & 0xffff;
5649 unsigned b2_index = pair2 >> 16;
5650
5651 unsigned a3_index = pair3 & 0xffff;
5652 unsigned b3_index = pair3 >> 16;
5653
5654 // Load rotations.
5655 simd4_float a_rotation_x = simd_float::load4(transforms[a0_index].rotation);
5656 simd4_float a_rotation_y = simd_float::load4(transforms[a1_index].rotation);
5657 simd4_float a_rotation_z = simd_float::load4(transforms[a2_index].rotation);
5658 simd4_float a_rotation_s = simd_float::load4(transforms[a3_index].rotation);
5659
5660 simd4_float b_rotation_x = simd_float::load4(transforms[b0_index].rotation);
5661 simd4_float b_rotation_y = simd_float::load4(transforms[b1_index].rotation);
5662 simd4_float b_rotation_z = simd_float::load4(transforms[b2_index].rotation);
5663 simd4_float b_rotation_s = simd_float::load4(transforms[b3_index].rotation);
5664
5665 simd128::transpose32(a_rotation_x, a_rotation_y, a_rotation_z, a_rotation_s);
5666 simd128::transpose32(b_rotation_x, b_rotation_y, b_rotation_z, b_rotation_s);
5667
5668 // Determine quaternion for rotation from a to b.
5669 simd4_float t_x, t_y, t_z;
5670 simd_soa::cross(b_rotation_x, b_rotation_y, b_rotation_z, a_rotation_x, a_rotation_y, a_rotation_z, t_x, t_y, t_z);
5671
5672 simd4_float relative_rotation_x = a_rotation_x * b_rotation_s - b_rotation_x * a_rotation_s - t_x;
5673 simd4_float relative_rotation_y = a_rotation_y * b_rotation_s - b_rotation_y * a_rotation_s - t_y;
5674 simd4_float relative_rotation_z = a_rotation_z * b_rotation_s - b_rotation_z * a_rotation_s - t_z;
5675 simd4_float relative_rotation_s = (a_rotation_x * b_rotation_x +
5676 a_rotation_y * b_rotation_y +
5677 a_rotation_z * b_rotation_z +
5678 a_rotation_s * b_rotation_s);
5679
5680 // Compute the corresponding matrix.
5681 // Note that the b to a matrix is simply the transpose of a to b.
5682 simd4_float kx = relative_rotation_x + relative_rotation_x;
5683 simd4_float ky = relative_rotation_y + relative_rotation_y;
5684 simd4_float kz = relative_rotation_z + relative_rotation_z;
5685
5686 simd4_float xx = kx * relative_rotation_x;
5687 simd4_float yy = ky * relative_rotation_y;
5688 simd4_float zz = kz * relative_rotation_z;
5689 simd4_float xy = kx * relative_rotation_y;
5690 simd4_float xz = kx * relative_rotation_z;
5691 simd4_float yz = ky * relative_rotation_z;
5692 simd4_float sx = kx * relative_rotation_s;
5693 simd4_float sy = ky * relative_rotation_s;
5694 simd4_float sz = kz * relative_rotation_s;
5695
5696 simd4_float one = simd_float::make4(1.0f);
5697
5698 simd4_float vx_x = one - yy - zz;
5699 simd4_float vx_y = xy + sz;
5700 simd4_float vx_z = xz - sy;
5701
5702 simd4_float vy_x = xy - sz;
5703 simd4_float vy_y = one - xx - zz;
5704 simd4_float vy_z = yz + sx;
5705
5706 simd4_float vz_x = xz + sy;
5707 simd4_float vz_y = yz - sx;
5708 simd4_float vz_z = one - xx - yy;
5709
5710 // Load sizes.
5711 simd4_float a_size_x = simd_float::load4(colliders[a0_index].size);
5712 simd4_float a_size_y = simd_float::load4(colliders[a1_index].size);
5713 simd4_float a_size_z = simd_float::load4(colliders[a2_index].size);
5714 simd4_float a_size_w = simd_float::load4(colliders[a3_index].size);
5715
5716 simd4_float b_size_x = simd_float::load4(colliders[b0_index].size);
5717 simd4_float b_size_y = simd_float::load4(colliders[b1_index].size);
5718 simd4_float b_size_z = simd_float::load4(colliders[b2_index].size);
5719 simd4_float b_size_w = simd_float::load4(colliders[b3_index].size);
5720
5721 simd128::transpose32(a_size_x, a_size_y, a_size_z, a_size_w);
5722 simd128::transpose32(b_size_x, b_size_y, b_size_z, b_size_w);
5723
5724 // Compute the penetration.
5725 vx_x = simd_float::abs(vx_x);
5726 vx_y = simd_float::abs(vx_y);
5727 vx_z = simd_float::abs(vx_z);
5728
5729 vy_x = simd_float::abs(vy_x);
5730 vy_y = simd_float::abs(vy_y);
5731 vy_z = simd_float::abs(vy_z);
5732
5733 vz_x = simd_float::abs(vz_x);
5734 vz_y = simd_float::abs(vz_y);
5735 vz_z = simd_float::abs(vz_z);
5736
5737 simd4_float pax = b_size_x + vx_x*a_size_x + vy_x*a_size_y + vz_x*a_size_z;
5738 simd4_float pay = b_size_y + vx_y*a_size_x + vy_y*a_size_y + vz_y*a_size_z;
5739 simd4_float paz = b_size_z + vx_z*a_size_x + vy_z*a_size_y + vz_z*a_size_z;
5740
5741 simd4_float pbx = a_size_x + vx_x*b_size_x + vx_y*b_size_y + vx_z*b_size_z;
5742 simd4_float pby = a_size_y + vy_x*b_size_x + vy_y*b_size_y + vy_z*b_size_z;
5743 simd4_float pbz = a_size_z + vz_x*b_size_x + vz_y*b_size_y + vz_z*b_size_z;
5744
5745 // Load positions.
5746 simd4_float a_position_x = simd_float::load4(transforms[a0_index].position);
5747 simd4_float a_position_y = simd_float::load4(transforms[a1_index].position);
5748 simd4_float a_position_z = simd_float::load4(transforms[a2_index].position);
5749 simd4_float a_position_w = simd_float::load4(transforms[a3_index].position);
5750
5751 simd4_float b_position_x = simd_float::load4(transforms[b0_index].position);
5752 simd4_float b_position_y = simd_float::load4(transforms[b1_index].position);
5753 simd4_float b_position_z = simd_float::load4(transforms[b2_index].position);
5754 simd4_float b_position_w = simd_float::load4(transforms[b3_index].position);
5755
5756 // Compute relative positions and offset the penetrations.
5757 simd4_float delta_x = a_position_x - b_position_x;
5758 simd4_float delta_y = a_position_y - b_position_y;
5759 simd4_float delta_z = a_position_z - b_position_z;
5760 simd4_float delta_w = a_position_w - b_position_w;
5761
5762 simd128::transpose32(delta_x, delta_y, delta_z, delta_w);
5763
5764 simd_soa::cross(b_rotation_x, b_rotation_y, b_rotation_z, delta_x, delta_y, delta_z, t_x, t_y, t_z);
5765 t_x += t_x;
5766 t_y += t_y;
5767 t_z += t_z;
5768
5769 simd4_float u_x, u_y, u_z;
5770 simd_soa::cross(b_rotation_x, b_rotation_y, b_rotation_z, t_x, t_y, t_z, u_x, u_y, u_z);
5771
5772 simd4_float a_offset_x = u_x + delta_x - b_rotation_s * t_x;
5773 simd4_float a_offset_y = u_y + delta_y - b_rotation_s * t_y;
5774 simd4_float a_offset_z = u_z + delta_z - b_rotation_s * t_z;
5775
5776 pax -= simd_float::abs(a_offset_x);
5777 pay -= simd_float::abs(a_offset_y);
5778 paz -= simd_float::abs(a_offset_z);
5779
5780 simd_soa::cross(delta_x, delta_y, delta_z, a_rotation_x, a_rotation_y, a_rotation_z, t_x, t_y, t_z);
5781 t_x += t_x;
5782 t_y += t_y;
5783 t_z += t_z;
5784
5785 simd_soa::cross(a_rotation_x, a_rotation_y, a_rotation_z, t_x, t_y, t_z, u_x, u_y, u_z);
5786
5787 simd4_float b_offset_x = u_x - delta_x - a_rotation_s * t_x;
5788 simd4_float b_offset_y = u_y - delta_y - a_rotation_s * t_y;
5789 simd4_float b_offset_z = u_z - delta_z - a_rotation_s * t_z;
5790
5791 pbx -= simd_float::abs(b_offset_x);
5792 pby -= simd_float::abs(b_offset_y);
5793 pbz -= simd_float::abs(b_offset_z);
5794
5795 // Reduce face penetrations.
5796 simd4_float payz = simd_float::min(pay, paz);
5797 simd4_float pbyz = simd_float::min(pby, pbz);
5798
5799 simd4_float pa = simd_float::min(pax, payz);
5800 simd4_float pb = simd_float::min(pbx, pbyz);
5801
5802 simd4_float p = simd_float::min(pa, pb);
5803
5804 // Determine the best aligned face for each collider.
5805 simd4_float aymf = simd_float::cmp_eq(payz, pa);
5806 simd4_float azmf = simd_float::cmp_eq(paz, pa);
5807
5808 simd4_float bymf = simd_float::cmp_eq(pbyz, pb);
5809 simd4_float bzmf = simd_float::cmp_eq(pbz, pb);
5810
5811 simd4_int32 aymi = simd::bitwise_and(simd_float::asint(aymf), simd_int32::make4(1));
5812 simd4_int32 azmi = simd::bitwise_and(simd_float::asint(azmf), simd_int32::make4(1));
5813
5814 simd4_int32 bymi = simd::bitwise_and(simd_float::asint(bymf), simd_int32::make4(1));
5815 simd4_int32 bzmi = simd::bitwise_and(simd_float::asint(bzmf), simd_int32::make4(1));
5816
5817 simd4_int32 aface = simd_int32::add(aymi, azmi);
5818 simd4_int32 bface = simd_int32::add(bymi, bzmi);
5819
5820 // Swap so that collider a has the most separating face.
5821 simd4_float swap = simd_float::cmp_eq(pa, p);
5822
5823 simd4_float pair_a_b = simd_int32::asfloat(simd_int32::load4((const int32_t*)(pairs + i)));
5824 simd4_float pair_b_a = simd_int32::asfloat(simd::bitwise_or(simd_int32::shift_left<16>(simd_float::asint(pair_a_b)), simd_int32::shift_right<16>(simd_float::asint(pair_a_b))));
5825
5826 simd4_float face = simd::blendv32(simd_int32::asfloat(bface), simd_int32::asfloat(aface), swap);
5827 simd4_float pair = simd::blendv32(pair_a_b, pair_b_a, swap);
5828
5829 // Store data for pairs with positive penetration.
5830 unsigned mask = simd::signmask32(simd_float::cmp_gt(p, simd_float::zero4()));
5831
5832 NUDGE_ALIGNED(16) float face_penetration_array[4];
5833 NUDGE_ALIGNED(16) uint32_t face_array[4];
5834 NUDGE_ALIGNED(16) uint32_t pair_array[4];
5835
5836 simd_float::store4(face_penetration_array, p);
5837 simd_float::store4((float*)face_array, face);
5838 simd_float::store4((float*)pair_array, pair);
5839
5840 while (mask) {
5841 unsigned index = first_set_bit(mask);
5842 mask &= mask-1;
5843
5844 feature_penetrations[added] = face_penetration_array[index];
5845 features[added] = face_array[index];
5846 pairs[added] = pair_array[index];
5847
5848 ++added;
5849 }
5850 }
5851
5852 // Erase padding.
5853 while (added && !pairs[added-1])
5854 --added;
5855
5856 pair_count = added;
5857 }
5858
5859 // Check if edge pairs are more separating.
5860 // Do face-face test if not.
5861 {
5862 pairs[pair_count+0] = 0; // Padding.
5863 pairs[pair_count+1] = 0;
5864 pairs[pair_count+2] = 0;
5865
5866 feature_penetrations[pair_count+0] = 0.0f;
5867 feature_penetrations[pair_count+1] = 0.0f;
5868 feature_penetrations[pair_count+2] = 0.0f;
5869
5870 unsigned added = 0;
5871
5872 for (unsigned pair_offset = 0; pair_offset < pair_count; pair_offset += 4) {
5873 // Load pairs.
5874 unsigned pair0 = pairs[pair_offset+0];
5875 unsigned pair1 = pairs[pair_offset+1];
5876 unsigned pair2 = pairs[pair_offset+2];
5877 unsigned pair3 = pairs[pair_offset+3];
5878
5879 unsigned a0_index = pair0 & 0xffff;
5880 unsigned b0_index = pair0 >> 16;
5881
5882 unsigned a1_index = pair1 & 0xffff;
5883 unsigned b1_index = pair1 >> 16;
5884
5885 unsigned a2_index = pair2 & 0xffff;
5886 unsigned b2_index = pair2 >> 16;
5887
5888 unsigned a3_index = pair3 & 0xffff;
5889 unsigned b3_index = pair3 >> 16;
5890
5891 // Load rotations.
5892 simd4_float a_rotation_x = simd_float::load4(transforms[a0_index].rotation);
5893 simd4_float a_rotation_y = simd_float::load4(transforms[a1_index].rotation);
5894 simd4_float a_rotation_z = simd_float::load4(transforms[a2_index].rotation);
5895 simd4_float a_rotation_s = simd_float::load4(transforms[a3_index].rotation);
5896
5897 simd4_float b_rotation_x = simd_float::load4(transforms[b0_index].rotation);
5898 simd4_float b_rotation_y = simd_float::load4(transforms[b1_index].rotation);
5899 simd4_float b_rotation_z = simd_float::load4(transforms[b2_index].rotation);
5900 simd4_float b_rotation_s = simd_float::load4(transforms[b3_index].rotation);
5901
5902 simd128::transpose32(a_rotation_x, a_rotation_y, a_rotation_z, a_rotation_s);
5903 simd128::transpose32(b_rotation_x, b_rotation_y, b_rotation_z, b_rotation_s);
5904
5905 // Determine quaternion for rotation from a to b.
5906 simd4_float t_x, t_y, t_z;
5907 simd_soa::cross(b_rotation_x, b_rotation_y, b_rotation_z, a_rotation_x, a_rotation_y, a_rotation_z, t_x, t_y, t_z);
5908
5909 simd4_float relative_rotation_x = a_rotation_x * b_rotation_s - b_rotation_x * a_rotation_s - t_x;
5910 simd4_float relative_rotation_y = a_rotation_y * b_rotation_s - b_rotation_y * a_rotation_s - t_y;
5911 simd4_float relative_rotation_z = a_rotation_z * b_rotation_s - b_rotation_z * a_rotation_s - t_z;
5912 simd4_float relative_rotation_s = (a_rotation_x * b_rotation_x +
5913 a_rotation_y * b_rotation_y +
5914 a_rotation_z * b_rotation_z +
5915 a_rotation_s * b_rotation_s);
5916
5917 // Compute the corresponding matrix.
5918 // Note that the b to a matrix is simply the transpose of a to b.
5919 simd4_float kx = relative_rotation_x + relative_rotation_x;
5920 simd4_float ky = relative_rotation_y + relative_rotation_y;
5921 simd4_float kz = relative_rotation_z + relative_rotation_z;
5922
5923 simd4_float xx = kx * relative_rotation_x;
5924 simd4_float yy = ky * relative_rotation_y;
5925 simd4_float zz = kz * relative_rotation_z;
5926 simd4_float xy = kx * relative_rotation_y;
5927 simd4_float xz = kx * relative_rotation_z;
5928 simd4_float yz = ky * relative_rotation_z;
5929 simd4_float sx = kx * relative_rotation_s;
5930 simd4_float sy = ky * relative_rotation_s;
5931 simd4_float sz = kz * relative_rotation_s;
5932
5933 simd4_float one = simd_float::make4(1.0f);
5934
5935 simd4_float vx_x = one - yy - zz;
5936 simd4_float vx_y = xy + sz;
5937 simd4_float vx_z = xz - sy;
5938
5939 simd4_float vy_x = xy - sz;
5940 simd4_float vy_y = one - xx - zz;
5941 simd4_float vy_z = yz + sx;
5942
5943 simd4_float vz_x = xz + sy;
5944 simd4_float vz_y = yz - sx;
5945 simd4_float vz_z = one - xx - yy;
5946
5947 NUDGE_ALIGNED(16) float a_to_b[4*9];
5948
5949 simd_float::store4(a_to_b + 0, vx_x);
5950 simd_float::store4(a_to_b + 4, vx_y);
5951 simd_float::store4(a_to_b + 8, vx_z);
5952
5953 simd_float::store4(a_to_b + 12, vy_x);
5954 simd_float::store4(a_to_b + 16, vy_y);
5955 simd_float::store4(a_to_b + 20, vy_z);
5956
5957 simd_float::store4(a_to_b + 24, vz_x);
5958 simd_float::store4(a_to_b + 28, vz_y);
5959 simd_float::store4(a_to_b + 32, vz_z);
5960
5961 // Load sizes.
5962 simd4_float a_size_x = simd_float::load4(colliders[a0_index].size);
5963 simd4_float a_size_y = simd_float::load4(colliders[a1_index].size);
5964 simd4_float a_size_z = simd_float::load4(colliders[a2_index].size);
5965 simd4_float a_size_w = simd_float::load4(colliders[a3_index].size);
5966
5967 simd4_float b_size_x = simd_float::load4(colliders[b0_index].size);
5968 simd4_float b_size_y = simd_float::load4(colliders[b1_index].size);
5969 simd4_float b_size_z = simd_float::load4(colliders[b2_index].size);
5970 simd4_float b_size_w = simd_float::load4(colliders[b3_index].size);
5971
5972 simd128::transpose32(a_size_x, a_size_y, a_size_z, a_size_w);
5973 simd128::transpose32(b_size_x, b_size_y, b_size_z, b_size_w);
5974
5975 // Load positions.
5976 simd4_float a_position_x = simd_float::load4(transforms[a0_index].position);
5977 simd4_float a_position_y = simd_float::load4(transforms[a1_index].position);
5978 simd4_float a_position_z = simd_float::load4(transforms[a2_index].position);
5979 simd4_float a_position_w = simd_float::load4(transforms[a3_index].position);
5980
5981 simd4_float b_position_x = simd_float::load4(transforms[b0_index].position);
5982 simd4_float b_position_y = simd_float::load4(transforms[b1_index].position);
5983 simd4_float b_position_z = simd_float::load4(transforms[b2_index].position);
5984 simd4_float b_position_w = simd_float::load4(transforms[b3_index].position);
5985
5986 // Compute relative positions and offset the penetrations.
5987 simd4_float delta_x = a_position_x - b_position_x;
5988 simd4_float delta_y = a_position_y - b_position_y;
5989 simd4_float delta_z = a_position_z - b_position_z;
5990 simd4_float delta_w = a_position_w - b_position_w;
5991
5992 simd128::transpose32(delta_x, delta_y, delta_z, delta_w);
5993
5994 simd_soa::cross(delta_x, delta_y, delta_z, a_rotation_x, a_rotation_y, a_rotation_z, t_x, t_y, t_z);
5995 t_x += t_x;
5996 t_y += t_y;
5997 t_z += t_z;
5998
5999 simd4_float u_x, u_y, u_z;
6000 simd_soa::cross(a_rotation_x, a_rotation_y, a_rotation_z, t_x, t_y, t_z, u_x, u_y, u_z);
6001
6002 simd4_float b_offset_x = u_x - delta_x - a_rotation_s * t_x;
6003 simd4_float b_offset_y = u_y - delta_y - a_rotation_s * t_y;
6004 simd4_float b_offset_z = u_z - delta_z - a_rotation_s * t_z;
6005
6006 NUDGE_ALIGNED(16) float b_offset_array[3*4];
6007
6008 simd_float::store4(b_offset_array + 0, b_offset_x);
6009 simd_float::store4(b_offset_array + 4, b_offset_y);
6010 simd_float::store4(b_offset_array + 8, b_offset_z);
6011
6012 simd4_float face_penetration = simd_float::load4(feature_penetrations + pair_offset);
6013
6014 // Is an edge pair more separating?
6015 NUDGE_ALIGNED(16) float edge_penetration_a[4*9];
6016 NUDGE_ALIGNED(16) float edge_penetration_b[4*9];
6017
6018 for (unsigned i = 0; i < 3; ++i) {
6019 simd4_float acx = simd_float::load4(a_to_b + (0*3 + i)*4);
6020 simd4_float acy = simd_float::load4(a_to_b + (1*3 + i)*4);
6021 simd4_float acz = simd_float::load4(a_to_b + (2*3 + i)*4);
6022
6023 simd4_float bcx = simd_float::load4(a_to_b + (i*3 + 0)*4);
6024 simd4_float bcy = simd_float::load4(a_to_b + (i*3 + 1)*4);
6025 simd4_float bcz = simd_float::load4(a_to_b + (i*3 + 2)*4);
6026
6027 simd4_float ac2x = acx*acx;
6028 simd4_float ac2y = acy*acy;
6029 simd4_float ac2z = acz*acz;
6030
6031 simd4_float bc2x = bcx*bcx;
6032 simd4_float bc2y = bcy*bcy;
6033 simd4_float bc2z = bcz*bcz;
6034
6035 simd4_float aacx = simd_float::abs(acx);
6036 simd4_float aacy = simd_float::abs(acy);
6037 simd4_float aacz = simd_float::abs(acz);
6038
6039 simd4_float abcx = simd_float::abs(bcx);
6040 simd4_float abcy = simd_float::abs(bcy);
6041 simd4_float abcz = simd_float::abs(bcz);
6042
6043 simd4_float r_a0 = ac2y + ac2z;
6044 simd4_float r_a1 = ac2z + ac2x;
6045 simd4_float r_a2 = ac2x + ac2y;
6046
6047 simd4_float r_b0 = bc2y + bc2z;
6048 simd4_float r_b1 = bc2z + bc2x;
6049 simd4_float r_b2 = bc2x + bc2y;
6050
6051 simd4_float nan_threshold = simd_float::make4(1e-3f);
6052
6053 r_a0 = simd::bitwise_or(simd_float::rsqrt(r_a0), simd_float::cmp_le(r_a0, nan_threshold));
6054 r_a1 = simd::bitwise_or(simd_float::rsqrt(r_a1), simd_float::cmp_le(r_a1, nan_threshold));
6055 r_a2 = simd::bitwise_or(simd_float::rsqrt(r_a2), simd_float::cmp_le(r_a2, nan_threshold));
6056
6057 r_b0 = simd::bitwise_or(simd_float::rsqrt(r_b0), simd_float::cmp_le(r_b0, nan_threshold));
6058 r_b1 = simd::bitwise_or(simd_float::rsqrt(r_b1), simd_float::cmp_le(r_b1, nan_threshold));
6059 r_b2 = simd::bitwise_or(simd_float::rsqrt(r_b2), simd_float::cmp_le(r_b2, nan_threshold));
6060
6061 simd4_float pa0 = aacy*a_size_z + aacz*a_size_y;
6062 simd4_float pa1 = aacz*a_size_x + aacx*a_size_z;
6063 simd4_float pa2 = aacx*a_size_y + aacy*a_size_x;
6064
6065 simd4_float pb0 = abcy*b_size_z + abcz*b_size_y;
6066 simd4_float pb1 = abcz*b_size_x + abcx*b_size_z;
6067 simd4_float pb2 = abcx*b_size_y + abcy*b_size_x;
6068
6069 simd4_float o0 = simd_float::abs(acy*b_offset_z - acz*b_offset_y);
6070 simd4_float o1 = simd_float::abs(acz*b_offset_x - acx*b_offset_z);
6071 simd4_float o2 = simd_float::abs(acx*b_offset_y - acy*b_offset_x);
6072
6073 simd_float::store4(edge_penetration_a + (i*3 + 0)*4, (pa0 - o0) * r_a0);
6074 simd_float::store4(edge_penetration_a + (i*3 + 1)*4, (pa1 - o1) * r_a1);
6075 simd_float::store4(edge_penetration_a + (i*3 + 2)*4, (pa2 - o2) * r_a2);
6076
6077 simd_float::store4(edge_penetration_b + (i*3 + 0)*4, pb0 * r_b0);
6078 simd_float::store4(edge_penetration_b + (i*3 + 1)*4, pb1 * r_b1);
6079 simd_float::store4(edge_penetration_b + (i*3 + 2)*4, pb2 * r_b2);
6080 }
6081
6082 simd4_int32 a_edge = simd_int32::make4(0);
6083 simd4_int32 b_edge = simd_int32::make4(0);
6084
6085 simd4_float penetration = face_penetration;
6086
6087 for (unsigned i = 0; i < 3; ++i) {
6088 for (unsigned j = 0; j < 3; ++j) {
6089 simd4_float p = simd_float::load4(edge_penetration_a + (i*3 + j)*4) + simd_float::load4(edge_penetration_b + (j*3 + i)*4);
6090
6091 simd4_float mask = simd_float::cmp_gt(penetration, p);
6092
6093 penetration = simd_float::min(penetration, p); // Note: First operand is returned on NaN.
6094 a_edge = simd::blendv32(a_edge, simd_int32::make4(j), simd_float::asint(mask));
6095 b_edge = simd::blendv32(b_edge, simd_int32::make4(i), simd_float::asint(mask));
6096 }
6097 }
6098
6099 simd4_float face_bias = simd_float::make4(1e-3f);
6100
6101 unsigned edge = simd::signmask32(simd_float::cmp_gt(face_penetration, penetration + face_bias));
6102 unsigned overlapping = simd::signmask32(simd_float::cmp_gt(penetration, simd_float::zero4()));
6103
6104 unsigned face = ~edge;
6105
6106 edge &= overlapping;
6107 face &= overlapping;
6108
6109 NUDGE_ALIGNED(16) float penetration_array[4];
6110 NUDGE_ALIGNED(16) int32_t a_edge_array[4];
6111 NUDGE_ALIGNED(16) int32_t b_edge_array[4];
6112
6113 simd_float::store4(penetration_array, penetration);
6114 simd_int32::store4(a_edge_array, a_edge);
6115 simd_int32::store4(b_edge_array, b_edge);
6116
6117 // Do face-face tests.
6118 while (face) {
6119 unsigned index = first_set_bit(face);
6120 face &= face-1;
6121
6122 unsigned pair = pairs[pair_offset + index];
6123 unsigned a_face = features[pair_offset + index];
6124
6125 unsigned a_index = pair & 0xffff;
6126 unsigned b_index = pair >> 16;
6127
6128 // Gather.
6129 simd4_float dirs = simd_float::make4(a_to_b[(a_face*3 + 0)*4 + index],
6130 a_to_b[(a_face*3 + 1)*4 + index],
6131 a_to_b[(a_face*3 + 2)*4 + index],
6132 0.0f);
6133
6134 simd4_float c0 = simd_float::make4(a_to_b[(0*3 + 0)*4 + index],
6135 a_to_b[(1*3 + 0)*4 + index],
6136 a_to_b[(2*3 + 0)*4 + index],
6137 0.0f);
6138
6139 simd4_float c1 = simd_float::make4(a_to_b[(0*3 + 1)*4 + index],
6140 a_to_b[(1*3 + 1)*4 + index],
6141 a_to_b[(2*3 + 1)*4 + index],
6142 0.0f);
6143
6144 simd4_float c2 = simd_float::make4(a_to_b[(0*3 + 2)*4 + index],
6145 a_to_b[(1*3 + 2)*4 + index],
6146 a_to_b[(2*3 + 2)*4 + index],
6147 0.0f);
6148
6149 simd4_float b_offset = simd_float::make4(b_offset_array[0*4 + index],
6150 b_offset_array[1*4 + index],
6151 b_offset_array[2*4 + index],
6152 0.0f);
6153
6154 // Load sizes.
6155 simd4_float a_size = simd_float::load4(colliders[a_index].size);
6156 simd4_float b_size = simd_float::load4(colliders[b_index].size);
6157
6158 // Find most aligned face of b.
6159 dirs = simd_float::abs(dirs);
6160
6161 simd4_float max_dir = simd_float::max(simd128::shuffle32<0,2,1,3>(dirs), simd128::shuffle32<0,0,0,0>(dirs));
6162
6163 unsigned dir_mask = simd::signmask32(simd_float::cmp_ge(dirs, max_dir));
6164
6165 // Compute the coordinates of the two quad faces.
6166 c0 *= simd128::shuffle32<0,0,0,0>(b_size);
6167 c1 *= simd128::shuffle32<1,1,1,1>(b_size);
6168 c2 *= simd128::shuffle32<2,2,2,2>(b_size);
6169
6170 unsigned b_face = 0;
6171
6172 if (dir_mask & 4) {
6173 simd4_float t = c0;
6174 c0 = c2;
6175 c2 = c1;
6176 c1 = t;
6177 b_face = 2;
6178 }
6179 else if (dir_mask & 2) {
6180 simd4_float t = c0;
6181 c0 = c1;
6182 c1 = c2;
6183 c2 = t;
6184 b_face = 1;
6185 }
6186
6187 simd4_float c = c0;
6188 simd4_float dx = c1;
6189 simd4_float dy = c2;
6190
6191 unsigned b_positive_face_bit = simd::signmask32(simd::bitwise_xor(b_offset, c)) & (1 << a_face);
6192 unsigned b_offset_neg = simd::signmask32(b_offset) & (1 << a_face);
6193
6194 if (!b_positive_face_bit)
6195 c = -c;
6196
6197 c += b_offset;
6198
6199 // Quad coordinate packing:
6200 // Size of quad a, center of quad b, x-axis of quad b, y-axis of quad b.
6201 // a.size.x, c.x, dx.x, dy.x
6202 // a.size.y, c.y, dx.y, dy.y
6203 // a.size.z, c.z, dx.z, dy.z
6204 NUDGE_ALIGNED(16) float quads[4*3];
6205
6206 simd4_float q0 = simd128::unpacklo32(a_size, c);
6207 simd4_float q1 = simd128::unpackhi32(a_size, c);
6208 simd4_float q2 = simd128::unpacklo32(dx, dy);
6209 simd4_float q3 = simd128::unpackhi32(dx, dy);
6210
6211 simd_float::store4(quads + 0, simd128::concat2x32<0,1,0,1>(q0, q2));
6212 simd_float::store4(quads + 4, simd128::concat2x32<2,3,2,3>(q0, q2));
6213 simd_float::store4(quads + 8, simd128::concat2x32<0,1,0,1>(q1, q3));
6214
6215 // Transform so that overlap testing can be done in two dimensions.
6216 const float* transformed_x = quads + 4*((a_face+1) % 3);
6217 const float* transformed_y = quads + 4*((a_face+2) % 3);
6218 const float* transformed_z = quads + 4*a_face;
6219
6220 // Find support points for the overlap between the quad faces in two dimensions.
6221 NUDGE_ALIGNED(32) float support[16*3];
6222 NUDGE_ALIGNED(32) uint32_t support_tags[16];
6223 unsigned mask; // Indicates valid points.
6224 {
6225 float* support_x = support + 0;
6226 float* support_y = support + 16;
6227
6228 simd4_float tx = simd_float::load4(transformed_x);
6229 simd4_float ty = simd_float::load4(transformed_y);
6230
6231 simd4_float sxycxy = simd128::unpacklo32(tx, ty);
6232 simd4_float dxy = simd128::unpackhi32(tx, ty);
6233
6234 simd4_float sx = simd128::shuffle32<0,0,0,0>(sxycxy);
6235 simd4_float sy = simd128::shuffle32<1,1,1,1>(sxycxy);
6236 simd4_float cx = simd128::shuffle32<2,2,2,2>(sxycxy);
6237 simd4_float cy = simd128::shuffle32<3,3,3,3>(sxycxy);
6238
6239 simd4_float sign_npnp = simd_float::make4(-0.0f, 0.0f, -0.0f, 0.0f);
6240
6241 // Add corner points to the support if they are part of the intersection.
6242 __m128i corner_mask;
6243 __m128i edge_mask;
6244 {
6245 simd4_float sign_pnpn = simd_float::make4(0.0f, -0.0f, 0.0f, -0.0f);
6246 simd4_float sign_nnpp = simd_float::make4(-0.0f, -0.0f, 0.0f, 0.0f);
6247
6248 simd4_float corner0x = simd::bitwise_xor(sx, sign_pnpn);
6249 simd4_float corner0y = simd::bitwise_xor(sy, sign_nnpp);
6250
6251 simd4_float corner1x = cx + simd::bitwise_xor(simd128::shuffle32<0,0,0,0>(dxy), sign_npnp) + simd::bitwise_xor(simd128::shuffle32<2,2,2,2>(dxy), sign_nnpp);
6252 simd4_float corner1y = cy + simd::bitwise_xor(simd128::shuffle32<1,1,1,1>(dxy), sign_npnp) + simd::bitwise_xor(simd128::shuffle32<3,3,3,3>(dxy), sign_nnpp);
6253
6254 simd4_float k = (simd128::concat2x32<2,2,0,0>(sxycxy, dxy) * simd128::shuffle32<3,1,3,1>(dxy) -
6255 simd128::concat2x32<3,3,1,1>(sxycxy, dxy) * simd128::shuffle32<2,0,2,0>(dxy));
6256
6257 simd4_float ox = simd128::shuffle32<0,0,0,0>(k);
6258 simd4_float oy = simd128::shuffle32<1,1,1,1>(k);
6259 simd4_float delta_max = simd_float::abs(simd128::shuffle32<2,2,2,2>(k));
6260
6261 simd4_float sdxy = dxy * simd128::shuffle32<1,0,1,0>(sxycxy);
6262
6263 simd4_float delta_x = ox + simd::bitwise_xor(simd128::shuffle32<2,2,2,2>(sdxy), sign_nnpp) + simd::bitwise_xor(simd128::shuffle32<3,3,3,3>(sdxy), sign_npnp);
6264 simd4_float delta_y = oy + simd::bitwise_xor(simd128::shuffle32<0,0,0,0>(sdxy), sign_nnpp) + simd::bitwise_xor(simd128::shuffle32<1,1,1,1>(sdxy), sign_npnp);
6265
6266 simd4_float inside_x = simd_float::cmp_le(simd_float::abs(corner1x), sx);
6267 simd4_float inside_y = simd_float::cmp_le(simd_float::abs(corner1y), sy);
6268
6269 simd4_float mask0 = simd_float::cmp_le(simd_float::max(simd_float::abs(delta_x), simd_float::abs(delta_y)), delta_max);
6270 simd4_float mask1 = simd::bitwise_and(inside_x, inside_y);
6271
6272 corner_mask = _mm_packs_epi32(simd_float::asint(mask0), simd_float::asint(mask1));
6273
6274 // Don't allow edge intersections if both vertices are inside.
6275 edge_mask = _mm_packs_epi32(simd_float::asint(simd::bitwise_and(simd128::shuffle32<3,2,0,2>(mask0), simd128::shuffle32<1,0,1,3>(mask0))),
6276 simd_float::asint(simd::bitwise_and(simd128::shuffle32<1,3,2,3>(mask1), simd128::shuffle32<0,2,0,1>(mask1))));
6277
6278 simd_float::store4(support_x + 0, corner0x);
6279 simd_float::store4(support_y + 0, corner0y);
6280 simd_float::store4(support_x + 4, corner1x);
6281 simd_float::store4(support_y + 4, corner1y);
6282 }
6283
6284 // Find additional support points by intersecting the edges of the second quad against the bounds of the first.
6285 unsigned edge_axis_near;
6286 unsigned edge_axis_far;
6287 {
6288 simd4_float one = simd_float::make4(1.0f);
6289 simd4_float rdxy = one/dxy;
6290
6291 simd4_float offset_x = simd128::shuffle32<0,0,2,2>(dxy);
6292 simd4_float offset_y = simd128::shuffle32<1,1,3,3>(dxy);
6293
6294 simd4_float pivot_x = cx + simd::bitwise_xor(simd128::shuffle32<2,2,0,0>(dxy), sign_npnp);
6295 simd4_float pivot_y = cy + simd::bitwise_xor(simd128::shuffle32<3,3,1,1>(dxy), sign_npnp);
6296
6297 simd4_float sign_mask = simd_float::make4(-0.0f);
6298 simd4_float pos_x = simd::bitwise_or(simd::bitwise_and(offset_x, sign_mask), sx); // Copy sign.
6299 simd4_float pos_y = simd::bitwise_or(simd::bitwise_and(offset_y, sign_mask), sy);
6300
6301 simd4_float rx = simd128::shuffle32<0,0,2,2>(rdxy);
6302 simd4_float ry = simd128::shuffle32<1,1,3,3>(rdxy);
6303
6304 simd4_float near_x = (pos_x + pivot_x) * rx;
6305 simd4_float far_x = (pos_x - pivot_x) * rx;
6306
6307 simd4_float near_y = (pos_y + pivot_y) * ry;
6308 simd4_float far_y = (pos_y - pivot_y) * ry;
6309
6310 simd4_float a = simd_float::min(one, near_x); // First operand is returned on NaN.
6311 simd4_float b = simd_float::min(one, far_x);
6312
6313 edge_axis_near = simd::signmask32(simd_float::cmp_gt(a, near_y));
6314 edge_axis_far = simd::signmask32(simd_float::cmp_gt(b, far_y));
6315
6316 a = simd_float::min(a, near_y);
6317 b = simd_float::min(b, far_y);
6318
6319 simd4_float ax = pivot_x - offset_x * a;
6320 simd4_float ay = pivot_y - offset_y * a;
6321 simd4_float bx = pivot_x + offset_x * b;
6322 simd4_float by = pivot_y + offset_y * b;
6323
6324 simd4_float mask = simd_float::cmp_gt(a + b, simd_float::zero4()); // Make sure -a < b.
6325
6326 simd4_float mask_a = simd_float::cmp_neq(a, one);
6327 simd4_float mask_b = simd_float::cmp_neq(b, one);
6328
6329 mask_a = simd::bitwise_and(mask_a, mask);
6330 mask_b = simd::bitwise_and(mask_b, mask);
6331
6332 edge_mask = simd::bitwise_notand(edge_mask, _mm_packs_epi32(simd_float::asint(mask_a), simd_float::asint(mask_b)));
6333
6334 simd_float::store4(support_x + 8, ax);
6335 simd_float::store4(support_y + 8, ay);
6336 simd_float::store4(support_x + 12, bx);
6337 simd_float::store4(support_y + 12, by);
6338 }
6339
6340 mask = _mm_movemask_epi8(_mm_packs_epi16(corner_mask, edge_mask));
6341
6342 // Calculate and store vertex labels.
6343 // The 8 vertices are tagged using the sign bit of each axis.
6344 // Bit rotation is used to "transform" the coordinates.
6345 unsigned a_sign_face_bit = b_offset_neg ? (1 << a_face) : 0;
6346 unsigned b_sign_face_bit = b_positive_face_bit ? 0 : (1 << b_face);
6347
6348 unsigned a_vertices = 0x12003624 >> (3 - a_face); // Rotates all vertices in parallel.
6349 unsigned b_vertices = 0x00122436 >> (3 - b_face);
6350
6351 unsigned a_face_bits = 0xffff0000 | a_sign_face_bit;
6352 unsigned b_face_bits = 0x0000ffff | (b_sign_face_bit << 16);
6353
6354 support_tags[0] = ((a_vertices >> 0) & 0x7) | a_face_bits;
6355 support_tags[1] = ((a_vertices >> 8) & 0x7) | a_face_bits;
6356 support_tags[2] = ((a_vertices >> 16) & 0x7) | a_face_bits;
6357 support_tags[3] = ((a_vertices >> 24) & 0x7) | a_face_bits;
6358
6359 support_tags[4] = ((b_vertices << 16) & 0x70000) | b_face_bits;
6360 support_tags[5] = ((b_vertices << 8) & 0x70000) | b_face_bits;
6361 support_tags[6] = ((b_vertices >> 0) & 0x70000) | b_face_bits;
6362 support_tags[7] = ((b_vertices >> 8) & 0x70000) | b_face_bits;
6363
6364 // Calculate edge numbers in the local coordinate frame.
6365 unsigned edge_axis_winding = simd::signmask32(dxy);
6366
6367 unsigned y_near0 = (edge_axis_near >> 0) & 1;
6368 unsigned y_near1 = (edge_axis_near >> 1) & 1;
6369 unsigned y_near2 = (edge_axis_near >> 2) & 1;
6370 unsigned y_near3 = (edge_axis_near >> 3) & 1;
6371
6372 unsigned y_far0 = (edge_axis_far >> 0) & 1;
6373 unsigned y_far1 = (edge_axis_far >> 1) & 1;
6374 unsigned y_far2 = (edge_axis_far >> 2) & 1;
6375 unsigned y_far3 = (edge_axis_far >> 3) & 1;
6376
6377 unsigned a_near_edge0 = y_near0*2 + ((edge_axis_winding >> (0 + y_near0)) & 1);
6378 unsigned a_near_edge1 = y_near1*2 + ((edge_axis_winding >> (0 + y_near1)) & 1);
6379 unsigned a_near_edge2 = y_near2*2 + ((edge_axis_winding >> (2 + y_near2)) & 1);
6380 unsigned a_near_edge3 = y_near3*2 + ((edge_axis_winding >> (2 + y_near3)) & 1);
6381
6382 edge_axis_winding ^= 0xf;
6383
6384 unsigned a_far_edge0 = y_far0*2 + ((edge_axis_winding >> (0 + y_far0)) & 1);
6385 unsigned a_far_edge1 = y_far1*2 + ((edge_axis_winding >> (0 + y_far1)) & 1);
6386 unsigned a_far_edge2 = y_far2*2 + ((edge_axis_winding >> (2 + y_far2)) & 1);
6387 unsigned a_far_edge3 = y_far3*2 + ((edge_axis_winding >> (2 + y_far3)) & 1);
6388
6389 // Map local edges to labels (so that faces can share an edge).
6390 // The 12 edges are tagged using two ordered points.
6391 // We use the same trick as the vertex transform but do it for pairs of vertices (in correct order).
6392 uint64_t a_edge_map = 0x1200362424003612llu >> (3 - a_face);
6393 uint64_t b_edge_map = 0x2400361212003624llu >> (3 - b_face);
6394
6395 unsigned face_bits = a_sign_face_bit | (a_sign_face_bit << 8) | (b_sign_face_bit << 16) | (b_sign_face_bit << 24);
6396
6397 unsigned b_edge0 = ((unsigned)((b_edge_map >> (0<<4)) & 0x0707) << 16) | face_bits;
6398 unsigned b_edge1 = ((unsigned)((b_edge_map >> (1<<4)) & 0x0707) << 16) | face_bits;
6399 unsigned b_edge2 = ((unsigned)((b_edge_map >> (2<<4)) & 0x0707) << 16) | face_bits;
6400 unsigned b_edge3 = ((unsigned)((b_edge_map >> (3<<4)) & 0x0707) << 16) | face_bits;
6401
6402 support_tags[ 8] = (unsigned)((a_edge_map >> (a_near_edge0<<4)) & 0x0707) | b_edge0;
6403 support_tags[ 9] = (unsigned)((a_edge_map >> (a_near_edge1<<4)) & 0x0707) | b_edge1;
6404 support_tags[10] = (unsigned)((a_edge_map >> (a_near_edge2<<4)) & 0x0707) | b_edge2;
6405 support_tags[11] = (unsigned)((a_edge_map >> (a_near_edge3<<4)) & 0x0707) | b_edge3;
6406
6407 support_tags[12] = (unsigned)((a_edge_map >> (a_far_edge0<<4)) & 0x0707) | b_edge0;
6408 support_tags[13] = (unsigned)((a_edge_map >> (a_far_edge1<<4)) & 0x0707) | b_edge1;
6409 support_tags[14] = (unsigned)((a_edge_map >> (a_far_edge2<<4)) & 0x0707) | b_edge2;
6410 support_tags[15] = (unsigned)((a_edge_map >> (a_far_edge3<<4)) & 0x0707) | b_edge3;
6411 }
6412
6413 // Compute z-plane through face b and calculate z for the support points.
6414 simd4_float a_size_transformed = simd_float::load4(transformed_x);
6415 simd4_float c_transformed = simd_float::load4(transformed_y);
6416 simd4_float dx_transformed = simd_float::load4(transformed_z);
6417 simd4_float dy_transformed = simd_float::zero4();
6418
6419 simd128::transpose32(a_size_transformed, c_transformed, dx_transformed, dy_transformed);
6420
6421 simd4_float zn = simd_aos::cross(dx_transformed, dy_transformed);
6422 simd4_float plane = simd128::concat2x32<0,1,0,1>(simd::bitwise_xor(zn, simd_float::make4(-0.0f)), simd_aos::dot(c_transformed, zn));
6423 plane *= simd_float::make4(1.0f)/simd128::shuffle32<2,2,2,2>(zn);
6424
6425 NUDGE_ALIGNED(32) float penetrations[16];
6426
6427 simdv_float z_sign = simd_float::zerov();
6428
6429 if (b_offset_neg)
6430 z_sign = simd_float::makev(-0.0f);
6431
6432#if NUDGE_SIMDV_WIDTH == 256
6433 simdv_float penetration_offset = simd256::broadcast(simd128::shuffle32<2,2,2,2>(a_size_transformed));
6434 simdv_float plane256 = simd256::broadcast(plane);
6435#else
6436 simdv_float penetration_offset = simd128::shuffle32<2,2,2,2>(a_size_transformed);
6437#endif
6438 unsigned penetration_mask = 0;
6439
6440 for (unsigned i = 0; i < 16; i += simdv_width32) {
6441#if NUDGE_SIMDV_WIDTH == 256
6442 simdv_float plane = plane256;
6443#endif
6444
6445 simdv_float x = simd_float::loadv(support + 0 + i);
6446 simdv_float y = simd_float::loadv(support + 16 + i);
6447 simdv_float z = x*simd128::shuffle32<0,0,0,0>(plane) + y*simd128::shuffle32<1,1,1,1>(plane) + simd128::shuffle32<2,2,2,2>(plane);
6448
6449 simdv_float penetration = penetration_offset - simd::bitwise_xor(z, z_sign);
6450
6451 z += penetration * simd::bitwise_xor(simd_float::makev(0.5f), z_sign);
6452
6453 penetration_mask |= simd::signmask32(simd_float::cmp_gt(penetration, simd_float::zerov())) << i;
6454
6455 simd_float::storev(penetrations + i, penetration);
6456 simd_float::storev(support + 32 + i, z);
6457 }
6458
6459 mask &= penetration_mask;
6460
6461 // Inverse transform.
6462 unsigned a_face_inverse = (a_face ^ 1) ^ (a_face >> 1);
6463
6464 const float* support_x = support + 16*((a_face_inverse+1) % 3);
6465 const float* support_y = support + 16*((a_face_inverse+2) % 3);
6466 const float* support_z = support + 16*a_face_inverse;
6467
6468 // Setup rotation matrix from a to world.
6469 simd4_float a_to_world0, a_to_world1, a_to_world2;
6470 {
6471 simd4_float qx_qy_qz_qs = simd_float::load4(transforms[a_index].rotation);
6472 simd4_float kx_ky_kz_ks = qx_qy_qz_qs + qx_qy_qz_qs;
6473
6474 // Make ks negative so that we can create +sx from kx*qs and -sx from ks*qx.
6475 kx_ky_kz_ks = simd::bitwise_xor(kx_ky_kz_ks, simd_float::make4(0.0f, 0.0f, 0.0f, -0.0f));
6476
6477 // 1.0f - yy - zz, xy + sz, xz - sy
6478 a_to_world0 = (simd128::shuffle32<1,0,0,3>(kx_ky_kz_ks) * simd128::shuffle32<1,1,2,3>(qx_qy_qz_qs) +
6479 simd128::shuffle32<2,2,3,3>(kx_ky_kz_ks) * simd128::shuffle32<2,3,1,3>(qx_qy_qz_qs));
6480
6481 // xy - sz, 1.0f - zz - xx, yz + sx
6482 a_to_world1 = (simd128::shuffle32<0,2,1,3>(kx_ky_kz_ks) * simd128::shuffle32<1,2,2,3>(qx_qy_qz_qs) +
6483 simd128::shuffle32<3,0,0,3>(kx_ky_kz_ks) * simd128::shuffle32<2,0,3,3>(qx_qy_qz_qs));
6484
6485 // xz + sy, yz - sx, 1.0f - xx - yy
6486 a_to_world2 = (simd128::shuffle32<0,1,0,3>(kx_ky_kz_ks) * simd128::shuffle32<2,2,0,3>(qx_qy_qz_qs) +
6487 simd128::shuffle32<1,3,1,3>(kx_ky_kz_ks) * simd128::shuffle32<3,0,1,3>(qx_qy_qz_qs));
6488
6489 a_to_world0 = a_to_world0 - simd_float::make4(1.0f, 0.0f, 0.0f, 0.0f);
6490 a_to_world1 = a_to_world1 - simd_float::make4(0.0f, 1.0f, 0.0f, 0.0f);
6491 a_to_world2 = a_to_world2 - simd_float::make4(0.0f, 0.0f, 1.0f, 0.0f);
6492
6493 a_to_world0 = simd::bitwise_xor(a_to_world0, simd_float::make4(-0.0f, 0.0f, 0.0f, 0.0f));
6494 a_to_world1 = simd::bitwise_xor(a_to_world1, simd_float::make4(0.0f, -0.0f, 0.0f, 0.0f));
6495 a_to_world2 = simd::bitwise_xor(a_to_world2, simd_float::make4(0.0f, 0.0f, -0.0f, 0.0f));
6496 }
6497
6498 // Add valid support points as contacts.
6499 simd4_float wn = a_face == 0 ? a_to_world0 : (a_face == 1 ? a_to_world1 : a_to_world2);
6500
6501 if (b_offset_neg)
6502 wn = simd::bitwise_xor(wn, simd_float::make4(-0.0f));
6503
6504 simd4_float a_position = simd_float::load4(transforms[a_index].position);
6505
6506 uint16_t a_body = (uint16_t)transforms[a_index].body;
6507 uint16_t b_body = (uint16_t)transforms[b_index].body;
6508
6509 a_index = transforms[a_index].body >> 16;
6510 b_index = transforms[b_index].body >> 16;
6511
6512 unsigned tag_swap = 0;
6513
6514 if (b_index > a_index) {
6515 unsigned tc = a_index;
6516 uint16_t tb = a_body;
6517
6518 a_index = b_index;
6519 b_index = tc;
6520
6521 a_body = b_body;
6522 b_body = tb;
6523
6524 tag_swap = 16;
6525
6526 wn = simd::bitwise_xor(wn, simd_float::make4(-0.0f));;
6527 }
6528
6529 uint64_t high_tag = ((uint64_t)a_index << 32) | ((uint64_t)b_index << 48);
6530
6531 while (mask) {
6532 unsigned index = first_set_bit(mask);
6533 mask &= mask-1;
6534
6535 simd4_float wp = (a_to_world0 * simd_float::broadcast_load4(support_x + index) +
6536 a_to_world1 * simd_float::broadcast_load4(support_y + index) +
6537 a_to_world2 * simd_float::broadcast_load4(support_z + index) + a_position);
6538
6539 float penetration = penetrations[index];
6540
6541 simd_float::store4(contacts[count].position, wp);
6542 simd_float::store4(contacts[count].normal, wn);
6543
6544 contacts[count].penetration = penetration;
6545 contacts[count].friction = NUDGE_FRICTION_MODEL(properties[a_body].friction,properties[b_body].friction); // this works!
6546 bodies[count].a = a_body;
6547 bodies[count].b = b_body;
6548 tags[count] = (uint32_t)(support_tags[index] >> tag_swap) | (uint32_t)(support_tags[index] << tag_swap) | high_tag;
6549
6550 ++count;
6551 }
6552 }
6553
6554 // Batch edge pairs.
6555 // Note: We need to output the edge pairs after handling the faces since we read from the pairs array during face processing.
6556 while (edge) {
6557 unsigned index = first_set_bit(edge);
6558 edge &= edge-1;
6559
6560 unsigned pair = pairs[pair_offset + index];
6561 unsigned edge_a = a_edge_array[index];
6562 unsigned edge_b = b_edge_array[index];
6563
6564 unsigned a = pair & 0xffff;
6565 unsigned b = pair >> 16;
6566
6567 a = transforms[a].body >> 16;
6568 b = transforms[b].body >> 16;
6569
6570 feature_penetrations[added] = penetration_array[index];
6571 features[added] = a > b ? edge_a | (edge_b << 16) : edge_b | (edge_a << 16);
6572 pairs[added] = a > b ? pair : (pair >> 16) | (pair << 16);
6573
6574 ++added;
6575 }
6576 }
6577
6578 assert(!added || pairs[added-1]); // There should be no padding.
6579
6580 pair_count = added;
6581 }
6582
6583 // Do edge-edge tests.
6584 {
6585 pairs[pair_count+0] = 0; // Padding.
6586 pairs[pair_count+1] = 0;
6587 pairs[pair_count+2] = 0;
6588
6589 features[pair_count+0] = 0;
6590 features[pair_count+1] = 0;
6591 features[pair_count+2] = 0;
6592
6593 feature_penetrations[pair_count+0] = 0.0f;
6594 feature_penetrations[pair_count+1] = 0.0f;
6595 feature_penetrations[pair_count+2] = 0.0f;
6596
6597 for (unsigned i = 0; i < pair_count; i += 4) {
6598 // Load pairs.
6599 unsigned pair0 = pairs[i + 0];
6600 unsigned pair1 = pairs[i + 1];
6601 unsigned pair2 = pairs[i + 2];
6602 unsigned pair3 = pairs[i + 3];
6603
6604 unsigned a0_index = pair0 & 0xffff;
6605 unsigned b0_index = pair0 >> 16;
6606
6607 unsigned a1_index = pair1 & 0xffff;
6608 unsigned b1_index = pair1 >> 16;
6609
6610 unsigned a2_index = pair2 & 0xffff;
6611 unsigned b2_index = pair2 >> 16;
6612
6613 unsigned a3_index = pair3 & 0xffff;
6614 unsigned b3_index = pair3 >> 16;
6615
6616 // Load rotations.
6617 simd4_float a_rotation_x = simd_float::load4(transforms[a0_index].rotation);
6618 simd4_float a_rotation_y = simd_float::load4(transforms[a1_index].rotation);
6619 simd4_float a_rotation_z = simd_float::load4(transforms[a2_index].rotation);
6620 simd4_float a_rotation_s = simd_float::load4(transforms[a3_index].rotation);
6621
6622 simd4_float b_rotation_x = simd_float::load4(transforms[b0_index].rotation);
6623 simd4_float b_rotation_y = simd_float::load4(transforms[b1_index].rotation);
6624 simd4_float b_rotation_z = simd_float::load4(transforms[b2_index].rotation);
6625 simd4_float b_rotation_s = simd_float::load4(transforms[b3_index].rotation);
6626
6627 simd128::transpose32(a_rotation_x, a_rotation_y, a_rotation_z, a_rotation_s);
6628 simd128::transpose32(b_rotation_x, b_rotation_y, b_rotation_z, b_rotation_s);
6629
6630 // Compute rotation matrices.
6631 simd4_float a_basis_xx, a_basis_xy, a_basis_xz;
6632 simd4_float a_basis_yx, a_basis_yy, a_basis_yz;
6633 simd4_float a_basis_zx, a_basis_zy, a_basis_zz;
6634 {
6635 simd4_float kx = a_rotation_x + a_rotation_x;
6636 simd4_float ky = a_rotation_y + a_rotation_y;
6637 simd4_float kz = a_rotation_z + a_rotation_z;
6638
6639 simd4_float xx = kx*a_rotation_x;
6640 simd4_float yy = ky*a_rotation_y;
6641 simd4_float zz = kz*a_rotation_z;
6642 simd4_float xy = kx*a_rotation_y;
6643 simd4_float xz = kx*a_rotation_z;
6644 simd4_float yz = ky*a_rotation_z;
6645 simd4_float sx = kx*a_rotation_s;
6646 simd4_float sy = ky*a_rotation_s;
6647 simd4_float sz = kz*a_rotation_s;
6648
6649 a_basis_xx = simd_float::make4(1.0f) - yy - zz;
6650 a_basis_xy = xy + sz;
6651 a_basis_xz = xz - sy;
6652
6653 a_basis_yx = xy - sz;
6654 a_basis_yy = simd_float::make4(1.0f) - xx - zz;
6655 a_basis_yz = yz + sx;
6656
6657 a_basis_zx = xz + sy;
6658 a_basis_zy = yz - sx;
6659 a_basis_zz = simd_float::make4(1.0f) - xx - yy;
6660 }
6661
6662 simd4_float b_basis_xx, b_basis_xy, b_basis_xz;
6663 simd4_float b_basis_yx, b_basis_yy, b_basis_yz;
6664 simd4_float b_basis_zx, b_basis_zy, b_basis_zz;
6665 {
6666 simd4_float kx = b_rotation_x + b_rotation_x;
6667 simd4_float ky = b_rotation_y + b_rotation_y;
6668 simd4_float kz = b_rotation_z + b_rotation_z;
6669
6670 simd4_float xx = kx*b_rotation_x;
6671 simd4_float yy = ky*b_rotation_y;
6672 simd4_float zz = kz*b_rotation_z;
6673 simd4_float xy = kx*b_rotation_y;
6674 simd4_float xz = kx*b_rotation_z;
6675 simd4_float yz = ky*b_rotation_z;
6676 simd4_float sx = kx*b_rotation_s;
6677 simd4_float sy = ky*b_rotation_s;
6678 simd4_float sz = kz*b_rotation_s;
6679
6680 b_basis_xx = simd_float::make4(1.0f) - yy - zz;
6681 b_basis_xy = xy + sz;
6682 b_basis_xz = xz - sy;
6683
6684 b_basis_yx = xy - sz;
6685 b_basis_yy = simd_float::make4(1.0f) - xx - zz;
6686 b_basis_yz = yz + sx;
6687
6688 b_basis_zx = xz + sy;
6689 b_basis_zy = yz - sx;
6690 b_basis_zz = simd_float::make4(1.0f) - xx - yy;
6691 }
6692
6693 // Load edges.
6694 simd4_int32 edge = simd_int32::load4((const int32_t*)(features + i));
6695
6696 // Select edge directions.
6697#ifdef NUDGE_NATIVE_BLENDV32
6698 simd4_int32 a_select_y = simd_int32::shift_left<32-1>(edge); // Shifts the relevant bit to the top.
6699 simd4_int32 a_select_z = simd_int32::shift_left<32-2>(edge);
6700
6701 simd4_int32 b_select_y = simd_int32::shift_left<16-1>(edge);
6702 simd4_int32 b_select_z = simd_int32::shift_left<16-2>(edge);
6703
6704 simd4_float u_x = simd::blendv32(a_basis_xx, a_basis_yx, simd_int32::asfloat(a_select_y));
6705 simd4_float u_y = simd::blendv32(a_basis_xy, a_basis_yy, simd_int32::asfloat(a_select_y));
6706 simd4_float u_z = simd::blendv32(a_basis_xz, a_basis_yz, simd_int32::asfloat(a_select_y));
6707
6708 simd4_float v_x = simd::blendv32(b_basis_xx, b_basis_yx, simd_int32::asfloat(b_select_y));
6709 simd4_float v_y = simd::blendv32(b_basis_xy, b_basis_yy, simd_int32::asfloat(b_select_y));
6710 simd4_float v_z = simd::blendv32(b_basis_xz, b_basis_yz, simd_int32::asfloat(b_select_y));
6711
6712 u_x = simd::blendv32(u_x, a_basis_zx, simd_int32::asfloat(a_select_z));
6713 u_y = simd::blendv32(u_y, a_basis_zy, simd_int32::asfloat(a_select_z));
6714 u_z = simd::blendv32(u_z, a_basis_zz, simd_int32::asfloat(a_select_z));
6715
6716 v_x = simd::blendv32(v_x, b_basis_zx, simd_int32::asfloat(b_select_z));
6717 v_y = simd::blendv32(v_y, b_basis_zy, simd_int32::asfloat(b_select_z));
6718 v_z = simd::blendv32(v_z, b_basis_zz, simd_int32::asfloat(b_select_z));
6719#else
6720 simd4_int32 a_edge = simd::bitwise_and(edge, simd_int32::make4(0xffff));
6721 simd4_int32 b_edge = simd_int32::shift_right<16>(edge);
6722
6723 simd4_float a_select_x = simd_int32::asfloat(simd_int32::cmp_eq(a_edge, simd_int32::zero4()));
6724 simd4_float a_select_y = simd_int32::asfloat(simd_int32::cmp_eq(a_edge, simd_int32::make4(1)));
6725 simd4_float a_select_z = simd_int32::asfloat(simd_int32::cmp_eq(a_edge, simd_int32::make4(2)));
6726
6727 simd4_float b_select_x = simd_int32::asfloat(simd_int32::cmp_eq(b_edge, simd_int32::zero4()));
6728 simd4_float b_select_y = simd_int32::asfloat(simd_int32::cmp_eq(b_edge, simd_int32::make4(1)));
6729 simd4_float b_select_z = simd_int32::asfloat(simd_int32::cmp_eq(b_edge, simd_int32::make4(2)));
6730
6731 simd4_float u_x = simd::bitwise_and(a_basis_xx, a_select_x);
6732 simd4_float u_y = simd::bitwise_and(a_basis_xy, a_select_x);
6733 simd4_float u_z = simd::bitwise_and(a_basis_xz, a_select_x);
6734
6735 simd4_float v_x = simd::bitwise_and(b_basis_xx, b_select_x);
6736 simd4_float v_y = simd::bitwise_and(b_basis_xy, b_select_x);
6737 simd4_float v_z = simd::bitwise_and(b_basis_xz, b_select_x);
6738
6739 u_x = simd::bitwise_or(u_x, simd::bitwise_and(a_basis_yx, a_select_y));
6740 u_y = simd::bitwise_or(u_y, simd::bitwise_and(a_basis_yy, a_select_y));
6741 u_z = simd::bitwise_or(u_z, simd::bitwise_and(a_basis_yz, a_select_y));
6742
6743 v_x = simd::bitwise_or(v_x, simd::bitwise_and(b_basis_yx, b_select_y));
6744 v_y = simd::bitwise_or(v_y, simd::bitwise_and(b_basis_yy, b_select_y));
6745 v_z = simd::bitwise_or(v_z, simd::bitwise_and(b_basis_yz, b_select_y));
6746
6747 u_x = simd::bitwise_or(u_x, simd::bitwise_and(a_basis_zx, a_select_z));
6748 u_y = simd::bitwise_or(u_y, simd::bitwise_and(a_basis_zy, a_select_z));
6749 u_z = simd::bitwise_or(u_z, simd::bitwise_and(a_basis_zz, a_select_z));
6750
6751 v_x = simd::bitwise_or(v_x, simd::bitwise_and(b_basis_zx, b_select_z));
6752 v_y = simd::bitwise_or(v_y, simd::bitwise_and(b_basis_zy, b_select_z));
6753 v_z = simd::bitwise_or(v_z, simd::bitwise_and(b_basis_zz, b_select_z));
6754#endif
6755
6756 // Compute axis.
6757 simd4_float n_x, n_y, n_z;
6758 simd_soa::cross(u_x, u_y, u_z, v_x, v_y, v_z, n_x, n_y, n_z);
6759
6760 // Load positions.
6761 simd4_float a_position_x = simd_float::load4(transforms[a0_index].position);
6762 simd4_float a_position_y = simd_float::load4(transforms[a1_index].position);
6763 simd4_float a_position_z = simd_float::load4(transforms[a2_index].position);
6764 simd4_float a_position_w = simd_float::load4(transforms[a3_index].position);
6765
6766 simd4_float b_position_x = simd_float::load4(transforms[b0_index].position);
6767 simd4_float b_position_y = simd_float::load4(transforms[b1_index].position);
6768 simd4_float b_position_z = simd_float::load4(transforms[b2_index].position);
6769 simd4_float b_position_w = simd_float::load4(transforms[b3_index].position);
6770
6771 simd128::transpose32(a_position_x, a_position_y, a_position_z, a_position_w);
6772 simd128::transpose32(b_position_x, b_position_y, b_position_z, b_position_w);
6773
6774 // Compute relative position.
6775 simd4_float delta_x = b_position_x - a_position_x;
6776 simd4_float delta_y = b_position_y - a_position_y;
6777 simd4_float delta_z = b_position_z - a_position_z;
6778
6779 // Flip normal?
6780 simd4_float sign_mask = simd_float::make4(-0.0f);
6781 simd4_float flip_sign = simd::bitwise_and(n_x*delta_x + n_y*delta_y + n_z*delta_z, sign_mask);
6782
6783 n_x = simd::bitwise_xor(n_x, flip_sign);
6784 n_y = simd::bitwise_xor(n_y, flip_sign);
6785 n_z = simd::bitwise_xor(n_z, flip_sign);
6786
6787 // Load sizes.
6788 simd4_float a_size_x = simd_float::load4(colliders[a0_index].size);
6789 simd4_float a_size_y = simd_float::load4(colliders[a1_index].size);
6790 simd4_float a_size_z = simd_float::load4(colliders[a2_index].size);
6791 simd4_float a_size_w = simd_float::load4(colliders[a3_index].size);
6792
6793 simd4_float b_size_x = simd_float::load4(colliders[b0_index].size);
6794 simd4_float b_size_y = simd_float::load4(colliders[b1_index].size);
6795 simd4_float b_size_z = simd_float::load4(colliders[b2_index].size);
6796 simd4_float b_size_w = simd_float::load4(colliders[b3_index].size);
6797
6798 simd128::transpose32(a_size_x, a_size_y, a_size_z, a_size_w);
6799 simd128::transpose32(b_size_x, b_size_y, b_size_z, b_size_w);
6800
6801 // Compute direction to the edge.
6802 simd4_float a_sign_x = a_basis_xx*n_x + a_basis_xy*n_y + a_basis_xz*n_z;
6803 simd4_float a_sign_y = a_basis_yx*n_x + a_basis_yy*n_y + a_basis_yz*n_z;
6804 simd4_float a_sign_z = a_basis_zx*n_x + a_basis_zy*n_y + a_basis_zz*n_z;
6805
6806 simd4_float b_sign_x = b_basis_xx*n_x + b_basis_xy*n_y + b_basis_xz*n_z;
6807 simd4_float b_sign_y = b_basis_yx*n_x + b_basis_yy*n_y + b_basis_yz*n_z;
6808 simd4_float b_sign_z = b_basis_zx*n_x + b_basis_zy*n_y + b_basis_zz*n_z;
6809
6810 a_sign_x = simd::bitwise_and(a_sign_x, sign_mask);
6811 a_sign_y = simd::bitwise_and(a_sign_y, sign_mask);
6812 a_sign_z = simd::bitwise_and(a_sign_z, sign_mask);
6813
6814 b_sign_x = simd::bitwise_and(b_sign_x, sign_mask);
6815 b_sign_y = simd::bitwise_and(b_sign_y, sign_mask);
6816 b_sign_z = simd::bitwise_and(b_sign_z, sign_mask);
6817
6818 simd4_int32 edge_x = simd::bitwise_or(simd_int32::shift_right<31-0>(simd_float::asint(a_sign_x)), simd_int32::shift_right<31-16>(simd_float::asint(simd::bitwise_xor(b_sign_x, simd_float::make4(-0.0f)))));
6819 simd4_int32 edge_y = simd::bitwise_or(simd_int32::shift_right<31-1>(simd_float::asint(a_sign_y)), simd_int32::shift_right<31-17>(simd_float::asint(simd::bitwise_xor(b_sign_y, simd_float::make4(-0.0f)))));
6820 simd4_int32 edge_z = simd::bitwise_or(simd_int32::shift_right<31-2>(simd_float::asint(a_sign_z)), simd_int32::shift_right<31-18>(simd_float::asint(simd::bitwise_xor(b_sign_z, simd_float::make4(-0.0f)))));
6821 simd4_int32 edge_w = _mm_add_epi16(_mm_add_epi16(edge, _mm_set1_epi16(1)), _mm_srli_epi16(edge, 1)); // Calculates 1 << edge (valid for 0-2).
6822
6823 simd4_int32 edge_xy = simd::bitwise_or(edge_x, edge_y);
6824 simd4_int32 edge_zw = simd::bitwise_or(edge_z, edge_w);
6825
6826 simd4_int32 tag_hi = simd::bitwise_or(edge_xy, edge_zw);
6827 simd4_int32 tag_lo = simd::bitwise_notand(edge_w, tag_hi);
6828 tag_hi = simd_int32::shift_left<8>(tag_hi);
6829
6830 simd4_int32 tag = simd::bitwise_or(tag_lo, tag_hi);
6831
6832 a_size_x = simd::bitwise_xor(a_size_x, a_sign_x);
6833 a_size_y = simd::bitwise_xor(a_size_y, a_sign_y);
6834 a_size_z = simd::bitwise_xor(a_size_z, a_sign_z);
6835
6836 b_size_x = simd::bitwise_xor(b_size_x, b_sign_x);
6837 b_size_y = simd::bitwise_xor(b_size_y, b_sign_y);
6838 b_size_z = simd::bitwise_xor(b_size_z, b_sign_z);
6839
6840 a_basis_xx *= a_size_x;
6841 a_basis_xy *= a_size_x;
6842 a_basis_xz *= a_size_x;
6843
6844 a_basis_yx *= a_size_y;
6845 a_basis_yy *= a_size_y;
6846 a_basis_yz *= a_size_y;
6847
6848 a_basis_zx *= a_size_z;
6849 a_basis_zy *= a_size_z;
6850 a_basis_zz *= a_size_z;
6851
6852 b_basis_xx *= b_size_x;
6853 b_basis_xy *= b_size_x;
6854 b_basis_xz *= b_size_x;
6855
6856 b_basis_yx *= b_size_y;
6857 b_basis_yy *= b_size_y;
6858 b_basis_yz *= b_size_y;
6859
6860 b_basis_zx *= b_size_z;
6861 b_basis_zy *= b_size_z;
6862 b_basis_zz *= b_size_z;
6863
6864 simd4_float ca_x = a_basis_xx + a_basis_yx + a_basis_zx + a_position_x;
6865 simd4_float ca_y = a_basis_xy + a_basis_yy + a_basis_zy + a_position_y;
6866 simd4_float ca_z = a_basis_xz + a_basis_yz + a_basis_zz + a_position_z;
6867
6868 simd4_float cb_x = b_basis_xx + b_basis_yx + b_basis_zx - b_position_x; // Note that cb really is negated to save some operations.
6869 simd4_float cb_y = b_basis_xy + b_basis_yy + b_basis_zy - b_position_y;
6870 simd4_float cb_z = b_basis_xz + b_basis_yz + b_basis_zz - b_position_z;
6871
6872 // Calculate closest point between the two lines.
6873 simd4_float o_x = ca_x + cb_x;
6874 simd4_float o_y = ca_y + cb_y;
6875 simd4_float o_z = ca_z + cb_z;
6876
6877 simd4_float ia = u_x*u_x + u_y*u_y + u_z*u_z;
6878 simd4_float ib = u_x*v_x + u_y*v_y + u_z*v_z;
6879 simd4_float ic = v_x*v_x + v_y*v_y + v_z*v_z;
6880 simd4_float id = o_x*u_x + o_y*u_y + o_z*u_z;
6881 simd4_float ie = o_x*v_x + o_y*v_y + o_z*v_z;
6882
6883 simd4_float half = simd_float::make4(0.5f);
6884 simd4_float ir = half / (ia*ic - ib*ib);
6885
6886 simd4_float sa = (ib*ie - ic*id) * ir;
6887 simd4_float sb = (ia*ie - ib*id) * ir;
6888
6889 simd4_float p_x = (ca_x - cb_x)*half + u_x*sa + v_x*sb;
6890 simd4_float p_y = (ca_y - cb_y)*half + u_y*sa + v_y*sb;
6891 simd4_float p_z = (ca_z - cb_z)*half + u_z*sa + v_z*sb;
6892
6893 simd_soa::normalize(n_x, n_y, n_z);
6894
6895 simd4_float p_w = simd_float::load4(feature_penetrations + i);
6896 simd4_float n_w = simd_float::make4(0.5f);
6897
6898 simd128::transpose32(p_x, p_y, p_z, p_w);
6899 simd128::transpose32(n_x, n_y, n_z, n_w);
6900
6901 simd_float::store4(contacts[count + 0].position, p_x);
6902 simd_float::store4(contacts[count + 0].normal, n_x);
6903 simd_float::store4(contacts[count + 1].position, p_y);
6904 simd_float::store4(contacts[count + 1].normal, n_y);
6905 simd_float::store4(contacts[count + 2].position, p_z);
6906 simd_float::store4(contacts[count + 2].normal, n_z);
6907 simd_float::store4(contacts[count + 3].position, p_w);
6908 simd_float::store4(contacts[count + 3].normal, n_w);
6909
6910 simd4_float body_pair = simd::bitwise_or(simd::bitwise_and(a_position_w, simd_int32::asfloat(simd_int32::make4(0xffff))), simd_int32::asfloat(simd_int32::shift_left<16>(simd_float::asint(b_position_w))));
6911 simd_float::storeu4((float*)(bodies + count), body_pair);
6912
6913 simd4_int32 pair = simd_float::asint(simd::bitwise_or(simd::bitwise_and(b_position_w, simd_int32::asfloat(simd_int32::make4(0xffff0000))), simd_int32::asfloat(simd_int32::shift_right<16>(simd_float::asint(a_position_w)))));
6914
6915 simd_int32::storeu4((int32_t*)tags + count*2 + 0, simd128::unpacklo32(tag, pair));
6916 simd_int32::storeu4((int32_t*)tags + count*2 + 4, simd128::unpackhi32(tag, pair));
6917
6918 count += 4;
6919 }
6920
6921 // Get rid of padding.
6922 while (count && bodies[count-1].a == bodies[count-1].b)
6923 --count;
6924 }
6925
6926 return count;
6927}
6928
6929static inline unsigned sphere_sphere_collide(SphereCollider a, SphereCollider b, Transform a_transform, Transform b_transform, Contact* contacts, BodyPair* bodies,float friction) {
6930 float r = a.radius + b.radius;
6931
6932 float3 dp = make_float3(b_transform.position) - make_float3(a_transform.position);
6933 float l2 = length2(dp);
6934
6935 if (l2 > r*r)
6936 return 0;
6937
6938 float3 n;
6939 float l = sqrtf(l2);
6940
6941 if (l2 > 1e-4f)
6942 n = dp * (1.0f / l);
6943 else
6944 n = make_float3(1.0f, 0.0f, 0.0f);
6945
6946 float3 p = make_float3(a_transform.position) + n * (l - b.radius);
6947
6948 contacts[0].position[0] = p.x;
6949 contacts[0].position[1] = p.y;
6950 contacts[0].position[2] = p.z;
6951 contacts[0].penetration = r - l;
6952 contacts[0].normal[0] = n.x;
6953 contacts[0].normal[1] = n.y;
6954 contacts[0].normal[2] = n.z;
6955 contacts[0].friction = friction; // doesn't work (try setting it to 100000.f: nothing changes!)
6956
6957 bodies[0].a = (uint16_t)a_transform.body;
6958 bodies[0].b = (uint16_t)b_transform.body;
6959
6960 return 1;
6961}
6962
6963static inline unsigned box_sphere_collide(BoxCollider a, SphereCollider b, Transform a_transform, Transform b_transform, Contact* contacts, BodyPair* bodies, float friction) {
6964 Rotation a_to_world = make_rotation(a_transform.rotation);
6965 Rotation world_to_a = inverse(a_to_world);
6966 float3 offset_b = world_to_a * (make_float3(b_transform.position) - make_float3(a_transform.position));
6967
6968 float dx = fabsf(offset_b.x);
6969 float dy = fabsf(offset_b.y);
6970 float dz = fabsf(offset_b.z);
6971
6972 float w = a.size[0] + b.radius;
6973 float h = a.size[1] + b.radius;
6974 float d = a.size[2] + b.radius;
6975
6976 if (dx >= w || dy >= h || dz >= d)
6977 return 0;
6978
6979 float3 n;
6980 float penetration;
6981
6982 float r = b.radius;
6983
6984 unsigned outside_x = dx > a.size[0];
6985 unsigned outside_y = dy > a.size[1];
6986 unsigned outside_z = dz > a.size[2];
6987
6988 if (outside_x + outside_y + outside_z >= 2) {
6989 float3 corner = {
6990 outside_x ? (offset_b.x > 0.0f ? a.size[0] : -a.size[0]) : offset_b.x,
6991 outside_y ? (offset_b.y > 0.0f ? a.size[1] : -a.size[1]) : offset_b.y,
6992 outside_z ? (offset_b.z > 0.0f ? a.size[2] : -a.size[2]) : offset_b.z,
6993 };
6994
6995 float3 dp = offset_b - corner;
6996 float l2 = length2(dp);
6997
6998 if (l2 > r*r)
6999 return 0;
7000
7001 float l = sqrtf(l2);
7002 float m = 1.0f / l;
7003
7004 n = dp * m;
7005 penetration = r - l;
7006 }
7007 else if (w - dx < h - dy && w - dx < d - dz) {
7008 n.x = offset_b.x > 0.0f ? 1.0f : -1.0f;
7009 n.y = 0.0f;
7010 n.z = 0.0f;
7011 penetration = w - dx;
7012 }
7013 else if (h - dy < d - dz) {
7014 n.x = 0.0f;
7015 n.y = offset_b.y > 0.0f ? 1.0f : -1.0f;
7016 n.z = 0.0f;
7017 penetration = h - dy;
7018 }
7019 else {
7020 n.x = 0.0f;
7021 n.y = 0.0f;
7022 n.z = offset_b.z > 0.0f ? 1.0f : -1.0f;
7023 penetration = d - dz;
7024 }
7025
7026 float3 p = offset_b - n*r;
7027
7028 p = a_to_world * p + make_float3(a_transform.position);
7029 n = a_to_world * n;
7030
7031 contacts[0].position[0] = p.x;
7032 contacts[0].position[1] = p.y;
7033 contacts[0].position[2] = p.z;
7034 contacts[0].penetration = penetration;
7035 contacts[0].normal[0] = n.x;
7036 contacts[0].normal[1] = n.y;
7037 contacts[0].normal[2] = n.z;
7038 contacts[0].friction = friction; // affects only the box shape (try setting it to 100000.f: nothing changes for the sphere!)
7039
7040 bodies[0].a = (uint16_t)a_transform.body;
7041 bodies[0].b = (uint16_t)b_transform.body;
7042
7043 return 1;
7044}
7045
7046template<unsigned offset>
7047static inline void dilate_3(simdv_int32 x, simdv_int32& lo32, simdv_int32& hi32) {
7048 simdv_int32 mask0 = simd_int32::makev(0xff);
7049 simdv_int32 mask1 = simd_int32::makev(0x0f00f00f);
7050 simdv_int32 mask2 = simd_int32::makev(0xc30c30c3);
7051 simdv_int32 mask3 = simd_int32::makev(0x49249249);
7052
7053 simdv_int32 lo24 = x;
7054 simdv_int32 hi24 = simd_int32::shift_right<8>(x);
7055 lo24 = simd::bitwise_and(lo24, mask0);
7056 hi24 = simd::bitwise_and(hi24, mask0);
7057
7058 lo24 = simd::bitwise_or(lo24, simd_int32::shift_left<8>(lo24));
7059 hi24 = simd::bitwise_or(hi24, simd_int32::shift_left<8>(hi24));
7060 lo24 = simd::bitwise_and(lo24, mask1);
7061 hi24 = simd::bitwise_and(hi24, mask1);
7062
7063 lo24 = simd::bitwise_or(lo24, simd_int32::shift_left<4>(lo24));
7064 hi24 = simd::bitwise_or(hi24, simd_int32::shift_left<4>(hi24));
7065 lo24 = simd::bitwise_and(lo24, mask2);
7066 hi24 = simd::bitwise_and(hi24, mask2);
7067
7068 lo24 = simd::bitwise_or(lo24, simd_int32::shift_left<2>(lo24));
7069 hi24 = simd::bitwise_or(hi24, simd_int32::shift_left<2>(hi24));
7070 lo24 = simd::bitwise_and(lo24, mask3);
7071 hi24 = simd::bitwise_and(hi24, mask3);
7072
7073 lo32 = simd::bitwise_or(simd_int32::shift_left<offset>(lo24), simd_int32::shift_left<24+offset>(hi24));
7074 hi32 = simd_int32::shift_right<8-offset>(hi24);
7075}
7076
7077static inline void morton(simdv_int32 x, simdv_int32 y, simdv_int32 z, simdv_int32& lo32, simdv_int32& hi32) {
7078 simdv_int32 lx, hx, ly, hy, lz, hz;
7079 dilate_3<2>(x, lx, hx);
7080 dilate_3<1>(y, ly, hy);
7081 dilate_3<0>(z, lz, hz);
7082
7083 lo32 = simd::bitwise_or(simd::bitwise_or(lx, ly), lz);
7084 hi32 = simd::bitwise_or(simd::bitwise_or(hx, hy), hz);
7085}
7086
7087static inline void radix_sort_uint64_low48(uint64_t* data, unsigned count, Arena temporary) {
7088 uint64_t* temp = allocate_array<uint64_t>(&temporary, count, 16);
7089
7090 unsigned buckets0[257] = {};
7091 unsigned buckets1[257] = {};
7092 unsigned buckets2[257] = {};
7093 unsigned buckets3[257] = {};
7094 unsigned buckets4[257] = {};
7095 unsigned buckets5[257] = {};
7096
7097 unsigned* histogram0 = buckets0+1;
7098 unsigned* histogram1 = buckets1+1;
7099 unsigned* histogram2 = buckets2+1;
7100 unsigned* histogram3 = buckets3+1;
7101 unsigned* histogram4 = buckets4+1;
7102 unsigned* histogram5 = buckets5+1;
7103
7104 for (unsigned i = 0; i < count; ++i) {
7105 uint64_t d = data[i];
7106
7107 ++histogram0[(d >> (0 << 3)) & 0xff];
7108 ++histogram1[(d >> (1 << 3)) & 0xff];
7109 ++histogram2[(d >> (2 << 3)) & 0xff];
7110 ++histogram3[(d >> (3 << 3)) & 0xff];
7111 ++histogram4[(d >> (4 << 3)) & 0xff];
7112 ++histogram5[(d >> (5 << 3)) & 0xff];
7113 }
7114
7115 for (unsigned i = 1; i < 256; ++i) {
7116 buckets0[i] += buckets0[i-1];
7117 buckets1[i] += buckets1[i-1];
7118 buckets2[i] += buckets2[i-1];
7119 buckets3[i] += buckets3[i-1];
7120 buckets4[i] += buckets4[i-1];
7121 buckets5[i] += buckets5[i-1];
7122 }
7123
7124 for (unsigned i = 0; i < count; ++i) {
7125 uint64_t d = data[i];
7126 unsigned index = buckets0[(d >> (0 << 3)) & 0xff]++;
7127 temp[index] = d;
7128 }
7129
7130 for (unsigned i = 0; i < count; ++i) {
7131 uint64_t d = temp[i];
7132 unsigned index = buckets1[(d >> (1 << 3)) & 0xff]++;
7133 data[index] = d;
7134 }
7135
7136 for (unsigned i = 0; i < count; ++i) {
7137 uint64_t d = data[i];
7138 unsigned index = buckets2[(d >> (2 << 3)) & 0xff]++;
7139 temp[index] = d;
7140 }
7141
7142 for (unsigned i = 0; i < count; ++i) {
7143 uint64_t d = temp[i];
7144 unsigned index = buckets3[(d >> (3 << 3)) & 0xff]++;
7145 data[index] = d;
7146 }
7147
7148 for (unsigned i = 0; i < count; ++i) {
7149 uint64_t d = data[i];
7150 unsigned index = buckets4[(d >> (4 << 3)) & 0xff]++;
7151 temp[index] = d;
7152 }
7153
7154 for (unsigned i = 0; i < count; ++i) {
7155 uint64_t d = temp[i];
7156 unsigned index = buckets5[(d >> (5 << 3)) & 0xff]++;
7157 data[index] = d;
7158 }
7159}
7160
7161static inline void radix_sort_uint32_x2(uint32_t* data, uint32_t* data2, unsigned count, Arena temporary) {
7162 uint32_t* temp = allocate_array<uint32_t>(&temporary, count, 16);
7163 uint32_t* temp2 = allocate_array<uint32_t>(&temporary, count, 16);
7164
7165 unsigned buckets0[257] = {};
7166 unsigned buckets1[257] = {};
7167 unsigned buckets2[257] = {};
7168 unsigned buckets3[257] = {};
7169
7170 unsigned* histogram0 = buckets0+1;
7171 unsigned* histogram1 = buckets1+1;
7172 unsigned* histogram2 = buckets2+1;
7173 unsigned* histogram3 = buckets3+1;
7174
7175 for (unsigned i = 0; i < count; ++i) {
7176 uint32_t d = data[i];
7177
7178 ++histogram0[(d >> (0 << 3)) & 0xff];
7179 ++histogram1[(d >> (1 << 3)) & 0xff];
7180 ++histogram2[(d >> (2 << 3)) & 0xff];
7181 ++histogram3[(d >> (3 << 3)) & 0xff];
7182 }
7183
7184 for (unsigned i = 1; i < 256; ++i) {
7185 buckets0[i] += buckets0[i-1];
7186 buckets1[i] += buckets1[i-1];
7187 buckets2[i] += buckets2[i-1];
7188 buckets3[i] += buckets3[i-1];
7189 }
7190
7191 for (unsigned i = 0; i < count; ++i) {
7192 uint32_t d = data[i];
7193 uint32_t d2 = data2[i];
7194 unsigned index = buckets0[(d >> (0 << 3)) & 0xff]++;
7195 temp[index] = d;
7196 temp2[index] = d2;
7197 }
7198
7199 for (unsigned i = 0; i < count; ++i) {
7200 uint32_t d = temp[i];
7201 uint32_t d2 = temp2[i];
7202 unsigned index = buckets1[(d >> (1 << 3)) & 0xff]++;
7203 data[index] = d;
7204 data2[index] = d2;
7205 }
7206
7207 for (unsigned i = 0; i < count; ++i) {
7208 uint32_t d = data[i];
7209 uint32_t d2 = data2[i];
7210 unsigned index = buckets2[(d >> (2 << 3)) & 0xff]++;
7211 temp[index] = d;
7212 temp2[index] = d2;
7213 }
7214
7215 for (unsigned i = 0; i < count; ++i) {
7216 uint32_t d = temp[i];
7217 uint32_t d2 = temp2[i];
7218 unsigned index = buckets3[(d >> (3 << 3)) & 0xff]++;
7219 data[index] = d;
7220 data2[index] = d2;
7221 }
7222}
7223
7224static inline void radix_sort_uint32(uint32_t* data, unsigned count, Arena temporary) {
7225 uint32_t* temp = allocate_array<uint32_t>(&temporary, count, 16);
7226
7227 unsigned buckets0[257] = {};
7228 unsigned buckets1[257] = {};
7229 unsigned buckets2[257] = {};
7230 unsigned buckets3[257] = {};
7231
7232 unsigned* histogram0 = buckets0+1;
7233 unsigned* histogram1 = buckets1+1;
7234 unsigned* histogram2 = buckets2+1;
7235 unsigned* histogram3 = buckets3+1;
7236
7237 for (unsigned i = 0; i < count; ++i) {
7238 uint32_t d = data[i];
7239
7240 ++histogram0[(d >> (0 << 3)) & 0xff];
7241 ++histogram1[(d >> (1 << 3)) & 0xff];
7242 ++histogram2[(d >> (2 << 3)) & 0xff];
7243 ++histogram3[(d >> (3 << 3)) & 0xff];
7244 }
7245
7246 for (unsigned i = 1; i < 256; ++i) {
7247 buckets0[i] += buckets0[i-1];
7248 buckets1[i] += buckets1[i-1];
7249 buckets2[i] += buckets2[i-1];
7250 buckets3[i] += buckets3[i-1];
7251 }
7252
7253 for (unsigned i = 0; i < count; ++i) {
7254 uint32_t d = data[i];
7255 unsigned index = buckets0[(d >> (0 << 3)) & 0xff]++;
7256 temp[index] = d;
7257 }
7258
7259 for (unsigned i = 0; i < count; ++i) {
7260 uint32_t d = temp[i];
7261 unsigned index = buckets1[(d >> (1 << 3)) & 0xff]++;
7262 data[index] = d;
7263 }
7264
7265 for (unsigned i = 0; i < count; ++i) {
7266 uint32_t d = data[i];
7267 unsigned index = buckets2[(d >> (2 << 3)) & 0xff]++;
7268 temp[index] = d;
7269 }
7270
7271 for (unsigned i = 0; i < count; ++i) {
7272 uint32_t d = temp[i];
7273 unsigned index = buckets3[(d >> (3 << 3)) & 0xff]++;
7274 data[index] = d;
7275 }
7276}
7277
7278template<unsigned data_stride, unsigned index_stride, class T>
7279NUDGE_FORCEINLINE static void load4(const float* data, const T* indices,
7280 simdv_float& d0, simdv_float& d1, simdv_float& d2, simdv_float& d3) {
7281 static const unsigned stride_in_floats = data_stride/sizeof(float);
7282
7283#if NUDGE_SIMDV_WIDTH == 256
7284 unsigned i0 = indices[0*index_stride];
7285 unsigned i1 = indices[1*index_stride];
7286 unsigned i2 = indices[2*index_stride];
7287 unsigned i3 = indices[3*index_stride];
7288
7289 simd4_float t0 = simd_float::load4(data + i0*stride_in_floats);
7290 simd4_float t1 = simd_float::load4(data + i1*stride_in_floats);
7291 simd4_float t2 = simd_float::load4(data + i2*stride_in_floats);
7292 simd4_float t3 = simd_float::load4(data + i3*stride_in_floats);
7293
7294 unsigned i4 = indices[4*index_stride];
7295 unsigned i5 = indices[5*index_stride];
7296 unsigned i6 = indices[6*index_stride];
7297 unsigned i7 = indices[7*index_stride];
7298
7299 simd4_float t4 = simd_float::load4(data + i4*stride_in_floats);
7300 simd4_float t5 = simd_float::load4(data + i5*stride_in_floats);
7301 simd4_float t6 = simd_float::load4(data + i6*stride_in_floats);
7302 simd4_float t7 = simd_float::load4(data + i7*stride_in_floats);
7303
7304 d0 = simd::concat(t0, t4);
7305 d1 = simd::concat(t1, t5);
7306 d2 = simd::concat(t2, t6);
7307 d3 = simd::concat(t3, t7);
7308#else
7309 unsigned i0 = indices[0*index_stride];
7310 unsigned i1 = indices[1*index_stride];
7311 unsigned i2 = indices[2*index_stride];
7312 unsigned i3 = indices[3*index_stride];
7313
7314 d0 = simd_float::load4(data + i0*stride_in_floats);
7315 d1 = simd_float::load4(data + i1*stride_in_floats);
7316 d2 = simd_float::load4(data + i2*stride_in_floats);
7317 d3 = simd_float::load4(data + i3*stride_in_floats);
7318#endif
7319
7320 simd128::transpose32(d0, d1, d2, d3);
7321}
7322
7323template<unsigned data_stride, unsigned index_stride, class T>
7324NUDGE_FORCEINLINE static void load8(const float* data, const T* indices,
7325 simdv_float& d0, simdv_float& d1, simdv_float& d2, simdv_float& d3,
7326 simdv_float& d4, simdv_float& d5, simdv_float& d6, simdv_float& d7) {
7327 static const unsigned stride_in_floats = data_stride/sizeof(float);
7328
7329#if NUDGE_SIMDV_WIDTH == 256
7330 unsigned i0 = indices[0*index_stride];
7331 unsigned i1 = indices[1*index_stride];
7332 unsigned i2 = indices[2*index_stride];
7333 unsigned i3 = indices[3*index_stride];
7334
7335 simdv_float t0 = simd_float::load8(data + i0*stride_in_floats);
7336 simdv_float t1 = simd_float::load8(data + i1*stride_in_floats);
7337 simdv_float t2 = simd_float::load8(data + i2*stride_in_floats);
7338 simdv_float t3 = simd_float::load8(data + i3*stride_in_floats);
7339
7340 unsigned i4 = indices[4*index_stride];
7341 unsigned i5 = indices[5*index_stride];
7342 unsigned i6 = indices[6*index_stride];
7343 unsigned i7 = indices[7*index_stride];
7344
7345 simdv_float t4 = simd_float::load8(data + i4*stride_in_floats);
7346 simdv_float t5 = simd_float::load8(data + i5*stride_in_floats);
7347 simdv_float t6 = simd_float::load8(data + i6*stride_in_floats);
7348 simdv_float t7 = simd_float::load8(data + i7*stride_in_floats);
7349
7350 d0 = simd256::permute128<0,2>(t0, t4);
7351 d1 = simd256::permute128<0,2>(t1, t5);
7352 d2 = simd256::permute128<0,2>(t2, t6);
7353 d3 = simd256::permute128<0,2>(t3, t7);
7354
7355 d4 = simd256::permute128<1,3>(t0, t4);
7356 d5 = simd256::permute128<1,3>(t1, t5);
7357 d6 = simd256::permute128<1,3>(t2, t6);
7358 d7 = simd256::permute128<1,3>(t3, t7);
7359#else
7360 unsigned i0 = indices[0*index_stride];
7361 unsigned i1 = indices[1*index_stride];
7362 unsigned i2 = indices[2*index_stride];
7363 unsigned i3 = indices[3*index_stride];
7364
7365 d0 = simd_float::load4(data + i0*stride_in_floats);
7366 d1 = simd_float::load4(data + i1*stride_in_floats);
7367 d2 = simd_float::load4(data + i2*stride_in_floats);
7368 d3 = simd_float::load4(data + i3*stride_in_floats);
7369
7370 d4 = simd_float::load4(data + i0*stride_in_floats + 4);
7371 d5 = simd_float::load4(data + i1*stride_in_floats + 4);
7372 d6 = simd_float::load4(data + i2*stride_in_floats + 4);
7373 d7 = simd_float::load4(data + i3*stride_in_floats + 4);
7374#endif
7375
7376 simd128::transpose32(d0, d1, d2, d3);
7377 simd128::transpose32(d4, d5, d6, d7);
7378}
7379
7380template<unsigned data_stride, unsigned index_stride, class T>
7381NUDGE_FORCEINLINE static void store8(float* data, const T* indices,
7382 simdv_float d0, simdv_float d1, simdv_float d2, simdv_float d3,
7383 simdv_float d4, simdv_float d5, simdv_float d6, simdv_float d7) {
7384 static const unsigned stride_in_floats = data_stride/sizeof(float);
7385
7386#if NUDGE_SIMDV_WIDTH == 256
7387 simdv_float t0 = simd256::permute128<0,2>(d0, d4);
7388 simdv_float t1 = simd256::permute128<0,2>(d1, d5);
7389 simdv_float t2 = simd256::permute128<0,2>(d2, d6);
7390 simdv_float t3 = simd256::permute128<0,2>(d3, d7);
7391
7392 simdv_float t4 = simd256::permute128<1,3>(d0, d4);
7393 simdv_float t5 = simd256::permute128<1,3>(d1, d5);
7394 simdv_float t6 = simd256::permute128<1,3>(d2, d6);
7395 simdv_float t7 = simd256::permute128<1,3>(d3, d7);
7396
7397 simd128::transpose32(t0, t1, t2, t3);
7398 simd128::transpose32(t4, t5, t6, t7);
7399
7400 unsigned i0 = indices[0*index_stride];
7401 unsigned i1 = indices[1*index_stride];
7402 unsigned i2 = indices[2*index_stride];
7403 unsigned i3 = indices[3*index_stride];
7404
7405 simd_float::store8(data + i0*stride_in_floats, t0);
7406 simd_float::store8(data + i1*stride_in_floats, t1);
7407 simd_float::store8(data + i2*stride_in_floats, t2);
7408 simd_float::store8(data + i3*stride_in_floats, t3);
7409
7410 unsigned i4 = indices[4*index_stride];
7411 unsigned i5 = indices[5*index_stride];
7412 unsigned i6 = indices[6*index_stride];
7413 unsigned i7 = indices[7*index_stride];
7414
7415 simd_float::store8(data + i4*stride_in_floats, t4);
7416 simd_float::store8(data + i5*stride_in_floats, t5);
7417 simd_float::store8(data + i6*stride_in_floats, t6);
7418 simd_float::store8(data + i7*stride_in_floats, t7);
7419#else
7420 simd128::transpose32(d0, d1, d2, d3);
7421 simd128::transpose32(d4, d5, d6, d7);
7422
7423 unsigned i0 = indices[0*index_stride];
7424 unsigned i1 = indices[1*index_stride];
7425 unsigned i2 = indices[2*index_stride];
7426 unsigned i3 = indices[3*index_stride];
7427
7428 simd_float::store4(data + i0*stride_in_floats, d0);
7429 simd_float::store4(data + i1*stride_in_floats, d1);
7430 simd_float::store4(data + i2*stride_in_floats, d2);
7431 simd_float::store4(data + i3*stride_in_floats, d3);
7432
7433 simd_float::store4(data + i0*stride_in_floats + 4, d4);
7434 simd_float::store4(data + i1*stride_in_floats + 4, d5);
7435 simd_float::store4(data + i2*stride_in_floats + 4, d6);
7436 simd_float::store4(data + i3*stride_in_floats + 4, d7);
7437#endif
7438}
7439
7440#ifndef NUDGE_COLLISION_MASKS_CONSISTENT
7441# define NUDGE_INTERNAL_CSBM && /* inconsistent mode (default): no collision if A don't want to collide with B or B don't want to collide with A */
7442#else
7443# define NUDGE_INTERNAL_CSBM || /* consistent mode (like in Bullet): no collision only if A don't want to collide with B and B don't want to collide with A */
7444#endif
7445
7446#ifndef NUDGE_COLLIDE_SKIP_BODYFILTERS_MACRO
7447// original code (used to works when macro was NUDGE_COLLIDE_SKIP_BODIES_MACRO)
7448//#define NUDGE_COLLIDE_SKIP_BODIES_MACRO(A,B) (!(A) || !(B)) // Body 0 is the static world and is ignored (original code).
7449// no-op (works)
7450//#define NUDGE_COLLIDE_SKIP_BODIES_MACRO(A,B) (0) /* no-op */
7451// This is the original code plus a second condition (but with &&). Seems OK, but maybe the new check is redundant... or not?
7452//#define NUDGE_COLLIDE_SKIP_BODIES_MACRO(A,B) ((!(A) || !(B)) || (c->bodies.properties[(A)].mass_inverse<=0 && c->bodies.properties[(B)].mass_inverse<=0))
7453/* De Morgan's laws:
7454 not (A or B) = (not A) and (not B)
7455 not (A and B) = (not A) or (not B)
7456*/
7457// Q) can we merge the first 2 lines together by applying De Morgan's laws in chain?
7458// A) not sure, but I don't think so. The condition states that: at least one body must be dynamic and both must be active
7459// Maybe we could just write the first line including the second-line flag (without removing any line) to maximise early exiting if both bodies are disabled or removed (DONE).
7460#define NUDGE_COLLIDE_SKIP_BODYFILTERS_MACRO(a,b) \
7461 ( \
7462 ((a)->flags&BF_IS_STATIC_OR_KINEMATIC_OR_DISABLED_OR_REMOVED && (b)->flags&BF_IS_STATIC_OR_KINEMATIC_OR_DISABLED_OR_REMOVED) /* if no body is dynamic we can skip. */ \
7463 || (((a)->flags&BF_IS_DISABLED_OR_REMOVED) || ((b)->flags&BF_IS_DISABLED_OR_REMOVED)) /* if one body is disabled or has been removed we can skip */ \
7464 || (!(((a)->collision_group&(b)->collision_mask) NUDGE_INTERNAL_CSBM ((b)->collision_group&(a)->collision_mask))) \
7465 )
7466#endif //NUDGE_COLLIDE_SKIP_BODYFILTERS_MACRO
7467
7468void collide(context_t* c,BodyConnections body_connections) {
7469 assert(c);
7470
7471 ActiveBodies* active_bodies = &c->active_bodies;
7472 ContactData* contacts = &c->contact_data;
7473 BodyData bodies = c->bodies;
7474 ColliderData colliders = c->colliders;
7475 const BodyProperties* properties = c->bodies.properties;
7476 Arena temporary = c->arena;
7477
7478 // Dbg test [OK]:
7479 //for (unsigned i=0;i<colliders.boxes.count;i++) {assert(colliders.boxes.transforms[i].body<bodies.count);}
7480 //for (unsigned i=0;i<colliders.spheres.count;i++) {assert(colliders.spheres.transforms[i].body<bodies.count);}
7481
7482 contacts->count = 0;
7483 contacts->sleeping_count = 0;
7484 active_bodies->count = 0;
7485
7486 const Transform* body_transforms = bodies.transforms;
7487
7488 unsigned count = colliders.spheres.count + colliders.boxes.count;
7489 unsigned aligned_count = (count + 7) & (~7);
7490
7491 assert(count <= (1 << 13)); // Too many colliders. 2^13 (=8192) is currently the maximum.
7492
7493 AABB* aos_bounds = allocate_array<AABB>(&temporary, aligned_count, 32);
7494
7495 unsigned box_bounds_offset = 0;
7496 unsigned sphere_bounds_offset = colliders.boxes.count;
7497
7498 Transform* transforms = allocate_array<Transform>(&temporary, count, 32);
7499 uint16_t* collider_tags = allocate_array<uint16_t>(&temporary, count, 32);
7500 uint16_t* collider_bodies = allocate_array<uint16_t>(&temporary, count, 32);
7501
7502 if (colliders.boxes.count) {
7503 for (unsigned i = 0; i < colliders.boxes.count; ++i) {
7504 Transform transform = colliders.boxes.transforms[i];
7505 transform = body_transforms[transform.body] * transform;
7506 transform.body |= (uint32_t)colliders.boxes.tags[i] << 16; // <--- transform.body loses its bodyId info here!
7507
7508 float3x3 m = matrix(make_rotation(transform.rotation));
7509
7510 m.c0 *= colliders.boxes.data[i].size[0];
7511 m.c1 *= colliders.boxes.data[i].size[1];
7512 m.c2 *= colliders.boxes.data[i].size[2];
7513
7514 float3 size = {
7515 fabsf(m.c0.x) + fabsf(m.c1.x) + fabsf(m.c2.x),
7516 fabsf(m.c0.y) + fabsf(m.c1.y) + fabsf(m.c2.y),
7517 fabsf(m.c0.z) + fabsf(m.c1.z) + fabsf(m.c2.z),
7518 };
7519
7520 float3 min = make_float3(transform.position) - size;
7521 float3 max = make_float3(transform.position) + size;
7522
7523 AABB aabb = {
7524 min, 0.0f,
7525 max, 0.0f,
7526 };
7527
7528 transforms[i + box_bounds_offset] = transform;
7529 aos_bounds[i + box_bounds_offset] = aabb;
7530 collider_tags[i + box_bounds_offset] = colliders.boxes.tags[i];
7531 collider_bodies[i + box_bounds_offset] = colliders.boxes.transforms[i].body;
7532 }
7533
7534 colliders.boxes.transforms = transforms + box_bounds_offset; // <---
7535 }
7536
7537 if (colliders.spheres.count) {
7538 for (unsigned i = 0; i < colliders.spheres.count; ++i) {
7539 Transform transform = colliders.spheres.transforms[i];
7540 transform = body_transforms[transform.body] * transform;
7541 transform.body |= (uint32_t)colliders.spheres.tags[i] << 16; // <--- transform.body loses its bodyId info here!
7542
7543 float radius = colliders.spheres.data[i].radius;
7544
7545 float3 min = make_float3(transform.position) - make_float3(radius);
7546 float3 max = make_float3(transform.position) + make_float3(radius);
7547
7548 AABB aabb = {
7549 min, 0.0f,
7550 max, 0.0f,
7551 };
7552
7553 transforms[i + sphere_bounds_offset] = transform;
7554 aos_bounds[i + sphere_bounds_offset] = aabb;
7555 collider_tags[i + sphere_bounds_offset] = colliders.spheres.tags[i];
7556 collider_bodies[i + sphere_bounds_offset] = colliders.spheres.transforms[i].body;
7557 }
7558
7559 colliders.spheres.transforms = transforms + sphere_bounds_offset; // <---
7560 }
7561
7562 for (unsigned i = count; i < aligned_count; ++i) {
7563 AABB zero = {};
7564 aos_bounds[i] = zero;
7565 }
7566
7567 // Morton order using the min corner should improve coherence: After some point, all BBs' min points will be outside a BB's max.
7568 simd4_float scene_min128 = simd_float::load4(&aos_bounds[0].min.x);
7569 simd4_float scene_max128 = scene_min128;
7570
7571 for (unsigned i = 1; i < count; ++i) {
7572 simd4_float p = simd_float::load4(&aos_bounds[i].min.x);
7573 scene_min128 = simd_float::min(scene_min128, p);
7574 scene_max128 = simd_float::max(scene_max128, p);
7575 }
7576
7577 simd4_float scene_scale128 = simd_float::make4((1<<16)-1) * simd_float::recip(scene_max128 - scene_min128);
7578
7579 scene_scale128 = simd_float::min(simd128::shuffle32<0,1,2,2>(scene_scale128), simd128::shuffle32<2,2,0,1>(scene_scale128));
7580 scene_scale128 = simd_float::min(scene_scale128, simd128::shuffle32<1,0,3,2>(scene_scale128));
7581 scene_min128 = scene_min128 * scene_scale128;
7582
7583#ifdef DEBUG
7584 if (simd_float::extract_first_float(scene_scale128) < 2.0f)
7585 log("Warning: World bounds are very large, which may decrease performance. Perhaps there's a body in free fall?\n");
7586#endif
7587
7588#if NUDGE_SIMDV_WIDTH == 256
7589 simdv_float scene_min = simd256::broadcast(scene_min128);
7590 simdv_float scene_scale = simd256::broadcast(scene_scale128);
7591 simdv_int32 index = simd_int32::make8(0 << 16, 1 << 16, 2 << 16, 3 << 16, 4 << 16, 5 << 16, 6 << 16, 7 << 16);
7592#else
7593 simdv_float scene_min = scene_min128;
7594 simdv_float scene_scale = scene_scale128;
7595 simdv_int32 index = simd_int32::make4(0 << 16, 1 << 16, 2 << 16, 3 << 16);
7596#endif
7597
7598 simdv_float scene_min_x = simd128::shuffle32<0,0,0,0>(scene_min);
7599 simdv_float scene_min_y = simd128::shuffle32<1,1,1,1>(scene_min);
7600 simdv_float scene_min_z = simd128::shuffle32<2,2,2,2>(scene_min);
7601
7602 uint64_t* morton_codes = allocate_array<uint64_t>(&temporary, aligned_count, 32);
7603
7604 for (unsigned i = 0; i < count; i += simdv_width32) {
7605#if NUDGE_SIMDV_WIDTH == 256
7606 simd4_float pos_xl = simd_float::load4(&aos_bounds[i+0].min.x);
7607 simd4_float pos_yl = simd_float::load4(&aos_bounds[i+1].min.x);
7608 simd4_float pos_zl = simd_float::load4(&aos_bounds[i+2].min.x);
7609 simd4_float pos_wl = simd_float::load4(&aos_bounds[i+3].min.x);
7610
7611 simdv_float pos_x = simd::concat(pos_xl, simd_float::load4(&aos_bounds[i+4].min.x));
7612 simdv_float pos_y = simd::concat(pos_yl, simd_float::load4(&aos_bounds[i+5].min.x));
7613 simdv_float pos_z = simd::concat(pos_zl, simd_float::load4(&aos_bounds[i+6].min.x));
7614 simdv_float pos_w = simd::concat(pos_wl, simd_float::load4(&aos_bounds[i+7].min.x));
7615#else
7616 simd4_float pos_x = simd_float::load4(&aos_bounds[i+0].min.x);
7617 simd4_float pos_y = simd_float::load4(&aos_bounds[i+1].min.x);
7618 simd4_float pos_z = simd_float::load4(&aos_bounds[i+2].min.x);
7619 simd4_float pos_w = simd_float::load4(&aos_bounds[i+3].min.x);
7620#endif
7621
7622 simd128::transpose32(pos_x, pos_y, pos_z, pos_w);
7623
7624 pos_x = simd_float::msub(pos_x, scene_scale, scene_min_x);
7625 pos_y = simd_float::msub(pos_y, scene_scale, scene_min_y);
7626 pos_z = simd_float::msub(pos_z, scene_scale, scene_min_z);
7627
7628 simdv_int32 lm, hm;
7629 morton(simd_float::toint(pos_x), simd_float::toint(pos_y), simd_float::toint(pos_z), lm, hm);
7630 hm = simd::bitwise_or(hm, index);
7631
7632 simdv_int32 mi0 = simd128::unpacklo32(lm, hm);
7633 simdv_int32 mi1 = simd128::unpackhi32(lm, hm);
7634
7635#if NUDGE_SIMDV_WIDTH == 256
7636 simd_int32::store8((int32_t*)(morton_codes + i) + 0, simd256::permute128<0,2>(mi0, mi1));
7637 simd_int32::store8((int32_t*)(morton_codes + i) + 8, simd256::permute128<1,3>(mi0, mi1));
7638#else
7639 simd_int32::store4((int32_t*)(morton_codes + i) + 0, mi0);
7640 simd_int32::store4((int32_t*)(morton_codes + i) + 4, mi1);
7641#endif
7642
7643 index = simd_int32::add(index, simd_int32::makev(simdv_width32 << 16));
7644 }
7645
7646 radix_sort_uint64_low48(morton_codes, count, temporary);
7647 uint16_t* sorted_indices = allocate_array<uint16_t>(&temporary, aligned_count, 32);
7648
7649 for (unsigned i = 0; i < count; ++i)
7650 sorted_indices[i] = (uint16_t)(morton_codes[i] >> 48);
7651
7652 for (unsigned i = count; i < aligned_count; ++i)
7653 sorted_indices[i] = 0;
7654
7655 unsigned bounds_count = aligned_count >> simdv_width32_log2;
7656 AABBV* bounds = allocate_array<AABBV>(&temporary, bounds_count, 32);
7657
7658 for (unsigned i = 0; i < count; i += simdv_width32) {
7659 simdv_float min_x, min_y, min_z, min_w;
7660 simdv_float max_x, max_y, max_z, max_w;
7661 load8<sizeof(aos_bounds[0]), 1>(&aos_bounds[0].min.x, sorted_indices + i,
7662 min_x, min_y, min_z, min_w,
7663 max_x, max_y, max_z, max_w);
7664
7665 simd_float::storev(bounds[i >> simdv_width32_log2].min_x, min_x);
7666 simd_float::storev(bounds[i >> simdv_width32_log2].max_x, max_x);
7667 simd_float::storev(bounds[i >> simdv_width32_log2].min_y, min_y);
7668 simd_float::storev(bounds[i >> simdv_width32_log2].max_y, max_y);
7669 simd_float::storev(bounds[i >> simdv_width32_log2].min_z, min_z);
7670 simd_float::storev(bounds[i >> simdv_width32_log2].max_z, max_z);
7671 }
7672
7673 for (unsigned i = count; i < aligned_count; ++i) {
7674 unsigned bounds_group = i >> simdv_width32_log2;
7675 unsigned bounds_lane = i & (simdv_width32-1);
7676
7677 bounds[bounds_group].min_x[bounds_lane] = NAN;
7678 bounds[bounds_group].max_x[bounds_lane] = NAN;
7679 bounds[bounds_group].min_y[bounds_lane] = NAN;
7680 bounds[bounds_group].max_y[bounds_lane] = NAN;
7681 bounds[bounds_group].min_z[bounds_lane] = NAN;
7682 bounds[bounds_group].max_z[bounds_lane] = NAN;
7683 }
7684
7685 // Pack each set of 8 consecutive AABBs into coarse AABBs.
7686 unsigned coarse_count = aligned_count >> 3;
7687 unsigned aligned_coarse_count = (coarse_count + (simdv_width32-1)) & (~(simdv_width32-1));
7688
7689 unsigned coarse_bounds_count = aligned_coarse_count >> simdv_width32_log2;
7690 AABBV* coarse_bounds = allocate_array<AABBV>(&temporary, coarse_bounds_count, 32);
7691
7692 for (unsigned i = 0; i < coarse_count; ++i) {
7693 unsigned start = i << (3 - simdv_width32_log2);
7694
7695 simd4_float coarse_min_x = simd_float::load4(bounds[start].min_x);
7696 simd4_float coarse_max_x = simd_float::load4(bounds[start].max_x);
7697 simd4_float coarse_min_y = simd_float::load4(bounds[start].min_y);
7698 simd4_float coarse_max_y = simd_float::load4(bounds[start].max_y);
7699 simd4_float coarse_min_z = simd_float::load4(bounds[start].min_z);
7700 simd4_float coarse_max_z = simd_float::load4(bounds[start].max_z);
7701
7702 // Note that the first operand is returned on NaN. The last padded bounds are NaN, so the earlier bounds should be in the first operand.
7703#if NUDGE_SIMDV_WIDTH == 256
7704 coarse_min_x = simd_float::min(coarse_min_x, simd_float::load4(bounds[start].min_x + 4));
7705 coarse_max_x = simd_float::max(coarse_max_x, simd_float::load4(bounds[start].max_x + 4));
7706 coarse_min_y = simd_float::min(coarse_min_y, simd_float::load4(bounds[start].min_y + 4));
7707 coarse_max_y = simd_float::max(coarse_max_y, simd_float::load4(bounds[start].max_y + 4));
7708 coarse_min_z = simd_float::min(coarse_min_z, simd_float::load4(bounds[start].min_z + 4));
7709 coarse_max_z = simd_float::max(coarse_max_z, simd_float::load4(bounds[start].max_z + 4));
7710#else
7711 coarse_min_x = simd_float::min(coarse_min_x, simd_float::load4(bounds[start+1].min_x));
7712 coarse_max_x = simd_float::max(coarse_max_x, simd_float::load4(bounds[start+1].max_x));
7713 coarse_min_y = simd_float::min(coarse_min_y, simd_float::load4(bounds[start+1].min_y));
7714 coarse_max_y = simd_float::max(coarse_max_y, simd_float::load4(bounds[start+1].max_y));
7715 coarse_min_z = simd_float::min(coarse_min_z, simd_float::load4(bounds[start+1].min_z));
7716 coarse_max_z = simd_float::max(coarse_max_z, simd_float::load4(bounds[start+1].max_z));
7717#endif
7718
7719 coarse_min_x = simd_float::min(coarse_min_x, simd128::shuffle32<2,3,0,1>(coarse_min_x));
7720 coarse_max_x = simd_float::max(coarse_max_x, simd128::shuffle32<2,3,0,1>(coarse_max_x));
7721 coarse_min_y = simd_float::min(coarse_min_y, simd128::shuffle32<2,3,0,1>(coarse_min_y));
7722 coarse_max_y = simd_float::max(coarse_max_y, simd128::shuffle32<2,3,0,1>(coarse_max_y));
7723 coarse_min_z = simd_float::min(coarse_min_z, simd128::shuffle32<2,3,0,1>(coarse_min_z));
7724 coarse_max_z = simd_float::max(coarse_max_z, simd128::shuffle32<2,3,0,1>(coarse_max_z));
7725
7726 coarse_min_x = simd_float::min(coarse_min_x, simd128::shuffle32<1,0,3,2>(coarse_min_x));
7727 coarse_max_x = simd_float::max(coarse_max_x, simd128::shuffle32<1,0,3,2>(coarse_max_x));
7728 coarse_min_y = simd_float::min(coarse_min_y, simd128::shuffle32<1,0,3,2>(coarse_min_y));
7729 coarse_max_y = simd_float::max(coarse_max_y, simd128::shuffle32<1,0,3,2>(coarse_max_y));
7730 coarse_min_z = simd_float::min(coarse_min_z, simd128::shuffle32<1,0,3,2>(coarse_min_z));
7731 coarse_max_z = simd_float::max(coarse_max_z, simd128::shuffle32<1,0,3,2>(coarse_max_z));
7732
7733 unsigned bounds_group = i >> simdv_width32_log2;
7734 unsigned bounds_lane = i & (simdv_width32-1);
7735
7736 coarse_bounds[bounds_group].min_x[bounds_lane] = simd_float::extract_first_float(coarse_min_x);
7737 coarse_bounds[bounds_group].max_x[bounds_lane] = simd_float::extract_first_float(coarse_max_x);
7738 coarse_bounds[bounds_group].min_y[bounds_lane] = simd_float::extract_first_float(coarse_min_y);
7739 coarse_bounds[bounds_group].max_y[bounds_lane] = simd_float::extract_first_float(coarse_max_y);
7740 coarse_bounds[bounds_group].min_z[bounds_lane] = simd_float::extract_first_float(coarse_min_z);
7741 coarse_bounds[bounds_group].max_z[bounds_lane] = simd_float::extract_first_float(coarse_max_z);
7742 }
7743
7744 for (unsigned i = coarse_count; i < aligned_coarse_count; ++i) {
7745 unsigned bounds_group = i >> simdv_width32_log2;
7746 unsigned bounds_lane = i & (simdv_width32-1);
7747
7748 coarse_bounds[bounds_group].min_x[bounds_lane] = NAN;
7749 coarse_bounds[bounds_group].max_x[bounds_lane] = NAN;
7750 coarse_bounds[bounds_group].min_y[bounds_lane] = NAN;
7751 coarse_bounds[bounds_group].max_y[bounds_lane] = NAN;
7752 coarse_bounds[bounds_group].min_z[bounds_lane] = NAN;
7753 coarse_bounds[bounds_group].max_z[bounds_lane] = NAN;
7754 }
7755
7756 // Test all coarse groups against each other and generate pairs with potential overlap.
7757 uint32_t* coarse_groups = reserve_array<uint32_t>(&temporary, coarse_count*coarse_count, 32);
7758 unsigned coarse_group_count = 0;
7759
7760 for (unsigned i = 0; i < coarse_count; ++i) {
7761 unsigned bounds_group = i >> simdv_width32_log2;
7762 unsigned bounds_lane = i & (simdv_width32-1);
7763
7764 simdv_float min_a_x = simd_float::broadcast_loadv(coarse_bounds[bounds_group].min_x + bounds_lane);
7765 simdv_float max_a_x = simd_float::broadcast_loadv(coarse_bounds[bounds_group].max_x + bounds_lane);
7766 simdv_float min_a_y = simd_float::broadcast_loadv(coarse_bounds[bounds_group].min_y + bounds_lane);
7767 simdv_float max_a_y = simd_float::broadcast_loadv(coarse_bounds[bounds_group].max_y + bounds_lane);
7768 simdv_float min_a_z = simd_float::broadcast_loadv(coarse_bounds[bounds_group].min_z + bounds_lane);
7769 simdv_float max_a_z = simd_float::broadcast_loadv(coarse_bounds[bounds_group].max_z + bounds_lane);
7770
7771 unsigned first = coarse_group_count;
7772
7773 // Maximum number of colliders is 2^13, i.e., 13 bit indices.
7774 // i needs 10 bits.
7775 // j needs 7 or 8 bits.
7776 // mask needs 4 or 8 bits.
7777 unsigned ij_bits = (bounds_group << 8) | (i << 16);
7778
7779 for (unsigned j = bounds_group; j < coarse_bounds_count; ++j) {
7780 simdv_float min_b_x = simd_float::loadv(coarse_bounds[j].min_x);
7781 simdv_float max_b_x = simd_float::loadv(coarse_bounds[j].max_x);
7782 simdv_float min_b_y = simd_float::loadv(coarse_bounds[j].min_y);
7783 simdv_float max_b_y = simd_float::loadv(coarse_bounds[j].max_y);
7784 simdv_float min_b_z = simd_float::loadv(coarse_bounds[j].min_z);
7785 simdv_float max_b_z = simd_float::loadv(coarse_bounds[j].max_z);
7786
7787 simdv_float inside_x = simd::bitwise_and(simd_float::cmp_gt(max_b_x, min_a_x), simd_float::cmp_gt(max_a_x, min_b_x));
7788 simdv_float inside_y = simd::bitwise_and(simd_float::cmp_gt(max_b_y, min_a_y), simd_float::cmp_gt(max_a_y, min_b_y));
7789 simdv_float inside_z = simd::bitwise_and(simd_float::cmp_gt(max_b_z, min_a_z), simd_float::cmp_gt(max_a_z, min_b_z));
7790
7791 unsigned mask = simd::signmask32(simd::bitwise_and(simd::bitwise_and(inside_x, inside_y), inside_z));
7792
7793 coarse_groups[coarse_group_count] = mask | ij_bits;
7794 coarse_group_count += mask != 0;
7795
7796 ij_bits += 1 << 8;
7797 }
7798
7799 // Mask out collisions already handled.
7800 coarse_groups[first] &= ~((1 << bounds_lane) - 1);
7801 }
7802
7803 commit_array<uint32_t>(&temporary, coarse_group_count);
7804
7805 uint32_t* coarse_pairs = reserve_array<uint32_t>(&temporary, coarse_group_count*simdv_width32, 32);
7806 unsigned coarse_pair_count = 0;
7807
7808 for (unsigned i = 0; i < coarse_group_count; ++i) {
7809 unsigned group = coarse_groups[i];
7810 unsigned mask = group & 0xff;
7811
7812 unsigned batch = (group & 0xff00) >> (8 - simdv_width32_log2);
7813 unsigned other = group & 0xffff0000;
7814
7815 while (mask) {
7816 unsigned index = first_set_bit(mask);
7817 mask &= mask-1;
7818
7819 coarse_pairs[coarse_pair_count++] = other | (batch + index);
7820 }
7821 }
7822
7823 commit_array<uint32_t>(&temporary, coarse_pair_count);
7824
7825 // Test AABBs within the coarse pairs.
7826 uint32_t* groups = reserve_array<uint32_t>(&temporary, coarse_pair_count*16, 32);
7827 unsigned group_count = 0;
7828
7829#if NUDGE_SIMDV_WIDTH == 256
7830 for (unsigned n = 0; n < coarse_pair_count; ++n) {
7831 unsigned pair = coarse_pairs[n];
7832
7833 unsigned a = pair >> 16;
7834 unsigned b = pair & 0xffff;
7835
7836 unsigned lane_count = 8;
7837
7838 if (a == b)
7839 --lane_count;
7840
7841 if (lane_count + (a << 3) > count)
7842 lane_count = count - (a << 3);
7843
7844 // Maximum number of colliders is 2^13, i.e., 13 bit indices.
7845 // i needs 13 bits.
7846 // j needs 10 or 11 bits.
7847 // mask needs 4 or 8 bits.
7848 unsigned ij_bits = (b << 8) | (a << 22);
7849
7850 unsigned lower_lane_mask = a == b ? 0xfe00 : 0xffff;
7851
7852 simdv_float min_b_x = simd_float::loadv(bounds[b].min_x);
7853 simdv_float max_b_x = simd_float::loadv(bounds[b].max_x);
7854 simdv_float min_b_y = simd_float::loadv(bounds[b].min_y);
7855 simdv_float max_b_y = simd_float::loadv(bounds[b].max_y);
7856 simdv_float min_b_z = simd_float::loadv(bounds[b].min_z);
7857 simdv_float max_b_z = simd_float::loadv(bounds[b].max_z);
7858
7859 for (unsigned i = 0; i < lane_count; ++i, ij_bits += (1 << 19)) {
7860 simdv_float min_a_x = simd_float::broadcast_loadv(bounds[a].min_x + i);
7861 simdv_float max_a_x = simd_float::broadcast_loadv(bounds[a].max_x + i);
7862 simdv_float min_a_y = simd_float::broadcast_loadv(bounds[a].min_y + i);
7863 simdv_float max_a_y = simd_float::broadcast_loadv(bounds[a].max_y + i);
7864 simdv_float min_a_z = simd_float::broadcast_loadv(bounds[a].min_z + i);
7865 simdv_float max_a_z = simd_float::broadcast_loadv(bounds[a].max_z + i);
7866
7867 simdv_float inside_x = simd::bitwise_and(simd_float::cmp_gt(max_b_x, min_a_x), simd_float::cmp_gt(max_a_x, min_b_x));
7868 simdv_float inside_y = simd::bitwise_and(simd_float::cmp_gt(max_b_y, min_a_y), simd_float::cmp_gt(max_a_y, min_b_y));
7869 simdv_float inside_z = simd::bitwise_and(simd_float::cmp_gt(max_b_z, min_a_z), simd_float::cmp_gt(max_a_z, min_b_z));
7870
7871 unsigned mask = simd::signmask32(simd::bitwise_and(simd::bitwise_and(inside_x, inside_y), inside_z));
7872
7873 // Mask out collisions already handled.
7874 mask &= lower_lane_mask >> 8;
7875 lower_lane_mask <<= 1;
7876
7877 groups[group_count] = mask | ij_bits;
7878 group_count += mask != 0;
7879 }
7880 }
7881#else
7882 // TODO: This version is currently much worse than the 256-bit version. We should fix it.
7883 for (unsigned n = 0; n < coarse_pair_count; ++n) {
7884 unsigned pair = coarse_pairs[n];
7885
7886 unsigned a = pair >> 16;
7887 unsigned b = pair & 0xffff;
7888
7889 unsigned a_start = a << 3;
7890 unsigned a_end = a_start + (1 << 3);
7891
7892 if (a_end > count)
7893 a_end = count;
7894
7895 unsigned b_start = b << (3 - simdv_width32_log2);
7896 unsigned b_end = b_start + (1 << (3 - simdv_width32_log2));
7897
7898 if (b_end > bounds_count)
7899 b_end = bounds_count;
7900
7901 for (unsigned i = a_start; i < a_end; ++i) {
7902 unsigned bounds_group = i >> simdv_width32_log2;
7903 unsigned bounds_lane = i & (simdv_width32-1);
7904
7905 simdv_float min_a_x = simd_float::broadcast_loadv(bounds[bounds_group].min_x + bounds_lane);
7906 simdv_float max_a_x = simd_float::broadcast_loadv(bounds[bounds_group].max_x + bounds_lane);
7907 simdv_float min_a_y = simd_float::broadcast_loadv(bounds[bounds_group].min_y + bounds_lane);
7908 simdv_float max_a_y = simd_float::broadcast_loadv(bounds[bounds_group].max_y + bounds_lane);
7909 simdv_float min_a_z = simd_float::broadcast_loadv(bounds[bounds_group].min_z + bounds_lane);
7910 simdv_float max_a_z = simd_float::broadcast_loadv(bounds[bounds_group].max_z + bounds_lane);
7911
7912 unsigned first = group_count;
7913
7914 unsigned start = (i+1) >> simdv_width32_log2;
7915
7916 if (start < b_start)
7917 start = b_start;
7918
7919 // Maximum number of colliders is 2^13, i.e., 13 bit indices.
7920 // i needs 13 bits.
7921 // j needs 10 or 11 bits.
7922 // mask needs 4 or 8 bits.
7923 unsigned ij_bits = (start << 8) | (i << 19);
7924
7925 for (unsigned j = start; j < b_end; ++j) {
7926 simdv_float min_b_x = simd_float::loadv(bounds[j].min_x);
7927 simdv_float max_b_x = simd_float::loadv(bounds[j].max_x);
7928 simdv_float min_b_y = simd_float::loadv(bounds[j].min_y);
7929 simdv_float max_b_y = simd_float::loadv(bounds[j].max_y);
7930 simdv_float min_b_z = simd_float::loadv(bounds[j].min_z);
7931 simdv_float max_b_z = simd_float::loadv(bounds[j].max_z);
7932
7933 simdv_float inside_x = simd::bitwise_and(simd_float::cmp_gt(max_b_x, min_a_x), simd_float::cmp_gt(max_a_x, min_b_x));
7934 simdv_float inside_y = simd::bitwise_and(simd_float::cmp_gt(max_b_y, min_a_y), simd_float::cmp_gt(max_a_y, min_b_y));
7935 simdv_float inside_z = simd::bitwise_and(simd_float::cmp_gt(max_b_z, min_a_z), simd_float::cmp_gt(max_a_z, min_b_z));
7936
7937 unsigned mask = simd::signmask32(simd::bitwise_and(simd::bitwise_and(inside_x, inside_y), inside_z));
7938
7939 groups[group_count] = mask | ij_bits;
7940 group_count += mask != 0;
7941
7942 ij_bits += 1 << 8;
7943 }
7944
7945 // Mask out collisions already handled.
7946 if (first < group_count && (groups[first] & 0x7ff00) == (bounds_group << 8))
7947 groups[first] &= ~((2 << bounds_lane) - 1);
7948 }
7949 }
7950#endif
7951
7952 commit_array<uint32_t>(&temporary, group_count);
7953
7954 uint32_t* pairs = reserve_array<uint32_t>(&temporary, group_count*simdv_width32, 32);
7955 unsigned pair_count = 0;
7956
7957 for (unsigned i = 0; i < group_count; ++i) {
7958 unsigned group = groups[i];
7959 unsigned mask = group & 0xff;
7960
7961 unsigned batch = (group & 0x7ff00) >> (8 - simdv_width32_log2);
7962 unsigned base = ((uint32_t)(group >> 19) << 16) | batch;
7963
7964 while (mask) {
7965 unsigned index = first_set_bit(mask);
7966 mask &= mask-1;
7967
7968 pairs[pair_count++] = base + index;
7969 }
7970 }
7971
7972 commit_array<uint32_t>(&temporary, pair_count);
7973
7974 for (unsigned i = 0; i < pair_count; ++i) {
7975 unsigned pair = pairs[i];
7976 pairs[i] = sorted_indices[pair & 0xffff] | ((uint32_t)sorted_indices[pair >> 16] << 16);
7977 }
7978
7979 radix_sort_uint32(pairs, pair_count, temporary);
7980
7981 // Discard islands of inactive objects at a coarse level, before detailed collisions.
7982 {
7983 NUDGE_ARENA_SCOPE(temporary);
7984
7985 // Find connected sets.
7986 uint16_t* heights = allocate_array<uint16_t>(&temporary, bodies.count, 16);
7987 uint16_t* parents = allocate_array<uint16_t>(&temporary, bodies.count, 16);
7988
7989 memset(heights, 0, sizeof(heights[0])*bodies.count);
7990 memset(parents, 0xff, sizeof(parents[0])*bodies.count);
7991
7992 for (unsigned i = 0; i < body_connections.count; ++i) {
7993 BodyPair pair = body_connections.data[i];
7994
7995 unsigned a = pair.a;
7996 unsigned b = pair.b;
7997 BodyFilter *a_filter=&c->bodies.filters[a], *b_filter=&c->bodies.filters[b];
7998
7999
8000 if (NUDGE_COLLIDE_SKIP_BODYFILTERS_MACRO(a_filter,b_filter)) continue;
8001 // Body 0 is the static world and is ignored.
8002// if (!a || !b) continue;
8003
8004 // Determine the root of a and b.
8005 unsigned a_root = a;
8006 unsigned a_parent = parents[a];
8007
8008 for (unsigned parent = a_parent; parent != 0xffff; parent = parents[a_root])
8009 a_root = parent;
8010
8011 unsigned b_root = b;
8012 unsigned b_parent = parents[b];
8013
8014 for (unsigned parent = b_parent; parent != 0xffff; parent = parents[b_root])
8015 b_root = parent;
8016
8017 if (a_root == b_root)
8018 continue;
8019
8020 // Put a and b under the same root.
8021 unsigned a_height = heights[a_root];
8022 unsigned b_height = heights[b_root];
8023
8024 unsigned root;
8025
8026 if (a_height < b_height) {
8027 parents[a_root] = b_root;
8028 root = b_root;
8029 }
8030 else {
8031 parents[b_root] = a_root;
8032 root = a_root;
8033 }
8034
8035 if (a_height == b_height) // Height of subtree increased.
8036 heights[a_root] = a_height+1;
8037
8038 // Propagate the root to make subsequent iterations faster.
8039 if (a_root != a) {
8040 while (a_parent != a_root) {
8041 unsigned next = parents[a_parent];
8042 parents[a] = root;
8043
8044 a = a_parent;
8045 a_parent = next;
8046 }
8047 }
8048
8049 if (b_root != b) {
8050 while (b_parent != b_root) {
8051 unsigned next = parents[b_parent];
8052 parents[b] = root;
8053
8054 b = b_parent;
8055 b_parent = next;
8056 }
8057 }
8058 }
8059
8060 for (unsigned i = 0; i < pair_count; ++i) {
8061 unsigned pair = pairs[i];
8062
8063 unsigned a = collider_bodies[pair & 0xffff];
8064 unsigned b = collider_bodies[pair >> 16];
8065
8066 BodyFilter *a_filter=&c->bodies.filters[a], *b_filter=&c->bodies.filters[b];
8067
8068 if (NUDGE_COLLIDE_SKIP_BODYFILTERS_MACRO(a_filter,b_filter)) continue;
8069 // Body 0 is the static world and is ignored.
8070// if (!a || !b) continue;
8071
8072 // Determine the root of a and b.
8073 unsigned a_root = a;
8074 unsigned a_parent = parents[a];
8075
8076 for (unsigned parent = a_parent; parent != 0xffff; parent = parents[a_root])
8077 a_root = parent;
8078
8079 unsigned b_root = b;
8080 unsigned b_parent = parents[b];
8081
8082 for (unsigned parent = b_parent; parent != 0xffff; parent = parents[b_root])
8083 b_root = parent;
8084
8085 if (a_root == b_root)
8086 continue;
8087
8088 // Put a and b under the same root.
8089 unsigned a_height = heights[a_root];
8090 unsigned b_height = heights[b_root];
8091
8092 unsigned root;
8093
8094 if (a_height < b_height) {
8095 parents[a_root] = b_root;
8096 root = b_root;
8097 }
8098 else {
8099 parents[b_root] = a_root;
8100 root = a_root;
8101 }
8102
8103 if (a_height == b_height) // Height of subtree increased.
8104 heights[a_root] = a_height+1;
8105
8106 // Propagate the root to make subsequent iterations faster.
8107 if (a_root != a) {
8108 while (a_parent != a_root) {
8109 unsigned next = parents[a_parent];
8110 parents[a] = root;
8111
8112 a = a_parent;
8113 a_parent = next;
8114 }
8115 }
8116
8117 if (b_root != b) {
8118 while (b_parent != b_root) {
8119 unsigned next = parents[b_parent];
8120 parents[b] = root;
8121
8122 b = b_parent;
8123 b_parent = next;
8124 }
8125 }
8126 }
8127
8128 // Identify a numbered set for each body.
8129 unsigned set_count = 0;
8130 uint16_t* sets = heights;
8131 memset(sets, 0xff, sizeof(sets[0])*bodies.count);
8132
8133 for (unsigned i = 0 /* was 1 */; i < bodies.count; ++i) {
8134 //if (bodies.filters[i].flags&BF_IS_STATIC_OR_KINEMATIC_OR_DISABLED_OR_REMOVED) continue; // new
8135 if (bodies.filters[i].flags&BF_IS_DISABLED_OR_REMOVED) continue;
8136 unsigned root = parents[i];
8137
8138 for (unsigned parent = root; parent != 0xffff; parent = parents[root])
8139 root = parent;
8140
8141 if (root == 0xffff)
8142 root = i;
8143
8144 if (sets[root] == 0xffff)
8145 sets[root] = set_count++;
8146
8147 sets[i] = sets[root];
8148 }
8149
8150 sets[0] = 0;
8151
8152 // Determine active sets.
8153 uint8_t* active = allocate_array<uint8_t>(&temporary, set_count, 16);
8154 memset(active, 0, sizeof(active[0])*set_count);
8155
8156 for (unsigned i = 0 /* was 1 */; i < bodies.count; ++i) {
8157 if (bodies.idle_counters[i] != 0xff
8158 //&& !(c->bodies.filters[i].flags&BF_IS_STATIC_OR_KINEMATIC_OR_DISABLED_OR_REMOVED) // new
8160 )
8161 active[sets[i]] = 1;
8162 }
8163
8164 // Remove inactive pairs.
8165 unsigned removed = 0;
8166
8167 for (unsigned i = 0; i < pair_count; ++i) {
8168 unsigned pair = pairs[i];
8169
8170 unsigned a = collider_bodies[pair & 0xffff];
8171 unsigned b = collider_bodies[pair >> 16];
8172
8173 if (a == b
8174 //|| ((c->bodies.filters[a].flags&c->bodies.filters[b].flags)&BF_IS_STATIC_OR_KINEMATIC) // new: test (tested: it does nothing!)
8175 ) {
8176 ++removed;
8177 continue;
8178 }
8179
8180 unsigned set = sets[a] | sets[b];
8181
8182 if (active[set]) {
8183 pairs[i-removed] = pair;
8184 }
8185 else {
8186 unsigned a = collider_tags[pair & 0xffff];
8187 unsigned b = collider_tags[pair >> 16];
8188
8189 contacts->sleeping_pairs[contacts->sleeping_count++] = a > b ? a | (b << 16): b | (a << 16);
8190 ++removed;
8191 }
8192 }
8193
8194 pair_count -= removed;
8195 }
8196
8197 uint32_t bucket_sizes[4] = {};
8198
8199 for (unsigned i = 0; i < pair_count; ++i) {
8200 unsigned pair = pairs[i];
8201
8202 unsigned a = pair & 0xffff;
8203 unsigned b = pair >> 16;
8204
8205 a = a >= colliders.boxes.count ? 1 : 0;
8206 b = b >= colliders.boxes.count ? 2 : 0;
8207
8208 unsigned ab = a | b;
8209
8210 ++bucket_sizes[ab];
8211 }
8212
8213 uint32_t bucket_offsets[4] = {
8214 0,
8215 ((bucket_sizes[0] + 7) & ~3),
8216 ((bucket_sizes[0] + 7) & ~3) + bucket_sizes[1],
8217 ((bucket_sizes[0] + 7) & ~3) + bucket_sizes[1] + bucket_sizes[2],
8218 };
8219
8220 uint32_t written_per_bucket[4] = { bucket_offsets[0], bucket_offsets[1], bucket_offsets[2], bucket_offsets[3] };
8221
8222 uint32_t* partitioned_pairs = allocate_array<uint32_t>(&temporary, pair_count + 7, 16); // Padding is required.
8223
8224 for (unsigned i = 0; i < pair_count; ++i) {
8225 unsigned pair = pairs[i];
8226
8227 unsigned a = pair & 0xffff;
8228 unsigned b = pair >> 16;
8229
8230 a = a >= colliders.boxes.count ? 1 : 0;
8231 b = b >= colliders.boxes.count ? 2 : 0;
8232
8233 unsigned ab = a | b;
8234
8235 partitioned_pairs[written_per_bucket[ab]++] = pair;
8236 }
8237
8238 for (unsigned i = 0; i < bucket_sizes[2]; ++i) {
8239 unsigned index = bucket_offsets[2] + i;
8240 unsigned pair = partitioned_pairs[index];
8241
8242 partitioned_pairs[index] = (pair >> 16) | (pair << 16);
8243 }
8244
8245 contacts->count += box_box_collide(partitioned_pairs, bucket_sizes[0], colliders.boxes.data, colliders.boxes.transforms, contacts->data + contacts->count, contacts->bodies + contacts->count, contacts->tags + contacts->count, properties, temporary);
8246
8247 // TODO: SIMD-optimize this loop.
8248 for (unsigned i = 0; i < bucket_sizes[1] + bucket_sizes[2]; ++i) {
8249 unsigned pair = partitioned_pairs[bucket_offsets[1] + i];
8250
8251 unsigned a = pair >> 16;
8252 unsigned b = pair & 0xffff;
8253
8254 b -= colliders.boxes.count;
8255
8256 BoxCollider box = colliders.boxes.data[a];
8257 SphereCollider sphere = colliders.spheres.data[b];
8258
8259 // make dbg asserts optional ---------------------------
8260 unsigned bodyA = c->colliders.boxes.transforms[a].body;
8261 unsigned bodyB = c->colliders.spheres.transforms[b].body;
8262 /*assert(bodyA<c->bodies.count);
8263 assert(bodyB<c->bodies.count);
8264 assert(c->bodies.infos[bodyA].num_boxes>0);
8265 assert(c->bodies.infos[bodyB].num_spheres>0);*/
8266 const float friction = NUDGE_FRICTION_MODEL(properties[bodyA].friction,properties[bodyB].friction);
8267 //if (friction!=0.5f) log("box_sphere_collide: %u (friction:%1.f); %u (friction:%1.f); contact_friction = %1.3f\n",bodyA,properties[bodyA].friction,bodyB,properties[bodyB].friction,friction);
8268 // ------------------------------------------------------
8269
8270 contacts->tags[contacts->count] = (uint64_t)((colliders.boxes.transforms[a].body >> 16) | (colliders.spheres.transforms[b].body & 0xffff0000)) << 32;
8271 contacts->count += box_sphere_collide(box, sphere, colliders.boxes.transforms[a], colliders.spheres.transforms[b], contacts->data + contacts->count, contacts->bodies + contacts->count, friction);
8272 }
8273
8274 // TODO: SIMD-optimize this loop.
8275 for (unsigned i = 0; i < bucket_sizes[3]; ++i) {
8276 unsigned pair = partitioned_pairs[bucket_offsets[3] + i];
8277
8278 unsigned a = pair >> 16;
8279 unsigned b = pair & 0xffff;
8280
8281 a -= colliders.boxes.count;
8282 b -= colliders.boxes.count;
8283
8284 SphereCollider sphere_a = colliders.spheres.data[a];
8285 SphereCollider sphere_b = colliders.spheres.data[b];
8286
8287 // make dbg asserts optional ----------------------------
8288 unsigned bodyA = c->colliders.spheres.transforms[a].body;
8289 unsigned bodyB = c->colliders.spheres.transforms[b].body;
8290 /*assert(bodyA<c->bodies.count);
8291 assert(bodyB<c->bodies.count);
8292 assert(c->bodies.infos[bodyA].num_spheres>0);
8293 assert(c->bodies.infos[bodyB].num_spheres>0);*/
8294 const float friction = NUDGE_FRICTION_MODEL(properties[bodyA].friction,properties[bodyB].friction);
8295 //if (friction!=0.5f) log("sphere_sphere_collide: %u (friction:%1.f); %u (friction:%1.f); contact_friction = %1.3f\n",bodyA,properties[bodyA].friction,bodyB,properties[bodyB].friction,friction);
8296 // ------------------------------------------------------
8297
8298 contacts->tags[contacts->count] = (uint64_t)((colliders.spheres.transforms[a].body >> 16) | (colliders.spheres.transforms[b].body & 0xffff0000)) << 32;
8299 contacts->count += sphere_sphere_collide(sphere_a, sphere_b, colliders.spheres.transforms[a], colliders.spheres.transforms[b], contacts->data + contacts->count, contacts->bodies + contacts->count, friction);
8300 }
8301
8302 // Discard islands of inactive objects at a fine level.
8303 {
8304 NUDGE_ARENA_SCOPE(temporary);
8305
8306 // Find connected sets.
8307 uint16_t* heights = allocate_array<uint16_t>(&temporary, bodies.count, 16);
8308 uint16_t* parents = allocate_array<uint16_t>(&temporary, bodies.count, 16);
8309
8310 memset(heights, 0, sizeof(heights[0])*bodies.count);
8311 memset(parents, 0xff, sizeof(parents[0])*bodies.count);
8312
8313 for (unsigned i = 0; i < body_connections.count; ++i) {
8314 BodyPair pair = body_connections.data[i];
8315
8316 unsigned a = pair.a;
8317 unsigned b = pair.b;
8318
8319 BodyFilter *a_filter=&c->bodies.filters[a], *b_filter=&c->bodies.filters[b];
8320
8321 if (NUDGE_COLLIDE_SKIP_BODYFILTERS_MACRO(a_filter,b_filter)) continue;
8322 // Body 0 is the static world and is ignored.
8323// if (!a || !b) continue;
8324
8325 // Determine the root of a and b.
8326 unsigned a_root = a;
8327 unsigned a_parent = parents[a];
8328
8329 for (unsigned parent = a_parent; parent != 0xffff; parent = parents[a_root])
8330 a_root = parent;
8331
8332 unsigned b_root = b;
8333 unsigned b_parent = parents[b];
8334
8335 for (unsigned parent = b_parent; parent != 0xffff; parent = parents[b_root])
8336 b_root = parent;
8337
8338 if (a_root == b_root)
8339 continue;
8340
8341 // Put a and b under the same root.
8342 unsigned a_height = heights[a_root];
8343 unsigned b_height = heights[b_root];
8344
8345 unsigned root;
8346
8347 if (a_height < b_height) {
8348 parents[a_root] = b_root;
8349 root = b_root;
8350 }
8351 else {
8352 parents[b_root] = a_root;
8353 root = a_root;
8354 }
8355
8356 if (a_height == b_height) // Height of subtree increased.
8357 heights[a_root] = a_height+1;
8358
8359 // Propagate the root to make subsequent iterations faster.
8360 if (a_root != a) {
8361 while (a_parent != a_root) {
8362 unsigned next = parents[a_parent];
8363 parents[a] = root;
8364
8365 a = a_parent;
8366 a_parent = next;
8367 }
8368 }
8369
8370 if (b_root != b) {
8371 while (b_parent != b_root) {
8372 unsigned next = parents[b_parent];
8373 parents[b] = root;
8374
8375 b = b_parent;
8376 b_parent = next;
8377 }
8378 }
8379 }
8380
8381 for (unsigned i = 0; i < contacts->count; ) {
8382 unsigned a = contacts->bodies[i].a;
8383 unsigned b = contacts->bodies[i].b;
8384
8385 do {
8386 ++i;
8387 }
8388 while (i < contacts->count && contacts->bodies[i].a == a && contacts->bodies[i].b == b);
8389
8390 BodyFilter *a_filter=&c->bodies.filters[a], *b_filter=&c->bodies.filters[b];
8391
8392 if (NUDGE_COLLIDE_SKIP_BODYFILTERS_MACRO(a_filter,b_filter)) continue;
8393 // Body 0 is the static world and is ignored.
8394 // if (!a || !b) continue;
8395
8396 // Determine the root of a and b.
8397 unsigned a_root = a;
8398 unsigned a_parent = parents[a];
8399
8400 for (unsigned parent = a_parent; parent != 0xffff; parent = parents[a_root])
8401 a_root = parent;
8402
8403 unsigned b_root = b;
8404 unsigned b_parent = parents[b];
8405
8406 for (unsigned parent = b_parent; parent != 0xffff; parent = parents[b_root])
8407 b_root = parent;
8408
8409 if (a_root == b_root)
8410 continue;
8411
8412 // Put a and b under the same root.
8413 unsigned a_height = heights[a_root];
8414 unsigned b_height = heights[b_root];
8415
8416 unsigned root;
8417
8418 if (a_height < b_height) {
8419 parents[a_root] = b_root;
8420 root = b_root;
8421 }
8422 else {
8423 parents[b_root] = a_root;
8424 root = a_root;
8425 }
8426
8427 if (a_height == b_height) // Height of subtree increased.
8428 heights[a_root] = a_height+1;
8429
8430 // Propagate the root to make subsequent iterations faster.
8431 if (a_root != a) {
8432 while (a_parent != a_root) {
8433 unsigned next = parents[a_parent];
8434 parents[a] = root;
8435
8436 a = a_parent;
8437 a_parent = next;
8438 }
8439 }
8440
8441 if (b_root != b) {
8442 while (b_parent != b_root) {
8443 unsigned next = parents[b_parent];
8444 parents[b] = root;
8445
8446 b = b_parent;
8447 b_parent = next;
8448 }
8449 }
8450 }
8451
8452 // Identify a numbered set for each body.
8453 unsigned set_count = 0;
8454 uint16_t* sets = heights;
8455 memset(sets, 0xff, sizeof(sets[0])*bodies.count);
8456
8457 for (unsigned i = 0 /* was 1 */; i < bodies.count; ++i) {
8458 //if (bodies.filters[i].flags&BF_IS_STATIC_OR_KINEMATIC_OR_DISABLED_OR_REMOVED) continue; // new
8459 if (bodies.filters[i].flags&BF_IS_DISABLED_OR_REMOVED) continue;
8460 unsigned root = parents[i];
8461
8462 for (unsigned parent = root; parent != 0xffff; parent = parents[root])
8463 root = parent;
8464
8465 if (root == 0xffff)
8466 root = i;
8467
8468 if (sets[root] == 0xffff)
8469 sets[root] = set_count++;
8470
8471 sets[i] = sets[root];
8472 }
8473
8474 sets[0] = 0;
8475
8476 // Determine active sets.
8477 uint8_t* active = allocate_array<uint8_t>(&temporary, set_count, 16);
8478 memset(active, 0, sizeof(active[0])*set_count);
8479
8480 for (unsigned i = 0 /* was 1 */; i < bodies.count; ++i) {
8481 if (bodies.idle_counters[i] != 0xff
8482 //&& !(bodies.filters[i].flags&BF_IS_STATIC_OR_KINEMATIC_OR_DISABLED_OR_REMOVED) // new
8483 && !(bodies.filters[i].flags&BF_IS_DISABLED_OR_REMOVED) // new
8484 )
8485 active[sets[i]] = 1;
8486 }
8487
8488 // Determine active bodies.
8489 for (unsigned i = 0 /* was 1 */; i < bodies.count; ++i) {
8490 //if (bodies.filters[i].flags&BF_IS_STATIC_OR_KINEMATIC_OR_DISABLED_OR_REMOVED) continue; // new
8491 if (bodies.filters[i].flags&BF_IS_DISABLED_OR_REMOVED) continue; // new
8492 unsigned set = sets[i];
8493
8494 if (active[set])
8495 active_bodies->indices[active_bodies->count++] = i;
8496 }
8497
8498 // Remove inactive contacts.
8499 unsigned removed = 0;
8500
8501 for (unsigned i = 0; i < contacts->count; ) {
8502 unsigned a = contacts->bodies[i].a;
8503 unsigned b = contacts->bodies[i].b;
8504 unsigned tag = contacts->tags[i] >> 32;
8505
8506 unsigned span = 0;
8507
8508 do {
8509 ++span;
8510 }
8511 while (i+span < contacts->count && (contacts->tags[i+span] >> 32) == tag);
8512
8513 unsigned set = sets[a] | sets[b];
8514
8515 if (active[set]) {
8516 for (unsigned j = 0; j < span; ++j) {
8517 contacts->tags[i+j-removed] = contacts->tags[i+j];
8518 contacts->data[i+j-removed] = contacts->data[i+j];
8519 contacts->bodies[i+j-removed] = contacts->bodies[i+j];
8520 }
8521 }
8522 else {
8523 contacts->sleeping_pairs[contacts->sleeping_count++] = tag;
8524 removed += span;
8525 }
8526
8527 i += span;
8528 }
8529
8530 contacts->count -= removed;
8531 }
8532
8533 radix_sort_uint32(contacts->sleeping_pairs, contacts->sleeping_count, temporary);
8534}
8535
8536struct ContactImpulseData {
8537 uint32_t* sorted_contacts;
8538
8539 CachedContactImpulse* culled_data;
8540 uint64_t* culled_tags;
8541 unsigned culled_count;
8542
8544};
8545
8546ContactImpulseData* read_cached_impulses(ContactCache contact_cache, ContactData contacts, Arena* memory) {
8547 ContactImpulseData* data = allocate_struct<ContactImpulseData>(memory, 64);
8548
8549 // Sort contacts based on tag so that they can be quickly matched against the contact cache.
8550 uint32_t* sorted_contacts = allocate_array<uint32_t>(memory, contacts.count, 16);
8551 data->sorted_contacts = sorted_contacts;
8552 {
8553 Arena temporary = *memory;
8554 uint32_t* contact_keys = allocate_array<uint32_t>(&temporary, contacts.count, 16);
8555
8556 for (unsigned i = 0; i < contacts.count; ++i) {
8557 sorted_contacts[i] = i;
8558 contact_keys[i] = (uint32_t)contacts.tags[i];
8559 }
8560
8561 radix_sort_uint32_x2(contact_keys, sorted_contacts, contacts.count, temporary);
8562
8563 for (unsigned i = 0; i < contacts.count; ++i) {
8564 unsigned index = sorted_contacts[i];
8565 contact_keys[i] = (uint32_t)(contacts.tags[index] >> 32);
8566 }
8567
8568 radix_sort_uint32_x2(contact_keys, sorted_contacts, contacts.count, temporary);
8569 }
8570
8571 // Gather warm start impulses and store away culled impulses for sleeping pairs.
8572 CachedContactImpulse* culled_data = allocate_array<CachedContactImpulse>(memory, contact_cache.count, 16);
8573 uint64_t* culled_tags = allocate_array<uint64_t>(memory, contact_cache.count, 16);
8574 unsigned culled_count = 0;
8575
8576 CachedContactImpulse* contact_impulses = allocate_array<CachedContactImpulse>(memory, contacts.count, 32);
8577 data->data = contact_impulses;
8578
8579 unsigned cached_contact_offset = 0;
8580 unsigned sleeping_pair_offset = 0;
8581
8582 for (unsigned i = 0; i < contacts.count; ++i) {
8583 unsigned index = sorted_contacts[i];
8584 uint64_t tag = contacts.tags[index];
8585
8586 CachedContactImpulse cached_impulse = {};
8587
8588 uint64_t cached_tag;
8589 while (cached_contact_offset < contact_cache.count && (cached_tag = contact_cache.tags[cached_contact_offset]) < tag) {
8590 unsigned cached_pair = cached_tag >> 32;
8591
8592 while (sleeping_pair_offset < contacts.sleeping_count && contacts.sleeping_pairs[sleeping_pair_offset] < cached_pair)
8593 ++sleeping_pair_offset;
8594
8595 if (sleeping_pair_offset < contacts.sleeping_count && contacts.sleeping_pairs[sleeping_pair_offset] == cached_pair) {
8596 culled_data[culled_count] = contact_cache.data[cached_contact_offset];
8597 culled_tags[culled_count] = contact_cache.tags[cached_contact_offset];
8598 ++culled_count;
8599 }
8600
8601 ++cached_contact_offset;
8602 }
8603
8604 if (cached_contact_offset < contact_cache.count && contact_cache.tags[cached_contact_offset] == tag)
8605 cached_impulse = contact_cache.data[cached_contact_offset];
8606
8607 contact_impulses[index] = cached_impulse;
8608 }
8609
8610 for (; cached_contact_offset < contact_cache.count && sleeping_pair_offset < contacts.sleeping_count; ) {
8611 unsigned a = contact_cache.tags[cached_contact_offset] >> 32;
8612 unsigned b = contacts.sleeping_pairs[sleeping_pair_offset];
8613
8614 if (a < b) {
8615 ++cached_contact_offset;
8616 }
8617 else if (a == b) {
8618 culled_data[culled_count] = contact_cache.data[cached_contact_offset];
8619 culled_tags[culled_count] = contact_cache.tags[cached_contact_offset];
8620 ++culled_count;
8621 ++cached_contact_offset;
8622 }
8623 else {
8624 ++sleeping_pair_offset;
8625 }
8626 }
8627
8628 data->culled_data = culled_data;
8629 data->culled_tags = culled_tags;
8630 data->culled_count = culled_count;
8631
8632 return data;
8633}
8634
8635void write_cached_impulses(ContactCache* contact_cache, ContactData contacts, ContactImpulseData* contact_impulses) {
8636 uint32_t* sorted_contacts = contact_impulses->sorted_contacts;
8637
8638 CachedContactImpulse* culled_data = contact_impulses->culled_data;
8639 uint64_t* culled_tags = contact_impulses->culled_tags;
8640 unsigned culled_count = contact_impulses->culled_count;
8641
8642 // Cache impulses.
8643 assert(contact_cache->capacity >= contacts.count + culled_count); // Out of space in contact cache.
8644 contact_cache->count = contacts.count + culled_count;
8645 {
8646 // Pick sort from contacts and culled impulses.
8647 unsigned i = 0, j = 0, k = 0;
8648
8649 while (i < contacts.count && j < culled_count) {
8650 unsigned index = sorted_contacts[i];
8651
8652 uint64_t a = contacts.tags[index];
8653 uint64_t b = culled_tags[j];
8654
8655 if (a < b) {
8656 contact_cache->tags[k] = contacts.tags[index];
8657 contact_cache->data[k] = contact_impulses->data[index];
8658 ++i;
8659 }
8660 else {
8661 contact_cache->tags[k] = culled_tags[j];
8662 contact_cache->data[k] = culled_data[j];
8663 ++j;
8664 }
8665
8666 ++k;
8667 }
8668
8669 for (; i < contacts.count; ++i) {
8670 unsigned index = sorted_contacts[i];
8671
8672 contact_cache->tags[k] = contacts.tags[index];
8673 contact_cache->data[k] = contact_impulses->data[index];
8674 ++k;
8675 }
8676
8677 for (; j < culled_count; ++j) {
8678 contact_cache->tags[k] = culled_tags[j];
8679 contact_cache->data[k] = culled_data[j];
8680 ++k;
8681 }
8682 }
8683}
8684
8685struct ContactConstraintData {
8686 unsigned contact_count;
8687 InertiaTransform* momentum_to_velocity;
8688 uint32_t* constraint_to_contact;
8689
8690 ContactConstraintV* constraints;
8691 ContactConstraintStateV* constraint_states;
8692 unsigned constraint_batches;
8693};
8694
8695ContactConstraintData* setup_contact_constraints(context_t* c, ContactImpulseData* contact_impulses, Arena* memory) {
8696 // TODO: We should investigate better evaluation order for contacts.
8697 //ActiveBodies& active_bodies = c->active_bodies;
8698 const float allowed_penetration = c->simulation_params.penetration_allowed_amount;
8699 const float bias_factor = c->simulation_params.penetration_bias_factor;
8700
8701 ContactData& contacts = c->contact_data;
8702 BodyData& bodies = c->bodies;
8703
8704 uint32_t* contact_order = contact_impulses->sorted_contacts;
8705
8706 ContactConstraintData* data = allocate_struct<ContactConstraintData>(memory, 64);
8707 data->contact_count = contacts.count;
8708
8709 InertiaTransform* momentum_to_velocity = allocate_array<InertiaTransform>(memory, bodies.count, 32);
8710 data->momentum_to_velocity = momentum_to_velocity;
8711
8712 // TODO: Consider SIMD-optimizing this loop.
8713 // TODO: Don't compute anything for inactive bodies. // original nudge comment
8714 for (unsigned i = 0; i < bodies.count; ++i) { // original nudge code
8715 //for (unsigned j = 0,i=0; j < active_bodies.count; ++j) {i=active_bodies.indices[j]; // naive attempt 1 [FAILED]
8716 //if (bodies.filters[i].idle_counter!=0xff || bodies.filters[i].flags&BF_IS_STATIC_OR_KINEMATIC) { // attempt 2 [FAILED]
8717 if (bodies.filters[i].flags&BF_IS_DYNAMIC) { // attempt 3 [WORKS?!?]
8718 //if (bodies.filters[i].idle_counter!=0xff) {// attempt 4 [Good, but artifacts when objects wake up from sleeping: they seem to start sinking again... we probably must reset their momentum somewhere (?)]
8719 Rotation rotation = make_rotation(bodies.transforms[i].rotation);
8720 float3 inertia_inverse = make_float3(bodies.properties[i].inertia_inverse);
8721
8722 float3x3 m = matrix(rotation);
8723
8724 InertiaTransform transform = {};
8725
8726 transform.xx = inertia_inverse.x*m.c0.x*m.c0.x + inertia_inverse.y*m.c1.x*m.c1.x + inertia_inverse.z*m.c2.x*m.c2.x;
8727 transform.yy = inertia_inverse.x*m.c0.y*m.c0.y + inertia_inverse.y*m.c1.y*m.c1.y + inertia_inverse.z*m.c2.y*m.c2.y;
8728 transform.zz = inertia_inverse.x*m.c0.z*m.c0.z + inertia_inverse.y*m.c1.z*m.c1.z + inertia_inverse.z*m.c2.z*m.c2.z;
8729 transform.xy = inertia_inverse.x*m.c0.x*m.c0.y + inertia_inverse.y*m.c1.x*m.c1.y + inertia_inverse.z*m.c2.x*m.c2.y;
8730 transform.xz = inertia_inverse.x*m.c0.x*m.c0.z + inertia_inverse.y*m.c1.x*m.c1.z + inertia_inverse.z*m.c2.x*m.c2.z;
8731 transform.yz = inertia_inverse.x*m.c0.y*m.c0.z + inertia_inverse.y*m.c1.y*m.c1.z + inertia_inverse.z*m.c2.y*m.c2.z;
8732
8733 momentum_to_velocity[i] = transform;
8734 bodies.momentum[i].unused0 = bodies.properties[i].mass_inverse;
8735 }
8736 else {memset(&momentum_to_velocity[i],0,sizeof(InertiaTransform));bodies.momentum[i].unused0 = 0;} // attempt 3 [WORKS?!?]
8737 //} // attempt 2
8738 }
8739
8740 CachedContactImpulse* impulses = contact_impulses->data;
8741
8742 uint32_t* constraint_to_contact = allocate_array<uint32_t>(memory, contacts.count*simdv_width32, 32);
8743 data->constraint_to_contact = constraint_to_contact;
8744
8745 // Schedule contacts so there are no conflicts within a SIMD width.
8746 ContactSlotV* contact_slots = reserve_array<ContactSlotV>(memory, contacts.count, 32);
8747 unsigned contact_slot_count = 0;
8748 {
8749 Arena temporary = *memory;
8750 commit_array<ContactSlotV>(&temporary, contacts.count);
8751# ifndef NUDGE_SETUP_CONTACT_CONSTRAINTS_BUCKET_COUNT
8752# define NUDGE_SETUP_CONTACT_CONSTRAINTS_BUCKET_COUNT (16)
8753# endif
8754 static const unsigned bucket_count = NUDGE_SETUP_CONTACT_CONSTRAINTS_BUCKET_COUNT; // (16)
8755
8756 ContactPairV* vacant_pair_buckets[bucket_count];
8757 ContactSlotV* vacant_slot_buckets[bucket_count];
8758 unsigned bucket_vacancy_count[bucket_count] = {};
8759
8760 simdv_int32 invalid_index = simd_int32::makev(~0u);
8761
8762 assert(temporary.size>bucket_count*((2*contacts.count+1)+31)*sizeof(ContactPairV));
8763 /*if (contacts.count>600) {
8764 log("[%llu] contacts.count:%u temporary.size:%lu min_size=%lu\n",
8765 c->simulation_params.num_frames,contacts.count,temporary.size,c->simulation_params.min_remaining_arena_size);
8766 flush();
8767 }*/
8768# ifdef ORIGINAL_CODE
8769 for (unsigned i = 0; i < bucket_count; ++i) {
8770 vacant_pair_buckets[i] = allocate_array<ContactPairV>(&temporary, contacts.count+1, 32);
8771 vacant_slot_buckets[i] = allocate_array<ContactSlotV>(&temporary, contacts.count, 32);
8772
8773 // Add padding with invalid data so we don't have to range check.
8774 simd_int32::storev((int32_t*)vacant_pair_buckets[i]->ab, invalid_index);
8775 }
8776# else
8777 assert(sizeof(ContactPairV)==sizeof(ContactSlotV));
8778 const unsigned stride = (2*contacts.count+1);
8779 ContactPairV* unified_alloc = allocate_array<ContactPairV>(&temporary, bucket_count*stride, 32);
8780 for (unsigned i = 0; i < bucket_count; ++i) {
8781 vacant_pair_buckets[i] = unified_alloc;
8782 vacant_slot_buckets[i] = (ContactSlotV*) &unified_alloc[contacts.count+1];
8783 unified_alloc+=stride;
8784 // Add padding with invalid data so we don't have to range check.
8785 simd_int32::storev((int32_t*)vacant_pair_buckets[i]->ab, invalid_index);
8786 }
8787# endif
8788
8789 for (unsigned i = 0; i < contacts.count; ++i) {
8790 unsigned index = contact_order[i];
8791 BodyPair active_bodies = contacts.bodies[index];
8792
8793 unsigned bucket = i % bucket_count;
8794 ContactPairV* vacant_pairs = vacant_pair_buckets[bucket];
8795 ContactSlotV* vacant_slots = vacant_slot_buckets[bucket];
8796 unsigned vacancy_count = bucket_vacancy_count[bucket];
8797
8798 BodyFilter *a_filter=&c->bodies.filters[active_bodies.a], *b_filter=&c->bodies.filters[active_bodies.b];
8799 // new: [Optimizable by unwrapping]
8800 if (NUDGE_COLLIDE_SKIP_BODYFILTERS_MACRO(a_filter,b_filter))
8801 continue; // but I'm not sure if this early exit is harmless...
8802
8803 // Ignore dependencies on body 0. // original note: originally body 0 was reserved for static background (and I don't know if now this relation can be completely broken...)
8804 //unsigned ca = active_bodies.a ? active_bodies.a : active_bodies.b;
8805 //unsigned cb = active_bodies.b ? active_bodies.b : active_bodies.a;
8806
8807 // Attempt 1 to replace the 2 lines above
8808 unsigned ca = a_filter->flags&BF_IS_DYNAMIC ? active_bodies.a : active_bodies.b; // or !=0?
8809 unsigned cb = b_filter->flags&BF_IS_DYNAMIC ? active_bodies.b : active_bodies.a;
8810
8811 // Attempt number 2
8812 //unsigned ca = active_bodies.a;
8813 //unsigned cb = active_bodies.b;
8814
8815 //if (b_filter->flags&BF_IS_STATIC_OR_KINEMATIC) {ca = active_bodies.b;cb = active_bodies.a;} // not sure if with this the first body of the contact data pair is always static/kinematic (if present)
8816
8817 //assert(ca!=cb); // asserts (probably in original code too)!
8818 //assert(!(a_filter->flags&BF_IS_STATIC_OR_KINEMATIC && b_filter->flags&BF_IS_STATIC_OR_KINEMATIC));
8819
8820#ifdef __AVX2__
8821 __m256i a = _mm256_set1_epi16(ca);
8822 __m256i b = _mm256_set1_epi16(cb);
8823
8824 __m256i scheduled_a_b;
8825
8826 unsigned j = 0;
8827
8828 for (;; ++j) {
8829 scheduled_a_b = _mm256_load_si256((const __m256i*)vacant_pairs[j].ab);
8830
8831 __m256i conflict = _mm256_packs_epi16(_mm256_cmpeq_epi16(a, scheduled_a_b), _mm256_cmpeq_epi16(b, scheduled_a_b));
8832
8833 if (!_mm256_movemask_epi8(conflict))
8834 break;
8835 }
8836
8837 unsigned lane = first_set_bit((unsigned)_mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpeq_epi32(scheduled_a_b, invalid_index))));
8838#else
8839 __m128i a = _mm_set1_epi16(ca);
8840 __m128i b = _mm_set1_epi16(cb);
8841
8842 __m128i scheduled_a_b;
8843
8844 unsigned j = 0;
8845
8846 for (;; ++j) {
8847 scheduled_a_b = _mm_load_si128((const __m128i*)vacant_pairs[j].ab);
8848
8849 __m128i conflict = _mm_packs_epi16(_mm_cmpeq_epi16(a, scheduled_a_b), _mm_cmpeq_epi16(b, scheduled_a_b));
8850
8851 if (!_mm_movemask_epi8(conflict))
8852 break;
8853 }
8854
8855 unsigned lane = first_set_bit((unsigned)_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(scheduled_a_b, invalid_index))));
8856#endif
8857
8858 ContactSlotV* slot = vacant_slots + j;
8859 ContactPairV* pair = vacant_pairs + j;
8860
8861 slot->indices[lane] = index;
8862
8863#ifdef __AVX2__
8864 _mm_store_ss((float*)pair->ab + lane, _mm_castsi128_ps(_mm_unpacklo_epi16(simd::extract_low(a), simd::extract_low(b))));
8865#else
8866 _mm_store_ss((float*)pair->ab + lane, _mm_castsi128_ps(_mm_unpacklo_epi16(a, b)));
8867#endif
8868
8869 if (j == vacancy_count) {
8870 ++vacancy_count;
8871 }
8872 else if (lane == simdv_width32-1) {
8873 simdv_int32 indices = simd_int32::loadv((const int32_t*)slot->indices);
8874
8875 --vacancy_count;
8876
8877 ContactPairV* last_pair = vacant_pairs + vacancy_count;
8878 ContactSlotV* last_slot = vacant_slots + vacancy_count;
8879
8880 simd_int32::storev((int32_t*)contact_slots[contact_slot_count++].indices, indices);
8881
8882 *pair = *last_pair;
8883 *slot = *last_slot;
8884 }
8885 else {
8886 continue;
8887 }
8888
8889 // Store count and maintain padding.
8890 bucket_vacancy_count[bucket] = vacancy_count;
8891 simd_int32::storev((int32_t*)vacant_pairs[vacancy_count].ab, invalid_index);
8892 }
8893
8894 for (unsigned i = 0; i < bucket_count; ++i) {
8895 ContactPairV* vacant_pairs = vacant_pair_buckets[i];
8896 ContactSlotV* vacant_slots = vacant_slot_buckets[i];
8897 unsigned vacancy_count = bucket_vacancy_count[i];
8898
8899 // Replace any unset indices with the first one, which is always valid.
8900 // This is safe because the slots will just overwrite each other.
8901 for (unsigned i = 0; i < vacancy_count; ++i) {
8902 simdv_int32 ab = simd_int32::loadv((int32_t*)vacant_pairs[i].ab);
8903 simdv_int32 indices = simd_int32::loadv((const int32_t*)vacant_slots[i].indices);
8904
8905 simdv_int32 mask = simd_int32::cmp_eq(ab, invalid_index);
8906 simdv_int32 first_index = simd128::shuffle32<0, 0, 0, 0>(indices);
8907
8908#if NUDGE_SIMDV_WIDTH == 256
8909 first_index = simd256::shuffle128<0,0>(first_index);
8910#endif
8911
8912 indices = simd::blendv32(indices, first_index, mask);
8913
8914 simd_int32::storev((int32_t*)contact_slots[contact_slot_count++].indices, indices);
8915 }
8916 }
8917 }
8918 commit_array<ContactSlotV>(memory, contact_slot_count);
8919
8920 ContactConstraintV* constraints = allocate_array<ContactConstraintV>(memory, contact_slot_count, 32);
8921 ContactConstraintStateV* constraint_states = allocate_array<ContactConstraintStateV>(memory, contact_slot_count, 32);
8922
8923 data->constraints = constraints;
8924 data->constraint_states = constraint_states;
8925
8926 memset(constraint_states, 0, sizeof(ContactConstraintStateV)*contact_slot_count);
8927
8928 for (unsigned i = 0; i < contact_slot_count; ++i) {
8929 ContactSlotV slot = contact_slots[i];
8930
8931 for (unsigned j = 0; j < simdv_width32; ++j)
8932 constraint_to_contact[i*simdv_width32 + j] = slot.indices[j];
8933
8934 simdv_float position_x, position_y, position_z, penetration;
8935 simdv_float normal_x, normal_y, normal_z, friction;
8936 load8<sizeof(contacts.data[0]), 1>((const float*)contacts.data, slot.indices,
8937 position_x, position_y, position_z, penetration,
8938 normal_x, normal_y, normal_z, friction);
8939
8940
8941 NUDGE_SIMDV_ALIGNED uint16_t ab_array[simdv_width32*2];
8942
8943 for (unsigned j = 0; j < simdv_width32; ++j) {
8944 BodyPair pair = contacts.bodies[slot.indices[j]];
8945 ab_array[j*2 + 0] = pair.a;
8946 ab_array[j*2 + 1] = pair.b;
8947 }
8948
8949 unsigned a0 = ab_array[0]; unsigned a1 = ab_array[2]; unsigned a2 = ab_array[4]; unsigned a3 = ab_array[6];
8950 unsigned b0 = ab_array[1]; unsigned b1 = ab_array[3]; unsigned b2 = ab_array[5]; unsigned b3 = ab_array[7];
8951
8952#if NUDGE_SIMDV_WIDTH == 256
8953 unsigned a4 = ab_array[8]; unsigned a5 = ab_array[10]; unsigned a6 = ab_array[12]; unsigned a7 = ab_array[14];
8954 unsigned b4 = ab_array[9]; unsigned b5 = ab_array[11]; unsigned b6 = ab_array[13]; unsigned b7 = ab_array[15];
8955
8956 simdv_float a_mass_inverse = simd_float::make8(bodies.momentum[a0].unused0, bodies.momentum[a1].unused0, bodies.momentum[a2].unused0, bodies.momentum[a3].unused0,
8957 bodies.momentum[a4].unused0, bodies.momentum[a5].unused0, bodies.momentum[a6].unused0, bodies.momentum[a7].unused0);
8958 simdv_float b_mass_inverse = simd_float::make8(bodies.momentum[b0].unused0, bodies.momentum[b1].unused0, bodies.momentum[b2].unused0, bodies.momentum[b3].unused0,
8959 bodies.momentum[b4].unused0, bodies.momentum[b5].unused0, bodies.momentum[b6].unused0, bodies.momentum[b7].unused0);
8960#else
8961 simdv_float a_mass_inverse = simd_float::make4(bodies.momentum[a0].unused0, bodies.momentum[a1].unused0, bodies.momentum[a2].unused0, bodies.momentum[a3].unused0);
8962 simdv_float b_mass_inverse = simd_float::make4(bodies.momentum[b0].unused0, bodies.momentum[b1].unused0, bodies.momentum[b2].unused0, bodies.momentum[b3].unused0);
8963#endif
8964
8965 simdv_float a_position_x, a_position_y, a_position_z, a_position_w;
8966 simdv_float b_position_x, b_position_y, b_position_z, b_position_w;
8967 load4<sizeof(bodies.transforms[0]), 2>(bodies.transforms[0].position, ab_array,
8968 a_position_x, a_position_y, a_position_z, a_position_w);
8969 load4<sizeof(bodies.transforms[0]), 2>(bodies.transforms[0].position, ab_array + 1,
8970 b_position_x, b_position_y, b_position_z, b_position_w);
8971
8972 simdv_float pa_x = position_x - a_position_x;
8973 simdv_float pa_y = position_y - a_position_y;
8974 simdv_float pa_z = position_z - a_position_z;
8975
8976 simdv_float pb_x = position_x - b_position_x;
8977 simdv_float pb_y = position_y - b_position_y;
8978 simdv_float pb_z = position_z - b_position_z;
8979
8980 simdv_float a_momentum_to_velocity_xx, a_momentum_to_velocity_yy, a_momentum_to_velocity_zz, a_momentum_to_velocity_u0;
8981 simdv_float a_momentum_to_velocity_xy, a_momentum_to_velocity_xz, a_momentum_to_velocity_yz, a_momentum_to_velocity_u1;
8982 load8<sizeof(momentum_to_velocity[0]), 2>((const float*)momentum_to_velocity, ab_array,
8983 a_momentum_to_velocity_xx, a_momentum_to_velocity_yy, a_momentum_to_velocity_zz, a_momentum_to_velocity_u0,
8984 a_momentum_to_velocity_xy, a_momentum_to_velocity_xz, a_momentum_to_velocity_yz, a_momentum_to_velocity_u1);
8985
8986 simdv_float na_xt, na_yt, na_zt;
8987 simd_soa::cross(pa_x, pa_y, pa_z, normal_x, normal_y, normal_z, na_xt, na_yt, na_zt);
8988
8989 simdv_float na_x = a_momentum_to_velocity_xx*na_xt + a_momentum_to_velocity_xy*na_yt + a_momentum_to_velocity_xz*na_zt;
8990 simdv_float na_y = a_momentum_to_velocity_xy*na_xt + a_momentum_to_velocity_yy*na_yt + a_momentum_to_velocity_yz*na_zt;
8991 simdv_float na_z = a_momentum_to_velocity_xz*na_xt + a_momentum_to_velocity_yz*na_yt + a_momentum_to_velocity_zz*na_zt;
8992
8993 simdv_float b_momentum_to_velocity_xx, b_momentum_to_velocity_yy, b_momentum_to_velocity_zz, b_momentum_to_velocity_u0;
8994 simdv_float b_momentum_to_velocity_xy, b_momentum_to_velocity_xz, b_momentum_to_velocity_yz, b_momentum_to_velocity_u1;
8995 load8<sizeof(momentum_to_velocity[0]), 2>((const float*)momentum_to_velocity, ab_array + 1,
8996 b_momentum_to_velocity_xx, b_momentum_to_velocity_yy, b_momentum_to_velocity_zz, b_momentum_to_velocity_u0,
8997 b_momentum_to_velocity_xy, b_momentum_to_velocity_xz, b_momentum_to_velocity_yz, b_momentum_to_velocity_u1);
8998
8999 simdv_float nb_xt, nb_yt, nb_zt;
9000 simd_soa::cross(pb_x, pb_y, pb_z, normal_x, normal_y, normal_z, nb_xt, nb_yt, nb_zt);
9001
9002 simdv_float nb_x = b_momentum_to_velocity_xx*nb_xt + b_momentum_to_velocity_xy*nb_yt + b_momentum_to_velocity_xz*nb_zt;
9003 simdv_float nb_y = b_momentum_to_velocity_xy*nb_xt + b_momentum_to_velocity_yy*nb_yt + b_momentum_to_velocity_yz*nb_zt;
9004 simdv_float nb_z = b_momentum_to_velocity_xz*nb_xt + b_momentum_to_velocity_yz*nb_yt + b_momentum_to_velocity_zz*nb_zt;
9005
9006 simd_soa::cross(na_x, na_y, na_z, pa_x, pa_y, pa_z, na_xt, na_yt, na_zt);
9007 simd_soa::cross(nb_x, nb_y, nb_z, pb_x, pb_y, pb_z, nb_xt, nb_yt, nb_zt);
9008
9009 simdv_float normal_impulse_to_rotational_velocity_x = na_xt + nb_xt;
9010 simdv_float normal_impulse_to_rotational_velocity_y = na_yt + nb_yt;
9011 simdv_float normal_impulse_to_rotational_velocity_z = na_zt + nb_zt;
9012
9013 simdv_float r_dot_n = normal_impulse_to_rotational_velocity_x*normal_x + normal_impulse_to_rotational_velocity_y*normal_y + normal_impulse_to_rotational_velocity_z*normal_z;
9014
9015 simdv_float mass_inverse = a_mass_inverse + b_mass_inverse;
9016 simdv_float normal_velocity_to_normal_impulse = mass_inverse + r_dot_n;
9017
9018 simdv_float nonzero = simd_float::cmp_neq(normal_velocity_to_normal_impulse, simd_float::zerov());
9019 normal_velocity_to_normal_impulse = simd::bitwise_and(simd_float::makev(-1.0f) / normal_velocity_to_normal_impulse, nonzero);
9020
9021 simdv_float bias = simd_float::makev(-bias_factor) * simd_float::max(penetration - simd_float::makev(allowed_penetration), simd_float::zerov()) * normal_velocity_to_normal_impulse;
9022
9023 // Compute a tangent from the normal. Care is taken to compute a smoothly varying basis to improve stability.
9024 simdv_float s = simd_float::abs(normal_x);
9025
9026 simdv_float u_x = normal_z*s;
9027 simdv_float u_y = u_x - normal_z;
9028 simdv_float u_z = simd_float::madd(normal_x - normal_y, s, normal_y);
9029
9030 u_x = simd::bitwise_xor(u_x, simd_float::makev(-0.0f));
9031 simd_soa::normalize(u_x, u_y, u_z);
9032
9033 // Compute the rest of the basis.
9034 simdv_float v_x, v_y, v_z;
9035 simd_soa::cross(u_x, u_y, u_z, normal_x, normal_y, normal_z, v_x, v_y, v_z);
9036
9037 simdv_float ua_x, ua_y, ua_z, va_x, va_y, va_z;
9038 simd_soa::cross(pa_x, pa_y, pa_z, u_x, u_y, u_z, ua_x, ua_y, ua_z);
9039 simd_soa::cross(pa_x, pa_y, pa_z, v_x, v_y, v_z, va_x, va_y, va_z);
9040
9041 simdv_float ub_x, ub_y, ub_z, vb_x, vb_y, vb_z;
9042 simd_soa::cross(pb_x, pb_y, pb_z, u_x, u_y, u_z, ub_x, ub_y, ub_z);
9043 simd_soa::cross(pb_x, pb_y, pb_z, v_x, v_y, v_z, vb_x, vb_y, vb_z);
9044
9045 simdv_float a_duu = a_momentum_to_velocity_xx*ua_x*ua_x + a_momentum_to_velocity_yy*ua_y*ua_y + a_momentum_to_velocity_zz*ua_z*ua_z;
9046 simdv_float a_dvv = a_momentum_to_velocity_xx*va_x*va_x + a_momentum_to_velocity_yy*va_y*va_y + a_momentum_to_velocity_zz*va_z*va_z;
9047 simdv_float a_duv = a_momentum_to_velocity_xx*ua_x*va_x + a_momentum_to_velocity_yy*ua_y*va_y + a_momentum_to_velocity_zz*ua_z*va_z;
9048
9049 simdv_float a_suu = a_momentum_to_velocity_xy*ua_x*ua_y + a_momentum_to_velocity_xz*ua_x*ua_z + a_momentum_to_velocity_yz*ua_y*ua_z;
9050 simdv_float a_svv = a_momentum_to_velocity_xy*va_x*va_y + a_momentum_to_velocity_xz*va_x*va_z + a_momentum_to_velocity_yz*va_y*va_z;
9051 simdv_float a_suv = a_momentum_to_velocity_xy*(ua_x*va_y + ua_y*va_x) + a_momentum_to_velocity_xz*(ua_x*va_z + ua_z*va_x) + a_momentum_to_velocity_yz*(ua_y*va_z + ua_z*va_y);
9052
9053 simdv_float b_duu = b_momentum_to_velocity_xx*ub_x*ub_x + b_momentum_to_velocity_yy*ub_y*ub_y + b_momentum_to_velocity_zz*ub_z*ub_z;
9054 simdv_float b_dvv = b_momentum_to_velocity_xx*vb_x*vb_x + b_momentum_to_velocity_yy*vb_y*vb_y + b_momentum_to_velocity_zz*vb_z*vb_z;
9055 simdv_float b_duv = b_momentum_to_velocity_xx*ub_x*vb_x + b_momentum_to_velocity_yy*ub_y*vb_y + b_momentum_to_velocity_zz*ub_z*vb_z;
9056
9057 simdv_float b_suu = b_momentum_to_velocity_xy*ub_x*ub_y + b_momentum_to_velocity_xz*ub_x*ub_z + b_momentum_to_velocity_yz*ub_y*ub_z;
9058 simdv_float b_svv = b_momentum_to_velocity_xy*vb_x*vb_y + b_momentum_to_velocity_xz*vb_x*vb_z + b_momentum_to_velocity_yz*vb_y*vb_z;
9059 simdv_float b_suv = b_momentum_to_velocity_xy*(ub_x*vb_y + ub_y*vb_x) + b_momentum_to_velocity_xz*(ub_x*vb_z + ub_z*vb_x) + b_momentum_to_velocity_yz*(ub_y*vb_z + ub_z*vb_y);
9060
9061 simdv_float friction_x = mass_inverse + a_duu + a_suu + a_suu + b_duu + b_suu + b_suu;
9062 simdv_float friction_y = mass_inverse + a_dvv + a_svv + a_svv + b_dvv + b_svv + b_svv;
9063 simdv_float friction_z = a_duv + a_duv + a_suv + a_suv + b_duv + b_duv + b_suv + b_suv;
9064
9065 simdv_float ua_xt = a_momentum_to_velocity_xx*ua_x + a_momentum_to_velocity_xy*ua_y + a_momentum_to_velocity_xz*ua_z;
9066 simdv_float ua_yt = a_momentum_to_velocity_xy*ua_x + a_momentum_to_velocity_yy*ua_y + a_momentum_to_velocity_yz*ua_z;
9067 simdv_float ua_zt = a_momentum_to_velocity_xz*ua_x + a_momentum_to_velocity_yz*ua_y + a_momentum_to_velocity_zz*ua_z;
9068
9069 simdv_float va_xt = a_momentum_to_velocity_xx*va_x + a_momentum_to_velocity_xy*va_y + a_momentum_to_velocity_xz*va_z;
9070 simdv_float va_yt = a_momentum_to_velocity_xy*va_x + a_momentum_to_velocity_yy*va_y + a_momentum_to_velocity_yz*va_z;
9071 simdv_float va_zt = a_momentum_to_velocity_xz*va_x + a_momentum_to_velocity_yz*va_y + a_momentum_to_velocity_zz*va_z;
9072
9073 simdv_float ub_xt = b_momentum_to_velocity_xx*ub_x + b_momentum_to_velocity_xy*ub_y + b_momentum_to_velocity_xz*ub_z;
9074 simdv_float ub_yt = b_momentum_to_velocity_xy*ub_x + b_momentum_to_velocity_yy*ub_y + b_momentum_to_velocity_yz*ub_z;
9075 simdv_float ub_zt = b_momentum_to_velocity_xz*ub_x + b_momentum_to_velocity_yz*ub_y + b_momentum_to_velocity_zz*ub_z;
9076
9077 simdv_float vb_xt = b_momentum_to_velocity_xx*vb_x + b_momentum_to_velocity_xy*vb_y + b_momentum_to_velocity_xz*vb_z;
9078 simdv_float vb_yt = b_momentum_to_velocity_xy*vb_x + b_momentum_to_velocity_yy*vb_y + b_momentum_to_velocity_yz*vb_z;
9079 simdv_float vb_zt = b_momentum_to_velocity_xz*vb_x + b_momentum_to_velocity_yz*vb_y + b_momentum_to_velocity_zz*vb_z;
9080
9081 constraints[i].a[0] = a0; constraints[i].a[1] = a1; constraints[i].a[2] = a2; constraints[i].a[3] = a3;
9082 constraints[i].b[0] = b0; constraints[i].b[1] = b1; constraints[i].b[2] = b2; constraints[i].b[3] = b3;
9083
9084#if NUDGE_SIMDV_WIDTH == 256
9085 constraints[i].a[4] = a4; constraints[i].a[5] = a5; constraints[i].a[6] = a6; constraints[i].a[7] = a7;
9086 constraints[i].b[4] = b4; constraints[i].b[5] = b5; constraints[i].b[6] = b6; constraints[i].b[7] = b7;
9087#endif
9088
9089 simd_float::storev(constraints[i].n_x, normal_x);
9090 simd_float::storev(constraints[i].n_y, normal_y);
9091 simd_float::storev(constraints[i].n_z, normal_z);
9092
9093 simd_float::storev(constraints[i].pa_x, pa_x);
9094 simd_float::storev(constraints[i].pa_y, pa_y);
9095 simd_float::storev(constraints[i].pa_z, pa_z);
9096
9097 simd_float::storev(constraints[i].pb_x, pb_x);
9098 simd_float::storev(constraints[i].pb_y, pb_y);
9099 simd_float::storev(constraints[i].pb_z, pb_z);
9100
9101 simd_float::storev(constraints[i].normal_velocity_to_normal_impulse, normal_velocity_to_normal_impulse);
9102
9103 simd_float::storev(constraints[i].bias, bias);
9104 simd_float::storev(constraints[i].friction, friction);
9105
9106 simd_float::storev(constraints[i].u_x, u_x);
9107 simd_float::storev(constraints[i].u_y, u_y);
9108 simd_float::storev(constraints[i].u_z, u_z);
9109
9110 simd_float::storev(constraints[i].v_x, v_x);
9111 simd_float::storev(constraints[i].v_y, v_y);
9112 simd_float::storev(constraints[i].v_z, v_z);
9113
9114 simd_float::storev(constraints[i].friction_coefficient_x, friction_x);
9115 simd_float::storev(constraints[i].friction_coefficient_y, friction_y);
9116 simd_float::storev(constraints[i].friction_coefficient_z, friction_z);
9117
9118 simd_float::storev(constraints[i].ua_x, -ua_xt);
9119 simd_float::storev(constraints[i].ua_y, -ua_yt);
9120 simd_float::storev(constraints[i].ua_z, -ua_zt);
9121
9122 simd_float::storev(constraints[i].va_x, -va_xt);
9123 simd_float::storev(constraints[i].va_y, -va_yt);
9124 simd_float::storev(constraints[i].va_z, -va_zt);
9125
9126 simd_float::storev(constraints[i].na_x, -na_x);
9127 simd_float::storev(constraints[i].na_y, -na_y);
9128 simd_float::storev(constraints[i].na_z, -na_z);
9129
9130 simd_float::storev(constraints[i].ub_x, ub_xt);
9131 simd_float::storev(constraints[i].ub_y, ub_yt);
9132 simd_float::storev(constraints[i].ub_z, ub_zt);
9133
9134 simd_float::storev(constraints[i].vb_x, vb_xt);
9135 simd_float::storev(constraints[i].vb_y, vb_yt);
9136 simd_float::storev(constraints[i].vb_z, vb_zt);
9137
9138 simd_float::storev(constraints[i].nb_x, nb_x);
9139 simd_float::storev(constraints[i].nb_y, nb_y);
9140 simd_float::storev(constraints[i].nb_z, nb_z);
9141
9142 simdv_float cached_impulse_x, cached_impulse_y, cached_impulse_z, unused0;
9143 load4<sizeof(impulses[0]), 1>((const float*)impulses, slot.indices,
9144 cached_impulse_x, cached_impulse_y, cached_impulse_z, unused0);
9145
9146 simdv_float a_velocity_x, a_velocity_y, a_velocity_z;
9147 simdv_float a_angular_velocity_x, a_angular_velocity_y, a_angular_velocity_z, a_angular_velocity_w;
9148 load8<sizeof(bodies.momentum[0]), 1>((const float*)bodies.momentum, constraints[i].a,
9149 a_velocity_x, a_velocity_y, a_velocity_z, a_mass_inverse,
9150 a_angular_velocity_x, a_angular_velocity_y, a_angular_velocity_z, a_angular_velocity_w);
9151
9152 simdv_float b_velocity_x, b_velocity_y, b_velocity_z;
9153 simdv_float b_angular_velocity_x, b_angular_velocity_y, b_angular_velocity_z, b_angular_velocity_w;
9154 load8<sizeof(bodies.momentum[0]), 1>((const float*)bodies.momentum, constraints[i].b,
9155 b_velocity_x, b_velocity_y, b_velocity_z, b_mass_inverse,
9156 b_angular_velocity_x, b_angular_velocity_y, b_angular_velocity_z, b_angular_velocity_w);
9157
9158 simdv_float normal_impulse = simd_float::max(normal_x*cached_impulse_x + normal_y*cached_impulse_y + normal_z*cached_impulse_z, simd_float::zerov());
9159 simdv_float max_friction_impulse = normal_impulse * friction;
9160
9161 simdv_float friction_impulse_x = u_x*cached_impulse_x + u_y*cached_impulse_y + u_z*cached_impulse_z;
9162 simdv_float friction_impulse_y = v_x*cached_impulse_x + v_y*cached_impulse_y + v_z*cached_impulse_z;
9163
9164 simdv_float friction_clamp_scale = friction_impulse_x*friction_impulse_x + friction_impulse_y*friction_impulse_y;
9165
9166 friction_clamp_scale = simd_float::rsqrt(friction_clamp_scale);
9167 friction_clamp_scale = friction_clamp_scale * max_friction_impulse;
9168 friction_clamp_scale = simd_float::min(simd_float::makev(1.0f), friction_clamp_scale); // Note: First operand is returned on NaN.
9169
9170 friction_impulse_x = friction_impulse_x * friction_clamp_scale;
9171 friction_impulse_y = friction_impulse_y * friction_clamp_scale;
9172
9173 simdv_float linear_impulse_x = friction_impulse_x*u_x + friction_impulse_y*v_x + normal_x * normal_impulse;
9174 simdv_float linear_impulse_y = friction_impulse_x*u_y + friction_impulse_y*v_y + normal_y * normal_impulse;
9175 simdv_float linear_impulse_z = friction_impulse_x*u_z + friction_impulse_y*v_z + normal_z * normal_impulse;
9176
9177 simdv_float a_angular_impulse_x = friction_impulse_x*simd_float::loadv(constraints[i].ua_x) + friction_impulse_y*simd_float::loadv(constraints[i].va_x) + normal_impulse*simd_float::loadv(constraints[i].na_x);
9178 simdv_float a_angular_impulse_y = friction_impulse_x*simd_float::loadv(constraints[i].ua_y) + friction_impulse_y*simd_float::loadv(constraints[i].va_y) + normal_impulse*simd_float::loadv(constraints[i].na_y);
9179 simdv_float a_angular_impulse_z = friction_impulse_x*simd_float::loadv(constraints[i].ua_z) + friction_impulse_y*simd_float::loadv(constraints[i].va_z) + normal_impulse*simd_float::loadv(constraints[i].na_z);
9180
9181 simdv_float b_angular_impulse_x = friction_impulse_x*simd_float::loadv(constraints[i].ub_x) + friction_impulse_y*simd_float::loadv(constraints[i].vb_x) + normal_impulse*simd_float::loadv(constraints[i].nb_x);
9182 simdv_float b_angular_impulse_y = friction_impulse_x*simd_float::loadv(constraints[i].ub_y) + friction_impulse_y*simd_float::loadv(constraints[i].vb_y) + normal_impulse*simd_float::loadv(constraints[i].nb_y);
9183 simdv_float b_angular_impulse_z = friction_impulse_x*simd_float::loadv(constraints[i].ub_z) + friction_impulse_y*simd_float::loadv(constraints[i].vb_z) + normal_impulse*simd_float::loadv(constraints[i].nb_z);
9184
9185 a_velocity_x -= linear_impulse_x * a_mass_inverse;
9186 a_velocity_y -= linear_impulse_y * a_mass_inverse;
9187 a_velocity_z -= linear_impulse_z * a_mass_inverse;
9188
9189 a_angular_velocity_x += a_angular_impulse_x;
9190 a_angular_velocity_y += a_angular_impulse_y;
9191 a_angular_velocity_z += a_angular_impulse_z;
9192
9193 b_velocity_x += linear_impulse_x * b_mass_inverse;
9194 b_velocity_y += linear_impulse_y * b_mass_inverse;
9195 b_velocity_z += linear_impulse_z * b_mass_inverse;
9196
9197 b_angular_velocity_x += b_angular_impulse_x;
9198 b_angular_velocity_y += b_angular_impulse_y;
9199 b_angular_velocity_z += b_angular_impulse_z;
9200
9201 simd_float::storev(constraint_states[i].applied_normal_impulse, normal_impulse);
9202 simd_float::storev(constraint_states[i].applied_friction_impulse_x, friction_impulse_x);
9203 simd_float::storev(constraint_states[i].applied_friction_impulse_y, friction_impulse_y);
9204
9205 store8<sizeof(bodies.momentum[0]), 1>((float*)bodies.momentum, constraints[i].a,
9206 a_velocity_x, a_velocity_y, a_velocity_z, a_mass_inverse,
9207 a_angular_velocity_x, a_angular_velocity_y, a_angular_velocity_z, a_angular_velocity_w);
9208
9209 store8<sizeof(bodies.momentum[0]), 1>((float*)bodies.momentum, constraints[i].b,
9210 b_velocity_x, b_velocity_y, b_velocity_z, b_mass_inverse,
9211 b_angular_velocity_x, b_angular_velocity_y, b_angular_velocity_z, b_angular_velocity_w);
9212 }
9213
9214 data->constraint_batches = contact_slot_count;
9215
9216 return data;
9217}
9218
9219uintptr_t get_required_arena_size_for_setup_contact_constraints(context_t* c) {
9220return
9221 sizeof(ContactConstraintData)+63+
9222 sizeof(InertiaTransform)*c->bodies.count+31+
9223 sizeof(uint32_t)*c->contact_data.count*simdv_width32+31+
9224 sizeof(ContactSlotV)*c->contact_data.count+
9225 NUDGE_SETUP_CONTACT_CONSTRAINTS_BUCKET_COUNT*((2*c->contact_data.count+1)+31)*sizeof(ContactPairV);
9226}
9227
9228void apply_impulses(ContactConstraintData* data, BodyData bodies) {
9229 ContactConstraintV* constraints = data->constraints;
9230 ContactConstraintStateV* constraint_states = data->constraint_states;
9231
9232 unsigned constraint_batches = data->constraint_batches;
9233
9234 for (unsigned i = 0; i < constraint_batches; ++i) {
9235 const ContactConstraintV& constraint = constraints[i];
9236
9237 simdv_float a_velocity_x, a_velocity_y, a_velocity_z, a_mass_inverse;
9238 simdv_float a_angular_velocity_x, a_angular_velocity_y, a_angular_velocity_z, a_angular_velocity_w;
9239 load8<sizeof(bodies.momentum[0]), 1>((const float*)bodies.momentum, constraint.a,
9240 a_velocity_x, a_velocity_y, a_velocity_z, a_mass_inverse,
9241 a_angular_velocity_x, a_angular_velocity_y, a_angular_velocity_z, a_angular_velocity_w);
9242
9243 simdv_float pa_z = simd_float::loadv(constraint.pa_z);
9244 simdv_float pa_x = simd_float::loadv(constraint.pa_x);
9245 simdv_float pa_y = simd_float::loadv(constraint.pa_y);
9246
9247 simdv_float v_xa = simd_float::madd(a_angular_velocity_y, pa_z, a_velocity_x);
9248 simdv_float v_ya = simd_float::madd(a_angular_velocity_z, pa_x, a_velocity_y);
9249 simdv_float v_za = simd_float::madd(a_angular_velocity_x, pa_y, a_velocity_z);
9250
9251 simdv_float b_velocity_x, b_velocity_y, b_velocity_z, b_mass_inverse;
9252 simdv_float b_angular_velocity_x, b_angular_velocity_y, b_angular_velocity_z, b_angular_velocity_w;
9253 load8<sizeof(bodies.momentum[0]), 1>((const float*)bodies.momentum, constraint.b,
9254 b_velocity_x, b_velocity_y, b_velocity_z, b_mass_inverse,
9255 b_angular_velocity_x, b_angular_velocity_y, b_angular_velocity_z, b_angular_velocity_w);
9256
9257 simdv_float pb_z = simd_float::loadv(constraint.pb_z);
9258 simdv_float pb_x = simd_float::loadv(constraint.pb_x);
9259 simdv_float pb_y = simd_float::loadv(constraint.pb_y);
9260
9261 simdv_float v_xb = simd_float::madd(b_angular_velocity_y, pb_z, b_velocity_x);
9262 simdv_float v_yb = simd_float::madd(b_angular_velocity_z, pb_x, b_velocity_y);
9263 simdv_float v_zb = simd_float::madd(b_angular_velocity_x, pb_y, b_velocity_z);
9264
9265 v_xa = simd_float::madd(b_angular_velocity_z, pb_y, v_xa);
9266 v_ya = simd_float::madd(b_angular_velocity_x, pb_z, v_ya);
9267 v_za = simd_float::madd(b_angular_velocity_y, pb_x, v_za);
9268
9269 simdv_float n_x = simd_float::loadv(constraint.n_x);
9270 simdv_float fu_x = simd_float::loadv(constraint.u_x);
9271 simdv_float fv_x = simd_float::loadv(constraint.v_x);
9272
9273 v_xb = simd_float::madd(a_angular_velocity_z, pa_y, v_xb);
9274 v_yb = simd_float::madd(a_angular_velocity_x, pa_z, v_yb);
9275 v_zb = simd_float::madd(a_angular_velocity_y, pa_x, v_zb);
9276
9277 simdv_float n_y = simd_float::loadv(constraint.n_y);
9278 simdv_float fu_y = simd_float::loadv(constraint.u_y);
9279 simdv_float fv_y = simd_float::loadv(constraint.v_y);
9280
9281 simdv_float v_x = v_xb - v_xa;
9282 simdv_float v_y = v_yb - v_ya;
9283 simdv_float v_z = v_zb - v_za;
9284
9285 simdv_float t_z = n_x * v_x;
9286 simdv_float t_x = v_x * fu_x;
9287 simdv_float t_y = v_x * fv_x;
9288
9289 simdv_float n_z = simd_float::loadv(constraint.n_z);
9290 simdv_float fu_z = simd_float::loadv(constraint.u_z);
9291 simdv_float fv_z = simd_float::loadv(constraint.v_z);
9292
9293 simdv_float normal_bias = simd_float::loadv(constraint.bias);
9294 simdv_float old_normal_impulse = simd_float::loadv(constraint_states[i].applied_normal_impulse);
9295 simdv_float normal_factor = simd_float::loadv(constraint.normal_velocity_to_normal_impulse);
9296
9297 t_z = simd_float::madd(n_y, v_y, t_z);
9298 t_x = simd_float::madd(v_y, fu_y, t_x);
9299 t_y = simd_float::madd(v_y, fv_y, t_y);
9300
9301 normal_bias = normal_bias + old_normal_impulse;
9302
9303 t_z = simd_float::madd(n_z, v_z, t_z);
9304 t_x = simd_float::madd(v_z, fu_z, t_x);
9305 t_y = simd_float::madd(v_z, fv_z, t_y);
9306
9307 simdv_float normal_impulse = simd_float::madd(normal_factor, t_z, normal_bias);
9308
9309 simdv_float t_xx = t_x*t_x;
9310 simdv_float t_yy = t_y*t_y;
9311 simdv_float t_xy = t_x*t_y;
9312 simdv_float tl2 = t_xx + t_yy;
9313
9314 normal_impulse = simd_float::max(normal_impulse, simd_float::zerov());
9315
9316 t_x *= tl2;
9317 t_y *= tl2;
9318
9319 simd_float::storev(constraint_states[i].applied_normal_impulse, normal_impulse);
9320
9321 simdv_float max_friction_impulse = normal_impulse * simd_float::loadv(constraint.friction);
9322 normal_impulse = normal_impulse - old_normal_impulse;
9323
9324 simdv_float friction_x = simd_float::loadv(constraint.friction_coefficient_x);
9325 simdv_float friction_factor = t_xx * friction_x;
9326 simdv_float linear_impulse_x = n_x * normal_impulse;
9327
9328 simdv_float friction_y = simd_float::loadv(constraint.friction_coefficient_y);
9329 friction_factor = simd_float::madd(t_yy, friction_y, friction_factor);
9330 simdv_float linear_impulse_y = n_y * normal_impulse;
9331
9332 simdv_float friction_z = simd_float::loadv(constraint.friction_coefficient_z);
9333 friction_factor = simd_float::madd(t_xy, friction_z, friction_factor);
9334 simdv_float linear_impulse_z = n_z * normal_impulse;
9335
9336 friction_factor = simd_float::recip(friction_factor);
9337
9338 simdv_float na_x = simd_float::loadv(constraint.na_x);
9339 simdv_float na_y = simd_float::loadv(constraint.na_y);
9340 simdv_float na_z = simd_float::loadv(constraint.na_z);
9341
9342 a_angular_velocity_x = simd_float::madd(na_x, normal_impulse, a_angular_velocity_x);
9343 a_angular_velocity_y = simd_float::madd(na_y, normal_impulse, a_angular_velocity_y);
9344 a_angular_velocity_z = simd_float::madd(na_z, normal_impulse, a_angular_velocity_z);
9345
9346 simdv_float old_friction_impulse_x = simd_float::loadv(constraint_states[i].applied_friction_impulse_x);
9347 simdv_float old_friction_impulse_y = simd_float::loadv(constraint_states[i].applied_friction_impulse_y);
9348
9349 friction_factor = simd_float::min(simd_float::makev(1e+6f), friction_factor); // Note: First operand is returned on NaN.
9350
9351 simdv_float friction_impulse_x = t_x*friction_factor;
9352 simdv_float friction_impulse_y = t_y*friction_factor;
9353
9354 friction_impulse_x = old_friction_impulse_x - friction_impulse_x; // Note: Friction impulse has the wrong sign until this point. This is really an addition.
9355 friction_impulse_y = old_friction_impulse_y - friction_impulse_y;
9356
9357 simdv_float friction_clamp_scale = friction_impulse_x*friction_impulse_x + friction_impulse_y*friction_impulse_y;
9358
9359 simdv_float nb_x = simd_float::loadv(constraint.nb_x);
9360 simdv_float nb_y = simd_float::loadv(constraint.nb_y);
9361 simdv_float nb_z = simd_float::loadv(constraint.nb_z);
9362
9363 friction_clamp_scale = simd_float::rsqrt(friction_clamp_scale);
9364
9365 b_angular_velocity_x = simd_float::madd(nb_x, normal_impulse, b_angular_velocity_x);
9366 b_angular_velocity_y = simd_float::madd(nb_y, normal_impulse, b_angular_velocity_y);
9367 b_angular_velocity_z = simd_float::madd(nb_z, normal_impulse, b_angular_velocity_z);
9368
9369 friction_clamp_scale = friction_clamp_scale * max_friction_impulse;
9370 friction_clamp_scale = simd_float::min(simd_float::makev(1.0f), friction_clamp_scale); // Note: First operand is returned on NaN.
9371
9372 friction_impulse_x = friction_impulse_x * friction_clamp_scale;
9373 friction_impulse_y = friction_impulse_y * friction_clamp_scale;
9374
9375 simd_float::storev(constraint_states[i].applied_friction_impulse_x, friction_impulse_x);
9376 simd_float::storev(constraint_states[i].applied_friction_impulse_y, friction_impulse_y);
9377
9378 friction_impulse_x -= old_friction_impulse_x;
9379 friction_impulse_y -= old_friction_impulse_y;
9380
9381 linear_impulse_x = simd_float::madd(fu_x, friction_impulse_x, linear_impulse_x);
9382 linear_impulse_y = simd_float::madd(fu_y, friction_impulse_x, linear_impulse_y);
9383 linear_impulse_z = simd_float::madd(fu_z, friction_impulse_x, linear_impulse_z);
9384
9385 linear_impulse_x = simd_float::madd(fv_x, friction_impulse_y, linear_impulse_x);
9386 linear_impulse_y = simd_float::madd(fv_y, friction_impulse_y, linear_impulse_y);
9387 linear_impulse_z = simd_float::madd(fv_z, friction_impulse_y, linear_impulse_z);
9388
9389 simdv_float a_mass_inverse_neg = simd::bitwise_xor(a_mass_inverse, simd_float::makev(-0.0f));
9390
9391 a_velocity_x = simd_float::madd(linear_impulse_x, a_mass_inverse_neg, a_velocity_x);
9392 a_velocity_y = simd_float::madd(linear_impulse_y, a_mass_inverse_neg, a_velocity_y);
9393 a_velocity_z = simd_float::madd(linear_impulse_z, a_mass_inverse_neg, a_velocity_z);
9394
9395 simdv_float ua_x = simd_float::loadv(constraint.ua_x);
9396 simdv_float ua_y = simd_float::loadv(constraint.ua_y);
9397 simdv_float ua_z = simd_float::loadv(constraint.ua_z);
9398
9399 a_angular_velocity_x = simd_float::madd(ua_x, friction_impulse_x, a_angular_velocity_x);
9400 a_angular_velocity_y = simd_float::madd(ua_y, friction_impulse_x, a_angular_velocity_y);
9401 a_angular_velocity_z = simd_float::madd(ua_z, friction_impulse_x, a_angular_velocity_z);
9402
9403 simdv_float va_x = simd_float::loadv(constraint.va_x);
9404 simdv_float va_y = simd_float::loadv(constraint.va_y);
9405 simdv_float va_z = simd_float::loadv(constraint.va_z);
9406
9407 a_angular_velocity_x = simd_float::madd(va_x, friction_impulse_y, a_angular_velocity_x);
9408 a_angular_velocity_y = simd_float::madd(va_y, friction_impulse_y, a_angular_velocity_y);
9409 a_angular_velocity_z = simd_float::madd(va_z, friction_impulse_y, a_angular_velocity_z);
9410
9411 a_angular_velocity_w = simd_float::zerov(); // Reduces register pressure.
9412
9413 store8<sizeof(bodies.momentum[0]), 1>((float*)bodies.momentum, constraint.a,
9414 a_velocity_x, a_velocity_y, a_velocity_z, a_mass_inverse,
9415 a_angular_velocity_x, a_angular_velocity_y, a_angular_velocity_z, a_angular_velocity_w);
9416
9417 b_velocity_x = simd_float::madd(linear_impulse_x, b_mass_inverse, b_velocity_x);
9418 b_velocity_y = simd_float::madd(linear_impulse_y, b_mass_inverse, b_velocity_y);
9419 b_velocity_z = simd_float::madd(linear_impulse_z, b_mass_inverse, b_velocity_z);
9420
9421 simdv_float ub_x = simd_float::loadv(constraint.ub_x);
9422 simdv_float ub_y = simd_float::loadv(constraint.ub_y);
9423 simdv_float ub_z = simd_float::loadv(constraint.ub_z);
9424
9425 b_angular_velocity_x = simd_float::madd(ub_x, friction_impulse_x, b_angular_velocity_x);
9426 b_angular_velocity_y = simd_float::madd(ub_y, friction_impulse_x, b_angular_velocity_y);
9427 b_angular_velocity_z = simd_float::madd(ub_z, friction_impulse_x, b_angular_velocity_z);
9428
9429 simdv_float vb_x = simd_float::loadv(constraint.vb_x);
9430 simdv_float vb_y = simd_float::loadv(constraint.vb_y);
9431 simdv_float vb_z = simd_float::loadv(constraint.vb_z);
9432
9433 b_angular_velocity_x = simd_float::madd(vb_x, friction_impulse_y, b_angular_velocity_x);
9434 b_angular_velocity_y = simd_float::madd(vb_y, friction_impulse_y, b_angular_velocity_y);
9435 b_angular_velocity_z = simd_float::madd(vb_z, friction_impulse_y, b_angular_velocity_z);
9436
9437 b_angular_velocity_w = simd_float::zerov(); // Reduces register pressure.
9438
9439 store8<sizeof(bodies.momentum[0]), 1>((float*)bodies.momentum, constraint.b,
9440 b_velocity_x, b_velocity_y, b_velocity_z, b_mass_inverse,
9441 b_angular_velocity_x, b_angular_velocity_y, b_angular_velocity_z, b_angular_velocity_w);
9442 }
9443}
9444
9445void update_cached_impulses(ContactConstraintData* data, ContactImpulseData* contact_impulses) {
9446 uint32_t* constraint_to_contact = data->constraint_to_contact;
9447
9448 ContactConstraintV* constraints = data->constraints;
9449 ContactConstraintStateV* constraint_states = data->constraint_states;
9450 unsigned constraint_count = data->constraint_batches * simdv_width32;
9451
9452 for (unsigned i = 0; i < constraint_count; ++i) {
9453 unsigned contact = constraint_to_contact[i];
9454
9455 unsigned b = i >> simdv_width32_log2;
9456 unsigned l = i & (simdv_width32-1);
9457
9458 float* impulse = contact_impulses->data[contact].impulse;
9459
9460 impulse[0] = (constraint_states[b].applied_normal_impulse[l] * constraints[b].n_x[l] +
9461 constraint_states[b].applied_friction_impulse_x[l] * constraints[b].u_x[l] +
9462 constraint_states[b].applied_friction_impulse_y[l] * constraints[b].v_x[l]);
9463
9464 impulse[1] = (constraint_states[b].applied_normal_impulse[l] * constraints[b].n_y[l] +
9465 constraint_states[b].applied_friction_impulse_x[l] * constraints[b].u_y[l] +
9466 constraint_states[b].applied_friction_impulse_y[l] * constraints[b].v_y[l]);
9467
9468 impulse[2] = (constraint_states[b].applied_normal_impulse[l] * constraints[b].n_z[l] +
9469 constraint_states[b].applied_friction_impulse_x[l] * constraints[b].u_z[l] +
9470 constraint_states[b].applied_friction_impulse_y[l] * constraints[b].v_z[l]);
9471 }
9472}
9473
9474void advance(context_t* c,float time_step) {
9475 ActiveBodies* active_bodies = &c->active_bodies;
9476 BodyData* bodies = &c->bodies;
9477 float half_time_step = 0.5f * time_step;
9478 const float sleeping_threshold_linear_velocity_squared = c->simulation_params.sleeping_threshold_linear_velocity_squared;
9479 const float sleeping_threshold_angular_velocity_squared = c->simulation_params.sleeping_threshold_angular_velocity_squared;
9480
9481
9482 // TODO: Consider SIMD-optimizing this loop.
9483 for (unsigned n = 0; n < active_bodies->count; ++n) {
9484 unsigned i = active_bodies->indices[n];
9485
9486 BodyFilter* filter = &bodies->filters[i];
9487 if (filter->flags&BF_IS_DYNAMIC
9488 //&& !(filter->flags&BF_IS_SENSOR) // Nope!
9489 ) {
9490 float3 velocity = make_float3(bodies->momentum[i].velocity);
9491 float3 angular_velocity = make_float3(bodies->momentum[i].angular_velocity);
9492 uint8_t* idle_counter = &c->bodies.idle_counters[i];
9493 if (length2(velocity) < sleeping_threshold_linear_velocity_squared && length2(angular_velocity) < sleeping_threshold_angular_velocity_squared) {
9494 if (*idle_counter < 0xff) {
9495 ++(*idle_counter);
9496 //*idle_counter=0xff; // nope
9497 /*// New stuff I'm testing (no way!)
9498 if (*idle_counter==0xff) {
9499 memset(&bodies->momentum[i].velocity,0,sizeof(float3));
9500 memset(&bodies->momentum[i].angular_velocity,0,sizeof(float3));
9501 velocity.x=velocity.y=velocity.z=0.f;
9502 angular_velocity.x=angular_velocity.y=angular_velocity.z=0.f;
9503 }*/
9504 //if (*idle_counter==0xff) continue; // attempt to try? NOPE: does nothing
9505 }
9506 //else continue; // attempt to try? NOPE: with this alone bodies keep waking up
9507 }
9508 else {
9509 *idle_counter = 0;
9510 }
9511 //if (filter->flags&BF_IS_SENSOR) continue; // test to remove
9512
9513 Rotation dr = { angular_velocity, 0.f }; // last value was missing (warning: missing initializer for member ‘{anonymous}::Rotation::s’ [-Wmissing-field-initializers])
9514
9515 dr = dr * make_rotation(bodies->transforms[i].rotation);
9516 dr.v *= half_time_step;
9517 dr.s *= half_time_step;
9518
9519 // 3 new lines (original code did not use these 3 substitutions)
9520 Transform* bodyTransform = &bodies->transforms[i];
9521 float* bodyPosition3 = bodyTransform->position;
9522 float* bodyRotation4 = bodyTransform->rotation;
9523
9524 bodyPosition3[0] += velocity.x * time_step;
9525 bodyPosition3[1] += velocity.y * time_step;
9526 bodyPosition3[2] += velocity.z * time_step;
9527
9528 bodyRotation4[0] += dr.v.x;
9529 bodyRotation4[1] += dr.v.y;
9530 bodyRotation4[2] += dr.v.z;
9531 bodyRotation4[3] += dr.s;
9532
9533 Rotation rotation = normalize(make_rotation(bodyRotation4));
9534
9535 bodyRotation4[0] = rotation.v.x;
9536 bodyRotation4[1] = rotation.v.y;
9537 bodyRotation4[2] = rotation.v.z;
9538 bodyRotation4[3] = rotation.s;
9539 }
9540 }
9541}
9542
9543} // namespace nudge
9544
9545#endif //NUDGE_IMPLEMENTATION_GUARD
9546#endif //NUDGE_IMPLEMENTATION
9547
int can_add_box(context_t *c)
Definition nudge.h:978
void body_recalculate_bounding_box(context_t *c, uint32_t body)
Recalculates the bounding box of the body (BodyInfo::aabb_center and BodyInfo::aabb_extents)
uint32_t colliders_get_num_remaining_spheres(context_t *c)
Return the number of sphere colliders that can still be added to the physic world.
int can_add_compound(context_t *c, unsigned num_boxes, unsigned num_spheres)
Definition nudge.h:986
int can_add_clone(context_t *c, unsigned body_to_clone)
Definition nudge.h:990
void remove_body(context_t *c, unsigned body)
Removes a body from the simulation.
unsigned get_next_add_body_index(context_t *c)
Allows to peek the body index that is going to be returned in next add_xxx(...) call.
Definition nudge.h:999
uint32_t colliders_get_num_remaining_boxes(context_t *c)
Return the number of box colliders that can still be added to the physic world.
unsigned add_sphere(context_t *c, float mass, float radius, const Transform *T=NULL, const float comOffset[3]=NULL)
Adds a new body to the simulation with a single sphere collider.
void body_scale(nudge::context_t *c, unsigned body, float scale_factor, float mass_scale_factor=0.f)
[Experimental] Uniformly scales the specified body incrementally
unsigned add_clone(context_t *c, unsigned body_to_clone, float mass, const Transform *T=NULL, float scale_factor=1.f, const float newComOffsetInPreScaledUnits[3]=NULL)
[Experimental] Adds a new body to the simulation cloning an existing body
unsigned add_compound(context_t *c, float mass, float inertia[3], unsigned num_boxes, const float *hsizeTriplets, const Transform *boxOffsetTransforms, unsigned num_spheres, const float *radii, const Transform *sphereOffsetTransforms, const Transform *T=NULL, const float comOffset[3]=NULL, float *centerMeshAndRetrieveOldCenter3Out=NULL)
Adds a new body to the simulation with a compound collider made up of num_boxes box colliders and num...
void body_change_motion_state(nudge::context_t *c, unsigned body, nudge::FlagMask new_motion_state, float mass_fallback=1.f)
[Experimental] Changes the body motion state (i.e. the BF_IS_STATIC_OR_KINEMATIC_OR_DYNAMIC group of ...
unsigned add_box(context_t *c, float mass, float hsizex, float hsizey, float hsizez, const Transform *T=NULL, const float comOffset[3]=NULL)
Adds a new body to the simulation with a single box collider.
int can_add_sphere(context_t *c)
Definition nudge.h:982
void contact_data_find_colliders(const context_t *c, unsigned contact_data_index, int16_t *box_collider_index_for_body_a, int16_t *sphere_collider_index_for_body_a, int16_t *box_collider_index_for_body_b, int16_t *sphere_collider_index_for_body_b, int use_relative_values_for_output_indices=0)
Find out which collider belonging to body_a and body_b is involved in the collision determined by a C...
void init_context(context_t *c)
Mandatory function to be called at program startup.
void show_info()
Displays basic info at program startup; very important call to detect the SIMD configuration of the p...
void load_context(FILE *f, context_t *c)
Loads a saved nudge context.
void save_context(FILE *f, const context_t *c)
Saves the nudge context.
void restart_context(context_t *c)
Optional function that restarts a valid context, preserving the simulation settings and the allocated...
void init_context_with(context_t *c, unsigned MAX_NUM_BOXES, unsigned MAX_NUM_SPHERES)
Mandatory function to be called at program startup.
void destroy_context(context_t *c)
Mandatory function to be called at program exit.
unsigned add_compound_hollow_cylinder(context_t *c, float mass, float min_radius, float max_radius, float hheight, const Transform *T=NULL, AxisEnum axis=AXIS_Y, unsigned num_boxes=8, const float comOffset[3]=NULL)
Adds a new body to the simulation with a compound collider that represents the hollow lateral surface...
unsigned add_compound_prism(context_t *c, float mass, float radius, float hheight, unsigned num_lateral_faces=0, const Transform *T=NULL, AxisEnum axis=AXIS_Y, const float comOffset[3]=NULL)
Adds a new body to the simulation with a compound collider that represents a prism of 4 or more later...
unsigned add_compound_capsule(context_t *c, float mass, float radius, float hheight, const Transform *T=NULL, AxisEnum axis=AXIS_Y, unsigned num_boxes=1, unsigned num_spheres=3, const float comOffset[3]=NULL, float box_lateral_side_shrinking=-1.f)
Adds a new body to the simulation with a compound collider that represents a capsule.
unsigned add_compound_cylinder(context_t *c, float mass, float radius, float hheight, const Transform *T=NULL, AxisEnum axis=AXIS_Y, unsigned num_boxes=0, unsigned num_spheres=0, const float comOffset[3]=NULL, float box_lateral_side_shrinking=-1.f)
Adds a new body to the simulation with a compound collider that represents a cylinder.
unsigned add_compound_staircase(context_t *c, float mass, float hdepth, float hheight, float hlength, unsigned num_steps=15, const Transform *T=NULL, int orientation_in_0_3=0, const float comOffset[3]=NULL)
Adds a new body to the simulation with a compound collider that represents a staircase; please note t...
unsigned add_compound_torus(context_t *c, float mass, float radius, float inner_radius, const Transform *T=NULL, AxisEnum axis=AXIS_Y, unsigned num_boxes=8, const float comOffset[3]=NULL)
Adds a new body to the simulation with a compound collider that represents a torus.
unsigned add_compound_cone(context_t *c, float mass, float radius, float hheight, const Transform *T=NULL, AxisEnum axis=AXIS_Y, unsigned num_boxes=0, unsigned num_spheres=0, const float comOffset[3]=NULL)
Adds a new body to the simulation with a compound collider that represents an approximated cone.
void body_set_collision_group_and_mask(context_t *c, uint32_t body, CollisionMask single_collision_group_body_belongs_to, CollisionMask collision_group_mask_body_can_collide_with=COLLISION_GROUP_ALL)
Sets the body collision group (a single value of COLLISION_GROUP_) and mask (a combination of COLLISI...
Definition nudge.h:1242
FlagMask * body_get_flags(context_t *c, uint32_t body)
Shortcut that returns a pointer to the body flags (a combination of BF_ enums)
Definition nudge.h:1263
CollisionMask * body_get_collision_mask(context_t *c, uint32_t body)
Gets the body collision mask (a combination of COLLISION_GROUP_ values)
Definition nudge.h:1258
CollisionMask * body_get_collision_group(context_t *c, uint32_t body)
Gets the body collision group (a single value of COLLISION_GROUP_)
Definition nudge.h:1252
void calculate_capsule_inertia_inverse(float result[3], float mass, float radius, float halfCylinderHeight, AxisEnum upAxis=AXIS_Y, const float comOffset[3]=NULL)
void calculate_cylinder_inertia_inverse(float result[3], float mass, float radius, float halfHeight, AxisEnum upAxis=AXIS_Y, const float comOffset[3]=NULL)
void calculate_box_inertia_inverse(float result[3], float mass, float hsizex, float hsizey, float hsizez, const float comOffset[3]=NULL)
void calculate_torus_inertia_inverse(float result[3], float mass, float majorRadius, float minorRadius, AxisEnum upAxis=AXIS_Y, const float comOffset[3]=NULL)
void calculate_hollow_cylinder_inertia(float result[3], float mass, float R, float r, float halfHeight, AxisEnum upAxis=AXIS_Y, const float comOffset[3]=NULL)
void calculate_sphere_inertia_inverse(float result[3], float mass, float radius, const float comOffset[3]=NULL, bool hollow=false)
void calculate_cone_inertia(float result[3], float mass, float radius, float halfHeight, AxisEnum upAxis=AXIS_Y, const float comOffset[3]=NULL)
void calculate_sphere_inertia(float result[3], float mass, float radius, const float comOffset[3]=NULL, bool hollow=false)
void calculate_cylinder_inertia(float result[3], float mass, float radius, float halfHeight, AxisEnum upAxis=AXIS_Y, const float comOffset[3]=NULL)
void calculate_box_inertia(float result[3], float mass, float hsizex, float hsizey, float hsizez, const float comOffset[3]=NULL)
void calculate_capsule_inertia(float result[3], float mass, float radius, float halfCylinderHeight, AxisEnum upAxis=AXIS_Y, const float comOffset[3]=NULL)
void calculate_torus_inertia(float result[3], float mass, float majorRadius, float minorRadius, AxisEnum upAxis=AXIS_Y, const float comOffset[3]=NULL)
void calculate_cone_inertia_inverse(float result[3], float mass, float radius, float halfHeight, AxisEnum upAxis=AXIS_Y, const float comOffset[3]=NULL)
void calculate_hollow_cylinder_inertia_inverse(float result[3], float mass, float R, float r, float halfHeight, AxisEnum upAxis=AXIS_Y, const float comOffset[3]=NULL)
void kinematic_data_reserve_animations(KinematicData *kd, size_t new_size)
Reserves additional space for KinematicData animations.
void kinematic_data_reserve_key_frames(KinematicData *kd, size_t new_size)
Reserves additional space for KinematicData key frames.
int flush(void)
Flushes the log. It defaults to fflush(NUDGE_LOG_FILE_PTR)
int log(const char *format,...)
Logging function used by the library. It defaults to printf.
float * calculate_graphic_transform_for_body(context_t *c, unsigned body, float *pModelMatrix16Out)
Function that can be used to calculate the smoothed 16-float column-major model matrix of a single bo...
void simulation_step(context_t *c)
Mandatory function that must be called once per frame.
unsigned pre_simulation_step(context_t *c, double elapsedSecondsFromLastCall)
Mandatory function that must be called once per frame.
void calculate_graphic_transforms(context_t *c, float *pModelMatricesOut, unsigned modelMatrixStrideInFloatUnits, int loopActiveBodiesOnly=0)
Function that can be used to calculate the smoothed 16-float column-major model matrices of all the b...
float * nm_QuatFromMat4(float *__restrict result4, const float *__restrict m16)
Turns the 3x3 submatrix of a 16-floats column-major matrix (without scaling applied) into a quaternio...
float * nm_QuatFromMat3(float *__restrict result4, const float *__restrict m9)
Turns the 3x3 9-floats column-major rotation matrix (without scaling applied) into a quaternion.
void TransformAssignToBody(context_t *c, unsigned body, Transform newT, float deltaTime, int16_t aux_body=-1)
Assigns a new Transform to a body, and sets its linear and angular velocities based on the difference...
Transform TransformSlerp(Transform T0, Transform T1, float time)
Applies (spherical) lerp between T0 and T1.
void nm_QuatGetAngularVelocity(float *__restrict angVel3, const float *newQuat4, const float *oldQuat4, float halfTimeStep)
Given an old and a new quaternion and a small time step, it calculates the angular velocities.
float * nm_QuatMulVec3(float *__restrict vOut3, const float *__restrict q4, const float *__restrict vIn3)
Transforms a 3-floats vector by a 4-floats quaternion.
float * nm_Mat4SetRotationFromQuat(float *__restrict result16, const float *__restrict q4)
Replaces the 3x3 submatrix of a 16-floats column-major matrix with the 3x3 matrix representing the gi...
float * nm_QuatRotate(float *__restrict qInOut4, float angle, float axisX, float axisY, float axisZ)
Rotates an input 4-floats unit quaternion by an angle in radians around a specified axis.
float * TransformToMat4(float *matrix16Out, const Transform *T)
Converts a nudge::Transform to a column-major 16-floats matrix.
float * nm_QuatGetAxis(float *__restrict vOut3, const float *__restrict q4, float axisX, float axisY, float axisZ)
Transforms a particular axis from the input quaternion space to word space.
void TransformAdvanceBodyFromVelocities(context_t *c, unsigned body, float deltaTime)
Advances the body's transform based on its linear and angular velocities.
float nm_Vec3Normalized(float *__restrict v3Out, const float *__restrict v3)
Get a normalizes copy of an input 3-floats vector.
float * nm_QuatGetAxisZ(float *__restrict axisOut3, const float *__restrict q4)
Definition nudge.h:1537
Transform * Mat4WithoutScalingToTransform(Transform *Tout, const float *matrix16WithoutScaling)
Converts a column-major 16-floats matrix without any scaling applied to a nudge::Transform.
float * nm_QuatGetAxisY(float *__restrict axisOut3, const float *__restrict q4)
Definition nudge.h:1536
float * nm_QuatFromAngleAxis(float *__restrict qOut4, float rfAngle, float rkAxisX, float rkAxisY, float rkAxisZ)
Generates a 4-floats quaternion based on an orientation arouns an axis.
float * nm_Mat4Mul(float *result16, const float *ml16, const float *mr16)
Multiplies two column-major 16-floats 4x4 matrices.
float * nm_QuatMul(float *qOut4, const float *a4, const float *b4)
Multiplies two 4-floats quaternions.
float * nm_QuatSlerp(float *__restrict result4, const float *__restrict a4, const float *__restrict b4, float slerpTime_In_0_1, int normalizeResult4AfterLerp)
Performs a spherical lerp between two quaternions (in 4-floats format)
Transform TransformMul(Transform T0, Transform T1)
Multiplies two transforms.
float nm_Vec3Normalize(float *__restrict v3)
Normalizes a 3-floats vector in place.
float * nm_Vec3Cross(float *__restrict vOut3, const float *__restrict a3, const float *__restrict b3)
Computes the cross product between two 3-floats vectors.
float * nm_Mat3FromQuat(float *__restrict result9, const float *__restrict q4)
Converts the given quaternion the a 3x3 9-floats column-major rotation matrix.
void nm_QuatToAngleAxis(const float *__restrict q4, float *__restrict rfAngleOut1, float *__restrict rkAxisOut3)
Calculates the angle-axis representation of the given 4-float quaternion.
void nm_QuatNormalize(float *__restrict q4)
normalizes a 4-floats quaternion in place
float nm_Vec3Dot(const float *__restrict a3, const float *__restrict b3)
Computes the dot product between two 3-floats vectors.
void nm_QuatAdvance(float *__restrict qOut4, const float *__restrict q4, const float *__restrict angVel3, float halfTimeStep)
Advances a quaternion given an angular velocity and a (small) time step.
float * nm_QuatGetAxisX(float *__restrict axisOut3, const float *__restrict q4)
Definition nudge.h:1535
Definition nudge.h:212
NUDGE_FLAG_MASK_TYPE FlagMask
The unsigned type used for the BF_ flags; it defaults to uint16_t (i.e. 16 flags available) if C++11 ...
Definition nudge.h:386
AxisEnum
The AxisEnum enum.
Definition nudge.h:688
@ AXIS_Z
Definition nudge.h:688
@ AXIS_X
Definition nudge.h:688
@ AXIS_Y
Definition nudge.h:688
float * body_get_position(context_t *c, uint32_t body)
Gets the pointer to the body position (3-floats vector)
Definition nudge.h:1062
NUDGE_COLLISION_MASK_TYPE CollisionMask
The unsigned type used for the COLLISION_GROUP_ flags; it defaults to uint8_t (i.e....
Definition nudge.h:359
GlobalDataMaskEnum
The GlobalDataMaskEnum enum.
Definition nudge.h:633
@ GF_DONT_RESET_AUX_BODIES
Definition nudge.h:635
@ GF_USE_GLOBAL_GRAVITY
Definition nudge.h:634
float * body_get_angular_velocity(context_t *c, uint32_t body)
Gets the pointer to the body angular velocity (3-floats)
Definition nudge.h:1053
CollisionMaskEnum
The CollisionMaskEnum enum.
Definition nudge.h:369
@ COLLISION_GROUP_A
Definition nudge.h:371
@ COLLISION_GROUP_DEFAULT
Definition nudge.h:370
@ COLLISION_GROUP_C
Definition nudge.h:373
@ COLLISION_GROUP_ALL
Definition nudge.h:378
@ COLLISION_GROUP_F
Definition nudge.h:376
@ COLLISION_GROUP_B
Definition nudge.h:372
@ COLLISION_GROUP_E
Definition nudge.h:375
@ COLLISION_GROUP_D
Definition nudge.h:374
@ COLLISION_GROUP_G
Definition nudge.h:377
BodyFlagEnum
The BodyFlagEnum enum.
Definition nudge.h:398
@ BF_NEVER_SLEEPING
Definition nudge.h:406
@ BF_IS_DYNAMIC
Definition nudge.h:405
@ BF_IS_KINEMATIC_OR_DYNAMIC
Definition nudge.h:418
@ BF_HAS_COM_OFFSET
Definition nudge.h:399
@ BF_HAS_DIFFERENT_GRAVITY_MODE
Definition nudge.h:407
@ BF_IS_PLATFORM
Definition nudge.h:411
@ BF_IS_STATIC_OR_DYNAMIC
Definition nudge.h:417
@ BF_IS_STATIC
Definition nudge.h:403
@ BF_IS_FRUSTUM_CULLED
Definition nudge.h:413
@ BF_IS_DISABLED_OR_REMOVED_OR_FRUSTUM_CULLED
Definition nudge.h:414
@ BF_HAS_DIFFERENT_AUX_BODIES_RESET_MODE
Definition nudge.h:408
@ BF_IS_STATIC_OR_KINEMATIC_OR_DISABLED_OR_REMOVED
Definition nudge.h:420
@ BF_IS_DISABLED_OR_REMOVED
Definition nudge.h:402
@ BF_IS_KINEMATIC
Definition nudge.h:404
@ BF_IS_STATIC_OR_KINEMATIC_OR_DYNAMIC
Definition nudge.h:419
@ BF_IS_SENSOR
Definition nudge.h:412
@ BF_IS_REMOVED
Definition nudge.h:401
@ BF_IS_DISABLED
Definition nudge.h:400
@ BF_IS_CHARACTER
Definition nudge.h:410
@ BF_IS_STATIC_OR_KINEMATIC
Definition nudge.h:416
float * body_get_orientation(context_t *c, uint32_t body)
Gets the pointer to the body orientation (4-floats quaternion in {x,y,z,w} format)
Definition nudge.h:1069
float * body_get_velocity(context_t *c, uint32_t body)
Gets the pointer to the body linear velocity (3-floats)
Definition nudge.h:1045
#define NUDGE_DEFAULT_SIMULATION_TIMESTEP
Definition nudge.h:1609
#define NUDGE_DEFAULT_SLEEPING_THRESHOLD_ANGULAR_VELOCITY_SQUARED
Definition nudge.h:1627
#define NUDGE_DEFAULT_MAX_NUM_SIMULATION_SUBSTEPS
Definition nudge.h:1612
#define NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES
Definition nudge.h:465
#define NUDGE_DEFAULT_DAMPING_LINEAR
Definition nudge.h:1618
#define NUDGE_DEFAULT_NUM_SIMULATION_ITERATIONS
Definition nudge.h:1615
#define NUDGE_COLLISION_MASK_TYPE
Definition nudge.h:205
#define NUDGE_FLAG_MASK_TYPE
Definition nudge.h:208
#define NUDGE_DEFAULT_DAMPING_ANGULAR
Definition nudge.h:1621
#define NUDGE_DEFAULT_PENETRATION_ALLOWED_AMOUNT
Definition nudge.h:1630
#define NUDGE_DEFAULT_PENETRATION_BIAS_FACTOR
Definition nudge.h:1633
#define NUDGE_INVALID_BODY_ID
Definition nudge.h:552
#define NUDGE_DEFAULT_SLEEPING_THRESHOLD_LINEAR_VELOCITY_SQUARED
Definition nudge.h:1624
The ActiveBodies struct.
Definition nudge.h:539
uint32_t capacity
Definition nudge.h:541
uint32_t count
Definition nudge.h:542
uint16_t * indices
Definition nudge.h:540
The Arena struct used internally.
Definition nudge.h:218
uintptr_t size
Definition nudge.h:220
void * data
Definition nudge.h:219
[unused] The BodyConnections struct is actually just sketched in nudge (it was intended to add custom...
Definition nudge.h:514
uint32_t count
Definition nudge.h:516
BodyPair * data
Definition nudge.h:515
The main struct contained in context_t: it exposes every per-body data in the simulation,...
Definition nudge.h:500
BodyMomentum * momentum
Definition nudge.h:503
BodyLayout * layouts
Definition nudge.h:505
uint8_t * idle_counters
Definition nudge.h:507
BodyInfo * infos
Definition nudge.h:506
BodyFilter * filters
Definition nudge.h:504
uint32_t count
Definition nudge.h:508
BodyProperties * properties
Definition nudge.h:502
Transform * transforms
Definition nudge.h:501
The BodyFilter struct.
Definition nudge.h:428
CollisionMask collision_mask
Definition nudge.h:431
FlagMask flags
Definition nudge.h:429
CollisionMask collision_group
Definition nudge.h:430
The BodyInfo struct contains some read-only graphic properties of the body (e.g. axis aligned boundin...
Definition nudge.h:453
union nudge::BodyInfo::@16::@18 sk_user
int16_t aux_bodies[NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES]
Definition nudge.h:469
uint32_t u32[NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES/2]
Definition nudge.h:483
int8_t i8[NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES *2]
Definition nudge.h:487
float com_offset[3]
Definition nudge.h:462
float aabb_center[3]
Definition nudge.h:460
float aabb_enlarged_radius
Definition nudge.h:463
float aabb_half_extents[3]
Definition nudge.h:461
uint8_t u8[NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES *2]
Definition nudge.h:488
uint16_t u16[NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES]
Definition nudge.h:486
UserData32Bit user
Definition nudge.h:455
int16_t i16[NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES]
Definition nudge.h:485
int32_t i32[NUDGE_BODYINFO_STRUCT_NUM_AUX_BODIES/2]
Definition nudge.h:482
Per-body struct that contains the indices of the body colliders inside ColliderData::boxes and Collid...
Definition nudge.h:438
uint16_t num_boxes
Definition nudge.h:439
uint16_t num_spheres
Definition nudge.h:442
int16_t first_sphere_index
Definition nudge.h:443
int16_t first_box_index
Definition nudge.h:440
The BodyMomentum struct.
Definition nudge.h:285
float unused0
Definition nudge.h:287
float unused1
Definition nudge.h:289
float angular_velocity[3]
Definition nudge.h:288
float velocity[3]
Definition nudge.h:286
The BodyPair struct.
Definition nudge.h:317
uint16_t b
Definition nudge.h:319
uint16_t a
Definition nudge.h:318
The BodyProperties struct.
Definition nudge.h:276
float gravity[3]
Definition nudge.h:279
float friction
Definition nudge.h:280
float inertia_inverse[3]
Definition nudge.h:277
float mass_inverse
Definition nudge.h:278
The BoxCollider struct.
Definition nudge.h:301
float unused
Definition nudge.h:303
float size[3]
Definition nudge.h:302
The CachedContactImpulse struct.
Definition nudge.h:522
float unused
Definition nudge.h:524
float impulse[3]
Definition nudge.h:523
This struct is used to access all the colliders in the physic world.
Definition nudge.h:338
Transform * transforms
Definition nudge.h:342
struct nudge::ColliderData::@14 boxes
uint32_t count
Definition nudge.h:343
SphereCollider * data
Definition nudge.h:348
BoxCollider * data
Definition nudge.h:341
struct nudge::ColliderData::@15 spheres
uint16_t * tags
Definition nudge.h:340
The ContactCache struct contains the CachedContactImpulse and persists across frames.
Definition nudge.h:529
uint64_t * tags
Definition nudge.h:530
uint32_t capacity
Definition nudge.h:532
uint32_t count
Definition nudge.h:533
CachedContactImpulse * data
Definition nudge.h:531
The ContactData class.
Definition nudge.h:324
uint32_t capacity
Definition nudge.h:328
uint32_t sleeping_count
Definition nudge.h:332
uint32_t count
Definition nudge.h:329
BodyPair * bodies
Definition nudge.h:326
uint64_t * tags
Definition nudge.h:327
Contact * data
Definition nudge.h:325
uint32_t * sleeping_pairs
Definition nudge.h:331
The Contact struct.
Definition nudge.h:308
float normal[3]
Definition nudge.h:311
float friction
Definition nudge.h:312
float penetration
Definition nudge.h:310
float position[3]
Definition nudge.h:309
The GlobalData struct inglobes global fields that could not fit in the SimulationParams struct.
Definition nudge.h:643
float gravity[3]
Definition nudge.h:644
uint32_t flags
Definition nudge.h:645
uint32_t removed_bodies_count
Definition nudge.h:648
const uint32_t removed_bodies_capacity
Definition nudge.h:650
FlagMask exclude_smoothing_graphic_transform_flags
Definition nudge.h:646
uint32_t * removed_bodies
Definition nudge.h:647
uint32_t finalized_removed_bodies_count
Definition nudge.h:649
The Animation class. Each animation owns a (kinematic) body index and a range of key frames.
Definition nudge.h:585
float play_time
Definition nudge.h:586
float speed
Definition nudge.h:588
LoopMode
Definition nudge.h:596
@ LM_LOOP_NORMAL
Definition nudge.h:598
@ LM_NO_LOOP
Definition nudge.h:597
@ LM_LOOP_PING_PONG
Definition nudge.h:599
float offset_time
Definition nudge.h:587
uint32_t body
Definition nudge.h:593
bool use_baseT
Definition nudge.h:595
Transform baseT
Definition nudge.h:590
uint32_t key_frame_start
Definition nudge.h:591
float total_time
Definition nudge.h:589
enum nudge::KinematicData::Animation::LoopMode loop_mode
uint32_t key_frame_count
Definition nudge.h:592
bool playing
Definition nudge.h:594
The KinematicData is composed by two arrays: an array of global key frames and an array of animations...
Definition nudge.h:565
uint32_t key_frame_capacity
Definition nudge.h:576
uint32_t animations_capacity
Definition nudge.h:602
Transform * key_frame_transforms
Definition nudge.h:567
struct nudge::KinematicData::Animation * animations
uint32_t animations_count
Definition nudge.h:603
TimeMode
TimeMode enum is an optional experimental flag.
Definition nudge.h:571
@ TM_NORMAL
Definition nudge.h:572
@ TM_DECELERATE
Definition nudge.h:574
@ TM_ACCELERATE
Definition nudge.h:573
uint32_t key_frame_count
Definition nudge.h:577
enum nudge::KinematicData::TimeMode * key_frame_modes
The SimulationParams struct.
Definition nudge.h:609
float linear_damping
Definition nudge.h:616
float sleeping_threshold_linear_velocity_squared
Definition nudge.h:614
unsigned numsubsteps_overflow_in_last_frame
Definition nudge.h:628
unsigned num_iterations_per_substep
Definition nudge.h:613
float penetration_allowed_amount
Definition nudge.h:618
float sleeping_threshold_angular_velocity_squared
Definition nudge.h:615
unsigned num_substeps_in_last_frame
Definition nudge.h:627
unsigned long long num_frames
Definition nudge.h:623
unsigned long long num_total_substeps
Definition nudge.h:624
double time_step
Definition nudge.h:611
unsigned numsubsteps_overflow_warning_mode
Definition nudge.h:620
float penetration_bias_factor
Definition nudge.h:619
float angular_damping
Definition nudge.h:617
double remaining_time_in_seconds
Definition nudge.h:625
float time_step_minus_remaining_time
Definition nudge.h:626
unsigned max_num_substeps
Definition nudge.h:612
The SphereCollider struct.
Definition nudge.h:295
float radius
Definition nudge.h:296
The Transform struct.
Definition nudge.h:249
struct nudge::Transform::@4::@9 quaternion
float r[4]
Definition nudge.h:264
float qy
Definition nudge.h:269
float y
Definition nudge.h:253
float position[3]
Definition nudge.h:251
float rw
Definition nudge.h:268
float p[3]
Definition nudge.h:252
float qz
Definition nudge.h:269
float rx
Definition nudge.h:268
struct nudge::Transform::@0::@6 vector
float x
Definition nudge.h:253
uint32_t body
Definition nudge.h:259
float rz
Definition nudge.h:268
float rotation[4]
Definition nudge.h:263
float pz
Definition nudge.h:255
float time
Definition nudge.h:260
float w
Definition nudge.h:266
float qx
Definition nudge.h:269
float z
Definition nudge.h:253
float q[4]
Definition nudge.h:265
float ry
Definition nudge.h:268
float qw
Definition nudge.h:269
float py
Definition nudge.h:255
float px
Definition nudge.h:255
Main struct of the library.
Definition nudge.h:657
ColliderData colliders
Definition nudge.h:662
BodyData bodies
Definition nudge.h:661
ContactData contact_data
Definition nudge.h:663
Arena arena
Definition nudge.h:659
const unsigned MAX_NUM_SPHERES
Definition nudge.h:674
ActiveBodies active_bodies
Definition nudge.h:666
GlobalData global_data
Definition nudge.h:670
SimulationParams simulation_params
Definition nudge.h:671
ContactCache contact_cache
Definition nudge.h:665
const unsigned MAX_NUM_BODIES
Definition nudge.h:675
UserData64Bit user
Definition nudge.h:681
const unsigned MAX_NUM_BOXES
Definition nudge.h:673
KinematicData kinematic_data
Definition nudge.h:669
Storage struct for user data (by default used inside BodyInfo): a per-body 32-bit user space in 7 dif...
Definition nudge.h:240
uint32_t u32
Definition nudge.h:240
int32_t i32
Definition nudge.h:240
uint8_t u8[4]
Definition nudge.h:240
int8_t i8[4]
Definition nudge.h:240
uint16_t u16[2]
Definition nudge.h:240
float f32
Definition nudge.h:240
int16_t i16[2]
Definition nudge.h:240
Storage struct for user data (by default used inside context_t): a per-context 64-bit user space in 1...
Definition nudge.h:226
uint32_t u32[2]
Definition nudge.h:234
int64_t i64
Definition nudge.h:230
int16_t i16[4]
Definition nudge.h:235
float f32[2]
Definition nudge.h:234
double f64
Definition nudge.h:230
uint8_t u8[8]
Definition nudge.h:235
int8_t i8[8]
Definition nudge.h:235
int32_t i32[2]
Definition nudge.h:234
uint16_t u16[4]
Definition nudge.h:235
uint64_t u64
Definition nudge.h:230