// SPDX-FileCopyrightText: 2023 Jorrit Rouwe // SPDX-License-Identifier: MIT #include #include #include #include #include #include #include #include #include //#define JPH_LARGE_ISLAND_SPLITTER_DEBUG JPH_NAMESPACE_BEGIN LargeIslandSplitter::EStatus LargeIslandSplitter::Splits::FetchNextBatch(uint32 &outConstraintsBegin, uint32 &outConstraintsEnd, uint32 &outContactsBegin, uint32 &outContactsEnd, bool &outFirstIteration) { { // First check if we can get a new batch (doing a read to avoid hammering an atomic with an atomic subtract) // Note this also avoids overflowing the status counter if we're done but there's still one thread processing items uint64 status = mStatus.load(memory_order_acquire); // Check for special value that indicates that the splits are still being built // (note we do not check for this condition again below as we reset all splits before kicking off jobs that fetch batches of work) if (status == StatusItemMask) return EStatus::WaitingForBatch; // Next check if all items have been processed. Note that we do this after checking if the job can be started // as mNumIterations is not initialized until the split is started. if (sGetIteration(status) >= mNumIterations) return EStatus::AllBatchesDone; uint item = sGetItem(status); uint split_index = sGetSplit(status); if (split_index == cNonParallelSplitIdx) { // Non parallel split needs to be taken as a single batch, only the thread that takes element 0 will do it if (item != 0) return EStatus::WaitingForBatch; } else { // Parallel split is split into batches JPH_ASSERT(split_index < mNumSplits); const Split &split = mSplits[split_index]; if (item >= split.GetNumItems()) return EStatus::WaitingForBatch; } } // Then try to actually get the batch uint64 status = mStatus.fetch_add(cBatchSize, memory_order_acquire); int iteration = sGetIteration(status); if (iteration >= mNumIterations) return EStatus::AllBatchesDone; uint split_index = sGetSplit(status); JPH_ASSERT(split_index < mNumSplits || split_index == cNonParallelSplitIdx); const Split &split = mSplits[split_index]; uint item_begin = sGetItem(status); if (split_index == cNonParallelSplitIdx) { if (item_begin == 0) { // Non-parallel split always goes as a single batch outConstraintsBegin = split.mConstraintBufferBegin; outConstraintsEnd = split.mConstraintBufferEnd; outContactsBegin = split.mContactBufferBegin; outContactsEnd = split.mContactBufferEnd; outFirstIteration = iteration == 0; return EStatus::BatchRetrieved; } else { // Otherwise we're done with this split return EStatus::WaitingForBatch; } } // Parallel split is split into batches uint num_constraints = split.GetNumConstraints(); uint num_contacts = split.GetNumContacts(); uint num_items = num_constraints + num_contacts; if (item_begin >= num_items) return EStatus::WaitingForBatch; uint item_end = min(item_begin + cBatchSize, num_items); if (item_end >= num_constraints) { if (item_begin < num_constraints) { // Partially from constraints and partially from contacts outConstraintsBegin = split.mConstraintBufferBegin + item_begin; outConstraintsEnd = split.mConstraintBufferEnd; } else { // Only contacts outConstraintsBegin = 0; outConstraintsEnd = 0; } outContactsBegin = split.mContactBufferBegin + (max(item_begin, num_constraints) - num_constraints); outContactsEnd = split.mContactBufferBegin + (item_end - num_constraints); } else { // Only constraints outConstraintsBegin = split.mConstraintBufferBegin + item_begin; outConstraintsEnd = split.mConstraintBufferBegin + item_end; outContactsBegin = 0; outContactsEnd = 0; } outFirstIteration = iteration == 0; return EStatus::BatchRetrieved; } void LargeIslandSplitter::Splits::MarkBatchProcessed(uint inNumProcessed, bool &outLastIteration, bool &outFinalBatch) { // We fetched this batch, nobody should change the split and or iteration until we mark the last batch as processed so we can safely get the current status uint64 status = mStatus.load(memory_order_relaxed); uint split_index = sGetSplit(status); JPH_ASSERT(split_index < mNumSplits || split_index == cNonParallelSplitIdx); const Split &split = mSplits[split_index]; uint num_items_in_split = split.GetNumItems(); // Determine if this is the last iteration before possibly incrementing it int iteration = sGetIteration(status); outLastIteration = iteration == mNumIterations - 1; // Add the number of items we processed to the total number of items processed // Note: This needs to happen after we read the status as other threads may update the status after we mark items as processed JPH_ASSERT(inNumProcessed > 0); // Logic will break if we mark a block of 0 items as processed uint total_items_processed = mItemsProcessed.fetch_add(inNumProcessed, memory_order_acq_rel) + inNumProcessed; // Check if we're at the end of the split if (total_items_processed >= num_items_in_split) { JPH_ASSERT(total_items_processed == num_items_in_split); // Should not overflow, that means we're retiring more items than we should process // Set items processed back to 0 for the next split/iteration mItemsProcessed.store(0, memory_order_release); // Determine next split do { if (split_index == cNonParallelSplitIdx) { // At start of next iteration split_index = 0; ++iteration; } else { // At start of next split ++split_index; } // If we're beyond the end of splits, go to the non-parallel split if (split_index >= mNumSplits) split_index = cNonParallelSplitIdx; } while (iteration < mNumIterations && mSplits[split_index].GetNumItems() == 0); // We don't support processing empty splits, skip to the next split in this case mStatus.store((uint64(iteration) << StatusIterationShift) | (uint64(split_index) << StatusSplitShift), memory_order_release); } // Track if this is the final batch outFinalBatch = iteration >= mNumIterations; } LargeIslandSplitter::~LargeIslandSplitter() { JPH_ASSERT(mSplitMasks == nullptr); JPH_ASSERT(mContactAndConstraintsSplitIdx == nullptr); JPH_ASSERT(mContactAndConstraintIndices == nullptr); JPH_ASSERT(mSplitIslands == nullptr); } void LargeIslandSplitter::Prepare(const IslandBuilder &inIslandBuilder, uint32 inNumActiveBodies, TempAllocator *inTempAllocator) { JPH_PROFILE_FUNCTION(); // Count the total number of constraints and contacts that we will be putting in splits mContactAndConstraintsSize = 0; for (uint32 island = 0; island < inIslandBuilder.GetNumIslands(); ++island) { // Get the contacts in this island uint32 *contacts_start, *contacts_end; inIslandBuilder.GetContactsInIsland(island, contacts_start, contacts_end); uint num_contacts_in_island = uint(contacts_end - contacts_start); // Get the constraints in this island uint32 *constraints_start, *constraints_end; inIslandBuilder.GetConstraintsInIsland(island, constraints_start, constraints_end); uint num_constraints_in_island = uint(constraints_end - constraints_start); uint island_size = num_contacts_in_island + num_constraints_in_island; if (island_size >= cLargeIslandTreshold) { mNumSplitIslands++; mContactAndConstraintsSize += island_size; } else break; // If this island doesn't have enough constraints, the next islands won't either since they're sorted from big to small } if (mContactAndConstraintsSize > 0) { mNumActiveBodies = inNumActiveBodies; // Allocate split mask buffer mSplitMasks = (SplitMask *)inTempAllocator->Allocate(mNumActiveBodies * sizeof(SplitMask)); // Allocate contact and constraint buffer uint contact_and_constraint_indices_size = mContactAndConstraintsSize * sizeof(uint32); mContactAndConstraintsSplitIdx = (uint32 *)inTempAllocator->Allocate(contact_and_constraint_indices_size); mContactAndConstraintIndices = (uint32 *)inTempAllocator->Allocate(contact_and_constraint_indices_size); // Allocate island split buffer mSplitIslands = (Splits *)inTempAllocator->Allocate(mNumSplitIslands * sizeof(Splits)); // Prevent any of the splits from being picked up as work for (uint i = 0; i < mNumSplitIslands; ++i) mSplitIslands[i].ResetStatus(); } } uint LargeIslandSplitter::AssignSplit(const Body *inBody1, const Body *inBody2) { uint32 idx1 = inBody1->GetIndexInActiveBodiesInternal(); uint32 idx2 = inBody2->GetIndexInActiveBodiesInternal(); // Test if either index is negative if (idx1 == Body::cInactiveIndex || !inBody1->IsDynamic()) { // Body 1 is not active or a kinematic body, so we only need to set 1 body JPH_ASSERT(idx2 < mNumActiveBodies); SplitMask &mask = mSplitMasks[idx2]; uint split = min(CountTrailingZeros(~uint32(mask)), cNonParallelSplitIdx); mask |= SplitMask(1U << split); return split; } else if (idx2 == Body::cInactiveIndex || !inBody2->IsDynamic()) { // Body 2 is not active or a kinematic body, so we only need to set 1 body JPH_ASSERT(idx1 < mNumActiveBodies); SplitMask &mask = mSplitMasks[idx1]; uint split = min(CountTrailingZeros(~uint32(mask)), cNonParallelSplitIdx); mask |= SplitMask(1U << split); return split; } else { // If both bodies are active, we need to set 2 bodies JPH_ASSERT(idx1 < mNumActiveBodies); JPH_ASSERT(idx2 < mNumActiveBodies); SplitMask &mask1 = mSplitMasks[idx1]; SplitMask &mask2 = mSplitMasks[idx2]; uint split = min(CountTrailingZeros((~uint32(mask1)) & (~uint32(mask2))), cNonParallelSplitIdx); SplitMask mask = SplitMask(1U << split); mask1 |= mask; mask2 |= mask; return split; } } uint LargeIslandSplitter::AssignToNonParallelSplit(const Body *inBody) { uint32 idx = inBody->GetIndexInActiveBodiesInternal(); if (idx != Body::cInactiveIndex) { JPH_ASSERT(idx < mNumActiveBodies); mSplitMasks[idx] |= 1U << cNonParallelSplitIdx; } return cNonParallelSplitIdx; } bool LargeIslandSplitter::SplitIsland(uint32 inIslandIndex, const IslandBuilder &inIslandBuilder, const BodyManager &inBodyManager, const ContactConstraintManager &inContactManager, Constraint **inActiveConstraints, CalculateSolverSteps &ioStepsCalculator) { JPH_PROFILE_FUNCTION(); // Get the contacts in this island uint32 *contacts_start, *contacts_end; inIslandBuilder.GetContactsInIsland(inIslandIndex, contacts_start, contacts_end); uint num_contacts_in_island = uint(contacts_end - contacts_start); // Get the constraints in this island uint32 *constraints_start, *constraints_end; inIslandBuilder.GetConstraintsInIsland(inIslandIndex, constraints_start, constraints_end); uint num_constraints_in_island = uint(constraints_end - constraints_start); // Check if it exceeds the threshold uint island_size = num_contacts_in_island + num_constraints_in_island; if (island_size < cLargeIslandTreshold) return false; // Get bodies in this island BodyID *bodies_start, *bodies_end; inIslandBuilder.GetBodiesInIsland(inIslandIndex, bodies_start, bodies_end); // Reset the split mask for all bodies in this island Body const * const *bodies = inBodyManager.GetBodies().data(); for (const BodyID *b = bodies_start; b < bodies_end; ++b) mSplitMasks[bodies[b->GetIndex()]->GetIndexInActiveBodiesInternal()] = 0; // Count the number of contacts and constraints per split uint num_contacts_in_split[cNumSplits] = { }; uint num_constraints_in_split[cNumSplits] = { }; // Get space to store split indices uint offset = mContactAndConstraintsNextFree.fetch_add(island_size, memory_order_relaxed); uint32 *contact_split_idx = mContactAndConstraintsSplitIdx + offset; uint32 *constraint_split_idx = contact_split_idx + num_contacts_in_island; // Assign the contacts to a split uint32 *cur_contact_split_idx = contact_split_idx; for (const uint32 *c = contacts_start; c < contacts_end; ++c) { const Body *body1, *body2; inContactManager.GetAffectedBodies(*c, body1, body2); uint split = AssignSplit(body1, body2); num_contacts_in_split[split]++; *cur_contact_split_idx++ = split; if (body1->IsDynamic()) ioStepsCalculator(body1->GetMotionPropertiesUnchecked()); if (body2->IsDynamic()) ioStepsCalculator(body2->GetMotionPropertiesUnchecked()); } // Assign the constraints to a split uint32 *cur_constraint_split_idx = constraint_split_idx; for (const uint32 *c = constraints_start; c < constraints_end; ++c) { const Constraint *constraint = inActiveConstraints[*c]; uint split = constraint->BuildIslandSplits(*this); num_constraints_in_split[split]++; *cur_constraint_split_idx++ = split; ioStepsCalculator(constraint); } ioStepsCalculator.Finalize(); // Start with 0 splits uint split_remap_table[cNumSplits]; uint new_split_idx = mNextSplitIsland.fetch_add(1, memory_order_relaxed); JPH_ASSERT(new_split_idx < mNumSplitIslands); Splits &splits = mSplitIslands[new_split_idx]; splits.mIslandIndex = inIslandIndex; splits.mNumSplits = 0; splits.mNumIterations = ioStepsCalculator.GetNumVelocitySteps() + 1; // Iteration 0 is used for warm starting splits.mNumVelocitySteps = ioStepsCalculator.GetNumVelocitySteps(); splits.mNumPositionSteps = ioStepsCalculator.GetNumPositionSteps(); splits.mItemsProcessed.store(0, memory_order_release); // Allocate space to store the sorted constraint and contact indices per split uint32 *constraint_buffer_cur[cNumSplits], *contact_buffer_cur[cNumSplits]; for (uint s = 0; s < cNumSplits; ++s) { // If this split doesn't contain enough constraints and contacts, we will combine it with the non parallel split if (num_constraints_in_split[s] + num_contacts_in_split[s] < cSplitCombineTreshold && s < cNonParallelSplitIdx) // The non-parallel split cannot merge into itself { // Remap it split_remap_table[s] = cNonParallelSplitIdx; // Add the counts to the non parallel split num_contacts_in_split[cNonParallelSplitIdx] += num_contacts_in_split[s]; num_constraints_in_split[cNonParallelSplitIdx] += num_constraints_in_split[s]; } else { // This split is valid, map it to the next empty slot uint target_split; if (s < cNonParallelSplitIdx) target_split = splits.mNumSplits++; else target_split = cNonParallelSplitIdx; Split &split = splits.mSplits[target_split]; split_remap_table[s] = target_split; // Allocate space for contacts split.mContactBufferBegin = offset; split.mContactBufferEnd = split.mContactBufferBegin + num_contacts_in_split[s]; // Allocate space for constraints split.mConstraintBufferBegin = split.mContactBufferEnd; split.mConstraintBufferEnd = split.mConstraintBufferBegin + num_constraints_in_split[s]; // Store start for each split contact_buffer_cur[target_split] = mContactAndConstraintIndices + split.mContactBufferBegin; constraint_buffer_cur[target_split] = mContactAndConstraintIndices + split.mConstraintBufferBegin; // Update offset offset = split.mConstraintBufferEnd; } } // Split the contacts for (uint c = 0; c < num_contacts_in_island; ++c) { uint split = split_remap_table[contact_split_idx[c]]; *contact_buffer_cur[split]++ = contacts_start[c]; } // Split the constraints for (uint c = 0; c < num_constraints_in_island; ++c) { uint split = split_remap_table[constraint_split_idx[c]]; *constraint_buffer_cur[split]++ = constraints_start[c]; } #ifdef JPH_LARGE_ISLAND_SPLITTER_DEBUG // Trace the size of all splits uint sum = 0; String stats; for (uint s = 0; s < cNumSplits; ++s) { // If we've processed all splits, jump to the non-parallel split if (s >= splits.GetNumSplits()) s = cNonParallelSplitIdx; const Split &split = splits.mSplits[s]; stats += StringFormat("g:%d:%d:%d, ", s, split.GetNumContacts(), split.GetNumConstraints()); sum += split.GetNumItems(); } stats += StringFormat("sum: %d", sum); Trace(stats.c_str()); #endif // JPH_LARGE_ISLAND_SPLITTER_DEBUG #ifdef JPH_ENABLE_ASSERTS for (uint s = 0; s < cNumSplits; ++s) { // If there are no more splits, process the non-parallel split if (s >= splits.mNumSplits) s = cNonParallelSplitIdx; // Check that we wrote all elements Split &split = splits.mSplits[s]; JPH_ASSERT(contact_buffer_cur[s] == mContactAndConstraintIndices + split.mContactBufferEnd); JPH_ASSERT(constraint_buffer_cur[s] == mContactAndConstraintIndices + split.mConstraintBufferEnd); } #ifdef JPH_DEBUG // Validate that the splits are indeed not touching the same body for (uint s = 0; s < splits.mNumSplits; ++s) { Array body_used(mNumActiveBodies, false); // Validate contacts uint32 split_contacts_begin, split_contacts_end; splits.GetContactsInSplit(s, split_contacts_begin, split_contacts_end); for (uint32 *c = mContactAndConstraintIndices + split_contacts_begin; c < mContactAndConstraintIndices + split_contacts_end; ++c) { const Body *body1, *body2; inContactManager.GetAffectedBodies(*c, body1, body2); uint32 idx1 = body1->GetIndexInActiveBodiesInternal(); if (idx1 != Body::cInactiveIndex && body1->IsDynamic()) { JPH_ASSERT(!body_used[idx1]); body_used[idx1] = true; } uint32 idx2 = body2->GetIndexInActiveBodiesInternal(); if (idx2 != Body::cInactiveIndex && body2->IsDynamic()) { JPH_ASSERT(!body_used[idx2]); body_used[idx2] = true; } } } #endif // JPH_DEBUG #endif // JPH_ENABLE_ASSERTS // Allow other threads to pick up this split island now splits.StartFirstBatch(); return true; } LargeIslandSplitter::EStatus LargeIslandSplitter::FetchNextBatch(uint &outSplitIslandIndex, uint32 *&outConstraintsBegin, uint32 *&outConstraintsEnd, uint32 *&outContactsBegin, uint32 *&outContactsEnd, bool &outFirstIteration) { // We can't be done when all islands haven't been submitted yet uint num_splits_created = mNextSplitIsland.load(memory_order_acquire); bool all_done = num_splits_created == mNumSplitIslands; // Loop over all split islands to find work uint32 constraints_begin, constraints_end, contacts_begin, contacts_end; for (Splits *s = mSplitIslands; s < mSplitIslands + num_splits_created; ++s) switch (s->FetchNextBatch(constraints_begin, constraints_end, contacts_begin, contacts_end, outFirstIteration)) { case EStatus::AllBatchesDone: break; case EStatus::WaitingForBatch: all_done = false; break; case EStatus::BatchRetrieved: outSplitIslandIndex = uint(s - mSplitIslands); outConstraintsBegin = mContactAndConstraintIndices + constraints_begin; outConstraintsEnd = mContactAndConstraintIndices + constraints_end; outContactsBegin = mContactAndConstraintIndices + contacts_begin; outContactsEnd = mContactAndConstraintIndices + contacts_end; return EStatus::BatchRetrieved; } return all_done? EStatus::AllBatchesDone : EStatus::WaitingForBatch; } void LargeIslandSplitter::MarkBatchProcessed(uint inSplitIslandIndex, const uint32 *inConstraintsBegin, const uint32 *inConstraintsEnd, const uint32 *inContactsBegin, const uint32 *inContactsEnd, bool &outLastIteration, bool &outFinalBatch) { uint num_items_processed = uint(inConstraintsEnd - inConstraintsBegin) + uint(inContactsEnd - inContactsBegin); JPH_ASSERT(inSplitIslandIndex < mNextSplitIsland.load(memory_order_relaxed)); Splits &splits = mSplitIslands[inSplitIslandIndex]; splits.MarkBatchProcessed(num_items_processed, outLastIteration, outFinalBatch); } void LargeIslandSplitter::PrepareForSolvePositions() { for (Splits *s = mSplitIslands, *s_end = mSplitIslands + mNumSplitIslands; s < s_end; ++s) { // Set the number of iterations to the number of position steps s->mNumIterations = s->mNumPositionSteps; // We can start again from the first batch s->StartFirstBatch(); } } void LargeIslandSplitter::Reset(TempAllocator *inTempAllocator) { JPH_PROFILE_FUNCTION(); // Everything should have been used JPH_ASSERT(mContactAndConstraintsNextFree.load(memory_order_relaxed) == mContactAndConstraintsSize); JPH_ASSERT(mNextSplitIsland.load(memory_order_relaxed) == mNumSplitIslands); // Free split islands if (mNumSplitIslands > 0) { inTempAllocator->Free(mSplitIslands, mNumSplitIslands * sizeof(Splits)); mSplitIslands = nullptr; mNumSplitIslands = 0; mNextSplitIsland.store(0, memory_order_relaxed); } // Free contact and constraint buffers if (mContactAndConstraintsSize > 0) { inTempAllocator->Free(mContactAndConstraintIndices, mContactAndConstraintsSize * sizeof(uint32)); mContactAndConstraintIndices = nullptr; inTempAllocator->Free(mContactAndConstraintsSplitIdx, mContactAndConstraintsSize * sizeof(uint32)); mContactAndConstraintsSplitIdx = nullptr; mContactAndConstraintsSize = 0; mContactAndConstraintsNextFree.store(0, memory_order_relaxed); } // Free split masks if (mSplitMasks != nullptr) { inTempAllocator->Free(mSplitMasks, mNumActiveBodies * sizeof(SplitMask)); mSplitMasks = nullptr; mNumActiveBodies = 0; } } JPH_NAMESPACE_END