diff --git a/src/application.cpp b/src/application.cpp index 1d633a5..03003e3 100644 --- a/src/application.cpp +++ b/src/application.cpp @@ -47,6 +47,7 @@ static void StepControl() { elements::TextButton(CLAY_STRING("Step"), style::actionButton, &StepSimulationButton); } } + elements::Toggle(CLAY_STRING("Threaded"), style::actionButton, simulation::threadedDesired); elements::Toggle(CLAY_STRING("Show Changes"), style::actionButton, simulation::drawDebugInfo); elements::Toggle(CLAY_STRING("Lock Speed"), style::actionButton, lockFramerate); } diff --git a/src/simulation.cpp b/src/simulation.cpp index c068106..6c43b7b 100644 --- a/src/simulation.cpp +++ b/src/simulation.cpp @@ -28,6 +28,9 @@ struct ThreadWorkload { }; bool drawDebugInfo{ true }; // draw next step's changes over current state +bool threadedDesired{ true }; // multithread generation steps +static bool actuallyThreaded{ false }; // notes whether or not currently ongoing tasks are multithreaded +size_t threadsPerTask{ std::thread::hardware_concurrency() }; static std::set living{}; // The set of currently alive cells @@ -87,11 +90,15 @@ static size_t CountNeighbors(Cell const &cell) { return count; } +// Block the thread until all tasks are done +// after this function, all code in this file is synchronous, until another thread is started static void BlockUntilTasksDone() { - std::unique_lock lock{ tasksMutex }; - while (tasks > 0) { - tasksChanged.wait(lock); - } + if (actuallyThreaded) { + std::unique_lock lock{ tasksMutex }; + while (tasks > 0) { + tasksChanged.wait(lock); + } + } else while (tasks > 0) { } } static void TaskBegin() { @@ -100,11 +107,16 @@ static void TaskBegin() { } static void TaskComplete() { - std::scoped_lock lock{ tasksMutex }; - if (--tasks == 0) { - SDL_Log("Generation Complete %lfs", (double)(SDL_GetTicksNS() - generationStartTime) * 0.000000001); + if (actuallyThreaded) { + std::scoped_lock lock{ tasksMutex }; + if (--tasks == 0) { + SDL_Log("Generation Complete %lfs", (double)(SDL_GetTicksNS() - generationStartTime) * 0.000000001); + } + tasksChanged.notify_all(); + } else if (tasks == 3) { + SDL_Log("Generation complete %lfs", (double)(SDL_GetTicksNS() - generationStartTime) * 0.000000001); + tasks = 0; } - tasksChanged.notify_all(); } static void FindOverpopulated(std::shared_ptr wl) { @@ -162,23 +174,11 @@ static void FindBorn(std::shared_ptr wl) { TaskComplete(); } -static void PopulateChanges() { - static bool first_run{ true }; -#if SIM_MULTITHREADING - BlockUntilTasksDone(); - // for some reason three tasks per thread is faster than one task per thread, i don't get it either - size_t const split{ std::thread::hardware_concurrency() }; - if (first_run) { - first_run = false; - born.resize(split); - underpopulated.resize(split); - overpopulated.resize(split); - } - +static inline void TickGenerationThreaded() { SDL_Log("Multithreading ON"); generationStartTime = SDL_GetTicksNS(); - size_t const seg_length{ living.size() / split }; - for (size_t i{ 0 }; i < split; ++i) { + size_t const seg_length{ living.size() / threadsPerTask }; + for (size_t i{ 0 }; i < threadsPerTask; ++i) { if (overpopulated[i] == nullptr) overpopulated[i] = std::make_shared(); { std::scoped_lock lock{ overpopulated[i]->mtx }; @@ -198,27 +198,42 @@ static void PopulateChanges() { threading::tasks.ScheduleTask(std::bind(FindBorn, born[i])); } } -#else - if (first_run) { - first_run = false; - overpopulated.emplace_back(std::make_shared()); - overpopulated[0]->seg_idx = 0; - overpopulated[0]->seg_len = living.size(); - underpopulated.emplace_back(std::make_shared()); - underpopulated[0]->seg_idx = 0; - underpopulated[0]->seg_len = living.size(); - born.emplace_back(std::make_shared()); - born[0]->seg_idx = 0; - born[0]->seg_len = living.size(); - } +} + +static inline void TickGenerationSynchronous() { SDL_Log("Multithreading OFF"); - { std::scoped_lock lock{ tasksMutex }; - tasks = 3; } generationStartTime = SDL_GetTicksNS(); + born[0]->seg_idx = underpopulated[0]->seg_idx = overpopulated[0]->seg_idx = 0; + born[0]->seg_len = underpopulated[0]->seg_len = overpopulated[0]->seg_len = living.size(); + FindOverpopulated(overpopulated[0]); FindUnderpopulated(underpopulated[0]); FindBorn(born[0]); -#endif +} + +static void PopulateChanges() { + static bool first_run{ true }; + BlockUntilTasksDone(); + // for some reason three tasks per thread is faster than one task per thread, i don't get it either + if (threadedDesired) { + if (first_run || !actuallyThreaded) { + actuallyThreaded = true; + first_run = false; + born.resize(threadsPerTask); + underpopulated.resize(threadsPerTask); + overpopulated.resize(threadsPerTask); + } + TickGenerationThreaded(); + } else { + if (first_run || actuallyThreaded) { + actuallyThreaded = false; + first_run = false; + born.resize(1); + underpopulated.resize(1); + overpopulated.resize(1); + } + TickGenerationSynchronous(); + } } void InitializeRandom(size_t livingChance, int64_t fillArea) { diff --git a/src/simulation.h b/src/simulation.h index df68d78..0dbbe50 100644 --- a/src/simulation.h +++ b/src/simulation.h @@ -34,6 +34,8 @@ public: CellRange(Cell begin, Cell end) : beginCell{ begin }, endCell{ end } {} }; extern bool drawDebugInfo; +extern bool threadedDesired; +extern size_t threadsPerTask; void InitializeRandom(size_t livingChance, int64_t fillArea); void Step();