From 01739471e11114ef196bf7da7ccb852fbf922d43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABl=20Madon?= <mael.madon@irit.fr> Date: Thu, 20 Jul 2023 18:20:48 +0200 Subject: [PATCH] fix: propagate bug fix easy_bf_fast, see batsched issue #9 --- src/scheds/easy_bf_fast.cpp | 51 ++++++++++++++++++++----------------- src/scheds/easy_bf_fast.hpp | 4 ++- 2 files changed, 31 insertions(+), 24 deletions(-) diff --git a/src/scheds/easy_bf_fast.cpp b/src/scheds/easy_bf_fast.cpp index 1a9112a..4bc094e 100644 --- a/src/scheds/easy_bf_fast.cpp +++ b/src/scheds/easy_bf_fast.cpp @@ -52,8 +52,11 @@ void EasyBackfillingFast::make_decisions(double date, // - only handles one priority job (the first of the queue) // - only handles time as floating-point (-> precision errors). - // Hacks: - // - uses priority job's completion time to store its expected starting time + // Warning: you might obtain different outputs than with easy_bf. This is + // due to the fact that this version only keeps track of the priority job + // expected start time and the number of machines available then, while + // easy_bf keeps track of a full 2D schedule of the future. easy_bf_fast + // will sometimes be a little more greedy in backfilling. bool job_ended = false; @@ -134,8 +137,7 @@ void EasyBackfillingFast::make_decisions(double date, { // The job becomes priority! _priority_job = pending_job; - _priority_job->completion_time - = compute_priority_job_expected_earliest_starting_time(); + update_priority_job_expected_earliest_start_time(); _pending_jobs.erase(job_it); // Stop first queue traversal. @@ -150,17 +152,16 @@ void EasyBackfillingFast::make_decisions(double date, // Update priority job expected starting time (might have changed if a recently ended job // completed before its walltime) if (_priority_job != nullptr) - _priority_job->completion_time = compute_priority_job_expected_earliest_starting_time(); + update_priority_job_expected_earliest_start_time(); for (auto job_it = _pending_jobs.begin(); job_it != _pending_jobs.end();) { const Job *pending_job = *job_it; - // Can the job be executed now ? - if (pending_job->nb_requested_resources - <= _nb_available_machines - && date + pending_job->walltime - <= _priority_job->completion_time) + // Can the job be executed now (without hindering priority job)? + if (pending_job->nb_requested_resources <= _nb_available_machines && + (date + pending_job->walltime <= _priority_job_expected_start_time || + pending_job->nb_requested_resources <= _remaining_resources_at_priority_job_start)) { // Yes, it can be backfilled! alloc.machines = _available_machines.left( @@ -182,6 +183,8 @@ void EasyBackfillingFast::make_decisions(double date, -= pending_job->nb_requested_resources; _current_allocations[pending_job->id] = alloc; job_it = _pending_jobs.erase(job_it); + if(date + pending_job->walltime > _priority_job_expected_start_time) + _remaining_resources_at_priority_job_start -= pending_job->nb_requested_resources; // Directly get out of the backfilling loop if all // machines are busy. @@ -222,13 +225,13 @@ void EasyBackfillingFast::make_decisions(double date, { // LOG_F(INFO, "There are enough available resources (%d) to execute // job %s", _nb_available_machines, new_job->id.c_str()); - // Can it be executed now (without hindering priority job?) - if (_priority_job == nullptr - || date + new_job->walltime <= _priority_job->completion_time) + // Can it be executed now (without hindering priority job)? + if (_priority_job == nullptr || + date + new_job->walltime <= _priority_job_expected_start_time || + new_job->nb_requested_resources <= _remaining_resources_at_priority_job_start) { - // LOG_F(INFO, "Job %s can be started right away!", - // new_job->id.c_str()); - // Yes, the job can be executed right away! + //LOG_F(INFO, "Job %s can be started right away!", new_job->id.c_str()); + // Yes, the job can be executed right away! Allocation alloc; alloc.machines @@ -246,6 +249,8 @@ void EasyBackfillingFast::make_decisions(double date, _available_machines -= alloc.machines; _nb_available_machines -= new_job->nb_requested_resources; _current_allocations[new_job_id] = alloc; + if(_priority_job != nullptr && date + new_job->walltime > _priority_job_expected_start_time) + _remaining_resources_at_priority_job_start -= new_job->nb_requested_resources; } else { @@ -253,7 +258,7 @@ void EasyBackfillingFast::make_decisions(double date, /*LOG_F(INFO, "Not enough time to execute job %s (walltime=%g, priority job expected starting time=%g)", new_job->id.c_str(), (double)new_job->walltime, - _priority_job->completion_time);*/ + _priority_job_expected_start_time);*/ _pending_jobs.push_back(new_job); } } @@ -264,8 +269,7 @@ void EasyBackfillingFast::make_decisions(double date, { // The job becomes priority. _priority_job = new_job; - _priority_job->completion_time - = compute_priority_job_expected_earliest_starting_time(); + update_priority_job_expected_earliest_start_time(); } else { @@ -280,8 +284,7 @@ void EasyBackfillingFast::make_decisions(double date, DynScheduler::make_decisions(date, update_info, compare_info); } -double -EasyBackfillingFast::compute_priority_job_expected_earliest_starting_time() +void EasyBackfillingFast::update_priority_job_expected_earliest_start_time() { int nb_available = _nb_available_machines; int required = _priority_job->nb_requested_resources; @@ -292,12 +295,14 @@ EasyBackfillingFast::compute_priority_job_expected_earliest_starting_time() if (nb_available >= required) { - return it->date; + _priority_job_expected_start_time = it->date; + _remaining_resources_at_priority_job_start = nb_available - required; + return; } } PPK_ASSERT_ERROR(false, "The job will never be executable."); - return 0; + return; } std::list<EasyBackfillingFast::FinishedHorizonPoint>::iterator diff --git a/src/scheds/easy_bf_fast.hpp b/src/scheds/easy_bf_fast.hpp index 270d720..b98b6cd 100644 --- a/src/scheds/easy_bf_fast.hpp +++ b/src/scheds/easy_bf_fast.hpp @@ -38,7 +38,7 @@ private: }; private: - double compute_priority_job_expected_earliest_starting_time(); + void update_priority_job_expected_earliest_start_time(); std::list<FinishedHorizonPoint>::iterator insert_horizon_point(const FinishedHorizonPoint & point); private: @@ -58,4 +58,6 @@ private: // At any time, null if there is no priority job (no waiting job) Job * _priority_job = nullptr; + double _priority_job_expected_start_time = -1; + int _remaining_resources_at_priority_job_start = -1; }; -- GitLab