From 01739471e11114ef196bf7da7ccb852fbf922d43 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C3=ABl=20Madon?= <mael.madon@irit.fr>
Date: Thu, 20 Jul 2023 18:20:48 +0200
Subject: [PATCH] fix: propagate bug fix easy_bf_fast, see batsched issue #9

---
 src/scheds/easy_bf_fast.cpp | 51 ++++++++++++++++++++-----------------
 src/scheds/easy_bf_fast.hpp |  4 ++-
 2 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/src/scheds/easy_bf_fast.cpp b/src/scheds/easy_bf_fast.cpp
index 1a9112a..4bc094e 100644
--- a/src/scheds/easy_bf_fast.cpp
+++ b/src/scheds/easy_bf_fast.cpp
@@ -52,8 +52,11 @@ void EasyBackfillingFast::make_decisions(double date,
     // - only handles one priority job (the first of the queue)
     // - only handles time as floating-point (-> precision errors).
 
-    // Hacks:
-    // - uses priority job's completion time to store its expected starting time
+    // Warning: you might obtain different outputs than with easy_bf. This is 
+    // due to the fact that this version only keeps track of the priority job 
+    // expected start time and the number of machines available then, while 
+    // easy_bf keeps track of a full 2D schedule of the future. easy_bf_fast 
+    // will sometimes be a little more greedy in backfilling.
 
     bool job_ended = false;
 
@@ -134,8 +137,7 @@ void EasyBackfillingFast::make_decisions(double date,
                     {
                         // The job becomes priority!
                         _priority_job = pending_job;
-                        _priority_job->completion_time
-                            = compute_priority_job_expected_earliest_starting_time();
+                        update_priority_job_expected_earliest_start_time();
                         _pending_jobs.erase(job_it);
 
                         // Stop first queue traversal.
@@ -150,17 +152,16 @@ void EasyBackfillingFast::make_decisions(double date,
                 // Update priority job expected starting time (might have changed if a recently ended job 
                 // completed before its walltime)
                 if (_priority_job != nullptr)
-                    _priority_job->completion_time = compute_priority_job_expected_earliest_starting_time();
+                    update_priority_job_expected_earliest_start_time();
 
                 for (auto job_it = _pending_jobs.begin();
                      job_it != _pending_jobs.end();)
                 {
                     const Job *pending_job = *job_it;
-                    // Can the job be executed now ?
-                    if (pending_job->nb_requested_resources
-                            <= _nb_available_machines
-                        && date + pending_job->walltime
-                            <= _priority_job->completion_time)
+                    // Can the job be executed now (without hindering priority job)?
+                    if (pending_job->nb_requested_resources <= _nb_available_machines && 
+                    (date + pending_job->walltime <= _priority_job_expected_start_time ||
+                    pending_job->nb_requested_resources <= _remaining_resources_at_priority_job_start))
                     {
                         // Yes, it can be backfilled!
                         alloc.machines = _available_machines.left(
@@ -182,6 +183,8 @@ void EasyBackfillingFast::make_decisions(double date,
                             -= pending_job->nb_requested_resources;
                         _current_allocations[pending_job->id] = alloc;
                         job_it = _pending_jobs.erase(job_it);
+                        if(date + pending_job->walltime > _priority_job_expected_start_time)
+                            _remaining_resources_at_priority_job_start -= pending_job->nb_requested_resources;
 
                         // Directly get out of the backfilling loop if all
                         // machines are busy.
@@ -222,13 +225,13 @@ void EasyBackfillingFast::make_decisions(double date,
         {
             // LOG_F(INFO, "There are enough available resources (%d) to execute
             // job %s", _nb_available_machines, new_job->id.c_str());
-            //  Can it be executed now (without hindering priority job?)
-            if (_priority_job == nullptr
-                || date + new_job->walltime <= _priority_job->completion_time)
+            // Can it be executed now (without hindering priority job)?
+            if (_priority_job == nullptr ||
+                date + new_job->walltime <= _priority_job_expected_start_time || 
+                new_job->nb_requested_resources <= _remaining_resources_at_priority_job_start)
             {
-                // LOG_F(INFO, "Job %s can be started right away!",
-                // new_job->id.c_str());
-                //  Yes, the job can be executed right away!
+                //LOG_F(INFO, "Job %s can be started right away!", new_job->id.c_str());
+                // Yes, the job can be executed right away!
                 Allocation alloc;
 
                 alloc.machines
@@ -246,6 +249,8 @@ void EasyBackfillingFast::make_decisions(double date,
                 _available_machines -= alloc.machines;
                 _nb_available_machines -= new_job->nb_requested_resources;
                 _current_allocations[new_job_id] = alloc;
+                if(_priority_job != nullptr && date + new_job->walltime > _priority_job_expected_start_time)
+                    _remaining_resources_at_priority_job_start -= new_job->nb_requested_resources;
             }
             else
             {
@@ -253,7 +258,7 @@ void EasyBackfillingFast::make_decisions(double date,
                 /*LOG_F(INFO, "Not enough time to execute job %s (walltime=%g,
                    priority job expected starting time=%g)",
                       new_job->id.c_str(), (double)new_job->walltime,
-                   _priority_job->completion_time);*/
+                   _priority_job_expected_start_time);*/
                 _pending_jobs.push_back(new_job);
             }
         }
@@ -264,8 +269,7 @@ void EasyBackfillingFast::make_decisions(double date,
             {
                 // The job becomes priority.
                 _priority_job = new_job;
-                _priority_job->completion_time
-                    = compute_priority_job_expected_earliest_starting_time();
+                update_priority_job_expected_earliest_start_time();
             }
             else
             {
@@ -280,8 +284,7 @@ void EasyBackfillingFast::make_decisions(double date,
     DynScheduler::make_decisions(date, update_info, compare_info);
 }
 
-double
-EasyBackfillingFast::compute_priority_job_expected_earliest_starting_time()
+void EasyBackfillingFast::update_priority_job_expected_earliest_start_time()
 {
     int nb_available = _nb_available_machines;
     int required = _priority_job->nb_requested_resources;
@@ -292,12 +295,14 @@ EasyBackfillingFast::compute_priority_job_expected_earliest_starting_time()
 
         if (nb_available >= required)
         {
-            return it->date;
+            _priority_job_expected_start_time = it->date;
+            _remaining_resources_at_priority_job_start = nb_available - required;
+            return;
         }
     }
 
     PPK_ASSERT_ERROR(false, "The job will never be executable.");
-    return 0;
+    return;
 }
 
 std::list<EasyBackfillingFast::FinishedHorizonPoint>::iterator
diff --git a/src/scheds/easy_bf_fast.hpp b/src/scheds/easy_bf_fast.hpp
index 270d720..b98b6cd 100644
--- a/src/scheds/easy_bf_fast.hpp
+++ b/src/scheds/easy_bf_fast.hpp
@@ -38,7 +38,7 @@ private:
     };
 
 private:
-    double compute_priority_job_expected_earliest_starting_time();
+    void update_priority_job_expected_earliest_start_time();
     std::list<FinishedHorizonPoint>::iterator insert_horizon_point(const FinishedHorizonPoint & point);
 
 private:
@@ -58,4 +58,6 @@ private:
 
     // At any time, null if there is no priority job (no waiting job)
     Job * _priority_job = nullptr;
+    double _priority_job_expected_start_time = -1;
+    int _remaining_resources_at_priority_job_start = -1;
 };
-- 
GitLab