Skip to content
Snippets Groups Projects
Commit 7c457f9b authored by Millian Poquet's avatar Millian Poquet
Browse files

sched: easy -> easy+powercap

parent 0d84d037
No related branches found
No related tags found
No related merge requests found
...@@ -82,6 +82,7 @@ ...@@ -82,6 +82,7 @@
"^.*\.h" "^.*\.h"
]; ];
buildInputs = [ buildInputs = [
pkgs.nlohmann_json
batprotopkgs.batprotocol-cpp batprotopkgs.batprotocol-cpp
intervalsetpkgs.intervalset intervalsetpkgs.intervalset
]; ];
......
...@@ -5,10 +5,12 @@ ...@@ -5,10 +5,12 @@
#include <batprotocol.hpp> #include <batprotocol.hpp>
#include <intervalset.hpp> #include <intervalset.hpp>
#include <nlohmann/json.hpp>
#include "batsim_edc.h" #include "batsim_edc.h"
using namespace batprotocol; using namespace batprotocol;
using json = nlohmann::json;
struct Job struct Job
{ {
...@@ -18,6 +20,7 @@ struct Job ...@@ -18,6 +20,7 @@ struct Job
IntervalSet alloc; IntervalSet alloc;
double maximum_finish_time; double maximum_finish_time;
double power_estimation;
}; };
MessageBuilder * mb = nullptr; MessageBuilder * mb = nullptr;
...@@ -29,6 +32,13 @@ std::unordered_map<std::string, ::Job*> running_jobs; ...@@ -29,6 +32,13 @@ std::unordered_map<std::string, ::Job*> running_jobs;
uint32_t nb_available_hosts = 0; uint32_t nb_available_hosts = 0;
IntervalSet available_hosts; IntervalSet available_hosts;
double platform_normal_watts = -1;
double platform_powercap_watts = -1;
double platform_current_powercap_watts = -1;
double platform_nb_available_watts = -1;
double powercap_end_time = -1;
std::string power_estimation_field;
uint8_t batsim_edc_init(const uint8_t * data, uint32_t size, uint32_t flags) uint8_t batsim_edc_init(const uint8_t * data, uint32_t size, uint32_t flags)
{ {
format_binary = ((flags & BATSIM_EDC_FORMAT_BINARY) != 0); format_binary = ((flags & BATSIM_EDC_FORMAT_BINARY) != 0);
...@@ -40,9 +50,18 @@ uint8_t batsim_edc_init(const uint8_t * data, uint32_t size, uint32_t flags) ...@@ -40,9 +50,18 @@ uint8_t batsim_edc_init(const uint8_t * data, uint32_t size, uint32_t flags)
mb = new MessageBuilder(!format_binary); mb = new MessageBuilder(!format_binary);
// ignore initialization data std::string init_string((const char *)data, static_cast<size_t>(size));
(void) data; try {
(void) size; auto init_json = json::parse(init_string);
platform_normal_watts = init_json["normal_watts"];
platform_powercap_watts = init_json["powercap_watts"];
platform_current_powercap_watts = platform_powercap_watts; // simulation starts in the powercap constrained window
platform_nb_available_watts = platform_current_powercap_watts;
powercap_end_time = init_json["powercap_end_time_seconds"];
power_estimation_field = init_json["job_power_estimation_field"];
} catch (const json::exception & e) {
throw std::runtime_error("scheduler called with bad init string: " + std::string(e.what()));
}
return 0; return 0;
} }
...@@ -69,6 +88,12 @@ uint8_t batsim_edc_take_decisions( ...@@ -69,6 +88,12 @@ uint8_t batsim_edc_take_decisions(
auto * parsed = deserialize_message(*mb, !format_binary, what_happened); auto * parsed = deserialize_message(*mb, !format_binary, what_happened);
mb->clear(parsed->now()); mb->clear(parsed->now());
// should only become true once, when the powercap window finishes
if (parsed->now() >= powercap_end_time) {
platform_current_powercap_watts = platform_normal_watts;
platform_nb_available_watts += (platform_normal_watts - platform_powercap_watts);
}
bool need_scheduling = false; bool need_scheduling = false;
auto nb_events = parsed->events()->size(); auto nb_events = parsed->events()->size();
for (unsigned int i = 0; i < nb_events; ++i) { for (unsigned int i = 0; i < nb_events; ++i) {
...@@ -76,7 +101,7 @@ uint8_t batsim_edc_take_decisions( ...@@ -76,7 +101,7 @@ uint8_t batsim_edc_take_decisions(
switch (event->event_type()) switch (event->event_type())
{ {
case fb::Event_BatsimHelloEvent: { case fb::Event_BatsimHelloEvent: {
mb->add_edc_hello("easy", "0.1.0"); mb->add_edc_hello("easy-powercap", "0.1.0");
} break; } break;
case fb::Event_SimulationBeginsEvent: { case fb::Event_SimulationBeginsEvent: {
auto simu_begins = event->event_as_SimulationBeginsEvent(); auto simu_begins = event->event_as_SimulationBeginsEvent();
...@@ -85,15 +110,26 @@ uint8_t batsim_edc_take_decisions( ...@@ -85,15 +110,26 @@ uint8_t batsim_edc_take_decisions(
available_hosts = IntervalSet::ClosedInterval(0, platform_nb_hosts - 1); available_hosts = IntervalSet::ClosedInterval(0, platform_nb_hosts - 1);
} break; } break;
case fb::Event_JobSubmittedEvent: { case fb::Event_JobSubmittedEvent: {
auto parsed_job = event->event_as_JobSubmittedEvent();
::Job job{ ::Job job{
event->event_as_JobSubmittedEvent()->job_id()->str(), parsed_job->job_id()->str(),
event->event_as_JobSubmittedEvent()->job()->resource_request(), parsed_job->job()->resource_request(),
event->event_as_JobSubmittedEvent()->job()->walltime(), parsed_job->job()->walltime(),
IntervalSet::empty_interval_set(), IntervalSet::empty_interval_set(),
-1,
-1 -1
}; };
if (job.nb_hosts > platform_nb_hosts) try {
auto extra_data = json::parse(parsed_job->job()->extra_data()->str());
job.power_estimation = extra_data[power_estimation_field];
} catch (const json::exception & e) {
throw std::runtime_error("bad extra_data in job submitted: tried to read field " + power_estimation_field);
}
if ( (job.nb_hosts > platform_nb_hosts) // usual EASY predicate
|| (job.power_estimation > platform_normal_watts) // powercap predicate
)
mb->add_reject_job(job.id); mb->add_reject_job(job.id);
else if (job.walltime <= 0) else if (job.walltime <= 0)
mb->add_reject_job(job.id); mb->add_reject_job(job.id);
...@@ -108,8 +144,9 @@ uint8_t batsim_edc_take_decisions( ...@@ -108,8 +144,9 @@ uint8_t batsim_edc_take_decisions(
auto job_id = event->event_as_JobCompletedEvent()->job_id()->str(); auto job_id = event->event_as_JobCompletedEvent()->job_id()->str();
auto job_it = running_jobs.find(job_id); auto job_it = running_jobs.find(job_id);
auto job = job_it->second; auto job = job_it->second;
nb_available_hosts += job->nb_hosts; nb_available_hosts += job->nb_hosts; // usual EASY update
available_hosts += job->alloc; available_hosts += job->alloc;
platform_nb_available_watts += job->power_estimation; // powercap update
delete job; delete job;
running_jobs.erase(job_it); running_jobs.erase(job_it);
...@@ -121,6 +158,7 @@ uint8_t batsim_edc_take_decisions( ...@@ -121,6 +158,7 @@ uint8_t batsim_edc_take_decisions(
if (need_scheduling) { if (need_scheduling) {
::Job* priority_job = nullptr; ::Job* priority_job = nullptr;
uint32_t nb_available_hosts_at_priority_job_start = 0; uint32_t nb_available_hosts_at_priority_job_start = 0;
double nb_available_watts_at_priority_job_start = 0;
float priority_job_start_time = -1; float priority_job_start_time = -1;
// First traversal, done until a job cannot be executed right now and is set as the priority job // First traversal, done until a job cannot be executed right now and is set as the priority job
...@@ -128,13 +166,16 @@ uint8_t batsim_edc_take_decisions( ...@@ -128,13 +166,16 @@ uint8_t batsim_edc_take_decisions(
auto job_it = job_queue.begin(); auto job_it = job_queue.begin();
for (; job_it != job_queue.end(); ) { for (; job_it != job_queue.end(); ) {
auto job = *job_it; auto job = *job_it;
if (job->nb_hosts <= nb_available_hosts) { if ( (job->nb_hosts <= nb_available_hosts) // usual EASY predicate
&& (job->power_estimation <= platform_nb_available_watts) // powercap predicate
) {
running_jobs[job->id] = *job_it; running_jobs[job->id] = *job_it;
job->maximum_finish_time = parsed->now() + job->walltime; job->maximum_finish_time = parsed->now() + job->walltime;
job->alloc = available_hosts.left(job->nb_hosts); job->alloc = available_hosts.left(job->nb_hosts);
mb->add_execute_job(job->id, job->alloc.to_string_hyphen()); mb->add_execute_job(job->id, job->alloc.to_string_hyphen());
available_hosts -= job->alloc; available_hosts -= job->alloc;
nb_available_hosts -= job->nb_hosts; nb_available_hosts -= job->nb_hosts;
platform_nb_available_watts -= job->power_estimation;
job_it = job_queue.erase(job_it); job_it = job_queue.erase(job_it);
} }
...@@ -151,10 +192,15 @@ uint8_t batsim_edc_take_decisions( ...@@ -151,10 +192,15 @@ uint8_t batsim_edc_take_decisions(
std::sort(running_jobs_asc_maximum_finish_time.begin(), running_jobs_asc_maximum_finish_time.end(), ascending_max_finish_time_job_order); std::sort(running_jobs_asc_maximum_finish_time.begin(), running_jobs_asc_maximum_finish_time.end(), ascending_max_finish_time_job_order);
nb_available_hosts_at_priority_job_start = nb_available_hosts; nb_available_hosts_at_priority_job_start = nb_available_hosts;
nb_available_watts_at_priority_job_start = platform_nb_available_watts;
for (const auto & job : running_jobs_asc_maximum_finish_time) { for (const auto & job : running_jobs_asc_maximum_finish_time) {
nb_available_hosts_at_priority_job_start += job->nb_hosts; nb_available_hosts_at_priority_job_start += job->nb_hosts;
if (nb_available_hosts_at_priority_job_start >= priority_job->nb_hosts) { nb_available_watts_at_priority_job_start += job->power_estimation;
if ( (nb_available_hosts_at_priority_job_start >= priority_job->nb_hosts) // usual EASY predicate
&& (nb_available_watts_at_priority_job_start >= priority_job->power_estimation) // powercap predicate
) {
nb_available_hosts_at_priority_job_start -= priority_job->nb_hosts; nb_available_hosts_at_priority_job_start -= priority_job->nb_hosts;
nb_available_watts_at_priority_job_start -= priority_job->power_estimation;
priority_job_start_time = job->maximum_finish_time; priority_job_start_time = job->maximum_finish_time;
break; break;
} }
...@@ -169,17 +215,27 @@ uint8_t batsim_edc_take_decisions( ...@@ -169,17 +215,27 @@ uint8_t batsim_edc_take_decisions(
auto job = *job_it; auto job = *job_it;
// should the job be backfilled? // should the job be backfilled?
float job_finish_time = parsed->now() + job->walltime; float job_finish_time = parsed->now() + job->walltime;
if (job->nb_hosts <= nb_available_hosts && // enough resources now? if ( (job->nb_hosts <= nb_available_hosts) // enough hosts now?
(job->nb_hosts <= nb_available_hosts_at_priority_job_start || job_finish_time <= priority_job_start_time)) { // does not directly hinder the priority job? && (job->power_estimation <= platform_nb_available_watts) // enough power now?
&& (
( (job->nb_hosts <= nb_available_hosts_at_priority_job_start) // cannot hinder priority job at all regarding hosts
&& (job->power_estimation <= nb_available_watts_at_priority_job_start) // cannot hinder priority job at all regarding watts
) // previous block if true if the backfilled job cannot hinder the priority job regardless of the backfilled job duration
|| (job_finish_time <= priority_job_start_time) // the backfilled job finishes before the priority job's expected start time
)
) {
running_jobs[job->id] = *job_it; running_jobs[job->id] = *job_it;
job->maximum_finish_time = job_finish_time; job->maximum_finish_time = job_finish_time;
job->alloc = available_hosts.left(job->nb_hosts); job->alloc = available_hosts.left(job->nb_hosts);
mb->add_execute_job(job->id, job->alloc.to_string_hyphen()); mb->add_execute_job(job->id, job->alloc.to_string_hyphen());
available_hosts -= job->alloc; available_hosts -= job->alloc;
nb_available_hosts -= job->nb_hosts; nb_available_hosts -= job->nb_hosts;
platform_nb_available_watts -= job->power_estimation;
if (job_finish_time > priority_job_start_time) if (job_finish_time > priority_job_start_time) {
nb_available_hosts_at_priority_job_start -= job->nb_hosts; nb_available_hosts_at_priority_job_start -= job->nb_hosts;
nb_available_watts_at_priority_job_start -= job->power_estimation;
}
job_it = job_queue.erase(job_it); job_it = job_queue.erase(job_it);
} }
......
...@@ -8,6 +8,7 @@ project('energusched', 'cpp', ...@@ -8,6 +8,7 @@ project('energusched', 'cpp',
batprotocol_cpp_dep = dependency('batprotocol-cpp') batprotocol_cpp_dep = dependency('batprotocol-cpp')
boost_dep = dependency('boost') boost_dep = dependency('boost')
intervalset_dep = dependency('intervalset') intervalset_dep = dependency('intervalset')
nlohmann_json_dep = dependency('nlohmann_json')
deps = [ deps = [
batprotocol_cpp_dep batprotocol_cpp_dep
] ]
...@@ -16,7 +17,7 @@ common = [ ...@@ -16,7 +17,7 @@ common = [
'batsim_edc.h' 'batsim_edc.h'
] ]
fcfs = shared_library('easy', common + ['easy.cpp'], easy_powercap = shared_library('easypower', common + ['easy.cpp'],
dependencies: deps + [boost_dep, intervalset_dep], dependencies: deps + [boost_dep, intervalset_dep, nlohmann_json_dep],
install: true, install: true,
) )
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment