diff --git a/flake.lock b/flake.lock index 94d77c6050aabf3cc1f76bd983c3f5f078959bb6..1e44deed22560a0c6b9b84b9f4049d8fd1e7d4c4 100644 --- a/flake.lock +++ b/flake.lock @@ -81,6 +81,38 @@ "type": "github" } }, + "easy-powercap-flake": { + "inputs": { + "batprotocol-flake": [ + "batprotocol-flake" + ], + "flake-utils": [ + "flake-utils" + ], + "intervalset-flake": [ + "intervalset-flake" + ], + "nixpkgs": [ + "nixpkgs" + ], + "nur-kapack": [ + "nur-kapack" + ] + }, + "locked": { + "lastModified": 1712244708, + "narHash": "sha256-MQppCw+g2QVFQrmdjz009Jjt8fuiQhldp5kVrMHHyv8=", + "ref": "refs/heads/main", + "rev": "659660c35650e9f46ec47e8c0743d75649e68d7b", + "revCount": 4, + "type": "git", + "url": "https://framagit.org/batsim/easy-powercap.git?tag=europar24" + }, + "original": { + "type": "git", + "url": "https://framagit.org/batsim/easy-powercap.git?tag=europar24" + } + }, "flake-parts": { "inputs": { "nixpkgs-lib": "nixpkgs-lib" @@ -236,6 +268,7 @@ "inputs": { "batprotocol-flake": "batprotocol-flake", "batsim-flake": "batsim-flake", + "easy-powercap-flake": "easy-powercap-flake", "flake-utils": "flake-utils", "intervalset-flake": "intervalset-flake", "nixpkgs": "nixpkgs", diff --git a/flake.nix b/flake.nix index cf9e4f0af1e1f6b8fc83a7ecbf7da6a5eff01f43..21c482d895364f75a079e14d80fce62cab5b6281 100644 --- a/flake.nix +++ b/flake.nix @@ -27,12 +27,20 @@ inputs.intervalset.follows = "intervalset-flake"; inputs.flake-utils.follows = "flake-utils"; }; + easy-powercap-flake = { + url = "git+https://framagit.org/batsim/easy-powercap.git?tag=europar24"; + inputs.nixpkgs.follows = "nixpkgs"; + inputs.nur-kapack.follows = "nur-kapack"; + inputs.batprotocol-flake.follows = "batprotocol-flake"; + inputs.intervalset-flake.follows = "intervalset-flake"; + inputs.flake-utils.follows = "flake-utils"; + }; typst-flake = { url = "github:typst/typst?rev=21c78abd6eecd0f6b3208405c7513be3bbd8991c"; }; }; - outputs = { self, nixpkgs, nur-kapack, intervalset-flake, flake-utils, batprotocol-flake, batsim-flake, typst-flake }: + outputs = { self, nixpkgs, nur-kapack, intervalset-flake, flake-utils, batprotocol-flake, batsim-flake, easy-powercap-flake, typst-flake }: flake-utils.lib.eachSystem [ "x86_64-linux" ] (system: let pkgs = import nixpkgs { inherit system; }; @@ -42,6 +50,7 @@ batprotopkgs = batprotocol-flake.packages.${system}; intervalsetpkgs = intervalset-flake.packages.${system}; batpkgs = batsim-flake.packages-release.${system}; + easy-powercap-pkgs = easy-powercap-flake.packages.${system}; typstpkgs = typst-flake.packages.${system}; in rec { packages = { @@ -72,25 +81,7 @@ hash = "sha256-e0gnC/HMYdrYdEwy6qNOD1J52xgN2x81oCG03YNsYjg="; }; }); - energusched = (nur-kapack.lib.${system}.cppMesonDevBase { - stdenv = pkgs.stdenv; - lib = pkgs.lib; - meson = pkgs.meson; - ninja = pkgs.ninja; - pkg-config = pkgs.pkg-config; - }).overrideAttrs(attrs: rec { - name = "energusched"; - src = pkgs.lib.sourceByRegex ./sched-cpp [ - "^meson\.build" - "^.*\.?pp" - "^.*\.h" - ]; - buildInputs = [ - pkgs.nlohmann_json - batprotopkgs.batprotocol-cpp - intervalsetpkgs.intervalset - ]; - }); + easypower-sched-lib = easy-powercap-pkgs.easypower; }; devShells = { download-m100-months = pkgs.mkShell { @@ -125,11 +116,11 @@ simulation = pkgs.mkShell { buildInputs = [ packages.batsim - packages.energusched + packages.easypower-sched-lib packages.python-scripts ]; BATSIM_ROOT_PATH="${batsim-flake}"; - EDC_LIBRARY_PATH="${packages.energusched}/lib"; + EDC_LIBRARY_PATH="${packages.easypower-sched-lib}/lib"; }; r = pkgs.mkShell { diff --git a/sched-cpp/batsim_edc.h b/sched-cpp/batsim_edc.h deleted file mode 100644 index f63f4b2fcd219b0735a79a2d88e6b99330b6cf42..0000000000000000000000000000000000000000 --- a/sched-cpp/batsim_edc.h +++ /dev/null @@ -1,52 +0,0 @@ -// This is free and unencumbered software released into the public domain. -// For more information, please refer to <http://unlicense.org/> - -// This file describes the C API you can use to make your decision components -// (schedulers, workload injectors...) callable by Batsim as dynamic libraries. - -#ifdef __cplusplus -extern "C" { -#endif -#include <stdint.h> - -// These are the flags supported by batsim_edc_init() argument. -#define BATSIM_EDC_FORMAT_BINARY 0x1 // Format is flatbuffers's binary format. Messages are pointers to buffers generated by a flatbuffers library. -#define BATSIM_EDC_FORMAT_JSON 0x2 // Format is flatbuffer's JSON format. Messages are NULL-terminated C strings with JSON content. - -/** - * @brief The batsim_edc_init() function is called by Batsim to initialize your external decision component. - * @details This is typically used to initialize global data structures to de/serialize messages and to take decisions later on. - * - * @param[in] data The initialization data of your decision component. This is retrieved from Batsim's command-line arguments. - * @param[in] size The size of your initialization data. This is retrieved from Batsim's command-line arguments. - * @param[in] flags These flags tell you additional information on how to communicate with Batsim. - * Currently, this is only used to know which data format should be used (flatbuffers's binary format or flatbuffer's JSON format). - * @return Zero if and only if you could initialize yourself successfully. - */ -uint8_t batsim_edc_init(const uint8_t * data, uint32_t size, uint32_t flags); - -/** - * @brief The batsim_edc_deinit() function is called by Batsim when it stops calling your decision component. - * @details This is typically used to deallocate any memory allocated by batsim_edc_init() or batsim_edc_take_decisions(). - * @return Zero if and only if you could deinitialize yourself successfully. - */ -uint8_t batsim_edc_deinit(); - -/** - * @brief The batsim_edc_take_decisions() function is called by Batsim when it asks you to take decisions. - * - * @param[in] what_happened A Batsim protocol message that contains what happened in the simulation since the previous call to your decision component - * (that is to say, since last batsim_edc_take_decisions() call or since the initial batsim_edc_init() at the beginning of the simulation). - * The message format depends on what flags were given to batsim_edc_init(). - * @param[in] what_happened_size The size (in bytes) of the what_happened input buffer. - * @param[out] decisions A Batsim protocol message that contains the decisions taken by this function. - * The buffer should be formatted according to the flags given to batsim_edc_init(). - * This buffer must be allocated by you and must persist in memory at least until the next batsim_edc_take_decisions() or batsim_edc_deinit() call. - * @param[out] decisions_size The size (in bytes) of the decisions output buffer. - * @return Zero if and only if you could take decisions. - */ -uint8_t batsim_edc_take_decisions(const uint8_t * what_happened, uint32_t what_happened_size, uint8_t ** decisions, uint32_t * decisions_size); - -#ifdef __cplusplus -} -#endif diff --git a/sched-cpp/easy.cpp b/sched-cpp/easy.cpp deleted file mode 100644 index b8a615b16af2bc97fdd8518c738afee204c493ca..0000000000000000000000000000000000000000 --- a/sched-cpp/easy.cpp +++ /dev/null @@ -1,254 +0,0 @@ -#include <cstdint> -#include <list> -#include <string> -#include <unordered_map> - -#include <batprotocol.hpp> -#include <intervalset.hpp> -#include <nlohmann/json.hpp> - -#include "batsim_edc.h" - -using namespace batprotocol; -using json = nlohmann::json; - -struct Job -{ - std::string id; - uint32_t nb_hosts; - double walltime; - - IntervalSet alloc; - double maximum_finish_time; - double power_estimation; -}; - -MessageBuilder * mb = nullptr; -bool format_binary = true; // whether flatbuffers binary or json format should be used - -uint32_t platform_nb_hosts = 0; -std::list<::Job*> job_queue; -std::unordered_map<std::string, ::Job*> running_jobs; -uint32_t nb_available_hosts = 0; -IntervalSet available_hosts; - -double platform_normal_dynamic_watts = -1; -double platform_powercap_dynamic_watts = -1; -double platform_current_powercap_dynamic_watts = -1; -double platform_nb_available_watts = -1; -double powercap_end_time = -1; -double idle_power_watts = -1; // per node -std::string power_estimation_field; - -uint8_t batsim_edc_init(const uint8_t * data, uint32_t size, uint32_t flags) -{ - format_binary = ((flags & BATSIM_EDC_FORMAT_BINARY) != 0); - if ((flags & (BATSIM_EDC_FORMAT_BINARY | BATSIM_EDC_FORMAT_JSON)) != flags) - { - printf("Unknown flags used, cannot initialize myself.\n"); - return 1; - } - - mb = new MessageBuilder(!format_binary); - - std::string init_string((const char *)data, static_cast<size_t>(size)); - try { - auto init_json = json::parse(init_string); - platform_normal_dynamic_watts = init_json["normal_dynamic_watts"]; - platform_powercap_dynamic_watts = init_json["powercap_dynamic_watts"]; - idle_power_watts = init_json["idle_watts"]; - platform_current_powercap_dynamic_watts = platform_powercap_dynamic_watts; // simulation starts in the powercap constrained window - platform_nb_available_watts = platform_current_powercap_dynamic_watts; - powercap_end_time = init_json["powercap_end_time_seconds"]; - power_estimation_field = init_json["job_power_estimation_field"]; - } catch (const json::exception & e) { - throw std::runtime_error("scheduler called with bad init string: " + std::string(e.what())); - } - - return 0; -} - -uint8_t batsim_edc_deinit() -{ - delete mb; - mb = nullptr; - - return 0; -} - -bool ascending_max_finish_time_job_order(const ::Job* a, const ::Job* b) { - return a->maximum_finish_time < b->maximum_finish_time; -} - -uint8_t batsim_edc_take_decisions( - const uint8_t * what_happened, - uint32_t what_happened_size, - uint8_t ** decisions, - uint32_t * decisions_size) -{ - (void) what_happened_size; - auto * parsed = deserialize_message(*mb, !format_binary, what_happened); - mb->clear(parsed->now()); - - // should only become true once, when the powercap window finishes - if (parsed->now() >= powercap_end_time) { - platform_current_powercap_dynamic_watts = platform_normal_dynamic_watts; - platform_nb_available_watts += (platform_normal_dynamic_watts - platform_powercap_dynamic_watts); - } - - bool need_scheduling = false; - auto nb_events = parsed->events()->size(); - for (unsigned int i = 0; i < nb_events; ++i) { - auto event = (*parsed->events())[i]; - switch (event->event_type()) - { - case fb::Event_BatsimHelloEvent: { - mb->add_edc_hello("easy-powercap", "0.1.0"); - } break; - case fb::Event_SimulationBeginsEvent: { - auto simu_begins = event->event_as_SimulationBeginsEvent(); - platform_nb_hosts = simu_begins->computation_host_number(); - nb_available_hosts = platform_nb_hosts; - available_hosts = IntervalSet::ClosedInterval(0, platform_nb_hosts - 1); - } break; - case fb::Event_JobSubmittedEvent: { - auto parsed_job = event->event_as_JobSubmittedEvent(); - ::Job job{ - parsed_job->job_id()->str(), - parsed_job->job()->resource_request(), - parsed_job->job()->walltime(), - IntervalSet::empty_interval_set(), - -1, - -1 - }; - - try { - auto extra_data = json::parse(parsed_job->job()->extra_data()->str()); - job.power_estimation = std::max(0.0, (double)extra_data[power_estimation_field] - idle_power_watts * job.nb_hosts); - } catch (const json::exception & e) { - throw std::runtime_error("bad extra_data in job submitted: tried to read field " + power_estimation_field); - } - - if ( (job.nb_hosts > platform_nb_hosts) // usual EASY predicate - || (job.power_estimation > platform_normal_dynamic_watts) // powercap predicate - ) - mb->add_reject_job(job.id); - else if (job.walltime <= 0) - mb->add_reject_job(job.id); - else { - need_scheduling = true; - job_queue.emplace_back(new ::Job(job)); - } - } break; - case fb::Event_JobCompletedEvent: { - need_scheduling = true; - - auto job_id = event->event_as_JobCompletedEvent()->job_id()->str(); - auto job_it = running_jobs.find(job_id); - auto job = job_it->second; - nb_available_hosts += job->nb_hosts; // usual EASY update - available_hosts += job->alloc; - platform_nb_available_watts += job->power_estimation; // powercap update - - delete job; - running_jobs.erase(job_it); - } break; - default: break; - } - } - - if (need_scheduling) { - ::Job* priority_job = nullptr; - uint32_t nb_available_hosts_at_priority_job_start = 0; - double nb_available_watts_at_priority_job_start = 0; - float priority_job_start_time = -1; - - // First traversal, done until a job cannot be executed right now and is set as the priority job - // (or until all jobs have been executed) - auto job_it = job_queue.begin(); - for (; job_it != job_queue.end(); ) { - auto job = *job_it; - if ( (job->nb_hosts <= nb_available_hosts) // usual EASY predicate - && (job->power_estimation <= platform_nb_available_watts) // powercap predicate - ) { - running_jobs[job->id] = *job_it; - job->maximum_finish_time = parsed->now() + job->walltime; - job->alloc = available_hosts.left(job->nb_hosts); - mb->add_execute_job(job->id, job->alloc.to_string_hyphen()); - available_hosts -= job->alloc; - nb_available_hosts -= job->nb_hosts; - platform_nb_available_watts -= job->power_estimation; - - job_it = job_queue.erase(job_it); - } - else - { - priority_job = *job_it; - ++job_it; - - // compute when the priority job can start, and the number of available machines at this time - std::vector<::Job*> running_jobs_asc_maximum_finish_time; - running_jobs_asc_maximum_finish_time.reserve(running_jobs.size()); - for (const auto & it : running_jobs) - running_jobs_asc_maximum_finish_time.push_back(it.second); - std::sort(running_jobs_asc_maximum_finish_time.begin(), running_jobs_asc_maximum_finish_time.end(), ascending_max_finish_time_job_order); - - nb_available_hosts_at_priority_job_start = nb_available_hosts; - nb_available_watts_at_priority_job_start = platform_nb_available_watts; - for (const auto & job : running_jobs_asc_maximum_finish_time) { - nb_available_hosts_at_priority_job_start += job->nb_hosts; - nb_available_watts_at_priority_job_start += job->power_estimation; - if ( (nb_available_hosts_at_priority_job_start >= priority_job->nb_hosts) // usual EASY predicate - && (nb_available_watts_at_priority_job_start >= priority_job->power_estimation) // powercap predicate - ) { - nb_available_hosts_at_priority_job_start -= priority_job->nb_hosts; - nb_available_watts_at_priority_job_start -= priority_job->power_estimation; - priority_job_start_time = job->maximum_finish_time; - break; - } - } - - break; - } - } - - // Continue traversal to backfill jobs - for (; job_it != job_queue.end(); ) { - auto job = *job_it; - // should the job be backfilled? - float job_finish_time = parsed->now() + job->walltime; - if ( (job->nb_hosts <= nb_available_hosts) // enough hosts now? - && (job->power_estimation <= platform_nb_available_watts) // enough power now? - && ( - ( (job->nb_hosts <= nb_available_hosts_at_priority_job_start) // cannot hinder priority job at all regarding hosts - && (job->power_estimation <= nb_available_watts_at_priority_job_start) // cannot hinder priority job at all regarding watts - ) // previous block if true if the backfilled job cannot hinder the priority job regardless of the backfilled job duration - || (job_finish_time <= priority_job_start_time) // the backfilled job finishes before the priority job's expected start time - ) - ) { - running_jobs[job->id] = *job_it; - job->maximum_finish_time = job_finish_time; - job->alloc = available_hosts.left(job->nb_hosts); - mb->add_execute_job(job->id, job->alloc.to_string_hyphen()); - available_hosts -= job->alloc; - nb_available_hosts -= job->nb_hosts; - platform_nb_available_watts -= job->power_estimation; - - if (job_finish_time > priority_job_start_time) { - nb_available_hosts_at_priority_job_start -= job->nb_hosts; - nb_available_watts_at_priority_job_start -= job->power_estimation; - } - - job_it = job_queue.erase(job_it); - } - else if (nb_available_hosts == 0) - break; - else - ++job_it; - } - } - - mb->finish_message(parsed->now()); - serialize_message(*mb, !format_binary, const_cast<const uint8_t **>(decisions), decisions_size); - return 0; -} diff --git a/sched-cpp/meson.build b/sched-cpp/meson.build deleted file mode 100644 index ee5f38a99f8a58cc3970c68f31d7a089f13dd09f..0000000000000000000000000000000000000000 --- a/sched-cpp/meson.build +++ /dev/null @@ -1,23 +0,0 @@ -project('energusched', 'cpp', - version: '0.1.0', - license: 'LGPL-3.0', - default_options: ['cpp_std=c++17'], - meson_version: '>=0.40.0' -) - -batprotocol_cpp_dep = dependency('batprotocol-cpp') -boost_dep = dependency('boost') -intervalset_dep = dependency('intervalset') -nlohmann_json_dep = dependency('nlohmann_json') -deps = [ - batprotocol_cpp_dep -] - -common = [ - 'batsim_edc.h' -] - -easy_powercap = shared_library('easypower', common + ['easy.cpp'], - dependencies: deps + [boost_dep, intervalset_dep, nlohmann_json_dep], - install: true, -)