diff --git a/1_test_expe.ipynb b/analyse_campaign1.ipynb similarity index 99% rename from 1_test_expe.ipynb rename to analyse_campaign1.ipynb index 6b1f74fe92ff7619f3e12dc50b09dbbd811efc3e..c810e5d678f48b14f699f899935bcfdc8bb5544a 100644 --- a/1_test_expe.ipynb +++ b/analyse_campaign1.ipynb @@ -5,7 +5,7 @@ "id": "sustained-helena", "metadata": {}, "source": [ - "# Test expe \n", + "# Analyse the data from the first campaign \n", "\n", "## Initializing the environment" ] @@ -17,22 +17,13 @@ "metadata": {}, "outputs": [], "source": [ - "# from workload_sampler import workload_sampler\n", - "#import useful_fun\n", - "# from matplotlib import figure, pyplot as plt\n", - "# from evalys.jobset import JobSet\n", - "# from evalys.visu.gantt import plot_gantt\n", "import pandas as pd, json, numpy as np\n", "import os, random\n", - "# from evalys.metrics import compute_load\n", - "# import evalys.visu.legacy as vleg\n", + "from scripts.util import * \n", "\n", "EXPE_DIR = \"1_test_expe\"\n", "WL_DIR = \"workload/May14_day1_2_3\"\n", - "if not os.path.exists(WL_DIR):\n", - " os.makedirs(WL_DIR)\n", - "if not os.path.exists(EXPE_DIR):\n", - " os.makedirs(EXPE_DIR)\n", + "\n", "# Platform file, workload file (empty bc only dynamic submission)\n", "pf = \"platform/average_metacentrum.xml\"\n", "wl = \"workload/empty_workload.json\"\n", @@ -42,455 +33,10 @@ }, { "cell_type": "markdown", - "id": "2ecfca61", - "metadata": {}, - "source": [ - "## Prepare the workload\n", - "Let's truncate the cleaned input workload (see `0_prepare_workload`) to keep only 3 days:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "64f9c72e", - "metadata": { - "collapsed": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Unix Time May 1st 2014: 1398895200\n", - "Unix Time May 3rt 2014: 1399154399\n", - "We should keep all the jobs submitted between 41900394 and 42159593\n", - "Processing swf line 100000\n", - "Processing swf line 200000\n", - "Processing swf line 300000\n", - "Processing swf line 400000\n", - "Processing swf line 500000\n", - "Processing swf line 600000\n", - "Processing swf line 700000\n", - "Processing swf line 800000\n", - "Processing swf line 900000\n", - "Processing swf line 1000000\n", - "Processing swf line 1100000\n", - "Processing swf line 1200000\n", - "Processing swf line 1300000\n", - "Processing swf line 1400000\n", - "Processing swf line 1500000\n", - "Processing swf line 1600000\n", - "Processing swf line 1700000\n", - "Processing swf line 1800000\n", - "Processing swf line 1900000\n", - "Processing swf line 2000000\n", - "Processing swf line 2100000\n", - "Processing swf line 2200000\n", - "Processing swf line 2300000\n", - "Processing swf line 2400000\n", - "Processing swf line 2500000\n", - "Processing swf line 2600000\n", - "Processing swf line 2700000\n", - "Processing swf line 2800000\n", - "Processing swf line 2900000\n", - "Processing swf line 3000000\n", - "Processing swf line 3100000\n", - "Processing swf line 3200000\n", - "Processing swf line 3300000\n", - "Processing swf line 3400000\n", - "Processing swf line 3500000\n", - "Processing swf line 3600000\n", - "Processing swf line 3700000\n", - "Processing swf line 3800000\n", - "Processing swf line 3900000\n", - "Processing swf line 4000000\n", - "Processing swf line 4100000\n", - "Processing swf line 4200000\n", - "Processing swf line 4300000\n", - "Processing swf line 4400000\n", - "Processing swf line 4500000\n", - "Processing swf line 4600000\n", - "Processing swf line 4700000\n", - "Processing swf line 4800000\n", - "-------------------\n", - "End parsing\n", - "Total 28879 jobs and 84 users have been created.\n", - "Total number of core-hours: 41083\n", - "4807291 valid jobs were not selected (keep_only) for 16764884 core-hour\n", - "Jobs not selected: 99.4% in number, 99.8% in core-hour\n", - "0 out of 4836171 lines in the file did not match the swf format\n", - "25 jobs were not valid\n" - ] - } - ], - "source": [ - "from time import *\n", - "begin_trace = 1356994806\n", - "may1_unix_time, may3_unix_time = mktime(strptime('Thu May 1 00:00:00 2014')), mktime(strptime('Sat May 3 23:59:59 2014'))\n", - "may1, may3 = (int) (may1_unix_time - begin_trace), (int) (may3_unix_time - begin_trace)\n", - "print(\"Unix Time May 1st 2014: {:.0f}\".format( may1_unix_time ))\n", - "print(\"Unix Time May 3rt 2014: {:.0f}\".format( may3_unix_time ))\n", - "print(\"We should keep all the jobs submitted between {:d} and {:d}\".format(may1, may3))\n", - "\n", - "# Create a swf with only these 3 days\n", - "! 0_prepare_workload/swf_moulinette.py workload/MC_selection_article.swf -o workload/May14_day1_2_3.swf \\\n", - " --keep_only=\"submit_time >= {may1} and submit_time <= {may3}\"" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "b36e080d", - "metadata": { - "collapsed": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "user 2:\n", - " 9 jobs had been created\n", - "user 66:\n", - " 44 jobs had been created\n", - "user 167:\n", - " 25 jobs had been created\n", - "user 426:\n", - " 6 jobs had been created\n", - "user 4:\n", - " 51 jobs had been created\n", - "user 246:\n", - " 1 jobs had been created\n", - "user 435:\n", - " 6 jobs had been created\n", - "user 47:\n", - " 6 jobs had been created\n", - "user 275:\n", - " 71 jobs had been created\n", - "user 463:\n", - " 10 jobs had been created\n", - "user 550:\n", - " 4 jobs had been created\n", - "user 68:\n", - " 6 jobs had been created\n", - "user 54:\n", - " 37 jobs had been created\n", - "user 5:\n", - " 302 jobs had been created\n", - "user 26:\n", - " 44 jobs had been created\n", - "user 160:\n", - " 386 jobs had been created\n", - "user 34:\n", - " 12 jobs had been created\n", - "user 437:\n", - " 150 jobs had been created\n", - "user 73:\n", - " 37 jobs had been created\n", - "user 272:\n", - " 4 jobs had been created\n", - "user 333:\n", - " 713 jobs had been created\n", - "user 3:\n", - " 20 jobs had been created\n", - "user 23:\n", - " 16 jobs had been created\n", - "user 546:\n", - " 5 jobs had been created\n", - "user 569:\n", - " 171 jobs had been created\n", - "user 523:\n", - " 2 jobs had been created\n", - "user 179:\n", - " 63 jobs had been created\n", - "user 168:\n", - " 173 jobs had been created\n", - "user 616:\n", - " 11 jobs had been created\n", - "user 586:\n", - " 263 jobs had been created\n", - "user 346:\n", - " 24688 jobs had been created\n", - "user 440:\n", - " 92 jobs had been created\n", - "user 50:\n", - " 21 jobs had been created\n", - "user 108:\n", - " 8 jobs had been created\n", - "user 634:\n", - " 1 jobs had been created\n", - "user 486:\n", - " 12 jobs had been created\n", - "user 372:\n", - " 3 jobs had been created\n", - "user 410:\n", - " 8 jobs had been created\n", - "user 45:\n", - " 91 jobs had been created\n", - "user 517:\n", - " 21 jobs had been created\n", - "user 474:\n", - " 101 jobs had been created\n", - "user 604:\n", - " 6 jobs had been created\n", - "user 166:\n", - " 18 jobs had been created\n", - "user 96:\n", - " 404 jobs had been created\n", - "user 619:\n", - " 1 jobs had been created\n", - "user 635:\n", - " 2 jobs had been created\n", - "user 165:\n", - " 33 jobs had been created\n", - "user 378:\n", - " 46 jobs had been created\n", - "user 138:\n", - " 13 jobs had been created\n", - "user 319:\n", - " 2 jobs had been created\n", - "user 447:\n", - " 9 jobs had been created\n", - "user 91:\n", - " 6 jobs had been created\n", - "user 141:\n", - " 15 jobs had been created\n", - "user 537:\n", - " 315 jobs had been created\n", - "user 39:\n", - " 3 jobs had been created\n", - "user 99:\n", - " 14 jobs had been created\n", - "user 191:\n", - " 3 jobs had been created\n", - "user 526:\n", - " 4 jobs had been created\n", - "user 487:\n", - " 1 jobs had been created\n", - "user 25:\n", - " 5 jobs had been created\n", - "user 531:\n", - " 1 jobs had been created\n", - "user 436:\n", - " 17 jobs had been created\n", - "user 424:\n", - " 77 jobs had been created\n", - "user 636:\n", - " 12 jobs had been created\n", - "user 551:\n", - " 3 jobs had been created\n", - "user 516:\n", - " 11 jobs had been created\n", - "user 571:\n", - " 4 jobs had been created\n", - "user 29:\n", - " 6 jobs had been created\n", - "user 433:\n", - " 20 jobs had been created\n", - "user 446:\n", - " 4 jobs had been created\n", - "user 11:\n", - " 1 jobs had been created\n", - "user 53:\n", - " 5 jobs had been created\n", - "user 100:\n", - " 4 jobs had been created\n", - "user 638:\n", - " 1 jobs had been created\n", - "user 115:\n", - " 1 jobs had been created\n", - "user 16:\n", - " 39 jobs had been created\n", - "user 89:\n", - " 4 jobs had been created\n", - "user 625:\n", - " 7 jobs had been created\n", - "user 281:\n", - " 3 jobs had been created\n", - "user 592:\n", - " 31 jobs had been created\n", - "user 632:\n", - " 3 jobs had been created\n", - "user 465:\n", - " 4 jobs had been created\n", - "user 545:\n", - " 24 jobs had been created\n", - "user 452:\n", - " 3 jobs had been created\n", - "-------------------\n", - "End parsing\n", - "Total 28879 jobs and 84 users have been created.\n", - "Total number of core-hours: 41083\n", - "0 valid jobs were not selected (keep_only) for 0 core-hour\n", - "Jobs not selected: 0.0% in number, 0.0% in core-hour\n", - "0 out of 28880 lines in the file did not match the swf format\n", - "0 jobs were not valid\n" - ] - } - ], - "source": [ - "# Transform into JSON and split by users\n", - "! 0_prepare_workload/swf_to_batsim_split_by_user.py {WL_DIR}/May14_day1_2_3.swf {WL_DIR} \\\n", - " --start_time={may1} -jg 10 --job_walltime_factor=8" - ] - }, - { - "cell_type": "markdown", - "id": "continental-encounter", + "id": "b20d17c8", "metadata": {}, "source": [ - "## 100% Replay Rigid" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "raising-collective", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[36mINFO\u001b[0m[2022-01-26 17:15:56.779] Waiting for valid context \u001b[36mbatsim command\u001b[0m=\"batsim -p platform/average_metacentrum.xml -w workload/empty_workload.json -e 1_test_expe/ReplayRigid_may1_2_3/ --energy --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse\" \u001b[36mextracted socket endpoint\u001b[0m=\"tcp://localhost:28000\" \u001b[36mready timeout (seconds)\u001b[0m=10\r\n", - "\u001b[36mINFO\u001b[0m[2022-01-26 17:15:56.795] Starting simulation \u001b[36mbatsim cmdfile\u001b[0m=1_test_expe/ReplayRigid_may1_2_3/cmd/batsim.bash \u001b[36mbatsim command\u001b[0m=\"batsim -p platform/average_metacentrum.xml -w workload/empty_workload.json -e 1_test_expe/ReplayRigid_may1_2_3/ --energy --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse\" \u001b[36mbatsim logfile\u001b[0m=1_test_expe/ReplayRigid_may1_2_3/log/batsim.log \u001b[36mscheduler cmdfile\u001b[0m=1_test_expe/ReplayRigid_may1_2_3/cmd/sched.bash \u001b[36mscheduler command\u001b[0m=\"batsched -v bin_packing_energy --queue_order=desc_size --variant_options_filepath=sched_input/user_description_file.json\" \u001b[36mscheduler logfile (err)\u001b[0m=1_test_expe/ReplayRigid_may1_2_3/log/sched.err.log \u001b[36mscheduler logfile (out)\u001b[0m=1_test_expe/ReplayRigid_may1_2_3/log/sched.out.log \u001b[36msimulation timeout (seconds)\u001b[0m=604800\r\n" - ] - } - ], - "source": [ - "EXPE_FILE = EXPE_DIR + \"/ReplayRigid_may1_2_3\"\n", - "\n", - "# Génère le user_description_file à partir de la liste des utilisateurs\n", - "# qui ont leur fichier .json dans le répertoire \n", - "wl_folder = WL_DIR\n", - "def user_description(user):\n", - " return {\n", - " \"name\": user,\n", - " \"category\": \"replay_user_rigid\",\n", - " \"param\": {\"input_json\": wl_folder + \"/\" + user + \".json\"}\n", - " }\n", - "\n", - "user_names = [user_file.split('.')[0] for user_file in os.listdir(wl_folder) if user_file.split('.')[1] == \"json\"]\n", - "data = {}\n", - "data[\"dm_window\"] = dm_window\n", - "data[\"log_user_stats\"] = True\n", - "data[\"log_folder\"] = EXPE_FILE\n", - "data[\"users\"] = [user_description(user) for user in user_names]\n", - "\n", - "uf = \"sched_input/user_description_file.json\"\n", - "with open(uf, 'w') as user_description_file:\n", - " json.dump(data, user_description_file)\n", - "\n", - "! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \\\n", - " --batcmd=\"batsim -p {pf} -w {wl} -e {EXPE_FILE}/ --energy --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse\"\\\n", - " --schedcmd='batsched -v bin_packing_energy --queue_order=desc_size --variant_options_filepath={uf}'\n", - "! robin {EXPE_FILE}.yaml\n", - "print(\"\\n******************\\nSimulation done.\")" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "empty-circuit", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['user447.json',\n", - " 'user34.json',\n", - " 'user604.json',\n", - " 'user526.json',\n", - " 'user39.json',\n", - " 'user546.json',\n", - " 'user625.json',\n", - " 'user433.json',\n", - " 'user410.json',\n", - " 'user53.json',\n", - " 'user426.json',\n", - " 'user531.json',\n", - " 'user160.json',\n", - " 'user23.json',\n", - " 'user634.json',\n", - " 'user452.json',\n", - " 'user99.json',\n", - " 'user281.json',\n", - " 'user138.json',\n", - " 'user440.json',\n", - " 'user26.json',\n", - " 'user4.json',\n", - " 'user141.json',\n", - " 'user2.json',\n", - " 'user550.json',\n", - " 'user551.json',\n", - " 'user89.json',\n", - " 'user5.json',\n", - " 'user516.json',\n", - " 'user586.json',\n", - " 'user96.json',\n", - " 'user73.json',\n", - " 'user168.json',\n", - " 'user91.json',\n", - " 'user537.json',\n", - " 'user333.json',\n", - " 'user523.json',\n", - " 'user486.json',\n", - " 'user571.json',\n", - " 'user435.json',\n", - " 'user167.json',\n", - " 'user25.json',\n", - " 'user616.json',\n", - " 'user272.json',\n", - " 'user16.json',\n", - " 'user66.json',\n", - " 'user638.json',\n", - " 'user378.json',\n", - " 'user592.json',\n", - " 'user635.json',\n", - " 'user165.json',\n", - " 'user346.json',\n", - " 'user11.json',\n", - " 'user474.json',\n", - " 'user108.json',\n", - " 'user191.json',\n", - " 'user179.json',\n", - " 'user619.json',\n", - " 'user569.json',\n", - " 'user54.json',\n", - " 'user436.json',\n", - " 'user372.json',\n", - " 'user632.json',\n", - " 'user487.json',\n", - " 'user465.json',\n", - " 'user45.json',\n", - " 'user166.json',\n", - " 'user50.json',\n", - " 'user463.json',\n", - " 'user3.json',\n", - " 'user424.json',\n", - " 'user636.json',\n", - " 'user115.json',\n", - " 'user319.json',\n", - " 'user100.json',\n", - " 'user545.json',\n", - " 'user446.json',\n", - " 'user68.json',\n", - " 'user437.json',\n", - " '.ipynb_checkpoints',\n", - " 'user29.json',\n", - " 'user246.json',\n", - " 'user517.json',\n", - " 'user47.json',\n", - " 'user275.json']" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "os.listdir(wl_folder)" + "## Look at one specific expe" ] }, { @@ -524,8 +70,8 @@ ], "source": [ "# Visualize results\n", - "EXPE_FILE = EXPE_DIR + \"/ReplayRigid_may1_2_3\"\n", - "plot_and_save(EXPE_FILE)" + "EXPE_DIR = f'{ROOT_DIR}/out/expe0/replay_user_rigid_window1'\n", + "plot_load_and_details(EXPE_FILE)" ] }, { diff --git a/campaign1.py b/campaign1.py old mode 100644 new mode 100755 index f9cb454eccf7cb153f22ef80b93c02089f7ecac4..d8689ed5d5a0ab407180decd07839ec56557e7f4 --- a/campaign1.py +++ b/campaign1.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + import random from time import * import concurrent.futures diff --git a/scripts/util.py b/scripts/util.py index 69bcc591ce53aea19b52358f074a563a5dc9a102..d5801a7ce1fd7694fab88d16eecebf2b2d421a8c 100644 --- a/scripts/util.py +++ b/scripts/util.py @@ -2,12 +2,20 @@ import os import os.path import subprocess -import filecmp -from collections import namedtuple +from matplotlib import figure, pyplot as plt +from evalys.jobset import JobSet +from evalys.metrics import compute_load +import evalys.visu.legacy as vleg +############ +# USEFUL VAR +############ ROOT_DIR = os.path.realpath(os.path.join(os.path.dirname(__file__), '..')) WL_DIR = f'{ROOT_DIR}/workload' +############ +# TO RUN EXPES WITH ROBIN +############ class RobinInstance(object): def __init__(self, output_dir, batcmd, schedcmd, simulation_timeout, ready_timeout, success_timeout, failure_timeout): self.output_dir = output_dir @@ -48,11 +56,38 @@ def create_dir_rec_if_needed(dirname): def run_robin(filename): return subprocess.run(['robin', filename]) -# def init_instance(test_name): -# output_dir = os.path.abspath(f'test-out/{test_name}') -# robin_filename = os.path.abspath(f'test-instances/{test_name}.yaml') -# schedconf_filename = f'{output_dir}/schedconf.json' +############ +# FOR DATA VIZ +############ +class JobSetMulticore(JobSet): + """Custom version of jobset to change the way 'utilisation' is computed.""" + def __init__(self, df, resource_bounds=None, float_precision=6): + JobSet.__init__(self, df, resource_bounds, float_precision) + self.MaxProcs = len(self.res_bounds) * 16 # because my machines have 16 cores -# create_dir_rec_if_needed(output_dir) + @property # override + def utilisation(self): + if self._utilisation is not None: + return self._utilisation + self._utilisation = compute_load(self.df, col_begin='starting_time', col_end='finish_time', + col_cumsum='requested_number_of_resources') # original: proc_alloc + return self._utilisation + +def plot_load_and_details(expe_file): + begin, end = 24 * 3600, 48 * 3600 -# return (output_dir, robin_filename, schedconf_filename) \ No newline at end of file + js = JobSetMulticore.from_csv(expe_file + "/_jobs.csv") + #js.df = js.df[(js.df.submission_time >= begin) & (js.df.submission_time < end)] + fig, axe = plt.subplots(nrows=2, sharex=True, figsize=(16, 8), tight_layout=True) + fig.suptitle(expe_file, fontsize=16) + vleg.plot_load(js.utilisation, js.MaxProcs, time_scale=False, ax=axe[0]) + vleg.plot_job_details(js.df, js.MaxProcs, time_scale=False, ax=axe[1]) + + for ax in axe: + ax.xaxis.set_ticks(np.arange(begin, end, 2*3600)) + ax.xaxis.set_ticklabels(np.arange(24, 48, 2)) + + plt.xlim(begin, end) + fig.savefig(expe_file + '_viz.png') + plt.show() + plt.close(fig) \ No newline at end of file