Changes for TPDS

7a39884f · Igor Fontana de Nardin · 12c45fd1 · 7a39884f · 7a39884f · 7a39884f
Commit 7a39884f authored 7 months ago by Igor Fontana de Nardin
--- a/flake.lock
+++ b/flake.lock
@@ -100,17 +100,18 @@
        ]
      },
      "locked": {
-        "lastModified": 1712244708,
-        "narHash": "sha256-MQppCw+g2QVFQrmdjz009Jjt8fuiQhldp5kVrMHHyv8=",
-        "ref": "refs/heads/main",
-        "rev": "659660c35650e9f46ec47e8c0743d75649e68d7b",
-        "revCount": 4,
+        "lastModified": 1729509887,
+        "narHash": "sha256-2nG1dWidae4Uo1WK50VuaH2Y4L+s7hEAkiDsIIzDYOs=",
+        "ref": "TPDS",
+        "rev": "e13754a5afa30d75cf3d34aaf2afbe20240b132c",
+        "revCount": 5,
        "type": "git",
-        "url": "https://framagit.org/batsim/easy-powercap.git?tag=europar24"
+        "url": "https://gitlab.irit.fr/sepia-pub/batsim/easy-powercap.git"
      },
      "original": {
+        "ref": "TPDS",
        "type": "git",
-        "url": "https://framagit.org/batsim/easy-powercap.git?tag=europar24"
+        "url": "https://gitlab.irit.fr/sepia-pub/batsim/easy-powercap.git"
      }
    },
    "flake-parts": {

--- a/flake.nix
+++ b/flake.nix
@@ -28,7 +28,7 @@
      inputs.flake-utils.follows = "flake-utils";
    };
    easy-powercap-flake = {
-      url = "git+https://framagit.org/batsim/easy-powercap.git?tag=europar24";
+      url = "git+https://gitlab.irit.fr/sepia-pub/batsim/easy-powercap.git?ref=TPDS";
      inputs.nixpkgs.follows = "nixpkgs";
      inputs.nur-kapack.follows = "nur-kapack";
      inputs.batprotocol-flake.follows = "batprotocol-flake";

--- a/notebooks/simulation-output-analysis.Rmd
+++ b/notebooks/simulation-output-analysis.Rmd
--- a/scripts-py/expe_energumen/m100_agg_power_predictions.py
+++ b/scripts-py/expe_energumen/m100_agg_power_predictions.py
@@ -7,8 +7,14 @@ import pandas as pd

 FILENAME_PARSE_REGEX = '''.*/filter123_user_(\d+)_total_power_mean_pred\.csv$'''

-def read_aggregate_one_dir(dir, estimated_metrics):
+def read_aggregate_one_dir(dir, estimated_metrics, estimated=True):
    full_df = None
+    field_to_get = f'hist_pred_total_power_{estimated_metrics}'
+    if not estimated:
+        field_to_get = f'total_power_{estimated_metrics}_watts'
+    field_to_save = f'{estimated_metrics}_power_estimation'
+    if not estimated:
+        field_to_save = f'{estimated_metrics}_power_real'
    r = re.compile(FILENAME_PARSE_REGEX)
    for filename in glob.glob(f'{dir}/*.csv'):
        m = r.match(filename)
@@ -18,8 +24,8 @@ def read_aggregate_one_dir(dir, estimated_metrics):

        df = pd.read_csv(filename, low_memory=False)
        df['user_id'] = user_id
-        df[f'{estimated_metrics}_power_estimation'] = df[f'hist_pred_total_power_{estimated_metrics}']
-        df = df[['job_id', 'user_id', f'{estimated_metrics}_power_estimation']]
+        df[field_to_save] = df[field_to_get]
+        df = df[['job_id', 'user_id', field_to_save]]

        if full_df is None:
            full_df = df
@@ -32,7 +38,9 @@ def read_aggregate_one_dir(dir, estimated_metrics):
 def read_aggregate_root_dir(root_dir):
    full_df_mean = read_aggregate_one_dir(f'{root_dir}/total_power_mean_predictions_users_allmethods_mean', 'mean')
    full_df_max = read_aggregate_one_dir(f'{root_dir}/total_power_mean_predictions_users_allmethods_max', 'max')
-    return full_df_mean.merge(full_df_max)
+    full_df_std = read_aggregate_one_dir(f'{root_dir}/total_power_mean_predictions_users_allmethods_std', 'std')
+    full_df_std_real = read_aggregate_one_dir(f'{root_dir}/total_power_mean_predictions_users_allmethods_std', 'std', False)
+    return full_df_mean.merge(full_df_max).merge(full_df_std).merge(full_df_std_real)

 def agg_all_files():
    parser = argparse.ArgumentParser()

--- a/scripts-py/expe_energumen/m100_compute_gantt_power_consumption.py
+++ b/scripts-py/expe_energumen/m100_compute_gantt_power_consumption.py
@@ -9,6 +9,7 @@ import pandas as pd
 def main():
    datetime_parser = lambda f: datetime.datetime.strptime(f, '%Y-%m-%d %H:%M:%S')
    parser = argparse.ArgumentParser()
+    parser.add_argument("input_schedule", help='path to the Batsim schedule output CSV file')
    parser.add_argument("input_jobs_gantt", help='path to the Batsim jobs output CSV file')
    parser.add_argument("input_batsim_workload", help='path to the Batsim workload JSON file')
    parser.add_argument("input_workload_root_path", help="filepath to the location of the root directory of the generated workloads")
@@ -21,7 +22,7 @@ def main():
    with open(args.input_batsim_workload) as f:
        batsim_workload = json.load(f)
    batsim_out_jobs = pd.read_csv(args.input_jobs_gantt)
-
+    batsim_out_schedule = pd.read_csv(args.input_schedule)
    # determine which jobs are in the computation window
    jobs_in_window_mask = batsim_out_jobs['starting_time'] < args.end
    jobs_in_window = batsim_out_jobs[jobs_in_window_mask].copy().reset_index()
@@ -42,6 +43,9 @@ def main():
    utilization_during_window = np.zeros(window_nb_values)
    assert window_nb_values == len(platform_power_values_during_window)

+    perc_jobs_inside = len(jobs_in_window) / batsim_out_schedule["nb_jobs"][0]
+    jobs_inside = len(jobs_in_window)
+    total_jobs = int(batsim_out_schedule["nb_jobs"][0])
    # compute the power consumed of all jobs in [0, end].
    # - use input data when the job is scheduled
    # - complete with zeros
@@ -107,6 +111,11 @@ def main():
    mean_turnaround_time = batsim_out_jobs['turnaround_time'].mean()
    mean_slowdown = batsim_out_jobs['stretch'].mean()

+    max_waiting_time = batsim_out_jobs['waiting_time'].max()
+    max_turnaround_time = batsim_out_jobs['turnaround_time'].max()
+    quantiles_waiting = np.quantile(batsim_out_jobs['waiting_time'], [0.01, 0.1, 0.5, 0.9, 0.99])
+    quantiles_turnaround = np.quantile(batsim_out_jobs['turnaround_time'], [0.01, 0.1, 0.5, 0.9, 0.99])
+
    # other infrastructure metrics
    mean_utilization = utilization_during_window.mean()
    max_utilization = utilization_during_window.max()
@@ -132,8 +141,27 @@ def main():
        'mean_turnaround_time': mean_turnaround_time,
        'mean_slowdown': mean_slowdown,

+        'max_waiting_time': max_waiting_time,
+        'max_turnaround_time': max_turnaround_time,
+
        'mean_utilization': mean_utilization,
        'max_utilization': max_utilization,
+
+        'waiting_p1': quantiles_waiting[0],
+        'waiting_p10': quantiles_waiting[1],
+        'waiting_p50': quantiles_waiting[2],
+        'waiting_p90': quantiles_waiting[3],
+        'waiting_p99': quantiles_waiting[4],
+        
+        'turnaround_p1': quantiles_turnaround[0],
+        'turnaround_p10': quantiles_turnaround[1],
+        'turnaround_p50': quantiles_turnaround[2],
+        'turnaround_p90': quantiles_turnaround[3],
+        'turnaround_p99': quantiles_turnaround[4],  
+        
+        'jobs_inside_window': jobs_inside,
+        'perc_jobs_inside_window': perc_jobs_inside,
+        'total_jobs': total_jobs,
    }

    print(json.dumps(metrics, sort_keys=True, allow_nan=False))

--- a/scripts-py/expe_energumen/m100_generate_batsim_workload.py
+++ b/scripts-py/expe_energumen/m100_generate_batsim_workload.py
@@ -160,6 +160,8 @@ def main():
                    'zero_power_estimation': float(row['zero_power_estimation']),
                    'mean_power_estimation': float(row['mean_power_estimation'] * row['num_nodes']),
                    'max_power_estimation': float(row['max_power_estimation'] * row['num_nodes']),
+                    'std_power_estimation': float(row['std_power_estimation'] * row['num_nodes']),
+                    'std_power_real': float(row['std_power_real'] * row['num_nodes']),
                    'upper_bound_power_estimation': float(row['upper_bound_power_estimation']),
                    'job_details_filepath': job_profile_dir_suffix,
                }

--- a/scripts-py/expe_energumen/m100_generate_expe_params.py
+++ b/scripts-py/expe_energumen/m100_generate_expe_params.py
@@ -23,7 +23,7 @@ def main():
        'powercap_dynamic_watts': int(i * 0.01 * max_dynamic_power),
        'normal_dynamic_watts': max_dynamic_power,
        'idle_watts': min_power_per_node,
-    } for i in range(10,71,5)]
+    } for i in range(10,91,10)]

    powercap_durations = [
        {'powercap_end_time_seconds': 60*60*3},
@@ -31,12 +31,18 @@ def main():

    algo_name = 'easypower'
    predictors = [
-        {'algo_name': algo_name, 'predictor_name': 'zero', 'job_power_estimation_field': 'zero_power_estimation'},
-        {'algo_name': algo_name, 'predictor_name': 'mean', 'job_power_estimation_field': 'mean_power_estimation'},
-        {'algo_name': algo_name, 'predictor_name': 'max', 'job_power_estimation_field': 'max_power_estimation'},
-        {'algo_name': algo_name, 'predictor_name': 'upper_bound', 'job_power_estimation_field': 'upper_bound_power_estimation'},
-        {'algo_name': algo_name, 'predictor_name': 'real_mean', 'job_power_estimation_field': 'real_mean_power_estimation'},
-        {'algo_name': algo_name, 'predictor_name': 'real_max', 'job_power_estimation_field': 'real_max_power_estimation'},
+        {'algo_name': algo_name, 'predictor_name': 'zero', 'job_power_estimation_field': 'zero_power_estimation', "order": "fcfs"},
+        {'algo_name': algo_name, 'predictor_name': 'mean', 'job_power_estimation_field': 'mean_power_estimation', "order": "fcfs"},
+        {'algo_name': algo_name, 'predictor_name': 'max', 'job_power_estimation_field': 'max_power_estimation', "order": "fcfs"},
+        {'algo_name': algo_name, 'predictor_name': 'upper_bound', 'job_power_estimation_field': 'upper_bound_power_estimation', "order": "fcfs"},
+        {'algo_name': algo_name, 'predictor_name': 'real_mean', 'job_power_estimation_field': 'real_mean_power_estimation', "order": "fcfs"},
+        {'algo_name': algo_name, 'predictor_name': 'real_max', 'job_power_estimation_field': 'real_max_power_estimation', "order": "fcfs"},
+        {'algo_name': algo_name, 'predictor_name': 'gaussian_68', 'job_power_estimation_field': 'gaussian', 'sigma_times': 1, "order": "fcfs"},
+        {'algo_name': algo_name, 'predictor_name': 'gaussian_95', 'job_power_estimation_field': 'gaussian', 'sigma_times': 2, "order": "fcfs"},
+        {'algo_name': algo_name, 'predictor_name': 'gaussian_99', 'job_power_estimation_field': 'gaussian', 'sigma_times': 3, "order": "fcfs"},
+        {'algo_name': algo_name, 'predictor_name': 'real_gaussian_68', 'job_power_estimation_field': 'real_gaussian', 'sigma_times': 1, "order": "fcfs"},
+        {'algo_name': algo_name, 'predictor_name': 'real_gaussian_95', 'job_power_estimation_field': 'real_gaussian', 'sigma_times': 2, "order": "fcfs"},
+        {'algo_name': algo_name, 'predictor_name': 'real_gaussian_99', 'job_power_estimation_field': 'real_gaussian', 'sigma_times': 3, "order": "fcfs"},
    ]

    platforms = [
@@ -54,6 +60,41 @@ def main():
        instance_hash = sha1(encoded_without_hash).hexdigest()
        instances[instance_hash] = instance
        nb_instances += 1
+
+    predictors = [
+        {'algo_name': 'easypower', 'predictor_name': 'gaussian_68', 'job_power_estimation_field': 'gaussian', 'sigma_times': 1, "order": "saf"},
+        {'algo_name': 'easypower', 'predictor_name': 'gaussian_95', 'job_power_estimation_field': 'gaussian', 'sigma_times': 2, "order": "saf"},
+        {'algo_name': 'easypower', 'predictor_name': 'gaussian_99', 'job_power_estimation_field': 'gaussian', 'sigma_times': 3, "order": "saf"},
+        {'algo_name': 'knapsack_greedy', 'predictor_name': 'gaussian_68', 'job_power_estimation_field': 'gaussian', 'sigma_times': 1, "type_knapsack": "waiting_time"},
+        {'algo_name': 'knapsack_greedy', 'predictor_name': 'gaussian_95', 'job_power_estimation_field': 'gaussian', 'sigma_times': 2, "type_knapsack": "waiting_time"},
+        {'algo_name': 'knapsack_greedy', 'predictor_name': 'gaussian_99', 'job_power_estimation_field': 'gaussian', 'sigma_times': 3, "type_knapsack": "waiting_time"},
+        {'algo_name': 'knapsack_greedy', 'predictor_name': 'gaussian_68', 'job_power_estimation_field': 'gaussian', 'sigma_times': 1, "type_knapsack": "waiting_time_ratio"},
+        {'algo_name': 'knapsack_greedy', 'predictor_name': 'gaussian_95', 'job_power_estimation_field': 'gaussian', 'sigma_times': 2, "type_knapsack": "waiting_time_ratio"},
+        {'algo_name': 'knapsack_greedy', 'predictor_name': 'gaussian_99', 'job_power_estimation_field': 'gaussian', 'sigma_times': 3, "type_knapsack": "waiting_time_ratio"},
+        {'algo_name': 'easypower', 'predictor_name': 'real_gaussian_68', 'job_power_estimation_field': 'real_gaussian', 'sigma_times': 1, "order": "saf"},
+        {'algo_name': 'easypower', 'predictor_name': 'real_gaussian_95', 'job_power_estimation_field': 'real_gaussian', 'sigma_times': 2, "order": "saf"},
+        {'algo_name': 'easypower', 'predictor_name': 'real_gaussian_99', 'job_power_estimation_field': 'real_gaussian', 'sigma_times': 3, "order": "saf"},
+        {'algo_name': 'knapsack_greedy', 'predictor_name': 'real_gaussian_68', 'job_power_estimation_field': 'real_gaussian', 'sigma_times': 1, "type_knapsack": "waiting_time"},
+        {'algo_name': 'knapsack_greedy', 'predictor_name': 'real_gaussian_95', 'job_power_estimation_field': 'real_gaussian', 'sigma_times': 2, "type_knapsack": "waiting_time"},
+        {'algo_name': 'knapsack_greedy', 'predictor_name': 'real_gaussian_99', 'job_power_estimation_field': 'real_gaussian', 'sigma_times': 3, "type_knapsack": "waiting_time"},
+        {'algo_name': 'knapsack_greedy', 'predictor_name': 'real_gaussian_68', 'job_power_estimation_field': 'real_gaussian', 'sigma_times': 1, "type_knapsack": "waiting_time_ratio"},
+        {'algo_name': 'knapsack_greedy', 'predictor_name': 'real_gaussian_95', 'job_power_estimation_field': 'real_gaussian', 'sigma_times': 2, "type_knapsack": "waiting_time_ratio"},
+        {'algo_name': 'knapsack_greedy', 'predictor_name': 'real_gaussian_99', 'job_power_estimation_field': 'real_gaussian', 'sigma_times': 3, "type_knapsack": "waiting_time_ratio"},
+    ]
+
+    powercaps = [{
+            'powercap_dynamic_value_ratio': f'{50 * 0.01:.2f}',
+            'powercap_dynamic_watts': int(50 * 0.01 * max_dynamic_power),
+            'normal_dynamic_watts': max_dynamic_power,
+            'idle_watts': min_power_per_node,
+        }]    
+    for instance_t in itertools.product(predictors, powercaps, powercap_durations, platforms, wl_params):
+        instance = reduce(lambda a,b: {**a, **b}, instance_t)
+        encoded_without_hash = json.dumps(instance, sort_keys=True).encode('utf-8')
+        instance_hash = sha1(encoded_without_hash).hexdigest()
+        instances[instance_hash] = instance
+        nb_instances += 1
+
    assert nb_instances == len(instances), 'collision: two instances have the same hash'

    f = sys.stdout

--- a/scripts-py/expe_energumen/m100_run_batsim_instances.py
+++ b/scripts-py/expe_energumen/m100_run_batsim_instances.py
@@ -61,6 +61,7 @@ def manage_batsim_instance(instance_hash, instance, output_dir, workloads_dir):
            'm100-compute-gantt-power-consumption',
            '--powercap_watts', f'{instance["powercap_dynamic_watts"]}',
            '-o', f'{instance_dir}/',
+            f'{instance_dir}/schedule.csv',
            f'{instance_dir}/jobs.csv',
            f'{workloads_dir}/wload_delay_{instance["start_dt_s"]}.json',
            f'{workloads_dir}',