Skip to content
Snippets Groups Projects
Commit 7ca6f8a5 authored by Maël Madon's avatar Maël Madon
Browse files

more cleaning of notebooks

parent 1d7746a2
No related branches found
No related tags found
No related merge requests found
This diff is collapsed.
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# Expe replay with feedback # Expe replay with feedback
Experiments to "validate" our model of replay with feedback. Experiments to "validate" our model of replay with feedback.
## Initializing... ## Initializing...
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
import pandas as pd
# For easily changing the workload # For easily changing the workload
EXPE_DIR = "out/expe_replay_SDSC" EXPE_DIR = "out/expe_replay_SDSC"
PF_folder = "platform/SDSC" PF_folder = "platform/SDSC"
WL_folder = "workload/SDSC" WL_folder = "workload/SDSC"
WL_swf_path = f"{WL_folder}/SDSC-SP2.swf" WL_swf_path = f"{WL_folder}/SDSC-SP2.swf"
# Original log params # Original log params
WL_URL = "http://www.cs.huji.ac.il/labs/parallel/workload/l_sdsc_sp2/SDSC-SP2-1998-4.swf.gz" WL_URL = "http://www.cs.huji.ac.il/labs/parallel/workload/l_sdsc_sp2/SDSC-SP2-1998-4.swf.gz"
WL_start_time = '1998-04-24 18:11:04' WL_start_time = '1998-04-24 18:11:04'
timezone="US/Pacific" timezone="US/Pacific"
WL_start_time_utc = pd.Timestamp(WL_start_time).tz_localize(timezone).tz_convert(None) WL_start_time_utc = pd.Timestamp(WL_start_time).tz_localize(timezone).tz_convert(None)
begin_data_in_swf, end_data_in_swf = 40, 73535 # line number (counting from 1) begin_data_in_swf, end_data_in_swf = 40, 73535 # line number (counting from 1)
# Folder for saving the figures # Folder for saving the figures
fig_path = "/home/mael/ownCloud/these/articles/replay_with_feedback/fig_rerun" fig_path = "/home/mael/ownCloud/these/articles/replay_with_feedback/fig_rerun"
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
import json import json
import os import os
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import matplotlib.dates as mdates import matplotlib.dates as mdates
import pandas as pd
import numpy as np import numpy as np
import networkx as nwx import networkx as nwx
from evalys.jobset import JobSet from evalys.jobset import JobSet
import evalys.visu.legacy as vleg import evalys.visu.legacy as vleg
empty_wl = "workload/empty_workload.json" empty_wl = "workload/empty_workload.json"
! mkdir -p {EXPE_DIR} {WL_folder} ! mkdir -p {EXPE_DIR} {WL_folder}
header=[ header=[
"JOB_ID","SUBMIT_TIME","WAIT_TIME","RUN_TIME","ALLOCATED_PROCESSOR_COUNT","AVERAGE_CPU_TIME_USED","USED_MEMORY", "JOB_ID","SUBMIT_TIME","WAIT_TIME","RUN_TIME","ALLOCATED_PROCESSOR_COUNT","AVERAGE_CPU_TIME_USED","USED_MEMORY",
"REQUESTED_NUMBER_OF_PROCESSORS","REQUESTED_TIME","REQUESTED_MEMORY","STATUS","USER_ID","GROUP_ID","APPLICATION_ID", "REQUESTED_NUMBER_OF_PROCESSORS","REQUESTED_TIME","REQUESTED_MEMORY","STATUS","USER_ID","GROUP_ID","APPLICATION_ID",
"QUEUD_ID","PARTITION_ID","PRECEDING_JOB_ID","THINK_TIME_FROM_PRECEDING_JOB"] "QUEUD_ID","PARTITION_ID","PRECEDING_JOB_ID","THINK_TIME_FROM_PRECEDING_JOB"]
# Function for automatic generation of user_description_file # Function for automatic generation of user_description_file
def user_description(user, user_type, wl_folder, suffix): def user_description(user, user_type, wl_folder, suffix):
return { return {
"name": user, "name": user,
"category": user_type, "category": user_type,
"param": {"input_json": wl_folder + "/" + user + suffix} "param": {"input_json": wl_folder + "/" + user + suffix}
} }
def gen_user_description_file(expe_folder, user_type, wl_folder, suffix): def gen_user_description_file(expe_folder, user_type, wl_folder, suffix):
if not os.path.exists(f"{expe_folder}/cmd"): if not os.path.exists(f"{expe_folder}/cmd"):
os.makedirs(f"{expe_folder}/cmd") os.makedirs(f"{expe_folder}/cmd")
uf_path = f"{expe_folder}/cmd/user_description_file.json" uf_path = f"{expe_folder}/cmd/user_description_file.json"
data = {} data = {}
user_names = [user_file.split('.')[0] for user_file in os.listdir(wl_folder) if user_file[:4]=="user"] user_names = [user_file.split('.')[0] for user_file in os.listdir(wl_folder) if user_file[:4]=="user"]
data["users"] = [user_description(user, user_type, wl_folder, suffix) for user in user_names] data["users"] = [user_description(user, user_type, wl_folder, suffix) for user in user_names]
with open(uf_path, 'w') as user_description_file: with open(uf_path, 'w') as user_description_file:
json.dump(data, user_description_file) json.dump(data, user_description_file)
return uf_path return uf_path
def plot_util_and_queue(expe_file): def plot_util_and_queue(expe_file):
js = JobSet.from_csv(expe_file + "/_jobs.csv") js = JobSet.from_csv(expe_file + "/_jobs.csv")
#js.df = js.df[(js.df.submission_time >= begin) & (js.df.submission_time < end)] #js.df = js.df[(js.df.submission_time >= begin) & (js.df.submission_time < end)]
fig, axe = plt.subplots(nrows=2, sharex=True, figsize=(16, 8), tight_layout=True) fig, axe = plt.subplots(nrows=2, sharex=True, figsize=(16, 8), tight_layout=True)
fig.suptitle(expe_file, fontsize=16) fig.suptitle(expe_file, fontsize=16)
vleg.plot_load(js.utilisation, js.MaxProcs, time_scale=False, legend_label="utilisation", ax=axe[0]) vleg.plot_load(js.utilisation, js.MaxProcs, time_scale=False, legend_label="utilisation", ax=axe[0])
vleg.plot_load(js.queue, js.MaxProcs, time_scale=False, legend_label="queue size", ax=axe[1]) vleg.plot_load(js.queue, js.MaxProcs, time_scale=False, legend_label="queue size", ax=axe[1])
# plt.xlim(begin, end) # plt.xlim(begin, end)
# fig.savefig(expe_file + '_viz.png') # fig.savefig(expe_file + '_viz.png')
plt.show() plt.show()
plt.close(fig) plt.close(fig)
def ajust_timestamp_column(col, time_origin=WL_start_time_utc): def ajust_timestamp_column(col, time_origin=WL_start_time_utc):
col = pd.to_datetime(col, unit='s', utc=True, origin=time_origin).dt.tz_convert(tz=timezone) col = pd.to_datetime(col, unit='s', utc=True, origin=time_origin).dt.tz_convert(tz=timezone)
col = col.dt.tz_localize(None) # drop the time_zone informationn keeping local time col = col.dt.tz_localize(None) # drop the time_zone informationn keeping local time
return col return col
def read_and_clean(jobs_file, time_origin=WL_start_time_utc): def read_and_clean(jobs_file, time_origin=WL_start_time_utc):
jobs = pd.read_csv(jobs_file, dtype={"job_id":"str"}) jobs = pd.read_csv(jobs_file, dtype={"job_id":"str"})
# Clean job_id column and set it as index (job_ids can be '45:s1', indicating a session) # Clean job_id column and set it as index (job_ids can be '45:s1', indicating a session)
jobs.job_id = jobs.job_id.str.replace(r":.*", "", regex=True).astype("int") jobs.job_id = jobs.job_id.str.replace(r":.*", "", regex=True).astype("int")
jobs.set_index("job_id", inplace=True) jobs.set_index("job_id", inplace=True)
jobs.sort_index(inplace=True) jobs.sort_index(inplace=True)
# Convert timestamp columns: proper way to convert our dates, managing properly the Daylight Saving Time # Convert timestamp columns: proper way to convert our dates, managing properly the Daylight Saving Time
jobs.submission_time = ajust_timestamp_column(jobs.submission_time, time_origin) jobs.submission_time = ajust_timestamp_column(jobs.submission_time, time_origin)
jobs.finish_time = ajust_timestamp_column(jobs.finish_time, time_origin) jobs.finish_time = ajust_timestamp_column(jobs.finish_time, time_origin)
return jobs return jobs
def submission_plots(df, axd, label): def submission_plots(df, axd, label):
sub_time = df["submission_time"] sub_time = df["submission_time"]
sub_time.groupby(sub_time.dt.weekday).count().plot(kind='line', ax=axd['A'], xlabel="Day of the week", ylabel="#submissions") sub_time.groupby(sub_time.dt.weekday).count().plot(kind='line', ax=axd['A'], xlabel="Day of the week", ylabel="#submissions")
sub_time.groupby(sub_time.dt.hour).count().plot(kind='line', ax=axd['B'], xticks = np.arange(0, 25, step=2), xlabel="Hour of the day", ylabel="#submissions") sub_time.groupby(sub_time.dt.hour).count().plot(kind='line', ax=axd['B'], xticks = np.arange(0, 25, step=2), xlabel="Hour of the day", ylabel="#submissions")
sub_time.groupby(sub_time.dt.to_period('w')).count().plot(kind='area', alpha=.5, ax=axd['C'], xlabel="Week of year", ylabel="#submissions") sub_time.groupby(sub_time.dt.to_period('w')).count().plot(kind='area', alpha=.5, ax=axd['C'], xlabel="Week of year", ylabel="#submissions")
sub_time.groupby(sub_time.dt.to_period('D')).count().plot(kind='area', alpha=.5, ax=axd['D'], xlabel="Per day", ylabel="#submissions", label=label) sub_time.groupby(sub_time.dt.to_period('D')).count().plot(kind='area', alpha=.5, ax=axd['D'], xlabel="Per day", ylabel="#submissions", label=label)
def detailled_submission_plots(df, label, ax, color, quartiles=False): def detailled_submission_plots(df, label, ax, color, quartiles=False):
sub_time = df["submission_time"] sub_time = df["submission_time"]
hourByhour = sub_time.groupby(by=sub_time.dt.to_period('h')).count() hourByhour = sub_time.groupby(by=sub_time.dt.to_period('h')).count()
def q10(x): def q10(x):
return x.quantile(.1) return x.quantile(.1)
def q90(x): def q90(x):
return x.quantile(.9) return x.quantile(.9)
grp = hourByhour.groupby([hourByhour.index.day_of_week, hourByhour.index.hour]).agg([q10, 'median', 'mean', q90]).reset_index(drop=True) grp = hourByhour.groupby([hourByhour.index.day_of_week, hourByhour.index.hour]).agg([q10, 'median', 'mean', q90]).reset_index(drop=True)
grp.plot(y='mean', kind='line', ax=ax, color=color) grp.plot(y='mean', kind='line', ax=ax, color=color)
if quartiles: if quartiles:
grp.plot(y='median', kind='line', ax=ax, color=color, alpha=.2) grp.plot(y='median', kind='line', ax=ax, color=color, alpha=.2)
grp.plot(y=['q10', 'q90'], kind='line', alpha=.05, ax=ax, color=color) grp.plot(y=['q10', 'q90'], kind='line', alpha=.05, ax=ax, color=color)
ax.fill_between(grp.index, grp.q10, grp.q90, alpha=.05, color=color) ax.fill_between(grp.index, grp.q10, grp.q90, alpha=.05, color=color)
ax.set_xticks(np.linspace(0, 7*24, 8)) ax.set_xticks(np.linspace(0, 7*24, 8))
ax.set_xticks(np.linspace(12, 7*24-12, 7), minor=True) ax.set_xticks(np.linspace(12, 7*24-12, 7), minor=True)
ax.set_xticklabels(['Mon 00:00', 'Tue 00:00', 'Wed 00:00', 'Thu 00:00', 'Fri 00:00', 'Sat 00:00', 'Sun 00:00', 'Mon 00:00']) ax.set_xticklabels(['Mon 00:00', 'Tue 00:00', 'Wed 00:00', 'Thu 00:00', 'Fri 00:00', 'Sat 00:00', 'Sun 00:00', 'Mon 00:00'])
ax.set_xlabel('hour of the week') ax.set_xlabel('hour of the week')
ax.set_ylabel('#job arrivals') ax.set_ylabel('#job arrivals')
ax.set_title(f"{label}") ax.set_title(f"{label}")
def mean_lateness(df, ref, crop=False): def mean_lateness(df, ref, crop=False):
if crop: if crop:
ref = ref.loc[df.index] ref = ref.loc[df.index]
else: else:
assert ref.shape == df.shape assert ref.shape == df.shape
nb_jobs = df.shape[0] nb_jobs = df.shape[0]
late = (df.submission_time - ref.submission_time).dt.total_seconds( late = (df.submission_time - ref.submission_time).dt.total_seconds(
).sum() / nb_jobs ).sum() / nb_jobs
return late return late
def length(df): def length(df):
return df["submission_time"].max() - df["submission_time"].min() return df["submission_time"].max() - df["submission_time"].min()
def stretch(df, ref, crop=False): def stretch(df, ref, crop=False):
if crop: if crop:
ref = ref.loc[df.index] ref = ref.loc[df.index]
else: else:
assert ref.shape == df.shape assert ref.shape == df.shape
l = length(ref).total_seconds() l = length(ref).total_seconds()
return (l + mean_lateness(df, ref)) / l return (l + mean_lateness(df, ref)) / l
def delta(df, ref): def delta(df, ref):
nb_jobs = df.shape[0] nb_jobs = df.shape[0]
return 2 * mean_lateness(df, ref) / (nb_jobs - 1) return 2 * mean_lateness(df, ref) / (nb_jobs - 1)
def process_for_util(m_state, timestamps_to_add): def process_for_util(m_state, timestamps_to_add):
m_state = m_state.copy()[["time", "nb_computing"]] m_state = m_state.copy()[["time", "nb_computing"]]
m_state.time = ajust_timestamp_column(m_state.time) m_state.time = ajust_timestamp_column(m_state.time)
# Add in the file the timestamps defining our time window, if necessary # Add in the file the timestamps defining our time window, if necessary
for timestamp in timestamps_to_add: for timestamp in timestamps_to_add:
prev_el = m_state[m_state.time <= timestamp].tail(1) prev_el = m_state[m_state.time <= timestamp].tail(1)
prev_time, prev_nb_comp = prev_el.time.iloc[0], prev_el.nb_computing.iloc[0] prev_time, prev_nb_comp = prev_el.time.iloc[0], prev_el.nb_computing.iloc[0]
if prev_time != timestamp: if prev_time != timestamp:
m_state.loc[len(m_state)] = [timestamp, prev_nb_comp] m_state.loc[len(m_state)] = [timestamp, prev_nb_comp]
m_state.sort_values(by="time", inplace=True) m_state.sort_values(by="time", inplace=True)
# Calculate column area # Calculate column area
m_state["timediff"] = m_state.time.astype("int").diff(periods=-1) / 10**9 # in ns, convert to s m_state["timediff"] = m_state.time.astype("int").diff(periods=-1) / 10**9 # in ns, convert to s
m_state["area"] = - m_state.nb_computing * m_state.timediff m_state["area"] = - m_state.nb_computing * m_state.timediff
return m_state return m_state
def mean_util_between(m_state, start, end): def mean_util_between(m_state, start, end):
"""Calculate the mean platform utilization bewteen two dates""" """Calculate the mean platform utilization bewteen two dates"""
nb_machines = m_state.nb_computing.iloc[0] + m_state.nb_idle.iloc[0] nb_machines = m_state.nb_computing.iloc[0] + m_state.nb_idle.iloc[0]
m_state = process_for_util(m_state, [start, end]) m_state = process_for_util(m_state, [start, end])
win = m_state[(m_state.time >= start) & (m_state.time < end)] win = m_state[(m_state.time >= start) & (m_state.time < end)]
mean_utilization = win.area.sum() / (end-start).total_seconds() / nb_machines * 100 # in % mean_utilization = win.area.sum() / (end-start).total_seconds() / nb_machines * 100 # in %
return mean_utilization return mean_utilization
def metrics(expe_folder): def metrics(expe_folder):
m = {} m = {}
jobs = read_and_clean(f"{expe_folder}/_jobs.csv") jobs = read_and_clean(f"{expe_folder}/_jobs.csv")
m_state = pd.read_csv(f"{expe_folder}/_machine_states.csv") m_state = pd.read_csv(f"{expe_folder}/_machine_states.csv")
m['nb_jobs'] = jobs.shape[0] m['nb_jobs'] = jobs.shape[0]
m['mean_lateness'] = int(mean_lateness(jobs, ref=WL_rigid, crop=True)) m['mean_lateness'] = int(mean_lateness(jobs, ref=WL_rigid, crop=True))
m['delta'] = 2 * m['mean_lateness'] / (m['nb_jobs'] - 1) m['delta'] = 2 * m['mean_lateness'] / (m['nb_jobs'] - 1)
m['stretch'] = stretch(jobs, ref=WL_rigid, crop=True) m['stretch'] = stretch(jobs, ref=WL_rigid, crop=True)
beg, end = jobs["submission_time"].min(), jobs["submission_time"].max() beg, end = jobs["submission_time"].min(), jobs["submission_time"].max()
m['util'] = mean_util_between(m_state, beg, end) m['util'] = mean_util_between(m_state, beg, end)
return m return m
# Charge the reference XP in memory # Charge the reference XP in memory
if os.path.exists(f"{EXPE_DIR}/rigid_FCFS/_jobs.csv"): if os.path.exists(f"{EXPE_DIR}/rigid_FCFS/_jobs.csv"):
WL_rigid = read_and_clean(f"{EXPE_DIR}/rigid_FCFS/_jobs.csv") WL_rigid = read_and_clean(f"{EXPE_DIR}/rigid_FCFS/_jobs.csv")
if os.path.exists(WL_swf_path): if os.path.exists(WL_swf_path):
WL_swf = pd.read_csv(WL_swf_path, header=begin_data_in_swf-2, delim_whitespace=True, names=header) WL_swf = pd.read_csv(WL_swf_path, header=begin_data_in_swf-2, delim_whitespace=True, names=header)
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## Generate session DAGs ## Generate session DAGs
### Download & process ### Download & process
Download workload from PWA: Download workload from PWA:
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
!curl -L -o - {WL_URL} | gunzip > {WL_swf_path} !curl -L -o - {WL_URL} | gunzip > {WL_swf_path}
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Charge it in memory # Charge it in memory
WL_swf = pd.read_csv(WL_swf_path, header=begin_data_in_swf-2, delim_whitespace=True, names=header) WL_swf = pd.read_csv(WL_swf_path, header=begin_data_in_swf-2, delim_whitespace=True, names=header)
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Transform the workload in inputs usable by batmen. Transform the workload in inputs usable by batmen.
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# JSON inputs for ReplayUser # JSON inputs for ReplayUser
!swf2batsim_split_by_user {WL_swf_path} {WL_folder}/jsons \ !swf2batsim_split_by_user {WL_swf_path} {WL_folder}/jsons \
--start_time 0 # option to keep the original start times (otherwise, takes the minimum submit time for origin) --start_time 0 # option to keep the original start times (otherwise, takes the minimum submit time for origin)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# SABJSON inputs for FeedbackUsers: # SABJSON inputs for FeedbackUsers:
# - delimitation arrival t=0 # - delimitation arrival t=0
! swf2userSessions {WL_swf_path} {WL_folder}/a0 -a 0 --graph ! swf2userSessions {WL_swf_path} {WL_folder}/a0 -a 0 --graph
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# - delimitation arrival t=60 # - delimitation arrival t=60
! swf2userSessions {WL_swf_path} {WL_folder}/a60 -a 60 --graph ! swf2userSessions {WL_swf_path} {WL_folder}/a60 -a 60 --graph
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Analyse session DAGs ### Analyse session DAGs
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
data = { data = {
"a0": f"{WL_folder}/a0", "a0": f"{WL_folder}/a0",
"a60": f"{WL_folder}/a60" "a60": f"{WL_folder}/a60"
} }
DAG_analysis = {"nb_sess": {}, "succs": {}, "preds": {}, "longest_paths":{}, "ttimes": {}} DAG_analysis = {"nb_sess": {}, "succs": {}, "preds": {}, "longest_paths":{}, "ttimes": {}}
for name, wl_folder in data.items(): for name, wl_folder in data.items():
user_names = [user_file.split('.')[0] for user_file in os.listdir(wl_folder) if user_file[:4]=="user"] user_names = [user_file.split('.')[0] for user_file in os.listdir(wl_folder) if user_file[:4]=="user"]
ttimes = [] ttimes = []
nb_sess, max_preds, max_succs, long_paths = {}, {}, {}, {} nb_sess, max_preds, max_succs, long_paths = {}, {}, {}, {}
for user in user_names: for user in user_names:
usr_nb = user[4:] usr_nb = user[4:]
G = nwx.read_gml(f"{wl_folder}/graphs/{usr_nb}.gml") G = nwx.read_gml(f"{wl_folder}/graphs/{usr_nb}.gml")
assert nwx.is_directed_acyclic_graph(G) assert nwx.is_directed_acyclic_graph(G)
long_paths[user] = (nwx.dag_longest_path_length(G, weight=1)) long_paths[user] = (nwx.dag_longest_path_length(G, weight=1))
with open(f"{wl_folder}/{user}.SABjson", 'r') as sabjson: with open(f"{wl_folder}/{user}.SABjson", 'r') as sabjson:
dag = json.load(sabjson) dag = json.load(sabjson)
# Compute arity # Compute arity
nb_sess[user] = len(dag["sessions"]) nb_sess[user] = len(dag["sessions"])
successors = dict.fromkeys(range(nb_sess[user]+1), 0) successors = dict.fromkeys(range(nb_sess[user]+1), 0)
max_pred = 1 # at least the start session max_pred = 1 # at least the start session
for s in dag["sessions"]: for s in dag["sessions"]:
if len(s["preceding_sessions"]) == 0: # s is a start session if len(s["preceding_sessions"]) == 0: # s is a start session
successors[0] += 1 successors[0] += 1
else: else:
for prec in s["preceding_sessions"]: for prec in s["preceding_sessions"]:
successors[prec] += 1 successors[prec] += 1
max_pred = max(len(s["preceding_sessions"]), max_pred) max_pred = max(len(s["preceding_sessions"]), max_pred)
max_succs[user] = max(successors.values()) max_succs[user] = max(successors.values())
max_preds[user] = max_pred max_preds[user] = max_pred
tt_user = [tt for s in dag["sessions"] for tt in s["thinking_time_after_preceding_session"]] tt_user = [tt for s in dag["sessions"] for tt in s["thinking_time_after_preceding_session"]]
ttimes.extend(tt_user) ttimes.extend(tt_user)
nb_ttimes = len(ttimes) nb_ttimes = len(ttimes)
ttimes = pd.DataFrame(ttimes, columns=["ttimes"]).sort_values(by="ttimes", ignore_index=True) ttimes = pd.DataFrame(ttimes, columns=["ttimes"]).sort_values(by="ttimes", ignore_index=True)
ttimes["cum_freq"] = [i / nb_ttimes for i in range(1,nb_ttimes+1)] ttimes["cum_freq"] = [i / nb_ttimes for i in range(1,nb_ttimes+1)]
DAG_analysis["ttimes"][name] = (ttimes) DAG_analysis["ttimes"][name] = (ttimes)
DAG_analysis["nb_sess"][name] = pd.Series(nb_sess) DAG_analysis["nb_sess"][name] = pd.Series(nb_sess)
DAG_analysis["succs"][name] = pd.Series(max_succs) DAG_analysis["succs"][name] = pd.Series(max_succs)
DAG_analysis["preds"][name] = pd.Series(max_preds) DAG_analysis["preds"][name] = pd.Series(max_preds)
DAG_analysis["longest_paths"][name] = pd.Series(long_paths) DAG_analysis["longest_paths"][name] = pd.Series(long_paths)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Graphs artiy # Graphs artiy
fig, ax = plt.subplots(ncols=3, figsize=(16,4),constrained_layout=True) fig, ax = plt.subplots(ncols=3, figsize=(16,4),constrained_layout=True)
bins_arity = [1,2,3,4,5,10,np.inf] bins_arity = [1,2,3,4,5,10,np.inf]
arity_labels = [1,2,3,4,"5-9","10+"] arity_labels = [1,2,3,4,"5-9","10+"]
bins_path = [1,2,3,4,5,10,20,30,40,50,100,200,np.inf] bins_path = [1,2,3,4,5,10,20,30,40,50,100,200,np.inf]
path_ticks = [1,2,3,4,"5-9","10-19","20-29","30-39","40-49","50-99","100","200+"] path_ticks = [1,2,3,4,"5-9","10-19","20-29","30-39","40-49","50-99","100","200+"]
grouped = {"succs": {}, "preds":{}, "long_paths":{}} grouped = {"succs": {}, "preds":{}, "long_paths":{}}
for wl in data.keys(): for wl in data.keys():
for x in ("succs", "preds"): for x in ("succs", "preds"):
df = pd.Series(DAG_analysis[x][wl]) df = pd.Series(DAG_analysis[x][wl])
grouped[x][wl] = df.groupby(pd.cut(df, bins=bins_arity, labels=arity_labels, right=False)).count() grouped[x][wl] = df.groupby(pd.cut(df, bins=bins_arity, labels=arity_labels, right=False)).count()
df = pd.Series(DAG_analysis["longest_paths"][wl]) df = pd.Series(DAG_analysis["longest_paths"][wl])
grouped["long_paths"][wl] = df.groupby(pd.cut(df, bins=bins_path, labels=path_ticks, right=False)).count() grouped["long_paths"][wl] = df.groupby(pd.cut(df, bins=bins_path, labels=path_ticks, right=False)).count()
pd.DataFrame(grouped["succs"]).plot(kind="bar", ax=ax[0], xlabel='max #successors in user DAG', ylabel='#user') pd.DataFrame(grouped["succs"]).plot(kind="bar", ax=ax[0], xlabel='max #successors in user DAG', ylabel='#user')
pd.DataFrame(grouped["preds"]).plot(kind="bar", ax=ax[1], xlabel='max #predecessors in user DAG', ylabel='#user') pd.DataFrame(grouped["preds"]).plot(kind="bar", ax=ax[1], xlabel='max #predecessors in user DAG', ylabel='#user')
for wl in DAG_analysis["succs"].keys(): for wl in DAG_analysis["succs"].keys():
ax[2].plot(DAG_analysis["succs"][wl], DAG_analysis["preds"][wl], '.') ax[2].plot(DAG_analysis["succs"][wl], DAG_analysis["preds"][wl], '.')
x=np.linspace(0,50,100); ax[2].plot(x,x,"--r",label="y=x") x=np.linspace(0,50,100); ax[2].plot(x,x,"--r",label="y=x")
ax[2].set( xlabel="max #successors", ylabel="max #predecessors") ax[2].set( xlabel="max #successors", ylabel="max #predecessors")
fig.suptitle("arity of user DAGs") fig.suptitle("arity of user DAGs")
# Graphs longest path # Graphs longest path
fig, ax = plt.subplots(ncols=3, figsize=(16,4),constrained_layout=True) fig, ax = plt.subplots(ncols=3, figsize=(16,4),constrained_layout=True)
pd.DataFrame(grouped["long_paths"]).plot(kind="bar", ax=ax[0], xlabel='longest path in user DAG', ylabel='#user') pd.DataFrame(grouped["long_paths"]).plot(kind="bar", ax=ax[0], xlabel='longest path in user DAG', ylabel='#user')
for wl in DAG_analysis["succs"].keys(): for wl in DAG_analysis["succs"].keys():
ax[1].plot(DAG_analysis["longest_paths"][wl], DAG_analysis["nb_sess"][wl], '.') ax[1].plot(DAG_analysis["longest_paths"][wl], DAG_analysis["nb_sess"][wl], '.')
ax[2].plot(DAG_analysis["longest_paths"][wl], DAG_analysis["succs"][wl], '.') ax[2].plot(DAG_analysis["longest_paths"][wl], DAG_analysis["succs"][wl], '.')
ax[1].set(xlabel='longest path in user DAG', ylabel="#sessions", xscale='log', yscale="log") ax[1].set(xlabel='longest path in user DAG', ylabel="#sessions", xscale='log', yscale="log")
ax[1].grid(which='major') ax[1].grid(which='major')
ax[2].set(xlabel='longest path in user DAG', ylabel="max #successors") ax[2].set(xlabel='longest path in user DAG', ylabel="max #successors")
fig.suptitle("Longest path in user DAGs", fontsize=12) fig.suptitle("Longest path in user DAGs", fontsize=12)
# Graph think time # Graph think time
fig, ax = plt.subplots() fig, ax = plt.subplots()
col = iter(["yellowgreen","darkgreen"]) col = iter(["yellowgreen","darkgreen"])
for wl in DAG_analysis["ttimes"].keys(): for wl in DAG_analysis["ttimes"].keys():
n = DAG_analysis["ttimes"][wl].shape[0] n = DAG_analysis["ttimes"][wl].shape[0]
DAG_analysis["ttimes"][wl].plot(kind="scatter", x="ttimes", y="cum_freq", s=1, logx=True, ax=ax, label=f"{wl},#ttimes={n}", DAG_analysis["ttimes"][wl].plot(kind="scatter", x="ttimes", y="cum_freq", s=1, logx=True, ax=ax, label=f"{wl},#ttimes={n}",
xlabel="think_time (second, log scale)", ylabel="cumulative frequency", color=next(col)) xlabel="think_time (second, log scale)", ylabel="cumulative frequency", color=next(col))
tick_loc = [1, 10, 60, 600, 3600, 10*3600, 24*3600, 7*24*3600, 100*24*3600] tick_loc = [1, 10, 60, 600, 3600, 10*3600, 24*3600, 7*24*3600, 100*24*3600]
ax.set_xticks(tick_loc, ["1s", "10s", "1m", "10m", "1h", "10h", "1d", "1w", "100d"]) ax.set_xticks(tick_loc, ["1s", "10s", "1m", "10m", "1h", "10h", "1d", "1w", "100d"])
ax.grid(which='major') ax.grid(which='major')
fig.suptitle("Think time distribution in DAGs", fontsize=12) fig.suptitle("Think time distribution in DAGs", fontsize=12)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Graph in paper # Graph in paper
fig, ax = plt.subplots(ncols=2, figsize=(6,2),layout='constrained', dpi=400) fig, ax = plt.subplots(ncols=2, figsize=(6,2),layout='constrained', dpi=400)
pd.DataFrame(grouped["preds"]).plot(kind="bar", ax=ax[1], xlabel='arity in session graphs', color=["yellowgreen","darkgreen"]) pd.DataFrame(grouped["preds"]).plot(kind="bar", ax=ax[1], xlabel='arity in session graphs', color=["yellowgreen","darkgreen"])
ax[1].xaxis.set_tick_params(rotation=0) ax[1].xaxis.set_tick_params(rotation=0)
ax[1].text(2,120,f"max arity a0: {max(DAG_analysis['preds']['a0'])}\nmax arity a60: {max(DAG_analysis['preds']['a60'])}", ax[1].text(2,120,f"max arity a0: {max(DAG_analysis['preds']['a0'])}\nmax arity a60: {max(DAG_analysis['preds']['a60'])}",
fontsize="small") fontsize="small")
ax[0].plot(DAG_analysis["longest_paths"]["a0"], DAG_analysis["nb_sess"]["a0"], '.', markersize=2, color="yellowgreen") ax[0].plot(DAG_analysis["longest_paths"]["a0"], DAG_analysis["nb_sess"]["a0"], '.', markersize=2, color="yellowgreen")
ax[0].plot(DAG_analysis["longest_paths"]["a60"], DAG_analysis["nb_sess"]["a60"],'.', markersize=2, color="darkgreen") ax[0].plot(DAG_analysis["longest_paths"]["a60"], DAG_analysis["nb_sess"]["a60"],'.', markersize=2, color="darkgreen")
ax[0].set(xlabel='longest path in session graphs', ylabel="#sessions", xscale='log', yscale="log") ax[0].set(xlabel='longest path in session graphs', ylabel="#sessions", xscale='log', yscale="log")
ax[0].set_yticks([1,10,100,1000,10000]) ax[0].set_yticks([1,10,100,1000,10000])
ax[0].grid(which='major') ax[0].grid(which='major')
fig.savefig(f"{fig_path}/dag_analysis_SDSC.pdf") fig.savefig(f"{fig_path}/dag_analysis_SDSC.pdf")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Observations: very similar to KTH... Observations: very similar to KTH...
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## Run simulations ## Run simulations
### a0 FCFS ### a0 FCFS
Compare `ReplayRigid` and `FeedbackThinkTime`. Compare `ReplayRigid` and `FeedbackThinkTime`.
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Expe rigid fcfs # Expe rigid fcfs
pf = f"{PF_folder}/infra.xml" pf = f"{PF_folder}/infra.xml"
wl_folder = f"{WL_folder}/jsons" wl_folder = f"{WL_folder}/jsons"
EXPE_FILE = EXPE_DIR + "/rigid_FCFS" EXPE_FILE = EXPE_DIR + "/rigid_FCFS"
print("Expe rigid.\n-------\nGenerate user description file...") print("Expe rigid.\n-------\nGenerate user description file...")
uf = gen_user_description_file(EXPE_FILE, "replay_user_rigid", wl_folder, ".json") uf = gen_user_description_file(EXPE_FILE, "replay_user_rigid", wl_folder, ".json")
print("Simulation start.\n******************\n") print("Simulation start.\n******************\n")
! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \ ! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \
--batcmd="batsim -p {pf} --quiet -w {empty_wl} -e {EXPE_FILE}/ --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse"\ --batcmd="batsim -p {pf} --quiet -w {empty_wl} -e {EXPE_FILE}/ --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse"\
--schedcmd='batmen --verbosity=silent -v fcfs --variant_options_filepath={uf}' --schedcmd='batmen --verbosity=silent -v fcfs --variant_options_filepath={uf}'
! robin {EXPE_FILE}.yaml ! robin {EXPE_FILE}.yaml
print("\n******************\nSimulation done.") print("\n******************\nSimulation done.")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Charge the reference XP in memory # Charge the reference XP in memory
WL_rigid = read_and_clean(f"{EXPE_DIR}/rigid_FCFS/_jobs.csv") WL_rigid = read_and_clean(f"{EXPE_DIR}/rigid_FCFS/_jobs.csv")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Expe feedback a0 FCFS # Expe feedback a0 FCFS
pf = f"{PF_folder}/infra.xml" pf = f"{PF_folder}/infra.xml"
wl_a0 = f"{WL_folder}/a0" wl_a0 = f"{WL_folder}/a0"
EXPE_FILE = EXPE_DIR + "/a0_FCFS" EXPE_FILE = EXPE_DIR + "/a0_FCFS"
print("Expe feedback a0.\n-------\nGenerate user description file...") print("Expe feedback a0.\n-------\nGenerate user description file...")
uf = gen_user_description_file(EXPE_FILE , "fb_user_think_time_only", wl_a0, ".SABjson") uf = gen_user_description_file(EXPE_FILE , "fb_user_think_time_only", wl_a0, ".SABjson")
print("Simulation start.\n******************\n") print("Simulation start.\n******************\n")
! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \ ! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \
--batcmd="batsim -p {pf} --quiet -w {empty_wl} -e {EXPE_FILE}/ --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse"\ --batcmd="batsim -p {pf} --quiet -w {empty_wl} -e {EXPE_FILE}/ --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse"\
--schedcmd='batmen --verbosity=silent -v fcfs --variant_options_filepath={uf}' --schedcmd='batmen --verbosity=silent -v fcfs --variant_options_filepath={uf}'
! robin {EXPE_FILE}.yaml ! robin {EXPE_FILE}.yaml
print("\n******************\nSimulation done.") print("\n******************\nSimulation done.")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### a0 EASY ### a0 EASY
Compare `ReplayRigid` and `FeedbackThinkTime`. Compare `ReplayRigid` and `FeedbackThinkTime`.
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Expe rigid easy # Expe rigid easy
pf = f"{PF_folder}/infra.xml" pf = f"{PF_folder}/infra.xml"
wl_folder = f"{WL_folder}/jsons" wl_folder = f"{WL_folder}/jsons"
EXPE_FILE = EXPE_DIR + "/rigid_EASY" EXPE_FILE = EXPE_DIR + "/rigid_EASY"
print("Expe rigid.\n-------\nGenerate user description file...") print("Expe rigid.\n-------\nGenerate user description file...")
uf = gen_user_description_file(EXPE_FILE, "replay_user_rigid", wl_folder, ".json") uf = gen_user_description_file(EXPE_FILE, "replay_user_rigid", wl_folder, ".json")
print("Simulation start.\n******************\n") print("Simulation start.\n******************\n")
! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \ ! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \
--batcmd="batsim -p {pf} --quiet -w {empty_wl} -e {EXPE_FILE}/ --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse"\ --batcmd="batsim -p {pf} --quiet -w {empty_wl} -e {EXPE_FILE}/ --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse"\
--schedcmd='batmen --verbosity=silent -v easy_bf --variant_options_filepath={uf}' --schedcmd='batmen --verbosity=silent -v easy_bf --variant_options_filepath={uf}'
! robin {EXPE_FILE}.yaml ! robin {EXPE_FILE}.yaml
print("\n******************\nSimulation done.") print("\n******************\nSimulation done.")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Expe feedback a0 EASY # Expe feedback a0 EASY
pf = f"{PF_folder}/infra.xml" pf = f"{PF_folder}/infra.xml"
wl_a0 = f"{WL_folder}/a0" wl_a0 = f"{WL_folder}/a0"
EXPE_FILE = EXPE_DIR + "/a0_EASY" EXPE_FILE = EXPE_DIR + "/a0_EASY"
print("Expe feedback a0.\n-------\nGenerate user description file...") print("Expe feedback a0.\n-------\nGenerate user description file...")
uf = gen_user_description_file(EXPE_FILE , "fb_user_think_time_only", wl_a0, ".SABjson") uf = gen_user_description_file(EXPE_FILE , "fb_user_think_time_only", wl_a0, ".SABjson")
print("Simulation start.\n******************\n") print("Simulation start.\n******************\n")
! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \ ! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \
--batcmd="batsim -p {pf} --quiet -w {empty_wl} -e {EXPE_FILE}/ --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse"\ --batcmd="batsim -p {pf} --quiet -w {empty_wl} -e {EXPE_FILE}/ --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse"\
--schedcmd='batmen --verbosity=silent -v easy_bf --variant_options_filepath={uf}' --schedcmd='batmen --verbosity=silent -v easy_bf --variant_options_filepath={uf}'
! robin {EXPE_FILE}.yaml ! robin {EXPE_FILE}.yaml
print("\n******************\nSimulation done.") print("\n******************\nSimulation done.")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### a0 speed ### a0 speed
4 new expe: 4 new expe:
- rigid easy: speed*2 and speed/2 - rigid easy: speed*2 and speed/2
- feedback a0 easy: speed*2 and speed/2 - feedback a0 easy: speed*2 and speed/2
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Generate the XP files # Generate the XP files
speeds = ["double_speed", "half_speed"] speeds = ["double_speed", "half_speed"]
wl_rigid = f"{WL_folder}/jsons" wl_rigid = f"{WL_folder}/jsons"
wl_a0 = f"{WL_folder}/a0" wl_a0 = f"{WL_folder}/a0"
for speed in speeds: for speed in speeds:
pf = f"{PF_folder}/{speed}.xml" pf = f"{PF_folder}/{speed}.xml"
# Rigid expe # Rigid expe
EXPE_FILE = f"{EXPE_DIR}/rigid_EASY_{speed}" EXPE_FILE = f"{EXPE_DIR}/rigid_EASY_{speed}"
uf = gen_user_description_file(EXPE_FILE, "replay_user_rigid", wl_rigid, ".json") uf = gen_user_description_file(EXPE_FILE, "replay_user_rigid", wl_rigid, ".json")
! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \ ! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \
--batcmd="batsim -p {pf} --quiet -w {empty_wl} -e {EXPE_FILE}/ --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse"\ --batcmd="batsim -p {pf} --quiet -w {empty_wl} -e {EXPE_FILE}/ --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse"\
--schedcmd='batmen --verbosity=silent -v easy_bf --variant_options_filepath={uf}' --schedcmd='batmen --verbosity=silent -v easy_bf --variant_options_filepath={uf}'
# Feedback expe # Feedback expe
EXPE_FILE = f"{EXPE_DIR}/a0_EASY_{speed}" EXPE_FILE = f"{EXPE_DIR}/a0_EASY_{speed}"
uf = gen_user_description_file(EXPE_FILE, "fb_user_think_time_only", wl_a0, ".SABjson") uf = gen_user_description_file(EXPE_FILE, "fb_user_think_time_only", wl_a0, ".SABjson")
! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \ ! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \
--batcmd="batsim -p {pf} --quiet -w {empty_wl} -e {EXPE_FILE}/ --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse"\ --batcmd="batsim -p {pf} --quiet -w {empty_wl} -e {EXPE_FILE}/ --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse"\
--schedcmd='batmen --verbosity=silent -v easy_bf --variant_options_filepath={uf}' --schedcmd='batmen --verbosity=silent -v easy_bf --variant_options_filepath={uf}'
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
EXPE = "rigid_EASY_double_speed" EXPE = "rigid_EASY_double_speed"
print(f"Expe {EXPE}.\n-------\n") print(f"Expe {EXPE}.\n-------\n")
print("Simulation start.\n******************\n") print("Simulation start.\n******************\n")
! robin {EXPE_DIR}/{EXPE}.yaml ! robin {EXPE_DIR}/{EXPE}.yaml
print("\n******************\nSimulation done.") print("\n******************\nSimulation done.")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
EXPE = "rigid_EASY_half_speed" EXPE = "rigid_EASY_half_speed"
print(f"Expe {EXPE}.\n-------\n") print(f"Expe {EXPE}.\n-------\n")
print("Simulation start.\n******************\n") print("Simulation start.\n******************\n")
! robin {EXPE_DIR}/{EXPE}.yaml ! robin {EXPE_DIR}/{EXPE}.yaml
print("\n******************\nSimulation done.") print("\n******************\nSimulation done.")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
EXPE = "a0_EASY_double_speed" EXPE = "a0_EASY_double_speed"
print(f"Expe {EXPE}.\n-------\n") print(f"Expe {EXPE}.\n-------\n")
print("Simulation start.\n******************\n") print("Simulation start.\n******************\n")
! robin {EXPE_DIR}/{EXPE}.yaml ! robin {EXPE_DIR}/{EXPE}.yaml
print("\n******************\nSimulation done.") print("\n******************\nSimulation done.")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
EXPE = "a0_EASY_half_speed" EXPE = "a0_EASY_half_speed"
print(f"Expe {EXPE}.\n-------\n") print(f"Expe {EXPE}.\n-------\n")
print("Simulation start.\n******************\n") print("Simulation start.\n******************\n")
! robin {EXPE_DIR}/{EXPE}.yaml ! robin {EXPE_DIR}/{EXPE}.yaml
print("\n******************\nSimulation done.") print("\n******************\nSimulation done.")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### a0 infra ### a0 infra
4 new expe: 4 new expe:
- rigid easy: infra*2 and infra/2 - rigid easy: infra*2 and infra/2
- feedback a0 easy: infra*2 and infra/2 - feedback a0 easy: infra*2 and infra/2
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Generate the XP files # Generate the XP files
infras = ["double_infra", "half_infra"] infras = ["double_infra", "half_infra"]
wl_rigid = f"{WL_folder}/jsons" wl_rigid = f"{WL_folder}/jsons"
wl_a0 = f"{WL_folder}/a0" wl_a0 = f"{WL_folder}/a0"
for infra in infras: for infra in infras:
pf = f"{PF_folder}/{infra}.xml" pf = f"{PF_folder}/{infra}.xml"
# Rigid expe # Rigid expe
EXPE_FILE = f"{EXPE_DIR}/rigid_EASY_{infra}" EXPE_FILE = f"{EXPE_DIR}/rigid_EASY_{infra}"
uf = gen_user_description_file(EXPE_FILE, "replay_user_rigid", wl_rigid, ".json") uf = gen_user_description_file(EXPE_FILE, "replay_user_rigid", wl_rigid, ".json")
! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \ ! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \
--batcmd="batsim -p {pf} --quiet -w {empty_wl} -e {EXPE_FILE}/ --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse"\ --batcmd="batsim -p {pf} --quiet -w {empty_wl} -e {EXPE_FILE}/ --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse"\
--schedcmd='batmen --verbosity=silent -v easy_bf --variant_options_filepath={uf}' --schedcmd='batmen --verbosity=silent -v easy_bf --variant_options_filepath={uf}'
# Feedback expe # Feedback expe
EXPE_FILE = f"{EXPE_DIR}/a0_EASY_{infra}" EXPE_FILE = f"{EXPE_DIR}/a0_EASY_{infra}"
uf = gen_user_description_file(EXPE_FILE, "fb_user_think_time_only", wl_a0, ".SABjson") uf = gen_user_description_file(EXPE_FILE, "fb_user_think_time_only", wl_a0, ".SABjson")
! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \ ! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \
--batcmd="batsim -p {pf} --quiet -w {empty_wl} -e {EXPE_FILE}/ --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse"\ --batcmd="batsim -p {pf} --quiet -w {empty_wl} -e {EXPE_FILE}/ --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse"\
--schedcmd='batmen --verbosity=silent -v easy_bf --variant_options_filepath={uf}' --schedcmd='batmen --verbosity=silent -v easy_bf --variant_options_filepath={uf}'
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
EXPE = "rigid_EASY_double_infra" EXPE = "rigid_EASY_double_infra"
print(f"Expe {EXPE}.\n-------\n") print(f"Expe {EXPE}.\n-------\n")
print("Simulation start.\n******************\n") print("Simulation start.\n******************\n")
! robin {EXPE_DIR}/{EXPE}.yaml ! robin {EXPE_DIR}/{EXPE}.yaml
print("\n******************\nSimulation done.") print("\n******************\nSimulation done.")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
EXPE = "rigid_EASY_half_infra" EXPE = "rigid_EASY_half_infra"
print(f"Expe {EXPE}.\n-------\n") print(f"Expe {EXPE}.\n-------\n")
print("Simulation start.\n******************\n") print("Simulation start.\n******************\n")
! robin {EXPE_DIR}/{EXPE}.yaml ! robin {EXPE_DIR}/{EXPE}.yaml
print("\n******************\nSimulation done.") print("\n******************\nSimulation done.")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
EXPE = "a0_EASY_double_infra" EXPE = "a0_EASY_double_infra"
print(f"Expe {EXPE}.\n-------\n") print(f"Expe {EXPE}.\n-------\n")
print("Simulation start.\n******************\n") print("Simulation start.\n******************\n")
! robin {EXPE_DIR}/{EXPE}.yaml ! robin {EXPE_DIR}/{EXPE}.yaml
print("\n******************\nSimulation done.") print("\n******************\nSimulation done.")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
EXPE = "a0_EASY_half_infra" EXPE = "a0_EASY_half_infra"
print(f"Expe {EXPE}.\n-------\n") print(f"Expe {EXPE}.\n-------\n")
print("Simulation start.\n******************\n") print("Simulation start.\n******************\n")
! robin {EXPE_DIR}/{EXPE}.yaml ! robin {EXPE_DIR}/{EXPE}.yaml
print("\n******************\nSimulation done.") print("\n******************\nSimulation done.")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### a60 EASY ### a60 EASY
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Expe feedback a60 EASY # Expe feedback a60 EASY
pf = f"{PF_folder}/infra.xml" pf = f"{PF_folder}/infra.xml"
wl = f"{WL_folder}/a60" wl = f"{WL_folder}/a60"
EXPE_FILE = EXPE_DIR + "/a60_EASY" EXPE_FILE = EXPE_DIR + "/a60_EASY"
print("Expe feedback a60.\n-------\nGenerate user description file...") print("Expe feedback a60.\n-------\nGenerate user description file...")
uf = gen_user_description_file(EXPE_FILE , "fb_user_think_time_only", wl, ".SABjson") uf = gen_user_description_file(EXPE_FILE , "fb_user_think_time_only", wl, ".SABjson")
print("Simulation start.\n******************\n") print("Simulation start.\n******************\n")
! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \ ! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \
--batcmd="batsim -p {pf} --quiet -w {empty_wl} -e {EXPE_FILE}/ --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse"\ --batcmd="batsim -p {pf} --quiet -w {empty_wl} -e {EXPE_FILE}/ --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse"\
--schedcmd='batmen --verbosity=silent -v easy_bf --variant_options_filepath={uf}' --schedcmd='batmen --verbosity=silent -v easy_bf --variant_options_filepath={uf}'
! robin {EXPE_FILE}.yaml ! robin {EXPE_FILE}.yaml
print("\n******************\nSimulation done.") print("\n******************\nSimulation done.")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### a60 FCFS ### a60 FCFS
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Expe feedback a0 FCFS # Expe feedback a0 FCFS
pf = f"{PF_folder}/infra.xml" pf = f"{PF_folder}/infra.xml"
wl_a0 = f"{WL_folder}/a60" wl_a0 = f"{WL_folder}/a60"
EXPE_FILE = EXPE_DIR + "/a60_FCFS" EXPE_FILE = EXPE_DIR + "/a60_FCFS"
print("Expe feedback a60.\n-------\nGenerate user description file...") print("Expe feedback a60.\n-------\nGenerate user description file...")
uf = gen_user_description_file(EXPE_FILE , "fb_user_think_time_only", wl_a0, ".SABjson") uf = gen_user_description_file(EXPE_FILE , "fb_user_think_time_only", wl_a0, ".SABjson")
print("Simulation start.\n******************\n") print("Simulation start.\n******************\n")
! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \ ! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \
--batcmd="batsim -p {pf} --quiet -w {empty_wl} -e {EXPE_FILE}/ --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse"\ --batcmd="batsim -p {pf} --quiet -w {empty_wl} -e {EXPE_FILE}/ --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse"\
--schedcmd='batmen --verbosity=silent -v fcfs --variant_options_filepath={uf}' --schedcmd='batmen --verbosity=silent -v fcfs --variant_options_filepath={uf}'
! robin {EXPE_FILE}.yaml ! robin {EXPE_FILE}.yaml
print("\n******************\nSimulation done.") print("\n******************\nSimulation done.")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### a60 speed ### a60 speed
2 new expe: 2 new expe:
- feedback a60 easy: speed*2 and speed/2 - feedback a60 easy: speed*2 and speed/2
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Generate the XP files # Generate the XP files
speeds = ["double_speed", "half_speed"] speeds = ["double_speed", "half_speed"]
wl_a60 = f"{WL_folder}/a60" wl_a60 = f"{WL_folder}/a60"
for speed in speeds: for speed in speeds:
pf = f"{PF_folder}/{speed}.xml" pf = f"{PF_folder}/{speed}.xml"
# Feedback expe # Feedback expe
EXPE_FILE = f"{EXPE_DIR}/a60_EASY_{speed}" EXPE_FILE = f"{EXPE_DIR}/a60_EASY_{speed}"
uf = gen_user_description_file(EXPE_FILE, "fb_user_think_time_only", wl_a60, ".SABjson") uf = gen_user_description_file(EXPE_FILE, "fb_user_think_time_only", wl_a60, ".SABjson")
! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \ ! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \
--batcmd="batsim -p {pf} --quiet -w {empty_wl} -e {EXPE_FILE}/ --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse"\ --batcmd="batsim -p {pf} --quiet -w {empty_wl} -e {EXPE_FILE}/ --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse"\
--schedcmd='batmen --verbosity=silent -v easy_bf --variant_options_filepath={uf}' --schedcmd='batmen --verbosity=silent -v easy_bf --variant_options_filepath={uf}'
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
EXPE = "a60_EASY_double_speed" EXPE = "a60_EASY_double_speed"
print(f"Expe {EXPE}.\n-------\n") print(f"Expe {EXPE}.\n-------\n")
print("Simulation start.\n******************\n") print("Simulation start.\n******************\n")
! robin {EXPE_DIR}/{EXPE}.yaml ! robin {EXPE_DIR}/{EXPE}.yaml
print("\n******************\nSimulation done.") print("\n******************\nSimulation done.")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
EXPE = "a60_EASY_half_speed" EXPE = "a60_EASY_half_speed"
print(f"Expe {EXPE}.\n-------\n") print(f"Expe {EXPE}.\n-------\n")
print("Simulation start.\n******************\n") print("Simulation start.\n******************\n")
! robin {EXPE_DIR}/{EXPE}.yaml ! robin {EXPE_DIR}/{EXPE}.yaml
print("\n******************\nSimulation done.") print("\n******************\nSimulation done.")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### a60 infra ### a60 infra
2 new expe: 2 new expe:
- feedback a60 easy: infra*2 and infra/2 - feedback a60 easy: infra*2 and infra/2
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Generate the XP files # Generate the XP files
infras = ["double_infra", "half_infra"] infras = ["double_infra", "half_infra"]
wl_a60 = f"{WL_folder}/a60" wl_a60 = f"{WL_folder}/a60"
for infra in infras: for infra in infras:
pf = f"{PF_folder}/{infra}.xml" pf = f"{PF_folder}/{infra}.xml"
# Feedback expe # Feedback expe
EXPE_FILE = f"{EXPE_DIR}/a60_EASY_{infra}" EXPE_FILE = f"{EXPE_DIR}/a60_EASY_{infra}"
uf = gen_user_description_file(EXPE_FILE, "fb_user_think_time_only", wl_a60, ".SABjson") uf = gen_user_description_file(EXPE_FILE, "fb_user_think_time_only", wl_a60, ".SABjson")
! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \ ! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \
--batcmd="batsim -p {pf} --quiet -w {empty_wl} -e {EXPE_FILE}/ --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse"\ --batcmd="batsim -p {pf} --quiet -w {empty_wl} -e {EXPE_FILE}/ --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse"\
--schedcmd='batmen --verbosity=silent -v easy_bf --variant_options_filepath={uf}' --schedcmd='batmen --verbosity=silent -v easy_bf --variant_options_filepath={uf}'
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
EXPE = "a60_EASY_double_infra" EXPE = "a60_EASY_double_infra"
print(f"Expe {EXPE}.\n-------\n") print(f"Expe {EXPE}.\n-------\n")
print("Simulation start.\n******************\n") print("Simulation start.\n******************\n")
! robin {EXPE_DIR}/{EXPE}.yaml ! robin {EXPE_DIR}/{EXPE}.yaml
print("\n******************\nSimulation done.") print("\n******************\nSimulation done.")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
EXPE = "a60_EASY_half_infra" EXPE = "a60_EASY_half_infra"
print(f"Expe {EXPE}.\n-------\n") print(f"Expe {EXPE}.\n-------\n")
print("Simulation start.\n******************\n") print("Simulation start.\n******************\n")
! robin {EXPE_DIR}/{EXPE}.yaml ! robin {EXPE_DIR}/{EXPE}.yaml
print("\n******************\nSimulation done.") print("\n******************\nSimulation done.")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## Results ## Results
### Scheduling metrics ### Scheduling metrics
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
data = [ data = [
["EASY", "rigid", f"{EXPE_DIR}/rigid_EASY"], ["EASY", "rigid", f"{EXPE_DIR}/rigid_EASY"],
["EASY", "a0", f"{EXPE_DIR}/a0_EASY"], ["EASY", "a0", f"{EXPE_DIR}/a0_EASY"],
["EASY", "a60", f"{EXPE_DIR}/a60_EASY"], ["EASY", "a60", f"{EXPE_DIR}/a60_EASY"],
["FCFS", "rigid", f"{EXPE_DIR}/rigid_FCFS"], ["FCFS", "rigid", f"{EXPE_DIR}/rigid_FCFS"],
["FCFS", "a0", f"{EXPE_DIR}/a0_FCFS"], ["FCFS", "a0", f"{EXPE_DIR}/a0_FCFS"],
["FCFS", "a60", f"{EXPE_DIR}/a60_FCFS"], ["FCFS", "a60", f"{EXPE_DIR}/a60_FCFS"],
["speed*2", "rigid", f"{EXPE_DIR}/rigid_EASY_double_speed"], ["speed*2", "rigid", f"{EXPE_DIR}/rigid_EASY_double_speed"],
["speed*2", "a0", f"{EXPE_DIR}/a0_EASY_double_speed"], ["speed*2", "a0", f"{EXPE_DIR}/a0_EASY_double_speed"],
["speed*2", "a60", f"{EXPE_DIR}/a60_EASY_double_speed"], ["speed*2", "a60", f"{EXPE_DIR}/a60_EASY_double_speed"],
["speed/2", "rigid", f"{EXPE_DIR}/rigid_EASY_half_speed"], ["speed/2", "rigid", f"{EXPE_DIR}/rigid_EASY_half_speed"],
["speed/2", "a0", f"{EXPE_DIR}/a0_EASY_half_speed"], ["speed/2", "a0", f"{EXPE_DIR}/a0_EASY_half_speed"],
["speed/2", "a60", f"{EXPE_DIR}/a60_EASY_half_speed"], ["speed/2", "a60", f"{EXPE_DIR}/a60_EASY_half_speed"],
["infra*2", "rigid", f"{EXPE_DIR}/rigid_EASY_double_infra"], ["infra*2", "rigid", f"{EXPE_DIR}/rigid_EASY_double_infra"],
["infra*2", "a0", f"{EXPE_DIR}/a0_EASY_double_infra"], ["infra*2", "a0", f"{EXPE_DIR}/a0_EASY_double_infra"],
["infra*2", "a60", f"{EXPE_DIR}/a60_EASY_double_infra"], ["infra*2", "a60", f"{EXPE_DIR}/a60_EASY_double_infra"],
["infra/2", "rigid", f"{EXPE_DIR}/rigid_EASY_half_infra"], ["infra/2", "rigid", f"{EXPE_DIR}/rigid_EASY_half_infra"],
["infra/2", "a0", f"{EXPE_DIR}/a0_EASY_half_infra"], ["infra/2", "a0", f"{EXPE_DIR}/a0_EASY_half_infra"],
["infra/2", "a60", f"{EXPE_DIR}/a60_EASY_half_infra"] ["infra/2", "a60", f"{EXPE_DIR}/a60_EASY_half_infra"]
] ]
dfs = [] dfs = []
WL_swf["finish_time"] = WL_swf.SUBMIT_TIME + WL_swf.WAIT_TIME + WL_swf.RUN_TIME WL_swf["finish_time"] = WL_swf.SUBMIT_TIME + WL_swf.WAIT_TIME + WL_swf.RUN_TIME
WL_swf["turnaround_time"] = WL_swf.WAIT_TIME + WL_swf.RUN_TIME WL_swf["turnaround_time"] = WL_swf.WAIT_TIME + WL_swf.RUN_TIME
WL_swf["slowdown"] = WL_swf.turnaround_time / WL_swf.RUN_TIME.replace(0, 1) WL_swf["slowdown"] = WL_swf.turnaround_time / WL_swf.RUN_TIME.replace(0, 1)
original_metrics = pd.DataFrame.from_dict({ original_metrics = pd.DataFrame.from_dict({
"expe": "original_log", "expe": "original_log",
"simulation_time":0, "simulation_time":0,
"lateness": 0, "lateness": 0,
"nb_jobs": [WL_swf.shape[0]], "nb_jobs": [WL_swf.shape[0]],
"nb_jobs_success": WL_swf.STATUS.value_counts()[1], "nb_jobs_success": WL_swf.STATUS.value_counts()[1],
"mean_waiting_time": WL_swf.WAIT_TIME.mean(), "mean_waiting_time": WL_swf.WAIT_TIME.mean(),
"max_waiting_time": WL_swf.WAIT_TIME.max(), "max_waiting_time": WL_swf.WAIT_TIME.max(),
"makespan": WL_swf.finish_time.max() - WL_swf.SUBMIT_TIME.min(), "makespan": WL_swf.finish_time.max() - WL_swf.SUBMIT_TIME.min(),
"length": WL_swf.SUBMIT_TIME.max() - WL_swf.SUBMIT_TIME.min(), "length": WL_swf.SUBMIT_TIME.max() - WL_swf.SUBMIT_TIME.min(),
"mean_slowdown": WL_swf.slowdown.mean(), "mean_slowdown": WL_swf.slowdown.mean(),
"max_slowdown": WL_swf.slowdown.max(), "max_slowdown": WL_swf.slowdown.max(),
"mean_turnaround_time": WL_swf.turnaround_time.mean(), "mean_turnaround_time": WL_swf.turnaround_time.mean(),
"max_turnaround_time": WL_swf.turnaround_time.max() "max_turnaround_time": WL_swf.turnaround_time.max()
}) })
dfs.append(original_metrics) dfs.append(original_metrics)
for expe, method, path in data: for expe, method, path in data:
expe_metrics = pd.read_csv(f"{path}/_schedule.csv") expe_metrics = pd.read_csv(f"{path}/_schedule.csv")
jobs = read_and_clean(f"{path}/_jobs.csv") jobs = read_and_clean(f"{path}/_jobs.csv")
expe_metrics['expe'] = expe expe_metrics['expe'] = expe
expe_metrics['replay'] = method expe_metrics['replay'] = method
expe_metrics['length'] = length(jobs) expe_metrics['length'] = length(jobs)
if jobs.shape == WL_rigid.shape: if jobs.shape == WL_rigid.shape:
expe_metrics['lateness'] = int(mean_lateness(jobs, ref=WL_rigid)) expe_metrics['lateness'] = int(mean_lateness(jobs, ref=WL_rigid))
expe_metrics['stretch'] = stretch(jobs, ref=WL_rigid) expe_metrics['stretch'] = stretch(jobs, ref=WL_rigid)
expe_metrics['delta'] = delta(jobs, ref=WL_rigid) expe_metrics['delta'] = delta(jobs, ref=WL_rigid)
dfs.append( expe_metrics ) dfs.append( expe_metrics )
all = pd.concat(dfs) all = pd.concat(dfs)
print("Simulation results:") print("Simulation results:")
display(all[["expe", "replay", "simulation_time","makespan","length","lateness","stretch","delta","nb_jobs","nb_jobs_success", display(all[["expe", "replay", "simulation_time","makespan","length","lateness","stretch","delta","nb_jobs","nb_jobs_success",
"mean_waiting_time","max_waiting_time","mean_slowdown","max_slowdown","mean_turnaround_time","max_turnaround_time","nb_computing_machines"]]) "mean_waiting_time","max_waiting_time","mean_slowdown","max_slowdown","mean_turnaround_time","max_turnaround_time","nb_computing_machines"]])
print("Simulation results (with readable durations):") print("Simulation results (with readable durations):")
all_readable = all.astype({"simulation_time":'timedelta64[s]',"makespan":'timedelta64[s]',"length":'timedelta64[s]',"lateness":'timedelta64[s]', all_readable = all.astype({"simulation_time":'timedelta64[s]',"makespan":'timedelta64[s]',"length":'timedelta64[s]',"lateness":'timedelta64[s]',
"mean_waiting_time":'timedelta64[s]',"max_waiting_time":'timedelta64[s]',"mean_turnaround_time":'timedelta64[s]',"max_turnaround_time":'timedelta64[s]'}) "mean_waiting_time":'timedelta64[s]',"max_waiting_time":'timedelta64[s]',"mean_turnaround_time":'timedelta64[s]',"max_turnaround_time":'timedelta64[s]'})
display(all_readable[["expe", "replay", "simulation_time","makespan","length","lateness","stretch","delta","nb_jobs","nb_jobs_success", display(all_readable[["expe", "replay", "simulation_time","makespan","length","lateness","stretch","delta","nb_jobs","nb_jobs_success",
"mean_waiting_time","max_waiting_time","mean_slowdown","max_slowdown","mean_turnaround_time","max_turnaround_time","nb_computing_machines"]]) "mean_waiting_time","max_waiting_time","mean_slowdown","max_slowdown","mean_turnaround_time","max_turnaround_time","nb_computing_machines"]])
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Remarks: Remarks:
- ~5700 jobs not valid in the SWF (NB_CORE = 0) - ~5700 jobs not valid in the SWF (NB_CORE = 0)
- 43117 job success in SWF VS 54044 (+ 10927) in our simu - 43117 job success in SWF VS 54044 (+ 10927) in our simu
- max waiting time of **62 days** in SWF (!!), close to the RIGID_FCFS (but not the mean waiting time) - max waiting time of **62 days** in SWF (!!), close to the RIGID_FCFS (but not the mean waiting time)
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
columns = ["expe", "replay", "makespan","mean_waiting_time","max_waiting_time","lateness", "stretch","delta"] columns = ["expe", "replay", "makespan","mean_waiting_time","max_waiting_time","lateness", "stretch","delta"]
to_latex = all.astype({"mean_waiting_time":'timedelta64[s]',"max_waiting_time":'timedelta64[s]'}) to_latex = all.astype({"mean_waiting_time":'timedelta64[s]',"max_waiting_time":'timedelta64[s]'})
to_latex["makespan"] = to_latex.makespan / (3600*24) to_latex["makespan"] = to_latex.makespan / (3600*24)
to_latex["lateness"] = to_latex.lateness / (3600*24) to_latex["lateness"] = to_latex.lateness / (3600*24)
print(to_latex.to_latex(columns=columns, index=False, escape=True,float_format="{:.2f}".format).replace(" days", "d")) print(to_latex.to_latex(columns=columns, index=False, escape=True,float_format="{:.2f}".format).replace(" days", "d"))
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Util and queue size ### Util and queue size
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
plot_util_and_queue(EXPE_DIR + "/rigid_FCFS") plot_util_and_queue(EXPE_DIR + "/rigid_FCFS")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
plot_util_and_queue(EXPE_DIR + "/rigid_EASY") plot_util_and_queue(EXPE_DIR + "/rigid_EASY")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# FEEDBACK A0 FCFS # FEEDBACK A0 FCFS
plot_util_and_queue(EXPE_DIR + "/a0_FCFS") plot_util_and_queue(EXPE_DIR + "/a0_FCFS")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# FEEDBACK A0 # FEEDBACK A0
plot_util_and_queue(EXPE_DIR + "/a0_EASY") plot_util_and_queue(EXPE_DIR + "/a0_EASY")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
plot_util_and_queue(EXPE_DIR + "/a0_EASY_double_speed") plot_util_and_queue(EXPE_DIR + "/a0_EASY_double_speed")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
plot_util_and_queue(EXPE_DIR + "/a0_EASY_half_speed") plot_util_and_queue(EXPE_DIR + "/a0_EASY_half_speed")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
plot_util_and_queue(EXPE_DIR + "/rigid_EASY_half_speed") plot_util_and_queue(EXPE_DIR + "/rigid_EASY_half_speed")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Submission time distribution ### Submission time distribution
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
df_a0_fcfs = read_and_clean(EXPE_DIR + "/a0_FCFS/_jobs.csv") df_a0_fcfs = read_and_clean(EXPE_DIR + "/a0_FCFS/_jobs.csv")
# Plots # Plots
fig, axd = plt.subplot_mosaic([['A', 'B', 'C'], ['D', 'D', 'D']], constrained_layout=True, figsize=(16,8)) fig, axd = plt.subplot_mosaic([['A', 'B', 'C'], ['D', 'D', 'D']], constrained_layout=True, figsize=(16,8))
submission_plots(WL_rigid, axd, 'rigid') submission_plots(WL_rigid, axd, 'rigid')
submission_plots(df_a0_fcfs, axd, 'feedback_a0_fcfs') submission_plots(df_a0_fcfs, axd, 'feedback_a0_fcfs')
axd['D'].legend() axd['D'].legend()
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
df_a0_easy = read_and_clean(EXPE_DIR + "/a0_EASY/_jobs.csv") df_a0_easy = read_and_clean(EXPE_DIR + "/a0_EASY/_jobs.csv")
fig, axd = plt.subplot_mosaic([['A', 'B', 'C'], ['D', 'D', 'D']], constrained_layout=True, figsize=(16,8)) fig, axd = plt.subplot_mosaic([['A', 'B', 'C'], ['D', 'D', 'D']], constrained_layout=True, figsize=(16,8))
submission_plots(WL_rigid, axd, 'rigid') submission_plots(WL_rigid, axd, 'rigid')
submission_plots(df_a0_easy, axd, 'feedback_a0_easy') submission_plots(df_a0_easy, axd, 'feedback_a0_easy')
axd['D'].legend() axd['D'].legend()
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
df_a0_easy = read_and_clean(EXPE_DIR + "/a0_EASY_double_speed/_jobs.csv") df_a0_easy = read_and_clean(EXPE_DIR + "/a0_EASY_double_speed/_jobs.csv")
fig, axd = plt.subplot_mosaic([['A', 'B', 'C'], ['D', 'D', 'D']], constrained_layout=True, figsize=(16,8)) fig, axd = plt.subplot_mosaic([['A', 'B', 'C'], ['D', 'D', 'D']], constrained_layout=True, figsize=(16,8))
submission_plots(WL_rigid, axd, 'rigid') submission_plots(WL_rigid, axd, 'rigid')
submission_plots(df_a0_easy, axd, 'feedback_a0_easy_double_speed') submission_plots(df_a0_easy, axd, 'feedback_a0_easy_double_speed')
axd['D'].legend() axd['D'].legend()
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
df_a0_easy = read_and_clean(EXPE_DIR + "/a0_EASY_half_speed/_jobs.csv") df_a0_easy = read_and_clean(EXPE_DIR + "/a0_EASY_half_speed/_jobs.csv")
fig, axd = plt.subplot_mosaic([['A', 'B', 'C'], ['D', 'D', 'D']], constrained_layout=True, figsize=(16,8)) fig, axd = plt.subplot_mosaic([['A', 'B', 'C'], ['D', 'D', 'D']], constrained_layout=True, figsize=(16,8))
submission_plots(WL_rigid, axd, 'rigid') submission_plots(WL_rigid, axd, 'rigid')
submission_plots(df_a0_easy, axd, 'feedback_a0_easy_half_speed') submission_plots(df_a0_easy, axd, 'feedback_a0_easy_half_speed')
axd['D'].legend() axd['D'].legend()
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
df_a0_fcfs = read_and_clean(EXPE_DIR + "/a0_FCFS/_jobs.csv") df_a0_fcfs = read_and_clean(EXPE_DIR + "/a0_FCFS/_jobs.csv")
df_a0_easy = read_and_clean(EXPE_DIR + "/a0_EASY/_jobs.csv") df_a0_easy = read_and_clean(EXPE_DIR + "/a0_EASY/_jobs.csv")
fig, ax = plt.subplots(nrows=3, figsize=(20,12), sharex=True, sharey=True) fig, ax = plt.subplots(nrows=3, figsize=(20,12), sharex=True, sharey=True)
detailled_submission_plots(WL_rigid, "rigid", ax[0], 'blue', quartiles=True) detailled_submission_plots(WL_rigid, "rigid", ax[0], 'blue', quartiles=True)
detailled_submission_plots(df_a0_fcfs, "feedback_a0_fcfs", ax[1], 'orange', quartiles=True) detailled_submission_plots(df_a0_fcfs, "feedback_a0_fcfs", ax[1], 'orange', quartiles=True)
detailled_submission_plots(df_a0_easy, "feedback_a0_easy", ax[2], 'red', quartiles=True) detailled_submission_plots(df_a0_easy, "feedback_a0_easy", ax[2], 'red', quartiles=True)
[ax[i].grid(axis='x') for i in range(3)] [ax[i].grid(axis='x') for i in range(3)]
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Summary submission distribution ### Summary submission distribution
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
data = [ data = [
["EASY", "a0", f"{EXPE_DIR}/a0_EASY"], ["EASY", "a0", f"{EXPE_DIR}/a0_EASY"],
["EASY", "a60", f"{EXPE_DIR}/a60_EASY"], ["EASY", "a60", f"{EXPE_DIR}/a60_EASY"],
["FCFS", "a0", f"{EXPE_DIR}/a0_FCFS"], ["FCFS", "a0", f"{EXPE_DIR}/a0_FCFS"],
["FCFS", "a60", f"{EXPE_DIR}/a60_FCFS"], ["FCFS", "a60", f"{EXPE_DIR}/a60_FCFS"],
["speed*2", "a0", f"{EXPE_DIR}/a0_EASY_double_speed"], ["speed*2", "a0", f"{EXPE_DIR}/a0_EASY_double_speed"],
["speed*2", "a60", f"{EXPE_DIR}/a60_EASY_double_speed"], ["speed*2", "a60", f"{EXPE_DIR}/a60_EASY_double_speed"],
["speed/2", "a0", f"{EXPE_DIR}/a0_EASY_half_speed"], ["speed/2", "a0", f"{EXPE_DIR}/a0_EASY_half_speed"],
["speed/2", "a60", f"{EXPE_DIR}/a60_EASY_half_speed"], ["speed/2", "a60", f"{EXPE_DIR}/a60_EASY_half_speed"],
["infra*2", "a0", f"{EXPE_DIR}/a0_EASY_double_infra"], ["infra*2", "a0", f"{EXPE_DIR}/a0_EASY_double_infra"],
["infra*2", "a60", f"{EXPE_DIR}/a60_EASY_double_infra"], ["infra*2", "a60", f"{EXPE_DIR}/a60_EASY_double_infra"],
["infra/2", "a0", f"{EXPE_DIR}/a0_EASY_half_infra"], ["infra/2", "a0", f"{EXPE_DIR}/a0_EASY_half_infra"],
["infra/2", "a60", f"{EXPE_DIR}/a60_EASY_half_infra"] ["infra/2", "a60", f"{EXPE_DIR}/a60_EASY_half_infra"]
] ]
sub_time_rigid = WL_rigid["submission_time"] sub_time_rigid = WL_rigid["submission_time"]
rigid_to_plot = sub_time_rigid.groupby(sub_time_rigid.dt.to_period('D')).count() rigid_to_plot = sub_time_rigid.groupby(sub_time_rigid.dt.to_period('D')).count()
N, i = len(data), 0 N, i = len(data), 0
fig, ax = plt.subplots(nrows=N, layout="constrained", sharex=True, sharey=True, figsize=(30, 16)) fig, ax = plt.subplots(nrows=N, layout="constrained", sharex=True, sharey=True, figsize=(30, 16))
for expe, mtd, path in data: for expe, mtd, path in data:
sub_time = read_and_clean(f"{path}/_jobs.csv")["submission_time"] sub_time = read_and_clean(f"{path}/_jobs.csv")["submission_time"]
rigid_to_plot.plot(kind='area', alpha=.5, ax=ax[i], xlabel="Per day", ylabel="#submissions", label="rigid") rigid_to_plot.plot(kind='area', alpha=.5, ax=ax[i], xlabel="Per day", ylabel="#submissions", label="rigid")
sub_time.groupby(sub_time.dt.to_period('D')).count().plot(kind='area', alpha=.5, ax=ax[i], xlabel="Per day", ylabel="#submissions", label=f"{expe} ({mtd})") sub_time.groupby(sub_time.dt.to_period('D')).count().plot(kind='area', alpha=.5, ax=ax[i], xlabel="Per day", ylabel="#submissions", label=f"{expe} ({mtd})")
ax[i].legend(); ax[i].set_ylim(0,700) ax[i].legend(); ax[i].set_ylim(0,700)
i += 1 i += 1
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# For paper # For paper
data = [ data = [
["easy", "a0", f"{EXPE_DIR}/a0_EASY"], ["easy", "a0", f"{EXPE_DIR}/a0_EASY"],
# ["easy", "a60", f"{EXPE_DIR}/a60_EASY"], # ["easy", "a60", f"{EXPE_DIR}/a60_EASY"],
["fcfs", "a0", f"{EXPE_DIR}/a0_FCFS"], ["fcfs", "a0", f"{EXPE_DIR}/a0_FCFS"],
# ["fcfs", "a60", f"{EXPE_DIR}/a60_FCFS"], # ["fcfs", "a60", f"{EXPE_DIR}/a60_FCFS"],
["infra*2", "a0", f"{EXPE_DIR}/a0_EASY_double_infra"], ["infra*2", "a0", f"{EXPE_DIR}/a0_EASY_double_infra"],
# ["infra*2", "a60", f"{EXPE_DIR}/a60_EASY_double_infra"], # ["infra*2", "a60", f"{EXPE_DIR}/a60_EASY_double_infra"],
["infra/2", "a0", f"{EXPE_DIR}/a0_EASY_half_infra"], ["infra/2", "a0", f"{EXPE_DIR}/a0_EASY_half_infra"],
# ["infra/2", "a60", f"{EXPE_DIR}/a60_EASY_half_infra"], # ["infra/2", "a60", f"{EXPE_DIR}/a60_EASY_half_infra"],
["speed*2", "a0", f"{EXPE_DIR}/a0_EASY_double_speed"], ["speed*2", "a0", f"{EXPE_DIR}/a0_EASY_double_speed"],
# ["speed*2", "a60", f"{EXPE_DIR}/a60_EASY_double_speed"], # ["speed*2", "a60", f"{EXPE_DIR}/a60_EASY_double_speed"],
["speed/2", "a0", f"{EXPE_DIR}/a0_EASY_half_speed"], ["speed/2", "a0", f"{EXPE_DIR}/a0_EASY_half_speed"],
# ["speed/2", "a60", f"{EXPE_DIR}/a60_EASY_half_speed"] # ["speed/2", "a60", f"{EXPE_DIR}/a60_EASY_half_speed"]
] ]
sub_time_rigid = WL_rigid["submission_time"] sub_time_rigid = WL_rigid["submission_time"]
rigid_to_plot = sub_time_rigid.groupby(sub_time_rigid.dt.to_period('D')).count() rigid_to_plot = sub_time_rigid.groupby(sub_time_rigid.dt.to_period('D')).count()
N, i = len(data), 0 N, i = len(data), 0
fontsize = 40 fontsize = 40
fig, ax = plt.subplots(nrows=N, layout="tight", sharex=True, sharey=True, figsize=(20, 16)) fig, ax = plt.subplots(nrows=N, layout="tight", sharex=True, sharey=True, figsize=(20, 16))
for expe, mtd, path in data: for expe, mtd, path in data:
sub_time = read_and_clean(f"{path}/_jobs.csv")["submission_time"] sub_time = read_and_clean(f"{path}/_jobs.csv")["submission_time"]
rigid_to_plot.plot(kind='area', alpha=.5, ax=ax[i],xlabel="", label="_rigid") rigid_to_plot.plot(kind='area', alpha=.5, ax=ax[i],xlabel="", label="_rigid")
sub_time.groupby(sub_time.dt.to_period('D')).count().plot(kind='area', alpha=.5, ax=ax[i], label=f"{expe} ({mtd})", xlabel="") sub_time.groupby(sub_time.dt.to_period('D')).count().plot(kind='area', alpha=.5, ax=ax[i], label=f"{expe} ({mtd})", xlabel="")
ax[i].legend(fontsize=fontsize); [item.set_fontsize(fontsize) for item in ax[i].get_xticklabels() + ax[i].get_yticklabels()] ax[i].legend(fontsize=fontsize); [item.set_fontsize(fontsize) for item in ax[i].get_xticklabels() + ax[i].get_yticklabels()]
ax[i].set_ylim(0,350) ax[i].set_ylim(0,350)
i += 1 i += 1
ax[0].set_yticks([0, 100, 200, 300], [0, "", 200, ""]) ax[0].set_yticks([0, 100, 200, 300], [0, "", 200, ""])
fig.suptitle("Number of submissions per day", fontsize=fontsize) fig.suptitle("Number of submissions per day", fontsize=fontsize)
fig.savefig(f"{fig_path}/sub_time_distri_SDSC.pdf") fig.savefig(f"{fig_path}/sub_time_distri_SDSC.pdf")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Cumulative submission time: Cumulative submission time:
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
data = [ data = [
"FCFS", "FCFS",
"EASY", "EASY",
"EASY_double_speed", "EASY_double_speed",
"EASY_half_speed", "EASY_half_speed",
"EASY_double_infra", "EASY_double_infra",
"EASY_half_infra" "EASY_half_infra"
] ]
# Graphs submission time # Graphs submission time
sub_time_rigid = WL_rigid["submission_time"] sub_time_rigid = WL_rigid["submission_time"]
fig, ax = plt.subplots(ncols=len(data), layout="constrained", sharex=True, sharey=True, figsize=(20, 5)) fig, ax = plt.subplots(ncols=len(data), layout="constrained", sharex=True, sharey=True, figsize=(20, 5))
for i, simu in enumerate(data): for i, simu in enumerate(data):
sub_time = read_and_clean(f"{EXPE_DIR}/a0_{simu}/_jobs.csv")["submission_time"] sub_time = read_and_clean(f"{EXPE_DIR}/a0_{simu}/_jobs.csv")["submission_time"]
sub_time_rigid.groupby(sub_time_rigid.dt.to_period('D')).count().cumsum().plot(kind='area', alpha=.5, ax=ax[i], xlabel="Per day", ylabel="#jobs submitted", label="rigid") sub_time_rigid.groupby(sub_time_rigid.dt.to_period('D')).count().cumsum().plot(kind='area', alpha=.5, ax=ax[i], xlabel="Per day", ylabel="#jobs submitted", label="rigid")
sub_time.groupby(sub_time.dt.to_period('D')).count().cumsum().plot(kind='area', alpha=.5, ax=ax[i], xlabel="Per day", ylabel="#jobs submitted", label=f"feedback_{simu}") sub_time.groupby(sub_time.dt.to_period('D')).count().cumsum().plot(kind='area', alpha=.5, ax=ax[i], xlabel="Per day", ylabel="#jobs submitted", label=f"feedback_{simu}")
ax[i].legend() ax[i].legend()
i += 1 i += 1
fig.suptitle("Cumulated number of jobs submitted") fig.suptitle("Cumulated number of jobs submitted")
# Graphs finish time # Graphs finish time
fig, ax = plt.subplots(ncols=len(data), layout="constrained", sharex=True, sharey=True, figsize=(20, 5)) fig, ax = plt.subplots(ncols=len(data), layout="constrained", sharex=True, sharey=True, figsize=(20, 5))
for i, simu in enumerate(data): for i, simu in enumerate(data):
rigid = read_and_clean(f"{EXPE_DIR}/rigid_{simu}/_jobs.csv")["finish_time"] rigid = read_and_clean(f"{EXPE_DIR}/rigid_{simu}/_jobs.csv")["finish_time"]
feedback = read_and_clean(f"{EXPE_DIR}/a0_{simu}/_jobs.csv")["finish_time"] feedback = read_and_clean(f"{EXPE_DIR}/a0_{simu}/_jobs.csv")["finish_time"]
rigid.groupby(rigid.dt.to_period('D')).count().cumsum().plot(kind='area', alpha=.5, ax=ax[i], ylabel="#jobs finished", label=f"rigid_{simu}") rigid.groupby(rigid.dt.to_period('D')).count().cumsum().plot(kind='area', alpha=.5, ax=ax[i], ylabel="#jobs finished", label=f"rigid_{simu}")
feedback.groupby(feedback.dt.to_period('D')).count().cumsum().plot(kind='area', alpha=.5, ax=ax[i], ylabel="#jobs finished", label=f"feedback_{simu}") feedback.groupby(feedback.dt.to_period('D')).count().cumsum().plot(kind='area', alpha=.5, ax=ax[i], ylabel="#jobs finished", label=f"feedback_{simu}")
ax[i].legend() ax[i].legend()
fig.suptitle("Cumulated number of jobs finished") fig.suptitle("Cumulated number of jobs finished")
fig.savefig(f"{fig_path}/cum_submit_SDSC.pdf") fig.savefig(f"{fig_path}/cum_submit_SDSC.pdf")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# For paper # For paper
data = { data = {
# "RIGID_FCFS": f"{EXPE_DIR}/rigid_FCFS", # "RIGID_FCFS": f"{EXPE_DIR}/rigid_FCFS",
"a0_EASY": "a0_EASY", "a0_EASY": "a0_EASY",
"a0_FCFS": "a0_FCFS", "a0_FCFS": "a0_FCFS",
# "A60_EASY": "a60_EASY", # "A60_EASY": "a60_EASY",
"a0_speed*2": "a0_EASY_double_speed", "a0_speed*2": "a0_EASY_double_speed",
"a0_infra*2": "a0_EASY_double_infra", "a0_infra*2": "a0_EASY_double_infra",
"a0_speed/2": "a0_EASY_half_speed", "a0_speed/2": "a0_EASY_half_speed",
"a0_infra/2": "a0_EASY_half_infra" "a0_infra/2": "a0_EASY_half_infra"
} }
fig, ax = plt.subplots(ncols=len(data),nrows=2, sharex=True, sharey=True, figsize=(12,3), dpi=400, gridspec_kw = {'wspace':0.05, 'hspace':0.05}) fig, ax = plt.subplots(ncols=len(data),nrows=2, sharex=True, sharey=True, figsize=(12,3), dpi=400, gridspec_kw = {'wspace':0.05, 'hspace':0.05})
# Graphs submission time # Graphs submission time
sub_time_rigid = WL_rigid["submission_time"] sub_time_rigid = WL_rigid["submission_time"]
N, i = len(sub_time_rigid), 0 N, i = len(sub_time_rigid), 0
for name, simu in data.items(): for name, simu in data.items():
l = len(simu.split(sep='_')[0])+1; exp = simu[l:]; exp_short = name[l:] l = len(simu.split(sep='_')[0])+1; exp = simu[l:]; exp_short = name[l:]
rigid = read_and_clean(f"{EXPE_DIR}/rigid_{exp}/_jobs.csv") rigid = read_and_clean(f"{EXPE_DIR}/rigid_{exp}/_jobs.csv")
feedback = read_and_clean(f"{EXPE_DIR}/{simu}/_jobs.csv") feedback = read_and_clean(f"{EXPE_DIR}/{simu}/_jobs.csv")
sub_time_rigid.groupby(sub_time_rigid.dt.to_period('D')).count().cumsum().plot( sub_time_rigid.groupby(sub_time_rigid.dt.to_period('D')).count().cumsum().plot(
kind='area', alpha=.5, color="tab:blue", ax=ax[0][i], title=exp_short, label="rigid") kind='area', alpha=.5, color="tab:blue", ax=ax[0][i], title=exp_short, label="rigid")
feedback.submission_time.groupby(feedback.submission_time.dt.to_period('D')).count().cumsum().plot( feedback.submission_time.groupby(feedback.submission_time.dt.to_period('D')).count().cumsum().plot(
kind='area', alpha=.5, color="tab:orange", ax=ax[0][i],xlabel="",ylabel="%jobs submitted", label="feedback (a0)") kind='area', alpha=.5, color="tab:orange", ax=ax[0][i],xlabel="",ylabel="%jobs submitted", label="feedback (a0)")
rigid.finish_time.groupby(rigid.finish_time.dt.to_period('D')).count().cumsum().plot( rigid.finish_time.groupby(rigid.finish_time.dt.to_period('D')).count().cumsum().plot(
kind='area', alpha=.5, color="tab:brown", ax=ax[1][i], label="rigid", grid=True) kind='area', alpha=.5, color="tab:brown", ax=ax[1][i], label="rigid", grid=True)
feedback.finish_time.groupby(feedback.finish_time.dt.to_period('D')).count().cumsum().plot( feedback.finish_time.groupby(feedback.finish_time.dt.to_period('D')).count().cumsum().plot(
kind='area', alpha=.3, color="tab:pink", ax=ax[1][i], xlabel="",ylabel="%jobs finished", label="feedback (a0)") kind='area', alpha=.3, color="tab:pink", ax=ax[1][i], xlabel="",ylabel="%jobs finished", label="feedback (a0)")
i += 1 i += 1
[ax[i][5].legend(loc='lower right', fontsize='x-small') for i in range(2)] [ax[i][5].legend(loc='lower right', fontsize='x-small') for i in range(2)]
# Change title for two expes that could not be run until the end # Change title for two expes that could not be run until the end
ax[0][4].set_title("speed/2 $\mathregular{^{(*)}}$") ax[0][4].set_title("speed/2 $\mathregular{^{(*)}}$")
ax[0][5].set_title("infra/2 $\mathregular{^{(*)}}$") ax[0][5].set_title("infra/2 $\mathregular{^{(*)}}$")
ax[0][0].set_yticks([0, N/2, N],[0, 50, 100]); ax[0][0].minorticks_off() ax[0][0].set_yticks([0, N/2, N],[0, 50, 100]); ax[0][0].minorticks_off()
ax[0][0].set_xticklabels(['', '1999', '2000', '2001', '2002', '']) ax[0][0].set_xticklabels(['', '1999', '2000', '2001', '2002', ''])
# ax[0][0].xaxis.set_major_formatter(mdates.DateFormatter('%Y')); # ax[0][0].xaxis.set_major_formatter(mdates.DateFormatter('%Y'));
fig.savefig(f"{fig_path}/cum_submit_SDSC.pdf") fig.savefig(f"{fig_path}/cum_submit_SDSC.pdf")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Correlation a0 a60 ### Correlation a0 a60
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
data = { data = {
"easy": { "easy": {
"rigid": f"{EXPE_DIR}/rigid_EASY", "rigid": f"{EXPE_DIR}/rigid_EASY",
"a0": f"{EXPE_DIR}/a0_EASY", "a0": f"{EXPE_DIR}/a0_EASY",
"a60": f"{EXPE_DIR}/a60_EASY"}, "a60": f"{EXPE_DIR}/a60_EASY"},
"fcfs": { "fcfs": {
"rigid": f"{EXPE_DIR}/rigid_FCFS", "rigid": f"{EXPE_DIR}/rigid_FCFS",
"a0": f"{EXPE_DIR}/a0_FCFS", "a0": f"{EXPE_DIR}/a0_FCFS",
"a60": f"{EXPE_DIR}/a60_FCFS"}, "a60": f"{EXPE_DIR}/a60_FCFS"},
"speed*2": { "speed*2": {
"rigid": f"{EXPE_DIR}/rigid_EASY_double_speed", "rigid": f"{EXPE_DIR}/rigid_EASY_double_speed",
"a0": f"{EXPE_DIR}/a0_EASY_double_speed", "a0": f"{EXPE_DIR}/a0_EASY_double_speed",
"a60": f"{EXPE_DIR}/a60_EASY_double_speed"}, "a60": f"{EXPE_DIR}/a60_EASY_double_speed"},
"speed/2": { "speed/2": {
"rigid": f"{EXPE_DIR}/rigid_EASY_half_speed", "rigid": f"{EXPE_DIR}/rigid_EASY_half_speed",
"a0": f"{EXPE_DIR}/a0_EASY_half_speed", "a0": f"{EXPE_DIR}/a0_EASY_half_speed",
"a60": f"{EXPE_DIR}/a60_EASY_half_speed"}, "a60": f"{EXPE_DIR}/a60_EASY_half_speed"},
"infra*2": { "infra*2": {
"rigid": f"{EXPE_DIR}/rigid_EASY_double_infra", "rigid": f"{EXPE_DIR}/rigid_EASY_double_infra",
"a0": f"{EXPE_DIR}/a0_EASY_double_infra", "a0": f"{EXPE_DIR}/a0_EASY_double_infra",
"a60": f"{EXPE_DIR}/a60_EASY_double_infra"}, "a60": f"{EXPE_DIR}/a60_EASY_double_infra"},
"infra/2": { "infra/2": {
"rigid": f"{EXPE_DIR}/rigid_EASY_half_infra", "rigid": f"{EXPE_DIR}/rigid_EASY_half_infra",
"a0": f"{EXPE_DIR}/a0_EASY_half_infra", "a0": f"{EXPE_DIR}/a0_EASY_half_infra",
"a60": f"{EXPE_DIR}/a60_EASY_half_infra"} "a60": f"{EXPE_DIR}/a60_EASY_half_infra"}
} }
fig, ax = plt.subplots(ncols=2, sharey=True) fig, ax = plt.subplots(ncols=2, sharey=True)
fig2, ax2 = plt.subplots(nrows=2, sharex=True, sharey=True) fig2, ax2 = plt.subplots(nrows=2, sharex=True, sharey=True)
corr = {} corr = {}
cmap, N, i = plt.cm.Accent, len(data), 0 cmap, N, i = plt.cm.Accent, len(data), 0
colors = [cmap(k / float(N)) for k in range(N)] colors = [cmap(k / float(N)) for k in range(N)]
for expe in data.keys(): for expe in data.keys():
corr[expe] = {} corr[expe] = {}
sub, fin = {}, {} sub, fin = {}, {}
for mtd, path in data[expe].items(): for mtd, path in data[expe].items():
df = pd.read_csv(f"{path}/_jobs.csv") df = pd.read_csv(f"{path}/_jobs.csv")
sub[mtd] = df.submission_time / (3600*24) sub[mtd] = df.submission_time / (3600*24)
fin[mtd] = df.finish_time / (3600*24) fin[mtd] = df.finish_time / (3600*24)
corr[expe]["rigid/a0"] = sub["rigid"].corr(sub["a0"]) corr[expe]["rigid/a0"] = sub["rigid"].corr(sub["a0"])
corr[expe]["rigid/a60"] = sub["rigid"].corr(sub["a60"]) corr[expe]["rigid/a60"] = sub["rigid"].corr(sub["a60"])
corr[expe]["a0/a60"] = sub["a0"].corr(sub["a60"]) corr[expe]["a0/a60"] = sub["a0"].corr(sub["a60"])
ax[0].plot(sub['rigid'], sub['a60'], marker=",", color=colors[i], label=expe) ax[0].plot(sub['rigid'], sub['a60'], marker=",", color=colors[i], label=expe)
ax[1].plot(sub['a0'], sub['a60'], marker=",", color=colors[i], label=expe) ax[1].plot(sub['a0'], sub['a60'], marker=",", color=colors[i], label=expe)
disp = sub["a60"] - sub["rigid"] disp = sub["a60"] - sub["rigid"]
# bins=np.linspace(-.3e7, 2.3e7, 100), # bins=np.linspace(-.3e7, 2.3e7, 100),
disp.plot(kind="hist", bins=range(-50,251), color=colors[i],alpha=.5, ax=ax2[0], label=f"{expe}, med={disp.median():.1f}", title="sub_time(a60) - sub_time(rigid)") disp.plot(kind="hist", bins=range(-50,251), color=colors[i],alpha=.5, ax=ax2[0], label=f"{expe}, med={disp.median():.1f}", title="sub_time(a60) - sub_time(rigid)")
disp = sub["a0"] - sub["a60"] disp = sub["a0"] - sub["a60"]
disp.plot(kind="hist", bins=range(-50,251), color=colors[i],alpha=.5, ax=ax2[1], label=f"{expe}, med={disp.median():.1f}", title="sub_time(a0) - sub_time(a60)") disp.plot(kind="hist", bins=range(-50,251), color=colors[i],alpha=.5, ax=ax2[1], label=f"{expe}, med={disp.median():.1f}", title="sub_time(a0) - sub_time(a60)")
i += 1 i += 1
print("Pearson correlation on submission timestamps:") print("Pearson correlation on submission timestamps:")
display(pd.DataFrame(corr)) display(pd.DataFrame(corr))
x=range(600); ax[0].plot(x, x, linestyle='dashed', color="tab:red") x=range(600); ax[0].plot(x, x, linestyle='dashed', color="tab:red")
ax[1].plot(x, x, linestyle='dashed', color="tab:red") ax[1].plot(x, x, linestyle='dashed', color="tab:red")
ax[0].set_xlim(ax[0].get_ylim()); ax[0].legend() ax[0].set_xlim(ax[0].get_ylim()); ax[0].legend()
ax[0].set(xlabel="rigid", ylabel="a60"); ax[1].set(xlabel="a0", ylabel="a60") ax[0].set(xlabel="rigid", ylabel="a60"); ax[1].set(xlabel="a0", ylabel="a60")
fig.suptitle("Submission timestamp correlation") fig.suptitle("Submission timestamp correlation")
ax2[0].set_xlim(-25,50); ax2[0].legend(); ax2[1].legend(); ax2[0].set_xlim(-25,50); ax2[0].legend(); ax2[1].legend();
fig2.suptitle("Dispersion around y=x (in days)") fig2.suptitle("Dispersion around y=x (in days)")
fig2.savefig(f"{fig_path}/a60_a0_comp_KTH.pdf") fig2.savefig(f"{fig_path}/a60_a0_comp_KTH.pdf")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Throughput ### Throughput
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
data = [ data = [
"FCFS", "FCFS",
"EASY", "EASY",
"EASY_double_speed", "EASY_double_speed",
"EASY_half_speed", "EASY_half_speed",
"EASY_double_infra", "EASY_double_infra",
"EASY_half_infra" "EASY_half_infra"
] ]
# Graphs submission time # Graphs submission time
fig, ax = plt.subplots(ncols=len(data), layout="constrained", sharex=True, sharey=True, figsize=(20, 5)) fig, ax = plt.subplots(ncols=len(data), layout="constrained", sharex=True, sharey=True, figsize=(20, 5))
for i, simu in enumerate(data): for i, simu in enumerate(data):
rigid = read_and_clean(f"{EXPE_DIR}/rigid_{simu}/_jobs.csv")["finish_time"] rigid = read_and_clean(f"{EXPE_DIR}/rigid_{simu}/_jobs.csv")["finish_time"]
feedback = read_and_clean(f"{EXPE_DIR}/a0_{simu}/_jobs.csv")["finish_time"] feedback = read_and_clean(f"{EXPE_DIR}/a0_{simu}/_jobs.csv")["finish_time"]
rigid.groupby(rigid.dt.to_period('W')).count().plot(kind='area', alpha=.5, ax=ax[i], xlabel="week", ylabel="#jobs finished", label=f"rigid_{simu}") rigid.groupby(rigid.dt.to_period('W')).count().plot(kind='area', alpha=.5, ax=ax[i], xlabel="week", ylabel="#jobs finished", label=f"rigid_{simu}")
feedback.groupby(feedback.dt.to_period('W')).count().plot(kind='area', alpha=.5, ax=ax[i], xlabel="week", ylabel="#jobs finished", label=f"feedback_{simu}") feedback.groupby(feedback.dt.to_period('W')).count().plot(kind='area', alpha=.5, ax=ax[i], xlabel="week", ylabel="#jobs finished", label=f"feedback_{simu}")
ax[i].legend(); ax[i].set_ylim(0, 1700) ax[i].legend(); ax[i].set_ylim(0, 1700)
fig.suptitle("Throughput per week") fig.suptitle("Throughput per week")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Windowed metrics ### Windowed metrics
Let's consider the metrics only on a smaller time window on the simulation outputs to avoid border effects. Let's consider the metrics only on a smaller time window on the simulation outputs to avoid border effects.
**Warm up time.** The *max turnaround time* in the original log is 62 days 11:39:03. Let's leave at least this time for the plateform to "warm up". Since the first job in SDSC log is submitted at 1998-05-01 07:26:33 which is a Friday, let's start to calculate our metrics on the Monday two month after ie **Monday 1998-07-06 00:00:00** **Warm up time.** The *max turnaround time* in the original log is 62 days 11:39:03. Let's leave at least this time for the plateform to "warm up". Since the first job in SDSC log is submitted at 1998-05-01 07:26:33 which is a Friday, let's start to calculate our metrics on the Monday two month after ie **Monday 1998-07-06 00:00:00**
**Simulation tail.** Since we simulate conditions that can greatly affect the pace at which the log is consumed (doubling the speed of the machine..), let's exclude a couple of month in the end of the log, to be safe. We suggest to **give the metrics for the 6, 9, 12, 15 and 18 first month of the simulation** ie until (Sunday 1998-12-27 23:59:59) + 13k weeks. **Simulation tail.** Since we simulate conditions that can greatly affect the pace at which the log is consumed (doubling the speed of the machine..), let's exclude a couple of month in the end of the log, to be safe. We suggest to **give the metrics for the 6, 9, 12, 15 and 18 first month of the simulation** ie until (Sunday 1998-12-27 23:59:59) + 13k weeks.
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
start_metrics = pd.Timestamp('1998-07-06 00:00:00').tz_localize(timezone).tz_convert(None) start_metrics = pd.Timestamp('1998-07-06 00:00:00').tz_localize(timezone).tz_convert(None)
end = pd.Timestamp('1998-12-27 23:59:59').tz_localize(timezone).tz_convert(None) end = pd.Timestamp('1998-12-27 23:59:59').tz_localize(timezone).tz_convert(None)
end_metrics = [end + k*pd.Timedelta(weeks=13) for k in range(5)] end_metrics = [end + k*pd.Timedelta(weeks=13) for k in range(5)]
def nb_days_between(beg:pd.Timestamp, end:pd.Timestamp): def nb_days_between(beg:pd.Timestamp, end:pd.Timestamp):
return round((end- beg).total_seconds() / (24*3600)) return round((end- beg).total_seconds() / (24*3600))
fig, ax = plt.subplots(figsize=(16,5)) fig, ax = plt.subplots(figsize=(16,5))
sub_time_rigid = WL_rigid["submission_time"] sub_time_rigid = WL_rigid["submission_time"]
sub_time_rigid.groupby(sub_time_rigid.dt.to_period('D')).count().plot(kind='area', sub_time_rigid.groupby(sub_time_rigid.dt.to_period('D')).count().plot(kind='area',
alpha=.5, xlabel="Per day", ylabel="#submissions", label="rigid", ax=ax) alpha=.5, xlabel="Per day", ylabel="#submissions", label="rigid", ax=ax)
sub_time_feedback = read_and_clean(f"{EXPE_DIR}/a0_FCFS/_jobs.csv")["submission_time"] sub_time_feedback = read_and_clean(f"{EXPE_DIR}/a0_FCFS/_jobs.csv")["submission_time"]
sub_time_feedback.groupby(sub_time_feedback.dt.to_period('D')).count().plot(kind='area', sub_time_feedback.groupby(sub_time_feedback.dt.to_period('D')).count().plot(kind='area',
alpha=.5, xlabel="Per day", ylabel="#submissions", label="A0_FCFS", ax=ax) alpha=.5, xlabel="Per day", ylabel="#submissions", label="A0_FCFS", ax=ax)
ax.set_ylim(0,650) ax.set_ylim(0,650)
ax.vlines(start_metrics, ymin=0, ymax=600, colors='tab:green', linestyles='dashed', label="start_metrics") ax.vlines(start_metrics, ymin=0, ymax=600, colors='tab:green', linestyles='dashed', label="start_metrics")
ax.vlines(end_metrics, ymin=0, ymax=600, colors='tab:red', linestyles='dashed',label="end_metrics") ax.vlines(end_metrics, ymin=0, ymax=600, colors='tab:red', linestyles='dashed',label="end_metrics")
ax.legend() ax.legend()
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
data = { data = {
"EASY (rigid)": f"{EXPE_DIR}/rigid_EASY", "EASY (rigid)": f"{EXPE_DIR}/rigid_EASY",
"EASY (fb)": f"{EXPE_DIR}/a60_EASY", "EASY (fb)": f"{EXPE_DIR}/a60_EASY",
"infra*2 (rigid)": f"{EXPE_DIR}/rigid_EASY_double_infra", "infra*2 (rigid)": f"{EXPE_DIR}/rigid_EASY_double_infra",
"infra*2 (fb)": f"{EXPE_DIR}/a60_EASY_double_infra", "infra*2 (fb)": f"{EXPE_DIR}/a60_EASY_double_infra",
"speed*2 (rigid)": f"{EXPE_DIR}/rigid_EASY_double_speed", "speed*2 (rigid)": f"{EXPE_DIR}/rigid_EASY_double_speed",
"speed*2 (fb)": f"{EXPE_DIR}/a60_EASY_double_speed", "speed*2 (fb)": f"{EXPE_DIR}/a60_EASY_double_speed",
# "FCFS (fb)": f"{EXPE_DIR}/a60_FCFS", # "FCFS (fb)": f"{EXPE_DIR}/a60_FCFS",
# "FCFS (rigid)": f"{EXPE_DIR}/rigid_FCFS", # "FCFS (rigid)": f"{EXPE_DIR}/rigid_FCFS",
"infra/2 (rigid)": f"{EXPE_DIR}/rigid_EASY_half_infra", "infra/2 (rigid)": f"{EXPE_DIR}/rigid_EASY_half_infra",
"infra/2 (fb)": f"{EXPE_DIR}/a60_EASY_half_infra", "infra/2 (fb)": f"{EXPE_DIR}/a60_EASY_half_infra",
"speed/2 (rigid)": f"{EXPE_DIR}/rigid_EASY_half_speed", "speed/2 (rigid)": f"{EXPE_DIR}/rigid_EASY_half_speed",
"speed/2 (fb)": f"{EXPE_DIR}/a60_EASY_half_speed", "speed/2 (fb)": f"{EXPE_DIR}/a60_EASY_half_speed",
} }
throughput, mean_util = {}, {} throughput, mean_util = {}, {}
for name, path in data.items(): for name, path in data.items():
js = JobSet.from_csv(f"{path}/_jobs.csv") js = JobSet.from_csv(f"{path}/_jobs.csv")
f_times = ajust_timestamp_column(js.df.finish_time) f_times = ajust_timestamp_column(js.df.finish_time)
throughput[name], mean_util[name] = [], [] throughput[name], mean_util[name] = [], []
for end in end_metrics: for end in end_metrics:
thru = f_times[(f_times >= start_metrics) & (f_times < end)].count() thru = f_times[(f_times >= start_metrics) & (f_times < end)].count()
throughput[name].append(thru / nb_days_between(start_metrics, end) ) throughput[name].append(thru / nb_days_between(start_metrics, end) )
m_state = pd.read_csv(f"{path}/_machine_states.csv") m_state = pd.read_csv(f"{path}/_machine_states.csv")
mean_util[name].append(mean_util_between(m_state, start_metrics, end)) mean_util[name].append(mean_util_between(m_state, start_metrics, end))
# Tables # Tables
throughput = pd.DataFrame.from_dict(throughput, orient="index", columns=["6m", "9m", "12m", "15m", "18m"]) throughput = pd.DataFrame.from_dict(throughput, orient="index", columns=["6m", "9m", "12m", "15m", "18m"])
mean_util = pd.DataFrame.from_dict(mean_util, orient="index", columns=["6m", "9m", "12m", "15m", "18m"]) mean_util = pd.DataFrame.from_dict(mean_util, orient="index", columns=["6m", "9m", "12m", "15m", "18m"])
print("Windowed throughput:") print("Windowed throughput:")
display(throughput) display(throughput)
print("Windowed mean utilization:") print("Windowed mean utilization:")
display(mean_util) display(mean_util)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Plot # Plot
n_col = throughput.shape[1] n_col = throughput.shape[1]
fig, ax = plt.subplots(ncols=2, layout="constrained", sharey=True, figsize=(6, 4)) fig, ax = plt.subplots(ncols=2, layout="constrained", sharey=True, figsize=(6, 4))
col1, col2 = [plt.cm.summer(i/n_col) for i in range(n_col)], [plt.cm.autumn(i/n_col) for i in range(n_col)] col1, col2 = [plt.cm.summer(i/n_col) for i in range(n_col)], [plt.cm.autumn(i/n_col) for i in range(n_col)]
throughput.plot(kind="barh", title="average #jobs finished per day", color=col1, ax=ax[0]) throughput.plot(kind="barh", title="average #jobs finished per day", color=col1, ax=ax[0])
mean_util.plot(kind="barh", title="mean utilization (in %)", color=col2, ax=ax[1]) mean_util.plot(kind="barh", title="mean utilization (in %)", color=col2, ax=ax[1])
ax[0].grid(); ax[1].grid() ax[0].grid(); ax[1].grid()
ax[0].invert_yaxis() ax[0].invert_yaxis()
ax[0].legend(loc='lower left') ax[0].legend(loc='lower left')
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
The graph above shows that our metrics are still completely dictated by the log, despite our replay model: when we mutliply or devide our infra by two, the **utilization** scales accordingly, but the **throughput** remains roughly the same. The graph above shows that our metrics are still completely dictated by the log, despite our replay model: when we mutliply or devide our infra by two, the **utilization** scales accordingly, but the **throughput** remains roughly the same.
**Our replay model is not sufficient to capture the rebound effect.** **Our replay model is not sufficient to capture the rebound effect.**
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## New metrics ## New metrics
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Lateness / delta per user ### Lateness / delta per user
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
data = { data = {
"A0_FCFS": f"{EXPE_DIR}/a0_FCFS", "A0_FCFS": f"{EXPE_DIR}/a0_FCFS",
"A0_EASY": f"{EXPE_DIR}/a0_EASY", "A0_EASY": f"{EXPE_DIR}/a0_EASY",
"speed*2": f"{EXPE_DIR}/a0_EASY_double_speed", "speed*2": f"{EXPE_DIR}/a0_EASY_double_speed",
"infra*2": f"{EXPE_DIR}/a0_EASY_double_infra", "infra*2": f"{EXPE_DIR}/a0_EASY_double_infra",
"speed/2": f"{EXPE_DIR}/a0_EASY_half_speed", "speed/2": f"{EXPE_DIR}/a0_EASY_half_speed",
"infra/2": f"{EXPE_DIR}/a0_EASY_half_infra", "infra/2": f"{EXPE_DIR}/a0_EASY_half_infra",
} }
rigid = WL_rigid[["workload_name","submission_time"]] rigid = WL_rigid[["workload_name","submission_time"]]
cmap, N= plt.cm.Accent, len(data) cmap, N= plt.cm.Accent, len(data)
colors = [cmap(k / float(N)) for k in range(N)] colors = [cmap(k / float(N)) for k in range(N)]
markers = ["." , "1" , "o" , "x" , "+" , "3", ">"] markers = ["." , "1" , "o" , "x" , "+" , "3", ">"]
def plot_lateness_spread(logscale=True): def plot_lateness_spread(logscale=True):
mean_lateness = {} m_lateness = {}
fig, ax = plt.subplots(ncols=2, figsize=(16,5), constrained_layout=True, sharey=True) fig, ax = plt.subplots(ncols=2, figsize=(16,5), constrained_layout=True, sharey=True)
col, marks = iter(colors), iter(markers) col, marks = iter(colors), iter(markers)
for name, path in data.items(): for name, path in data.items():
df = read_and_clean(f"{path}/_jobs.csv")[["workload_name","submission_time"]] df = read_and_clean(f"{path}/_jobs.csv")[["workload_name","submission_time"]]
df["lateness"] = (df.submission_time - rigid.submission_time).astype(dtype='timedelta64[s]') df["lateness"] = (df.submission_time - rigid.submission_time).astype(dtype='timedelta64[s]')
grp = df.groupby(df.workload_name)["lateness"].agg(["last", "mean", "count"]) grp = df.groupby(df.workload_name)["lateness"].agg(["last", "mean", "count"])
mean_lateness[name] = grp["mean"] m_lateness[name] = grp["mean"]
grp.plot(kind='scatter', x="count", y="mean", ax=ax[0], marker=next(marks), color=next(col), label=name, grp.plot(kind='scatter', x="count", y="mean", ax=ax[0], marker=next(marks), color=next(col), label=name,
xlabel="#jobs submitted by the user", ylabel="average lateness (s)") xlabel="#jobs submitted by the user", ylabel="average lateness (s)")
bplot = pd.concat(mean_lateness, axis=1).plot(kind="box", ax=ax[1], return_type='dict', patch_artist=True) bplot = pd.concat(m_lateness, axis=1).plot(kind="box", ax=ax[1], return_type='dict', patch_artist=True)
# Color the boxes : # Color the boxes :
for i, patch in enumerate(bplot['boxes']): for i, patch in enumerate(bplot['boxes']):
patch.set_facecolor(colors[i]) patch.set_facecolor(colors[i])
ax[0].grid() ax[0].grid()
ax[1].grid(axis="y") ax[1].grid(axis="y")
if logscale: if logscale:
ax[0].set_yscale('symlog') ax[0].set_yscale('symlog')
ax[0].set_xscale('log') ax[0].set_xscale('log')
plot_lateness_spread(logscale=False) plot_lateness_spread(logscale=False)
plot_lateness_spread(logscale=True) plot_lateness_spread(logscale=True)
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Analysis: Analysis:
- the average lateness per user doesn't seem to depend only on the #jobs submitted by this user - the average lateness per user doesn't seem to depend only on the #jobs submitted by this user
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
data = { data = {
"A0_FCFS": f"{EXPE_DIR}/a0_FCFS", "A0_FCFS": f"{EXPE_DIR}/a0_FCFS",
"A0_EASY": f"{EXPE_DIR}/a0_EASY", "A0_EASY": f"{EXPE_DIR}/a0_EASY",
"speed*2": f"{EXPE_DIR}/a0_EASY_double_speed", "speed*2": f"{EXPE_DIR}/a0_EASY_double_speed",
"infra*2": f"{EXPE_DIR}/a0_EASY_double_infra", "infra*2": f"{EXPE_DIR}/a0_EASY_double_infra",
"speed/2": f"{EXPE_DIR}/a0_EASY_half_speed", "speed/2": f"{EXPE_DIR}/a0_EASY_half_speed",
"infra/2": f"{EXPE_DIR}/a0_EASY_half_infra", "infra/2": f"{EXPE_DIR}/a0_EASY_half_infra",
} }
rigid = WL_rigid[["workload_name","submission_time"]] rigid = WL_rigid[["workload_name","submission_time"]]
cmap, N= plt.cm.Accent, len(data) cmap, N= plt.cm.Accent, len(data)
cols = iter([cmap(k / float(N)) for k in range(N)]) cols = iter([cmap(k / float(N)) for k in range(N)])
marks = iter(["." , "1" , "o" , "x" , "+" , "3", ">"]) marks = iter(["." , "1" , "o" , "x" , "+" , "3", ">"])
fig, ax = plt.subplots(ncols=2, figsize=(16,5), constrained_layout=True, sharey=True) fig, ax = plt.subplots(ncols=2, figsize=(16,5), constrained_layout=True, sharey=True)
DAG_l_paths = DAG_analysis["longest_paths"]["a0"] DAG_l_paths = DAG_analysis["longest_paths"]["a0"]
max_succs = DAG_analysis["succs"]["a0"] max_succs = DAG_analysis["succs"]["a0"]
for name, path in data.items(): for name, path in data.items():
col, mark = next(cols), next(marks) col, mark = next(cols), next(marks)
df = read_and_clean(f"{path}/_jobs.csv")[["workload_name","submission_time"]] df = read_and_clean(f"{path}/_jobs.csv")[["workload_name","submission_time"]]
df["lateness"] = (df.submission_time - rigid.submission_time).astype(dtype='timedelta64[s]') df["lateness"] = (df.submission_time - rigid.submission_time).astype(dtype='timedelta64[s]')
grp = df.groupby(df.workload_name)["lateness"].mean() grp = df.groupby(df.workload_name)["lateness"].mean()
path_lateness = pd.concat([grp, DAG_l_paths], keys=["mean_lateness","DAG_l_path"], axis=1) path_lateness = pd.concat([grp, DAG_l_paths], keys=["mean_lateness","DAG_l_path"], axis=1)
path_lateness.plot(kind='scatter', x="DAG_l_path", y="mean_lateness", ax=ax[0], marker=mark, color=col, label=name, path_lateness.plot(kind='scatter', x="DAG_l_path", y="mean_lateness", ax=ax[0], marker=mark, color=col, label=name,
xlabel="longest path in user DAG", ylabel="average lateness (s)") xlabel="longest path in user DAG", ylabel="average lateness (s)")
arity_lateness = pd.concat([grp, max_succs], keys=["mean_lateness","max_succs"], axis=1) arity_lateness = pd.concat([grp, max_succs], keys=["mean_lateness","max_succs"], axis=1)
arity_lateness.plot(kind='scatter', x="max_succs", y="mean_lateness", ax=ax[1], marker=mark, color=col, label=name, arity_lateness.plot(kind='scatter', x="max_succs", y="mean_lateness", ax=ax[1], marker=mark, color=col, label=name,
xlabel="max #succs in user DAG", ylabel="average lateness (s)") xlabel="max #succs in user DAG", ylabel="average lateness (s)")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Analysis: Analysis:
- no clear tendancy visible for neither of the two variables.. - no clear tendancy visible for neither of the two variables..
- the left graph looks a lot like the graph on average lateness in f(#jobs). - the left graph looks a lot like the graph on average lateness in f(#jobs).
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
fig, ax = plt.subplots(ncols=2, figsize=(16,5), constrained_layout=True, sharey=True, sharex=True) fig, ax = plt.subplots(ncols=2, figsize=(16,5), constrained_layout=True, sharey=True, sharex=True)
cmap, N, i = plt.cm.Accent, len(data), 0 cmap, N, i = plt.cm.Accent, len(data), 0
for name, path in data.items(): for name, path in data.items():
df = read_and_clean(f"{path}/_jobs.csv")[["workload_name","submission_time"]] df = read_and_clean(f"{path}/_jobs.csv")[["workload_name","submission_time"]]
df["lateness"] = (df.submission_time - rigid.submission_time).astype(dtype='timedelta64[s]') df["lateness"] = (df.submission_time - rigid.submission_time).astype(dtype='timedelta64[s]')
grp = df.groupby(df.workload_name)["lateness"].agg(["last", "mean", "median"]) grp = df.groupby(df.workload_name)["lateness"].agg(["last", "mean", "median"])
grp.plot(kind='scatter', x="last", y="mean", ax=ax[0], marker='.', color=cmap(i / float(N)), label=name, grp.plot(kind='scatter', x="last", y="mean", ax=ax[0], marker='.', color=cmap(i / float(N)), label=name,
xlabel="lateness of last job", ylabel="average lateness (s)") xlabel="lateness of last job", ylabel="average lateness (s)")
grp.plot(kind='scatter', x="median", y="mean", ax=ax[1], marker='.', color=cmap(i / float(N)), label=name, grp.plot(kind='scatter', x="median", y="mean", ax=ax[1], marker='.', color=cmap(i / float(N)), label=name,
xlabel="median lateness per user", ylabel="average lateness (s)") xlabel="median lateness per user", ylabel="average lateness (s)")
i += 1 i += 1
x_l, x_r = ax[0].get_xlim() x_l, x_r = ax[0].get_xlim()
x = np.linspace(x_l, x_r, 100) x = np.linspace(x_l, x_r, 100)
for i in {0,1}: for i in {0,1}:
ax[i].plot(x, x, '--r', alpha=.3, label='y=x') ax[i].plot(x, x, '--r', alpha=.3, label='y=x')
ax[i].plot(x, x/2, '--g', alpha=.3, label='y=x/2') ax[i].plot(x, x/2, '--g', alpha=.3, label='y=x/2')
ax[i].set_ylim(-.6e7, 1.6e7) ax[i].set_ylim(-.6e7, 1.6e7)
ax[i].set_xlim(-1e7, 2.7e7) ax[i].set_xlim(-1e7, 2.7e7)
ax[i].legend(); ax[i].grid() ax[i].legend(); ax[i].grid()
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Analysis : Analysis :
- average lateness per user and lateness of last job per user are proporitional - average lateness per user and lateness of last job per user are proporitional
- same between average lateness and lateness of median job - same between average lateness and lateness of median job
- **corresponds pretty well with the delta hypothesis** - **corresponds pretty well with the delta hypothesis**
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
data = { data = {
"A0_FCFS": f"{EXPE_DIR}/a0_FCFS", "A0_FCFS": f"{EXPE_DIR}/a0_FCFS",
"A0_EASY": f"{EXPE_DIR}/a0_EASY", "A0_EASY": f"{EXPE_DIR}/a0_EASY",
"speed*2": f"{EXPE_DIR}/a0_EASY_double_speed", "speed*2": f"{EXPE_DIR}/a0_EASY_double_speed",
"infra*2": f"{EXPE_DIR}/a0_EASY_double_infra", "infra*2": f"{EXPE_DIR}/a0_EASY_double_infra",
"speed/2": f"{EXPE_DIR}/a0_EASY_half_speed", "speed/2": f"{EXPE_DIR}/a0_EASY_half_speed",
"infra/2": f"{EXPE_DIR}/a0_EASY_half_infra", "infra/2": f"{EXPE_DIR}/a0_EASY_half_infra",
} }
cmap, N= plt.cm.Accent, len(data) cmap, N= plt.cm.Accent, len(data)
colors = [cmap(k / float(N)) for k in range(N)] colors = [cmap(k / float(N)) for k in range(N)]
def delta(x): def delta(x):
if len(x) <= 2: if len(x) <= 2:
return np.inf return np.inf
return 2 * sum(x) / (len(x) * (len(x) - 1)) return 2 * sum(x) / (len(x) * (len(x) - 1))
def plot_delta_spread(logscale=False): def plot_delta_spread(logscale=False):
fig, ax = plt.subplots(ncols=2, figsize=(16,6), constrained_layout=True, sharey=True) fig, ax = plt.subplots(ncols=2, figsize=(16,6), constrained_layout=True, sharey=True)
deltas = {} deltas = {}
col = iter(colors) col = iter(colors)
for name, path in data.items(): for name, path in data.items():
df = read_and_clean(f"{path}/_jobs.csv")[["workload_name","submission_time"]] df = read_and_clean(f"{path}/_jobs.csv")[["workload_name","submission_time"]]
df["lateness"] = (df.submission_time - WL_rigid.submission_time).astype(dtype='timedelta64[s]') df["lateness"] = (df.submission_time - WL_rigid.submission_time).astype(dtype='timedelta64[s]')
grp = df.groupby(df.workload_name)["lateness"].agg(count="count", delta=lambda x: delta(x)) grp = df.groupby(df.workload_name)["lateness"].agg(count="count", delta=lambda x: delta(x))
grp.plot(kind='scatter', x="count", y="delta", ax=ax[0], marker='.', color=next(col), label=name, grp.plot(kind='scatter', x="count", y="delta", ax=ax[0], marker='.', color=next(col), label=name,
xlabel="#jobs per user", ylabel="delta per user") xlabel="#jobs per user", ylabel="delta per user")
deltas[name] = grp["delta"] deltas[name] = grp["delta"]
bplot = pd.DataFrame(deltas).boxplot(ax=ax[1], return_type='dict', patch_artist=True) bplot = pd.DataFrame(deltas).boxplot(ax=ax[1], return_type='dict', patch_artist=True)
# Color the boxes : # Color the boxes :
for i, patch in enumerate(bplot['boxes']): for i, patch in enumerate(bplot['boxes']):
patch.set_facecolor(colors[i]) patch.set_facecolor(colors[i])
if logscale: if logscale:
ax[0].set_xscale('log') ax[0].set_xscale('log')
ax[0].set_xticks([2,10,100,1000],[2,10,100,1000]) ax[0].set_xticks([2,10,100,1000],[2,10,100,1000])
ax[0].set_yscale('symlog') ax[0].set_yscale('symlog')
ax[0].grid() ax[0].grid()
ax[1].grid(axis="x") ax[1].grid(axis="x")
plot_delta_spread(logscale=False) plot_delta_spread(logscale=False)
plot_delta_spread(logscale=True) plot_delta_spread(logscale=True)
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Analyse: Analyse:
- le delta dépend de l'utilisateur (et pas simplement du nombre de job qu'il a soumis), de l'infra, du scheduler - le delta dépend de l'utilisateur (et pas simplement du nombre de job qu'il a soumis), de l'infra, du scheduler
- -
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Which metric scale by input size? ### Which metric scale by input size?
Run the simulation with a subset of the input, to see if it scales. Run the simulation with a subset of the input, to see if it scales.
With scheduler FCFS: With scheduler FCFS:
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
tot_lines = (end_data_in_swf - begin_data_in_swf) + 1 tot_lines = (end_data_in_swf - begin_data_in_swf) + 1
pf = f"{PF_folder}/infra.xml" pf = f"{PF_folder}/infra.xml"
for k in range(10, 100, 10): for k in range(10, 100, 10):
print(f"Expe cut {k}%.\n-------\n") print(f"Expe cut {k}%.\n-------\n")
# Cut the input # Cut the input
wl = f"{WL_folder}/a0_cut{k}" wl = f"{WL_folder}/a0_cut{k}"
nb_lines = int(tot_lines * k / 100) nb_lines = int(tot_lines * k / 100)
! tail -n +{begin_data_in_swf} {WL_swf_path} | head -n {nb_lines} > workload/tmp_wl.swf ! tail -n +{begin_data_in_swf} {WL_swf_path} | head -n {nb_lines} > workload/tmp_wl.swf
! swf2userSessions workload/tmp_wl.swf {wl} -a 0 ! swf2userSessions workload/tmp_wl.swf {wl} -a 0
# Run simu # Run simu
EXPE_FILE = f"{EXPE_DIR}/a0_FCFS_cut{k}" EXPE_FILE = f"{EXPE_DIR}/a0_FCFS_cut{k}"
uf = gen_user_description_file(EXPE_FILE , "fb_user_think_time_only", wl, ".SABjson") uf = gen_user_description_file(EXPE_FILE , "fb_user_think_time_only", wl, ".SABjson")
print("Simulation start.\n******************\n") print("Simulation start.\n******************\n")
! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \ ! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \
--batcmd="batsim -p {pf} --quiet -w {empty_wl} -e {EXPE_FILE}/ --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse"\ --batcmd="batsim -p {pf} --quiet -w {empty_wl} -e {EXPE_FILE}/ --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse"\
--schedcmd='batmen --verbosity=silent -v fcfs --variant_options_filepath={uf}' --schedcmd='batmen --verbosity=silent -v fcfs --variant_options_filepath={uf}'
! robin {EXPE_FILE}.yaml ! robin {EXPE_FILE}.yaml
print("\n******************\nSimulation done.") print("\n******************\nSimulation done.")
# Clean inputs # Clean inputs
! rm workload/tmp_wl.swf ! rm workload/tmp_wl.swf
! rm -r {wl} ! rm -r {wl}
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
With scheduler EASY: With scheduler EASY:
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
tot_lines = (end_data_in_swf - begin_data_in_swf) + 1 tot_lines = (end_data_in_swf - begin_data_in_swf) + 1
pf = f"{PF_folder}/infra.xml" pf = f"{PF_folder}/infra.xml"
for k in range(10, 100, 10): for k in range(10, 100, 10):
print(f"Expe cut {k}%.\n-------\n") print(f"Expe cut {k}%.\n-------\n")
# Cut the input # Cut the input
wl = f"{WL_folder}/a0_cut{k}" wl = f"{WL_folder}/a0_cut{k}"
nb_lines = int(tot_lines * k / 100) nb_lines = int(tot_lines * k / 100)
! tail -n +{begin_data_in_swf} {WL_swf_path} | head -n {nb_lines} > workload/tmp_wl.swf ! tail -n +{begin_data_in_swf} {WL_swf_path} | head -n {nb_lines} > workload/tmp_wl.swf
! swf2userSessions workload/tmp_wl.swf {wl} -a 0 ! swf2userSessions workload/tmp_wl.swf {wl} -a 0
# Run simu # Run simu
EXPE_FILE = f"{EXPE_DIR}/a0_EASY_cut{k}" EXPE_FILE = f"{EXPE_DIR}/a0_EASY_cut{k}"
uf = gen_user_description_file(EXPE_FILE , "fb_user_think_time_only", wl, ".SABjson") uf = gen_user_description_file(EXPE_FILE , "fb_user_think_time_only", wl, ".SABjson")
print("Simulation start.\n******************\n") print("Simulation start.\n******************\n")
! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \ ! robin generate {EXPE_FILE}.yaml --output-dir={EXPE_FILE} \
--batcmd="batsim -p {pf} --quiet -w {empty_wl} -e {EXPE_FILE}/ --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse"\ --batcmd="batsim -p {pf} --quiet -w {empty_wl} -e {EXPE_FILE}/ --enable-compute-sharing --enable-dynamic-jobs --acknowledge-dynamic-jobs --enable-profile-reuse"\
--schedcmd='batmen --verbosity=silent -v easy_bf --variant_options_filepath={uf}' --schedcmd='batmen --verbosity=silent -v easy_bf --variant_options_filepath={uf}'
! robin {EXPE_FILE}.yaml ! robin {EXPE_FILE}.yaml
print("\n******************\nSimulation done.") print("\n******************\nSimulation done.")
# Clean inputs # Clean inputs
! rm workload/tmp_wl.swf ! rm workload/tmp_wl.swf
! rm {EXPE_FILE}.yaml ! rm {EXPE_FILE}.yaml
! rm -r {wl} ! rm -r {wl}
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
def metrics(expe_folder): def metrics(expe_folder):
m = {} m = {}
jobs = read_and_clean(f"{expe_folder}/_jobs.csv") jobs = read_and_clean(f"{expe_folder}/_jobs.csv")
m_state = pd.read_csv(f"{expe_folder}/_machine_states.csv") m_state = pd.read_csv(f"{expe_folder}/_machine_states.csv")
m['nb_jobs'] = jobs.shape[0] m['nb_jobs'] = jobs.shape[0]
m['mean_lateness'] = int(mean_lateness(jobs, ref=WL_rigid, crop=True)) m['mean_lateness'] = int(mean_lateness(jobs, ref=WL_rigid, crop=True))
m['delta'] = 2 * m['mean_lateness'] / (m['nb_jobs'] - 1) m['delta'] = 2 * m['mean_lateness'] / (m['nb_jobs'] - 1)
m['stretch'] = stretch(jobs, ref=WL_rigid, crop=True) m['stretch'] = stretch(jobs, ref=WL_rigid, crop=True)
beg, end = jobs["submission_time"].min(), jobs["submission_time"].max() beg, end = jobs["submission_time"].min(), jobs["submission_time"].max()
m['util'] = mean_util_between(m_state, beg, end) m['util'] = mean_util_between(m_state, beg, end)
return m return m
met_fcfs, met_easy= [], [] met_fcfs, met_easy= [], []
for k in range(10, 100, 10): for k in range(10, 100, 10):
expe_folder = f"{EXPE_DIR}/a0_FCFS_cut{k}" expe_folder = f"{EXPE_DIR}/a0_FCFS_cut{k}"
met_fcfs.append( metrics(expe_folder) ) met_fcfs.append( metrics(expe_folder) )
expe_folder = f"{EXPE_DIR}/a0_EASY_cut{k}" expe_folder = f"{EXPE_DIR}/a0_EASY_cut{k}"
met_easy.append( metrics(expe_folder) ) met_easy.append( metrics(expe_folder) )
met_fcfs.append(metrics(f"{EXPE_DIR}/a0_FCFS")) met_fcfs.append(metrics(f"{EXPE_DIR}/a0_FCFS"))
met_easy.append(metrics(f"{EXPE_DIR}/a0_EASY")) met_easy.append(metrics(f"{EXPE_DIR}/a0_EASY"))
dfs = {"fcfs": pd.DataFrame(met_fcfs), "easy": pd.DataFrame(met_easy)} dfs = {"fcfs": pd.DataFrame(met_fcfs), "easy": pd.DataFrame(met_easy)}
fig, ax = plt.subplots(ncols=3, figsize=(16,10), layout='constrained', sharex=True) fig, ax = plt.subplots(ncols=3, figsize=(16,10), layout='constrained', sharex=True)
for name, to_plot in dfs.items(): for name, to_plot in dfs.items():
to_plot.plot(kind='line', x='nb_jobs', y='mean_lateness', title='mean_lateness (days)', style='.--', label=name, ax=ax[0]) to_plot.plot(kind='line', x='nb_jobs', y='mean_lateness', title='mean_lateness (days)', style='.--', label=name, ax=ax[0])
to_plot.plot(kind='line', x='nb_jobs', y='delta', title='delta (s) / strech', style='.--', label=f"{name} (delta)", ax=ax[1]) to_plot.plot(kind='line', x='nb_jobs', y='delta', title='delta (s) / strech', style='.--', label=f"{name} (delta)", ax=ax[1])
to_plot.plot(kind='line', x='nb_jobs', y='stretch', style='x:', label=f"{name} (strech)", ax=ax[1], secondary_y=True, color=ax[1].get_lines()[-1].get_color()) to_plot.plot(kind='line', x='nb_jobs', y='stretch', style='x:', label=f"{name} (strech)", ax=ax[1], secondary_y=True, color=ax[1].get_lines()[-1].get_color())
to_plot.plot(kind='line', x='nb_jobs', y='util', title='mean_utilization (%)', style='.--', label=name, ax=ax[2]) to_plot.plot(kind='line', x='nb_jobs', y='util', title='mean_utilization (%)', style='.--', label=name, ax=ax[2])
y_l, y_r = ax[0].get_ylim(); y_l, y_r = int(y_l / (3600*24)), int(y_r / (3600*24)) y_l, y_r = ax[0].get_ylim(); y_l, y_r = int(y_l / (3600*24)), int(y_r / (3600*24))
ax[0].set_yticks(np.arange(y_l * 3600*24, y_r * 3600*24, step=5*3600*24), labels=np.arange(y_l, y_r, step=5)) ax[0].set_yticks(np.arange(y_l * 3600*24, y_r * 3600*24, step=5*3600*24), labels=np.arange(y_l, y_r, step=5))
ax[0].grid(); ax[1].grid(); ax[2].grid() ax[0].grid(); ax[1].grid(); ax[2].grid()
ax[2].set_ylim(0,100) ax[2].set_ylim(0,100)
fig.suptitle("Variation of metrics by input size", fontsize=16) fig.suptitle("Variation of metrics by input size", fontsize=16)
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Observation: Observation:
- bad news: do not scale at all by input size.. - bad news: do not scale at all by input size..
- à quoi c'est dû ? refaire les graphs avec plateforme*2 plateforme/2 - à quoi c'est dû ? refaire les graphs avec plateforme*2 plateforme/2
- a refaire avec la log non cleaned - a refaire avec la log non cleaned
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Evolution of mean lateness and delta ### Evolution of mean lateness and delta
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
data[0] = { data[0] = {
"a0_FCFS": f"{EXPE_DIR}/a0_FCFS", "a0_FCFS": f"{EXPE_DIR}/a0_FCFS",
"a0_EASY_half_speed": f"{EXPE_DIR}/a0_EASY_half_speed", "a0_EASY_half_speed": f"{EXPE_DIR}/a0_EASY_half_speed",
"a0_EASY_half_infra": f"{EXPE_DIR}/a0_EASY_half_infra", "a0_EASY_half_infra": f"{EXPE_DIR}/a0_EASY_half_infra",
} }
data[1] = { data[1] = {
"a0_EASY": f"{EXPE_DIR}/a0_EASY", "a0_EASY": f"{EXPE_DIR}/a0_EASY",
"a0_EASY_double_speed": f"{EXPE_DIR}/a0_EASY_double_speed", "a0_EASY_double_speed": f"{EXPE_DIR}/a0_EASY_double_speed",
"a0_EASY_double_infra": f"{EXPE_DIR}/a0_EASY_double_infra", "a0_EASY_double_infra": f"{EXPE_DIR}/a0_EASY_double_infra",
} }
fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(20,12), sharex=True, layout="constrained") fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(20,12), sharex=True, layout="constrained")
for pos in [0,1]: for pos in [0,1]:
for name, path in data[pos].items(): for name, path in data[pos].items():
jobs = read_and_clean(f"{path}/_jobs.csv") jobs = read_and_clean(f"{path}/_jobs.csv")
N = jobs.shape[0] N = jobs.shape[0]
lateness = (jobs.submission_time - WL_rigid.submission_time).astype(dtype='timedelta64[s]').reset_index(drop=True) lateness = (jobs.submission_time - WL_rigid.submission_time).astype(dtype='timedelta64[s]').reset_index(drop=True)
mean_lateness = lateness.cumsum() / lateness.index m_lateness = lateness.cumsum() / lateness.index
mean_lateness.plot(kind='line', ax=ax[0][pos], label=name, title="Mean lateness") m_lateness.plot(kind='line', ax=ax[0][pos], label=name, title="Mean lateness")
delta = 2 * lateness.cumsum() / (lateness.index * (lateness.index - 1)) delta = 2 * lateness.cumsum() / (lateness.index * (lateness.index - 1))
delta.plot(kind='line', ax=ax[1][pos], title="Delta") delta.plot(kind='line', ax=ax[1][pos], title="Delta")
ax[0][pos].legend() ax[0][pos].legend()
ax[0][pos].set(xlabel="jobs (sorted by original submission time)", xlim=(0, WL_rigid.shape[0]),ylabel="cumulated lateness (s)") ax[0][pos].set(xlabel="jobs (sorted by original submission time)", xlim=(0, WL_rigid.shape[0]),ylabel="cumulated lateness (s)")
ax[1][pos].set(xlabel="jobs (sorted by original submission time)", xlim=(0, WL_rigid.shape[0]),ylabel="delta (s)") ax[1][pos].set(xlabel="jobs (sorted by original submission time)", xlim=(0, WL_rigid.shape[0]),ylabel="delta (s)")
ax[0][pos].yaxis.set_label_position("right"); ax[0][pos].yaxis.tick_right() ax[0][pos].yaxis.set_label_position("right"); ax[0][pos].yaxis.tick_right()
ax[1][pos].yaxis.set_label_position("right"); ax[1][pos].yaxis.tick_right() ax[1][pos].yaxis.set_label_position("right"); ax[1][pos].yaxis.tick_right()
ax[1][pos].grid(axis='y') ax[1][pos].grid(axis='y')
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# For paper # For paper
data = { data = {
"A0_EASY": f"{EXPE_DIR}/a0_EASY", "A0_EASY": f"{EXPE_DIR}/a0_EASY",
"A0_FCFS": f"{EXPE_DIR}/a0_FCFS", "A0_FCFS": f"{EXPE_DIR}/a0_FCFS",
"A0_speed*2": f"{EXPE_DIR}/a0_EASY_double_speed", "A0_speed*2": f"{EXPE_DIR}/a0_EASY_double_speed",
"A0_infra*2": f"{EXPE_DIR}/a0_EASY_double_infra", "A0_infra*2": f"{EXPE_DIR}/a0_EASY_double_infra",
"A0_speed/2": f"{EXPE_DIR}/a0_EASY_half_speed", "A0_speed/2": f"{EXPE_DIR}/a0_EASY_half_speed",
"A0_infra/2": f"{EXPE_DIR}/a0_EASY_half_infra" "A0_infra/2": f"{EXPE_DIR}/a0_EASY_half_infra"
} }
fig, ax = plt.subplots(ncols=2, figsize=(6,6), layout='tight', sharex=True) fig, ax = plt.subplots(ncols=2, figsize=(6,6), layout='tight', sharex=True)
# Metrics obtained by cuting the OUTPUT # Metrics obtained by cuting the OUTPUT
for name, path in data.items(): for name, path in data.items():
jobs = read_and_clean(f"{path}/_jobs.csv") jobs = read_and_clean(f"{path}/_jobs.csv")
N = jobs.shape[0] N = jobs.shape[0]
lateness = (jobs.submission_time - WL_rigid.submission_time).astype(dtype='timedelta64[s]').reset_index(drop=True) lateness = (jobs.submission_time - WL_rigid.submission_time).astype(dtype='timedelta64[s]').reset_index(drop=True)
m_lateness = lateness.cumsum() / lateness.index m_lateness = lateness.cumsum() / lateness.index
m_lateness.plot(kind='line', ax=ax[0], label=name, title="mean lateness (days)") m_lateness.plot(kind='line', ax=ax[0], label=name, title="mean lateness (days)")
delta = 2 * lateness.cumsum() / (lateness.index * (lateness.index - 1)) delta = 2 * lateness.cumsum() / (lateness.index * (lateness.index - 1))
delta.plot(kind='line', ax=ax[1], title="Delta (s)", label=name) delta.plot(kind='line', ax=ax[1], title="Delta (s)", label=name)
# Metrics obtained by cuting the INPUT # Metrics obtained by cuting the INPUT
met_fcfs, met_easy = [], [] met_fcfs, met_easy = [], []
for k in range(10, 100, 10): for k in range(10, 100, 10):
met_fcfs.append( metrics(f"{EXPE_DIR}/a0_FCFS_cut{k}") ) met_fcfs.append( metrics(f"{EXPE_DIR}/a0_FCFS_cut{k}") )
met_easy.append( metrics(f"{EXPE_DIR}/a0_EASY_cut{k}") ) met_easy.append( metrics(f"{EXPE_DIR}/a0_EASY_cut{k}") )
met_fcfs.append(metrics(f"{EXPE_DIR}/a0_FCFS")) met_fcfs.append(metrics(f"{EXPE_DIR}/a0_FCFS"))
met_easy.append(metrics(f"{EXPE_DIR}/a0_EASY")) met_easy.append(metrics(f"{EXPE_DIR}/a0_EASY"))
dfs = {"easy": pd.DataFrame(met_easy), "fcfs": pd.DataFrame(met_fcfs)} dfs = {"easy": pd.DataFrame(met_easy), "fcfs": pd.DataFrame(met_fcfs)}
ax[0].set_prop_cycle(None); ax[1].set_prop_cycle(None) # reset colors ax[0].set_prop_cycle(None); ax[1].set_prop_cycle(None) # reset colors
for name, to_plot in dfs.items(): for name, to_plot in dfs.items():
to_plot.plot(kind='line', x='nb_jobs', y='mean_lateness', xlabel="", style='.--', label=name, ax=ax[0]) to_plot.plot(kind='line', x='nb_jobs', y='mean_lateness', xlabel="", style='.--', label=name, ax=ax[0])
to_plot.plot(kind='line', x='nb_jobs', y='delta', title='delta (s)', xlabel="", style='.--', label="", ax=ax[1]) to_plot.plot(kind='line', x='nb_jobs', y='delta', title='delta (s)', xlabel="", style='.--', label="", ax=ax[1])
fig.supxlabel("jobs (sorted by original submission time)") fig.supxlabel("jobs (sorted by original submission time)")
ax[0].set_xlim(0, WL_rigid.shape[0]) ax[0].set_xlim(0, WL_rigid.shape[0])
step = 10; y_l, y_r = ax[0].get_ylim(); y_l, y_r = int(y_l / (3600*24) / step), int(y_r / (3600*24) / step) step = 10; y_l, y_r = ax[0].get_ylim(); y_l, y_r = int(y_l / (3600*24) / step), int(y_r / (3600*24) / step)
ax[0].set_yticks(np.arange(y_l * 3600*24 * step, (y_r+1) * 3600*24 * step, step=step*3600*24), labels=np.arange(y_l*step, (y_r+1)*step, step=10)) ax[0].set_yticks(np.arange(y_l * 3600*24 * step, (y_r+1) * 3600*24 * step, step=step*3600*24), labels=np.arange(y_l*step, (y_r+1)*step, step=10))
ax[1].set_ylim(-100, 600) ax[1].set_ylim(-100, 600)
ax[0].grid(); ax[1].grid() ax[0].grid(); ax[1].grid()
ax[1].legend() ax[1].legend()
fig.savefig(f"{fig_path}/lateness_delta_evolution_SDSC.pdf") fig.savefig(f"{fig_path}/lateness_delta_evolution_SDSC.pdf")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Observation : Observation :
- the delta seem to stabilize - the delta seem to stabilize
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment