Skip to content
Snippets Groups Projects
Commit 2f1d7f7f authored by Millian Poquet's avatar Millian Poquet
Browse files

wip: visualization scripts for paper

parent a6941eeb
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env Rscript
library(tidyverse)
library(GGally)
library(viridis)
agg_file = '/tmp/agg-result.csv'
output_dir = '/tmp'
epoch_m100 = ymd_hms('2022-01-01 00:00:00')
`%notin%` = Negate(`%in%`)
data = read_csv(agg_file) %>% mutate(
start_dt_s = as.factor(start_dt_s),
job_power_estimation_field = as.factor(job_power_estimation_field)
)
data$predictor_name = factor(data$predictor_name, levels=c('upper_bound', 'max', 'real_max', 'real_mean', 'mean', 'zero'))
data = data %>% mutate(
predictor_metrics = ifelse(predictor_name %in% c('real_max', 'max'), 'max',
ifelse(predictor_name %in% c('real_mean', 'mean'), 'mean',
'ub'
)),
predictor_method = ifelse(predictor_name %in% c('mean', 'max'), 'predicted', 'real')
)
data$predictor_metrics = factor(data$predictor_metrics, levels=c('ub', 'max', 'mean'))
data$predictor_method = factor(data$predictor_method, levels=c('predicted', 'real'))
# compute scheduling metrics against their matching EASY baseline
data_nz = data %>% filter(predictor_name != 'zero')
data_z = data %>% filter(predictor_name == 'zero' & powercap_dynamic_value_ratio == max(data$powercap_dynamic_value_ratio))
data_z_joinable = data_z %>% transmute(
start_dt_s = start_dt_s,
zero_mean_utilization = mean_utilization,
zero_max_utilization = max_utilization,
zero_mean_turnaround_time = mean_turnaround_time,
zero_mean_waiting_time = mean_waiting_time,
)
data_nz = inner_join(data_nz, data_z_joinable) %>% mutate(
mean_turnaround_time_minus_zero = mean_turnaround_time - zero_mean_turnaround_time,
mean_waiting_time_minus_zero = mean_waiting_time - zero_mean_waiting_time
)
# energy diff from powercap, depending on powercap ratio and predictor
data_nz %>% ggplot(aes(x=powercap_dynamic_value_ratio, y=energy_from_powercap / 1e9, color=predictor_name)) +
geom_jitter(width=1/100, height=0) +
geom_smooth(method = "lm", se = FALSE) +
theme_bw() +
theme(legend.position='top', legend.title=element_blank()) +
guides(color = guide_legend(nrow = 1)) +
scale_x_continuous(breaks=seq(0,0.7,0.1), labels = scales::percent) +
scale_color_viridis(discrete=TRUE) +
expand_limits(x=0) +
labs(
y="Energy difference from the powercap during the constrained period for each simulation (GJ)",
x="Powercap value (proportion of the maximum dynamic power range). Shown with horizontal jitter."
)
ggsave(sprintf("%s/energy-diff-against-powercap-predictor.pdf", output_dir), width=16, height=9)
# energy surplus from powercap, depending on powercap ratio and predictor
data_nz %>% ggplot(aes(x=powercap_dynamic_value_ratio, y=surplus_energy / 1e9, color=predictor_name)) +
geom_jitter(width=1/100, height=0) +
geom_smooth(method = "lm", se = FALSE) +
theme_bw() +
theme(legend.position='top', legend.title=element_blank()) +
guides(color = guide_legend(nrow = 1)) +
scale_x_continuous(breaks=seq(0,0.7,0.1), labels = scales::percent) +
scale_color_viridis(discrete=TRUE) +
expand_limits(x=0) +
labs(
y="Energy surplus from the powercap during the constrained period for each simulation (GJ)",
x="Powercap value (proportion of the maximum dynamic power range). Shown with horizontal jitter."
)
ggsave(sprintf("%s/energy-surplus-against-powercap-predictor.pdf", output_dir), width=16, height=9)
# utilization
data_nz %>% ggplot(aes(x=powercap_dynamic_value_ratio, y=mean_utilization / 980, color=predictor_name)) +
geom_jitter(width=1/100, height=0) +
geom_smooth(method = "lm", se = FALSE) +
geom_abline(slope=1) +
theme_bw() +
theme(legend.position='top', legend.title=element_blank()) +
guides(color = guide_legend(nrow = 1)) +
scale_x_continuous(breaks=seq(0,0.7,0.1), labels = scales::percent) +
scale_y_continuous(breaks=seq(0,1,0.2), labels = scales::percent) +
scale_color_viridis(discrete=TRUE) +
expand_limits(x=0) +
labs(
y="Utilization (proportion of nodes)",
x="Powercap value (proportion of the maximum dynamic power range). Shown with horizontal jitter."
)
ggsave(sprintf("%s/utilization-against-powercap-predictor.pdf", output_dir), width=16, height=9)
# mean turnaround time metrics
data_nz %>% ggplot(aes(x=powercap_dynamic_value_ratio, y=mean_turnaround_time_minus_zero, color=predictor_name)) +
geom_jitter(width=1/100, height=0) +
geom_smooth(method = "lm", se = FALSE) +
geom_hline(yintercept=0) +
theme_bw() +
theme(legend.position='top', legend.title=element_blank()) +
guides(color = guide_legend(nrow = 1)) +
scale_x_continuous(breaks=seq(0,0.7,0.1), labels = scales::percent) +
scale_y_continuous() +
scale_color_viridis(discrete=TRUE) +
facet_wrap(vars(start_dt_s), scales='free_y') +
expand_limits(x=0) +
labs(
y="Mean turnaround time difference against EASY without any powercap for each simulation",
x="Powercap value (proportion of the maximum dynamic power range). Shown with horizontal jitter."
)
ggsave(sprintf("%s/mean-turnaround-time-against-powercap-predictor.pdf", output_dir), width=16, height=9)
# mean waiting time
data_nz %>% ggplot(aes(x=powercap_dynamic_value_ratio, y=mean_waiting_time_minus_zero, color=predictor_name)) +
geom_jitter(width=1/100, height=0) +
geom_smooth(method = "lm", se = FALSE) +
geom_hline(yintercept=0) +
theme_bw() +
theme(legend.position='top', legend.title=element_blank()) +
guides(color = guide_legend(nrow = 1)) +
scale_x_continuous(breaks=seq(0,0.7,0.1), labels = scales::percent) +
scale_y_continuous() +
scale_color_viridis(discrete=TRUE) +
facet_wrap(vars(start_dt_s), scales='free_y') +
expand_limits(x=0) +
labs(
y="Mean waiting time difference against EASY without any powercap for each simulation",
x="Powercap value (proportion of the maximum dynamic power range). Shown with horizontal jitter."
)
ggsave(sprintf("%s/mean-waiting-time-against-powercap-predictor.pdf", output_dir), width=16, height=9)
# power (mean+p1+p99) for all instances, facetted by workload, colored by predictor
max_observed_total_power = 955080
max_power_per_node = 2100.0
min_power_per_node = 240.0
nb_nodes = 980
max_dynamic_power = max_observed_total_power - min_power_per_node * nb_nodes
data_nz %>% ggplot(aes(x=powercap_dynamic_value_ratio)) +
#geom_jitter(width=1/100, height=0) +
#geom_smooth(method = "lm", se = FALSE) +
geom_ribbon(aes(ymin=power_p1/max_dynamic_power, ymax=power_p99/max_dynamic_power, fill=predictor_name), alpha=0.1) +
geom_line(aes(y=mean_power/max_dynamic_power, color=predictor_name)) +
geom_line(aes(y=power_p1/max_dynamic_power, color=predictor_name), linetype='dotted', show.legend = FALSE) +
geom_line(aes(y=power_p99/max_dynamic_power, color=predictor_name), linetype='dotted', show.legend = FALSE) +
geom_abline(slope=1) +
theme_bw() +
theme(legend.position='top', legend.title=upper_bound
upper_boundt) +
scale_color_viridis(discrete=TRUE) +
scale_fill_viridis(discrete=TRUE) +
facet_wrap(vars(start_dt_s)) +
expand_limits(x=0) +
labs(
y="Power consumption during the powercap-constrained 3-hour time window (proportion of the maximum dynamic power range).",
x="Powercap value (proportion of the maximum dynamic power range)."
)
ggsave(sprintf("%s/power-consumption-mean-p1-p99-against-powercap-predictor.pdf", output_dir), width=16, height=9)
# distribution of the mean power used for each predictor
powercap_ratios_values_to_show = c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7)
width_scale=0.3
data_nz %>%
filter(powercap_dynamic_value_ratio %in% powercap_ratios_values_to_show) %>%
mutate(powercap_label = sprintf("pcap=%g", powercap_dynamic_value_ratio)) %>%
ggplot() +
geom_hline(aes(yintercept=powercap_dynamic_value_ratio), linewidth=width_scale) +
geom_boxplot(aes(y=mean_power/max_dynamic_power, fill=predictor_method, x=predictor_metrics), linewidth=width_scale, outlier.size=width_scale) +
theme_bw() +
theme(
legend.position=c(0.2, 0.9),
legend.direction='horizontal',
legend.title=element_blank(),
legend.background=element_rect(color='black'),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
) +
expand_limits(x=0) +
scale_y_continuous(breaks=seq(0,0.7,0.1)) +
facet_wrap(vars(powercap_label), nrow=1) +
labs(
y="Mean power",
x="Job power estimator metrics"
) +
scale_fill_grey(start=0.8, end=1)
scale=0.9
ggsave(sprintf("%s/sched-mean-power-distribution.pdf", output_dir), width=8*scale, height=4*scale)
ggsave(sprintf("%s/sched-mean-power-distribution.png", output_dir), width=8*scale, height=4*scale)
# distribution of mean turnaround time diff (EASY) for each predictor
outlier_workload_start_dt_s = 18474670 # sched metrics are strogly better than EASY there
width_scale=0.3
data_nz %>%
filter(start_dt_s != outlier_workload_start_dt_s) %>%
filter(powercap_dynamic_value_ratio %in% powercap_ratios_values_to_show) %>%
ggplot() +
geom_boxplot(aes(y=mean_turnaround_time_minus_zero, fill=predictor_method, x=predictor_metrics), linewidth=width_scale, outlier.size=width_scale) +
theme_bw() +
theme(
legend.position=c(0.16, 0.12),
legend.direction='horizontal',
legend.background=element_rect(color='black'),
legend.title=element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
) +
facet_wrap(vars(powercap_dynamic_value_ratio), nrow=1) +
labs(
y="Mean turnaround time increase (s)",
x="Job power estimator metrics"
) +
scale_fill_grey(start=0.8, end=1)
scale=0.9
ggsave(sprintf("%s/sched-mtt-distribution.pdf", output_dir), width=8*scale, height=4*scale)
# enable comparison of mean power values between predictors for all (workload, powercap) tuples
options(dplyr.width = Inf)
data_p_mean = data %>% pivot_wider(names_from = predictor_name, values_from = mean_power) %>%
replace_na(list(max=0,upper_bound=0,zero=0,real_max=0,mean=0,real_mean=0))
against_zero = data_p_mean %>%
group_by(start_dt_s, powercap_dynamic_value_ratio) %>%
summarize(
upper_bound_below_zero = sum(upper_bound) <= sum(zero),
real_mean_below_zero = sum(real_mean) <= sum(zero),
mean_below_zero = sum(mean) <= sum(zero),
real_max_below_zero = sum(real_max) <= sum(zero),
max_below_zero = sum(max) <= sum(zero),
) %>% mutate(
all_below_zero = upper_bound_below_zero & real_mean_below_zero & mean_below_zero & real_max_below_zero & max_below_zero
)
instances_where_some_mean_power_is_NOT_below_EASY = against_zero %>% filter(!all_below_zero)
print(sprintf("number of occurrences where EASY consumes less power than any other EASY+powercap predictors: %d/%d", nrow(instances_where_some_mean_power_is_NOT_below_EASY), nrow(against_zero)))
print(instances_where_some_mean_power_is_NOT_below_EASY)
against_upper_bound = data_p_mean %>%
group_by(start_dt_s, powercap_dynamic_value_ratio) %>%
summarize(
zero_above_upper_bound = sum(zero) >= sum(upper_bound),
real_mean_above_upper_bound = sum(real_mean) >= sum(upper_bound),
mean_above_upper_bound = sum(mean) >= sum(upper_bound),
real_max_above_upper_bound = sum(real_max) >= sum(upper_bound),
max_above_upper_bound = sum(max) >= sum(upper_bound),
) %>% mutate(
all_above_upper_bound = zero_above_upper_bound & real_mean_above_upper_bound & mean_above_upper_bound & real_max_above_upper_bound & max_above_upper_bound
)
instances_where_some_mean_power_is_NOT_above_upper_bound = against_upper_bound %>% filter(!all_above_upper_bound)
print(sprintf("number of occurrences where upper_bound consumes more power than any other EASY+powercap predictors: %d/%d", nrow(instances_where_some_mean_power_is_NOT_above_upper_bound), nrow(against_zero)))
print(instances_where_some_mean_power_is_NOT_above_upper_bound)
against_mean = data_p_mean %>%
group_by(start_dt_s, powercap_dynamic_value_ratio) %>%
summarize(
real_mean_below_mean = sum(real_mean) <= sum(mean),
real_max_below_mean = sum(real_max) <= sum(mean),
max_below_mean = sum(max) <= sum(mean),
) %>% mutate(
all_max_below_mean = real_max_below_mean & max_below_mean,
all_below_mean = real_mean_below_mean & real_max_below_mean & max_below_mean
)
instances_where_some_max_power_is_NOT_below_mean = against_mean %>% filter(!all_max_below_mean)
print(sprintf("number of occurrences where mean consumes more power than max/real_max: %d/%d", nrow(instances_where_some_max_power_is_NOT_below_mean), nrow(against_mean)))
data_p_mtt = data_nz %>% filter(start_dt_s != outlier_workload_start_dt_s) %>%
pivot_wider(names_from = predictor_name, values_from = mean_turnaround_time_minus_zero) %>%
replace_na(list(max=0,upper_bound=0,zero=0,real_max=0,mean=0,real_mean=0))
against_upper_bound = data_p_mtt %>%
group_by(start_dt_s, powercap_dynamic_value_ratio) %>%
summarize(
real_mean_below_upper_bound = sum(real_mean) <= sum(upper_bound),
mean_below_upper_bound = sum(mean) <= sum(upper_bound),
real_max_below_upper_bound = sum(real_max) <= sum(upper_bound),
max_below_upper_bound = sum(max) <= sum(upper_bound),
) %>% mutate(
all_below_upper_bound = real_mean_below_upper_bound & mean_below_upper_bound & real_max_below_upper_bound & max_below_upper_bound
)
against_mean = data_p_mtt %>%
group_by(start_dt_s, powercap_dynamic_value_ratio) %>%
summarize(
real_mean_above_mean = sum(real_mean) >= sum(mean),
upper_bound_above_mean = sum(mean) >= sum(mean),
real_max_above_mean = sum(real_max) >= sum(mean),
max_above_mean = sum(max) >= sum(mean),
) %>% mutate(
all_above_mean = real_mean_above_mean & upper_bound_above_mean & real_max_above_mean & max_above_mean
)
# overview of whether each predictor breaks the powercap
data %>%
mutate(breaks_powercap = surplus_energy > 0) %>%
group_by(predictor_name, breaks_powercap) %>%
summarize(occ=n())
data_nz %>% filter(start_dt_s != 18474670) %>% ggplot(aes(x=predictor_name, y=mean_waiting_time_minus_zero)) +
geom_violin() +
geom_jitter(alpha=0.1) +
geom_boxplot(width=1/8, outlier.shape=NA) +
facet_wrap(vars(start_dt_s)) +
theme_bw()
data_nz %>% filter(start_dt_s != 18474670) %>% ggplot(aes(y=energy_from_powercap / 1e9, x=mean_utilization / 980, color=predictor_name)) +
geom_point() +
stat_ellipse() +
theme_bw() +
#facet_wrap(vars(powercap_dynamic_value_ratio)) +
scale_color_viridis(discrete=TRUE)
data %>% ggplot() +
geom_violin(aes(x=predictor_name, y=energy_from_powercap / 1e9)) +
geom_jitter(aes(x=predictor_name, y=energy_from_powercap / 1e9), alpha=0.1) +
geom_boxplot(aes(x=predictor_name, y=energy_from_powercap / 1e9), width=0.025, outlier.shape=NA) +
theme_bw() +
labs(
x="Power predictor",
y="Distribution of the energy consumed during the constrained period for each simulation (GJ).\nComputed as the integral of the dynamic power minus the dynamic powercap value."
)
data %>% ggplot() +
geom_violin(aes(x=predictor_name, y=surplus_energy / 1e9)) +
geom_jitter(aes(x=predictor_name, y=surplus_energy / 1e9), alpha=0.1) +
geom_boxplot(aes(x=predictor_name, y=surplus_energy / 1e9), width=0.025, outlier.shape=NA) +
theme_bw() +
labs(
x="Power predictor",
y="Distribution of the surplus energy consumed during the constrained period for each simulation (GJ).\nComputed as the integral of the dynamic power minus the dynamic powercap value, only keeping positive values."
)
data %>% ggplot() +
geom_violin(aes(x=predictor_name, y=unused_energy / 1e9)) +
geom_jitter(aes(x=predictor_name, y=unused_energy / 1e9), alpha=0.1) +
geom_boxplot(aes(x=predictor_name, y=unused_energy / 1e9), width=0.025, outlier.shape=NA) +
theme_bw() +
labs(
x="Power predictor",
y="Distribution of the unused energy consumed during the constrained period for each simulation (GJ).\nComputed as the integral of the dynamic power minus the dynamic powercap value, only keeping negative values."
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment