wip: visualization scripts for paper

2f1d7f7f · Millian Poquet · a6941eeb · 2f1d7f7f
Commit 2f1d7f7f authored 1 year ago by Millian Poquet
--- a/scripts-r/viz-campaign.R
+++ b/scripts-r/viz-campaign.R
+#!/usr/bin/env Rscript
+library(tidyverse)
+library(GGally)
+library(viridis)
+agg_file = '/tmp/agg-result.csv'
+output_dir = '/tmp'
+epoch_m100 = ymd_hms('2022-01-01 00:00:00')
+`%notin%` = Negate(`%in%`)
+data = read_csv(agg_file) %>% mutate(
+  start_dt_s = as.factor(start_dt_s),
+  job_power_estimation_field = as.factor(job_power_estimation_field)
+)
+data$predictor_name = factor(data$predictor_name, levels=c('upper_bound', 'max', 'real_max', 'real_mean', 'mean', 'zero'))
+data = data %>% mutate(
+  predictor_metrics = ifelse(predictor_name %in% c('real_max', 'max'), 'max',
+                      ifelse(predictor_name %in% c('real_mean', 'mean'), 'mean',
+                      'ub'
+  )),
+  predictor_method = ifelse(predictor_name %in% c('mean', 'max'), 'predicted', 'real')
+)
+data$predictor_metrics = factor(data$predictor_metrics, levels=c('ub', 'max', 'mean'))
+data$predictor_method = factor(data$predictor_method, levels=c('predicted', 'real'))
+
+# compute scheduling metrics against their matching EASY baseline
+data_nz = data %>% filter(predictor_name != 'zero')
+data_z = data %>% filter(predictor_name == 'zero' & powercap_dynamic_value_ratio == max(data$powercap_dynamic_value_ratio))
+data_z_joinable = data_z %>% transmute(
+  start_dt_s = start_dt_s,
+  zero_mean_utilization = mean_utilization,
+  zero_max_utilization = max_utilization,
+  zero_mean_turnaround_time = mean_turnaround_time,
+  zero_mean_waiting_time = mean_waiting_time,
+)
+
+data_nz = inner_join(data_nz, data_z_joinable) %>% mutate(
+  mean_turnaround_time_minus_zero = mean_turnaround_time - zero_mean_turnaround_time,
+  mean_waiting_time_minus_zero = mean_waiting_time - zero_mean_waiting_time
+)
+
+# energy diff from powercap, depending on powercap ratio and predictor
+data_nz %>% ggplot(aes(x=powercap_dynamic_value_ratio, y=energy_from_powercap / 1e9, color=predictor_name)) +
+  geom_jitter(width=1/100, height=0) +
+  geom_smooth(method = "lm", se = FALSE) +
+  theme_bw() +
+  theme(legend.position='top', legend.title=element_blank()) +
+  guides(color = guide_legend(nrow = 1)) +
+  scale_x_continuous(breaks=seq(0,0.7,0.1), labels = scales::percent) +
+  scale_color_viridis(discrete=TRUE) +
+  expand_limits(x=0) +
+  labs(
+    y="Energy difference from the powercap during the constrained period for each simulation (GJ)",
+    x="Powercap value (proportion of the maximum dynamic power range). Shown with horizontal jitter."
+  )
+ggsave(sprintf("%s/energy-diff-against-powercap-predictor.pdf", output_dir), width=16, height=9)
+
+# energy surplus from powercap, depending on powercap ratio and predictor
+data_nz %>% ggplot(aes(x=powercap_dynamic_value_ratio, y=surplus_energy / 1e9, color=predictor_name)) +
+  geom_jitter(width=1/100, height=0) +
+  geom_smooth(method = "lm", se = FALSE) +
+  theme_bw() +
+  theme(legend.position='top', legend.title=element_blank()) +
+  guides(color = guide_legend(nrow = 1)) +
+  scale_x_continuous(breaks=seq(0,0.7,0.1), labels = scales::percent) +
+  scale_color_viridis(discrete=TRUE) +
+  expand_limits(x=0) +
+  labs(
+    y="Energy surplus from the powercap during the constrained period for each simulation (GJ)",
+    x="Powercap value (proportion of the maximum dynamic power range). Shown with horizontal jitter."
+  )
+ggsave(sprintf("%s/energy-surplus-against-powercap-predictor.pdf", output_dir), width=16, height=9)
+
+# utilization
+data_nz %>% ggplot(aes(x=powercap_dynamic_value_ratio, y=mean_utilization / 980, color=predictor_name)) +
+  geom_jitter(width=1/100, height=0) +
+  geom_smooth(method = "lm", se = FALSE) +
+  geom_abline(slope=1) +
+  theme_bw() +
+  theme(legend.position='top', legend.title=element_blank()) +
+  guides(color = guide_legend(nrow = 1)) +
+  scale_x_continuous(breaks=seq(0,0.7,0.1), labels = scales::percent) +
+  scale_y_continuous(breaks=seq(0,1,0.2), labels = scales::percent) +
+  scale_color_viridis(discrete=TRUE) +
+  expand_limits(x=0) +
+  labs(
+    y="Utilization (proportion of nodes)",
+    x="Powercap value (proportion of the maximum dynamic power range). Shown with horizontal jitter."
+  )
+ggsave(sprintf("%s/utilization-against-powercap-predictor.pdf", output_dir), width=16, height=9)
+
+# mean turnaround time metrics
+data_nz %>% ggplot(aes(x=powercap_dynamic_value_ratio, y=mean_turnaround_time_minus_zero, color=predictor_name)) +
+  geom_jitter(width=1/100, height=0) +
+  geom_smooth(method = "lm", se = FALSE) +
+  geom_hline(yintercept=0) +
+  theme_bw() +
+  theme(legend.position='top', legend.title=element_blank()) +
+  guides(color = guide_legend(nrow = 1)) +
+  scale_x_continuous(breaks=seq(0,0.7,0.1), labels = scales::percent) +
+  scale_y_continuous() +
+  scale_color_viridis(discrete=TRUE) +
+  facet_wrap(vars(start_dt_s), scales='free_y') +
+  expand_limits(x=0) +
+  labs(
+    y="Mean turnaround time difference against EASY without any powercap for each simulation",
+    x="Powercap value (proportion of the maximum dynamic power range). Shown with horizontal jitter."
+  )
+ggsave(sprintf("%s/mean-turnaround-time-against-powercap-predictor.pdf", output_dir), width=16, height=9)
+
+# mean waiting time
+data_nz %>% ggplot(aes(x=powercap_dynamic_value_ratio, y=mean_waiting_time_minus_zero, color=predictor_name)) +
+  geom_jitter(width=1/100, height=0) +
+  geom_smooth(method = "lm", se = FALSE) +
+  geom_hline(yintercept=0) +
+  theme_bw() +
+  theme(legend.position='top', legend.title=element_blank()) +
+  guides(color = guide_legend(nrow = 1)) +
+  scale_x_continuous(breaks=seq(0,0.7,0.1), labels = scales::percent) +
+  scale_y_continuous() +
+  scale_color_viridis(discrete=TRUE) +
+  facet_wrap(vars(start_dt_s), scales='free_y') +
+  expand_limits(x=0) +
+  labs(
+    y="Mean waiting time difference against EASY without any powercap for each simulation",
+    x="Powercap value (proportion of the maximum dynamic power range). Shown with horizontal jitter."
+  )
+ggsave(sprintf("%s/mean-waiting-time-against-powercap-predictor.pdf", output_dir), width=16, height=9)
+
+
+# power (mean+p1+p99) for all instances, facetted by workload, colored by predictor
+max_observed_total_power = 955080
+max_power_per_node = 2100.0
+min_power_per_node = 240.0
+nb_nodes = 980
+max_dynamic_power = max_observed_total_power - min_power_per_node * nb_nodes
+data_nz %>% ggplot(aes(x=powercap_dynamic_value_ratio)) +
+  #geom_jitter(width=1/100, height=0) +
+  #geom_smooth(method = "lm", se = FALSE) +
+  geom_ribbon(aes(ymin=power_p1/max_dynamic_power, ymax=power_p99/max_dynamic_power, fill=predictor_name), alpha=0.1) +
+  geom_line(aes(y=mean_power/max_dynamic_power, color=predictor_name)) +
+  geom_line(aes(y=power_p1/max_dynamic_power, color=predictor_name), linetype='dotted', show.legend = FALSE) +
+  geom_line(aes(y=power_p99/max_dynamic_power, color=predictor_name), linetype='dotted', show.legend = FALSE) +
+  geom_abline(slope=1) +
+  theme_bw() +
+  theme(legend.position='top', legend.title=upper_bound
+upper_boundt) +
+  scale_color_viridis(discrete=TRUE) +
+  scale_fill_viridis(discrete=TRUE) +
+  facet_wrap(vars(start_dt_s)) +
+  expand_limits(x=0) +
+  labs(
+    y="Power consumption during the powercap-constrained 3-hour time window (proportion of the maximum dynamic power range).",
+    x="Powercap value (proportion of the maximum dynamic power range)."
+  )
+ggsave(sprintf("%s/power-consumption-mean-p1-p99-against-powercap-predictor.pdf", output_dir), width=16, height=9)
+
+# distribution of the mean power used for each predictor
+powercap_ratios_values_to_show = c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7)
+width_scale=0.3
+data_nz %>%
+  filter(powercap_dynamic_value_ratio %in% powercap_ratios_values_to_show) %>%
+  mutate(powercap_label = sprintf("pcap=%g", powercap_dynamic_value_ratio)) %>%
+  ggplot() +
+  geom_hline(aes(yintercept=powercap_dynamic_value_ratio), linewidth=width_scale) +
+  geom_boxplot(aes(y=mean_power/max_dynamic_power, fill=predictor_method, x=predictor_metrics), linewidth=width_scale, outlier.size=width_scale) +
+  theme_bw() +
+  theme(
+    legend.position=c(0.2, 0.9),
+    legend.direction='horizontal',
+    legend.title=element_blank(),
+    legend.background=element_rect(color='black'),
+
+    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
+  ) +
+  expand_limits(x=0) +
+  scale_y_continuous(breaks=seq(0,0.7,0.1)) +
+  facet_wrap(vars(powercap_label), nrow=1) +
+  labs(
+    y="Mean power",
+    x="Job power estimator metrics"
+  ) +
+  scale_fill_grey(start=0.8, end=1)
+scale=0.9
+ggsave(sprintf("%s/sched-mean-power-distribution.pdf", output_dir), width=8*scale, height=4*scale)
+ggsave(sprintf("%s/sched-mean-power-distribution.png", output_dir), width=8*scale, height=4*scale)
+
+# distribution of mean turnaround time diff (EASY) for each predictor
+outlier_workload_start_dt_s = 18474670 # sched metrics are strogly better than EASY there
+width_scale=0.3
+data_nz %>%
+  filter(start_dt_s != outlier_workload_start_dt_s) %>%
+  filter(powercap_dynamic_value_ratio %in% powercap_ratios_values_to_show) %>%
+  ggplot() +
+  geom_boxplot(aes(y=mean_turnaround_time_minus_zero, fill=predictor_method, x=predictor_metrics), linewidth=width_scale, outlier.size=width_scale) +
+  theme_bw() +
+  theme(
+    legend.position=c(0.16, 0.12),
+    legend.direction='horizontal',
+    legend.background=element_rect(color='black'),
+    legend.title=element_blank(),
+
+    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
+  ) +
+  facet_wrap(vars(powercap_dynamic_value_ratio), nrow=1) +
+  labs(
+    y="Mean turnaround time increase (s)",
+    x="Job power estimator metrics"
+  ) +
+  scale_fill_grey(start=0.8, end=1)
+scale=0.9
+ggsave(sprintf("%s/sched-mtt-distribution.pdf", output_dir), width=8*scale, height=4*scale)
+
+# enable comparison of mean power values between predictors for all (workload, powercap) tuples
+options(dplyr.width = Inf)
+data_p_mean = data %>% pivot_wider(names_from = predictor_name, values_from = mean_power) %>%
+  replace_na(list(max=0,upper_bound=0,zero=0,real_max=0,mean=0,real_mean=0))
+
+against_zero = data_p_mean %>%
+  group_by(start_dt_s, powercap_dynamic_value_ratio) %>%
+  summarize(
+    upper_bound_below_zero = sum(upper_bound) <= sum(zero),
+    real_mean_below_zero = sum(real_mean) <= sum(zero),
+    mean_below_zero = sum(mean) <= sum(zero),
+    real_max_below_zero = sum(real_max) <= sum(zero),
+    max_below_zero = sum(max) <= sum(zero),
+  ) %>% mutate(
+    all_below_zero = upper_bound_below_zero & real_mean_below_zero & mean_below_zero & real_max_below_zero & max_below_zero
+  )
+
+instances_where_some_mean_power_is_NOT_below_EASY = against_zero %>% filter(!all_below_zero)
+print(sprintf("number of occurrences where EASY consumes less power than any other EASY+powercap predictors: %d/%d", nrow(instances_where_some_mean_power_is_NOT_below_EASY), nrow(against_zero)))
+print(instances_where_some_mean_power_is_NOT_below_EASY)
+
+against_upper_bound = data_p_mean %>%
+  group_by(start_dt_s, powercap_dynamic_value_ratio) %>%
+  summarize(
+    zero_above_upper_bound = sum(zero) >= sum(upper_bound),
+    real_mean_above_upper_bound = sum(real_mean) >= sum(upper_bound),
+    mean_above_upper_bound = sum(mean) >= sum(upper_bound),
+    real_max_above_upper_bound = sum(real_max) >= sum(upper_bound),
+    max_above_upper_bound = sum(max) >= sum(upper_bound),
+  ) %>% mutate(
+    all_above_upper_bound = zero_above_upper_bound & real_mean_above_upper_bound & mean_above_upper_bound & real_max_above_upper_bound & max_above_upper_bound
+  )
+instances_where_some_mean_power_is_NOT_above_upper_bound = against_upper_bound %>% filter(!all_above_upper_bound)
+print(sprintf("number of occurrences where upper_bound consumes more power than any other EASY+powercap predictors: %d/%d", nrow(instances_where_some_mean_power_is_NOT_above_upper_bound), nrow(against_zero)))
+print(instances_where_some_mean_power_is_NOT_above_upper_bound)
+
+against_mean = data_p_mean %>%
+  group_by(start_dt_s, powercap_dynamic_value_ratio) %>%
+  summarize(
+    real_mean_below_mean = sum(real_mean) <= sum(mean),
+    real_max_below_mean = sum(real_max) <= sum(mean),
+    max_below_mean = sum(max) <= sum(mean),
+  ) %>% mutate(
+    all_max_below_mean = real_max_below_mean & max_below_mean,
+    all_below_mean = real_mean_below_mean & real_max_below_mean & max_below_mean
+  )
+
+instances_where_some_max_power_is_NOT_below_mean = against_mean %>% filter(!all_max_below_mean)
+print(sprintf("number of occurrences where mean consumes more power than max/real_max: %d/%d", nrow(instances_where_some_max_power_is_NOT_below_mean), nrow(against_mean)))
+
+
+
+data_p_mtt = data_nz %>% filter(start_dt_s != outlier_workload_start_dt_s) %>%
+  pivot_wider(names_from = predictor_name, values_from = mean_turnaround_time_minus_zero) %>%
+  replace_na(list(max=0,upper_bound=0,zero=0,real_max=0,mean=0,real_mean=0))
+
+against_upper_bound = data_p_mtt %>%
+  group_by(start_dt_s, powercap_dynamic_value_ratio) %>%
+  summarize(
+    real_mean_below_upper_bound = sum(real_mean) <= sum(upper_bound),
+    mean_below_upper_bound = sum(mean) <= sum(upper_bound),
+    real_max_below_upper_bound = sum(real_max) <= sum(upper_bound),
+    max_below_upper_bound = sum(max) <= sum(upper_bound),
+  ) %>% mutate(
+    all_below_upper_bound = real_mean_below_upper_bound & mean_below_upper_bound & real_max_below_upper_bound & max_below_upper_bound
+  )
+
+against_mean = data_p_mtt %>%
+  group_by(start_dt_s, powercap_dynamic_value_ratio) %>%
+  summarize(
+    real_mean_above_mean = sum(real_mean) >= sum(mean),
+    upper_bound_above_mean = sum(mean) >= sum(mean),
+    real_max_above_mean = sum(real_max) >= sum(mean),
+    max_above_mean = sum(max) >= sum(mean),
+  ) %>% mutate(
+    all_above_mean = real_mean_above_mean & upper_bound_above_mean & real_max_above_mean & max_above_mean
+  )
+
+
+# overview of whether each predictor breaks the powercap
+data %>%
+  mutate(breaks_powercap = surplus_energy > 0) %>%
+  group_by(predictor_name, breaks_powercap) %>%
+  summarize(occ=n())
+
+
+
+data_nz %>% filter(start_dt_s != 18474670) %>% ggplot(aes(x=predictor_name, y=mean_waiting_time_minus_zero)) +
+  geom_violin() +
+  geom_jitter(alpha=0.1) +
+  geom_boxplot(width=1/8, outlier.shape=NA) +
+  facet_wrap(vars(start_dt_s)) +
+  theme_bw()
+
+data_nz %>% filter(start_dt_s != 18474670) %>% ggplot(aes(y=energy_from_powercap / 1e9, x=mean_utilization / 980, color=predictor_name)) +
+  geom_point() +
+  stat_ellipse() +
+  theme_bw() +
+  #facet_wrap(vars(powercap_dynamic_value_ratio)) +
+  scale_color_viridis(discrete=TRUE)
+
+data %>% ggplot() +
+  geom_violin(aes(x=predictor_name, y=energy_from_powercap / 1e9)) +
+  geom_jitter(aes(x=predictor_name, y=energy_from_powercap / 1e9), alpha=0.1) +
+  geom_boxplot(aes(x=predictor_name, y=energy_from_powercap / 1e9), width=0.025, outlier.shape=NA) +
+  theme_bw() +
+  labs(
+    x="Power predictor",
+    y="Distribution of the energy consumed during the constrained period for each simulation (GJ).\nComputed as the integral of the dynamic power minus the dynamic powercap value."
+  )
+
+data %>% ggplot() +
+  geom_violin(aes(x=predictor_name, y=surplus_energy / 1e9)) +
+  geom_jitter(aes(x=predictor_name, y=surplus_energy / 1e9), alpha=0.1) +
+  geom_boxplot(aes(x=predictor_name, y=surplus_energy / 1e9), width=0.025, outlier.shape=NA) +
+  theme_bw() +
+  labs(
+    x="Power predictor",
+    y="Distribution of the surplus energy consumed during the constrained period for each simulation (GJ).\nComputed as the integral of the dynamic power minus the dynamic powercap value, only keeping positive values."
+  )
+
+data %>% ggplot() +
+  geom_violin(aes(x=predictor_name, y=unused_energy / 1e9)) +
+  geom_jitter(aes(x=predictor_name, y=unused_energy / 1e9), alpha=0.1) +
+  geom_boxplot(aes(x=predictor_name, y=unused_energy / 1e9), width=0.025, outlier.shape=NA) +
+  theme_bw() +
+  labs(
+    x="Power predictor",
+    y="Distribution of the unused energy consumed during the constrained period for each simulation (GJ).\nComputed as the integral of the dynamic power minus the dynamic powercap value, only keeping negative values."
+  )
+
+
+