From 5a15139dadde8d923703ece93745fa250b1a0c53 Mon Sep 17 00:00:00 2001 From: Millian Poquet <millian.poquet@irit.fr> Date: Fri, 17 May 2024 11:50:18 +0200 Subject: [PATCH] prediction notebook: shrink hists (py->R) --- notebooks/prediction-results-analysis.Rmd | 101 ++++++++-------------- 1 file changed, 37 insertions(+), 64 deletions(-) diff --git a/notebooks/prediction-results-analysis.Rmd b/notebooks/prediction-results-analysis.Rmd index e20a77b..6398e33 100644 --- a/notebooks/prediction-results-analysis.Rmd +++ b/notebooks/prediction-results-analysis.Rmd @@ -28,7 +28,7 @@ result_filenames = os.listdir(RESULTS_PATH) df_all_results = pd.concat([pd.read_csv(RESULTS_PATH+filename, low_memory=False) for filename in result_filenames]) df_all_results = df_all_results.dropna(subset=PRED_COLS) -df_all_results +df_all_results.to_csv('/tmp/allresults-mean.csv', index=False) from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error @@ -97,6 +97,7 @@ g.set_ylabel("Prediction Method") g.set_xlabel("Mean Absolute Percentage Error (MAPE) ") plt.tight_layout(pad=0) plt.savefig("./fig3a-pred-mape-mean-power.svg") +plt.savefig("./fig3a-pred-mape-mean-power.pdf") ``` ## Processing the max power prediction results @@ -122,6 +123,7 @@ result_filenames = os.listdir(RESULTS_PATH) df_all_results = pd.concat([pd.read_csv(RESULTS_PATH+filename, low_memory=False) for filename in result_filenames]) df_all_results = df_all_results.dropna(subset=PRED_COLS) +df_all_results.to_csv('/tmp/allresults-max.csv', index=False) #df_all_results @@ -193,73 +195,44 @@ g.set_ylabel("Prediction Method") g.set_xlabel("Mean Absolute Percentage Error (MAPE)") plt.tight_layout(pad=0) plt.savefig("./fig3b-pred-mape-max-power.svg") +plt.savefig("./fig3b-pred-mape-max-power.pdf") ``` ## Getting the actual mean and max power distributions -### Mean: Figure 2 (a) ```{python} -import matplotlib.pyplot as plt -import seaborn as sns - -TINY_SIZE = 2 -SMALL_SIZE = 5 -MEDIUM_SIZE = 20 -BIGGER_SIZE = 50 -FIG_WIDTH = 40 -FIG_HEIGHT = 10 - -plt.clf() - -plt.rc('figure', figsize=(8, 6)) -plt.rc('font', size=MEDIUM_SIZE) # controls default text sizes -plt.rc('axes', titlesize=MEDIUM_SIZE) # fontsize of the axes title -plt.rc('axes', labelsize=MEDIUM_SIZE) # fontsize of the x and y labels -plt.rc('xtick', labelsize=MEDIUM_SIZE) # fontsize of the tick labels -plt.rc('ytick', labelsize=MEDIUM_SIZE) # fontsize of the tick labels -plt.rc('legend', fontsize=MEDIUM_SIZE) # legend fontsize -plt.rc('figure', titlesize=MEDIUM_SIZE) # fontsize of the figure title -plt.rc('figure', figsize=(6,4)) - -g = sns.histplot(x="total_power_mean_watts", data=df_all_results, bins=25, fill=False) -#g.ax.set_yscale('log') -g.set_xlabel("Total Power (watts)") -g.set_ylabel("Number of Jobs") -plt.xticks(ticks=[0,250,500,750,1000,1250,1500], rotation=30) -plt.tight_layout(pad=0) -plt.savefig("./fig2a-distrib-job-power-mean.svg") +# clear all Python memory +import sys +sys.modules[__name__].__dict__.clear() +import gc +gc.collect() ``` -### Max : Figure 2 (b) -```{python} -import matplotlib.pyplot as plt -import seaborn as sns - -TINY_SIZE = 2 -SMALL_SIZE = 5 -MEDIUM_SIZE = 20 -BIGGER_SIZE = 50 -FIG_WIDTH = 40 -FIG_HEIGHT = 10 - -plt.clf() - -plt.rc('figure', figsize=(8, 6)) -plt.rc('font', size=MEDIUM_SIZE) # controls default text sizes -plt.rc('axes', titlesize=MEDIUM_SIZE) # fontsize of the axes title -plt.rc('axes', labelsize=MEDIUM_SIZE) # fontsize of the x and y labels -plt.rc('xtick', labelsize=MEDIUM_SIZE) # fontsize of the tick labels -plt.rc('ytick', labelsize=MEDIUM_SIZE) # fontsize of the tick labels -plt.rc('legend', fontsize=MEDIUM_SIZE) # legend fontsize -plt.rc('figure', titlesize=MEDIUM_SIZE) # fontsize of the figure title -plt.rc('figure', figsize=(6,4)) - -#g = sns.displot(x="total_power_max_watts", data=df_all_results) -g = sns.histplot(x="total_power_max_watts", data=df_all_results, bins=25, fill=False) - -#g.ax.set_yscale('log') -g.set_xlabel("Total Power (watts)") -g.set_ylabel("Number of Jobs") -plt.xticks(ticks=[0,250,500,750,1000,1250,1500,1750,2000], rotation=30) -plt.tight_layout(pad=0) -plt.savefig("./fig2b-distrib-job-power-max.svg") +```{R} +library(tidyverse) + +data_mean = read_csv('/tmp/allresults-mean.csv') +data_mean %>% ggplot(aes(x=total_power_mean_watts)) + + geom_histogram() + + scale_y_continuous(labels = scales::label_number()) + + theme_bw(base_size=20) + + labs( + x='Total power (W)', + y='Number of jobs' + ) +ggsave('./fig2a-distrib-job-power-mean.pdf', width=6, height=3) +ggsave('./fig2a-distrib-job-power-mean.svg', width=6, height=3) +rm(data_mean) + +data_max = read_csv('/tmp/allresults-max.csv') +data_max %>% ggplot(aes(x=total_power_max_watts)) + + geom_histogram() + + scale_y_continuous(labels = scales::label_number()) + + theme_bw(base_size=20) + + labs( + x='Total power (W)', + y='Number of jobs' + ) +ggsave('./fig2b-distrib-job-power-max.pdf', width=6, height=3) +ggsave('./fig2b-distrib-job-power-max.svg', width=6, height=3) +rm(data_max) ``` -- GitLab