prediction notebook: shrink hists (py->R)

5a15139d · Millian Poquet · 8404cc04 · 5a15139d
Commit 5a15139d authored 1 year ago by Millian Poquet
--- a/notebooks/prediction-results-analysis.Rmd
+++ b/notebooks/prediction-results-analysis.Rmd
@@ -28,7 +28,7 @@ result_filenames = os.listdir(RESULTS_PATH)
 df_all_results = pd.concat([pd.read_csv(RESULTS_PATH+filename, low_memory=False) for filename in result_filenames])

 df_all_results = df_all_results.dropna(subset=PRED_COLS)
-df_all_results
+df_all_results.to_csv('/tmp/allresults-mean.csv', index=False)


 from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
@@ -97,6 +97,7 @@ g.set_ylabel("Prediction Method")
 g.set_xlabel("Mean Absolute Percentage Error (MAPE)     ")
 plt.tight_layout(pad=0)
 plt.savefig("./fig3a-pred-mape-mean-power.svg")
+plt.savefig("./fig3a-pred-mape-mean-power.pdf")
 ```

 ## Processing the max power prediction results
@@ -122,6 +123,7 @@ result_filenames = os.listdir(RESULTS_PATH)
 df_all_results = pd.concat([pd.read_csv(RESULTS_PATH+filename, low_memory=False) for filename in result_filenames])

 df_all_results = df_all_results.dropna(subset=PRED_COLS)
+df_all_results.to_csv('/tmp/allresults-max.csv', index=False)
 #df_all_results


@@ -193,73 +195,44 @@ g.set_ylabel("Prediction Method")
 g.set_xlabel("Mean Absolute Percentage Error (MAPE)")
 plt.tight_layout(pad=0)
 plt.savefig("./fig3b-pred-mape-max-power.svg")
+plt.savefig("./fig3b-pred-mape-max-power.pdf")
 ```

 ## Getting the actual mean and max power distributions
-### Mean: Figure 2 (a)
 ```{python}
-import matplotlib.pyplot as plt
-import seaborn as sns
-
-TINY_SIZE = 2
-SMALL_SIZE = 5
-MEDIUM_SIZE = 20
-BIGGER_SIZE = 50
-FIG_WIDTH = 40
-FIG_HEIGHT = 10
-
-plt.clf()
-
-plt.rc('figure', figsize=(8, 6))
-plt.rc('font', size=MEDIUM_SIZE)          # controls default text sizes
-plt.rc('axes', titlesize=MEDIUM_SIZE)     # fontsize of the axes title
-plt.rc('axes', labelsize=MEDIUM_SIZE)     # fontsize of the x and y labels
-plt.rc('xtick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
-plt.rc('ytick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
-plt.rc('legend', fontsize=MEDIUM_SIZE)    # legend fontsize
-plt.rc('figure', titlesize=MEDIUM_SIZE)  # fontsize of the figure title
-plt.rc('figure', figsize=(6,4))
-
-g = sns.histplot(x="total_power_mean_watts", data=df_all_results, bins=25, fill=False)
-#g.ax.set_yscale('log')
-g.set_xlabel("Total Power (watts)")
-g.set_ylabel("Number of Jobs")
-plt.xticks(ticks=[0,250,500,750,1000,1250,1500], rotation=30)
-plt.tight_layout(pad=0)
-plt.savefig("./fig2a-distrib-job-power-mean.svg")
+# clear all Python memory
+import sys
+sys.modules[__name__].__dict__.clear()
+import gc
+gc.collect()
 ```

-### Max : Figure 2 (b)
-```{python}
-import matplotlib.pyplot as plt
-import seaborn as sns
-
-TINY_SIZE = 2
-SMALL_SIZE = 5
-MEDIUM_SIZE = 20
-BIGGER_SIZE = 50
-FIG_WIDTH = 40
-FIG_HEIGHT = 10
-
-plt.clf()
-
-plt.rc('figure', figsize=(8, 6))
-plt.rc('font', size=MEDIUM_SIZE)          # controls default text sizes
-plt.rc('axes', titlesize=MEDIUM_SIZE)     # fontsize of the axes title
-plt.rc('axes', labelsize=MEDIUM_SIZE)     # fontsize of the x and y labels
-plt.rc('xtick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
-plt.rc('ytick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
-plt.rc('legend', fontsize=MEDIUM_SIZE)    # legend fontsize
-plt.rc('figure', titlesize=MEDIUM_SIZE)  # fontsize of the figure title
-plt.rc('figure', figsize=(6,4))
-
-#g = sns.displot(x="total_power_max_watts", data=df_all_results)
-g = sns.histplot(x="total_power_max_watts", data=df_all_results, bins=25, fill=False)
-
-#g.ax.set_yscale('log')
-g.set_xlabel("Total Power (watts)")
-g.set_ylabel("Number of Jobs")
-plt.xticks(ticks=[0,250,500,750,1000,1250,1500,1750,2000], rotation=30)
-plt.tight_layout(pad=0)
-plt.savefig("./fig2b-distrib-job-power-max.svg")
+```{R}
+library(tidyverse)
+
+data_mean = read_csv('/tmp/allresults-mean.csv')
+data_mean %>% ggplot(aes(x=total_power_mean_watts)) +
+  geom_histogram() +
+  scale_y_continuous(labels = scales::label_number()) +
+  theme_bw(base_size=20) +
+  labs(
+    x='Total power (W)',
+    y='Number of jobs'
+  )
+ggsave('./fig2a-distrib-job-power-mean.pdf', width=6, height=3)
+ggsave('./fig2a-distrib-job-power-mean.svg', width=6, height=3)
+rm(data_mean)
+
+data_max = read_csv('/tmp/allresults-max.csv')
+data_max %>% ggplot(aes(x=total_power_max_watts)) +
+  geom_histogram() +
+  scale_y_continuous(labels = scales::label_number()) +
+  theme_bw(base_size=20) +
+  labs(
+    x='Total power (W)',
+    y='Number of jobs'
+  )
+ggsave('./fig2b-distrib-job-power-max.pdf', width=6, height=3)
+ggsave('./fig2b-distrib-job-power-max.svg', width=6, height=3)
+rm(data_max)
 ```