-
Millian Poquet authoredMillian Poquet authored
prediction-results-analysis.Rmd 8.93 KiB
title: "Job power prediction result analysis"
author: "Danilo Carastan-Santos"
date: "2024-05-15"
output:
rmdformats::readthedown
Processing the mean power prediction results
Outputs of script run_prediction_per_user_allmethods_mean.py
.
import pandas as pd
import seaborn as sns
import os
RESULTS_PATH = "../user-power-predictions/data/total_power_mean_predictions_users_allmethods_mean/"
PRED_COLS = ["hist_pred_total_power_mean",
"LinearRegression_total_power_mean_watts",
"RandomForestRegressor_total_power_mean_watts",
"LinearSVR_total_power_mean_watts",
"SGDRegressor_total_power_mean_watts"]
result_filenames = os.listdir(RESULTS_PATH)
df_all_results = pd.concat([pd.read_csv(RESULTS_PATH+filename, low_memory=False) for filename in result_filenames])
df_all_results = df_all_results.dropna(subset=PRED_COLS)
df_all_results.to_csv('/tmp/allresults-mean.csv', index=False)
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
lst_users = df_all_results["user_id"].drop_duplicates().to_list()
#print(lst_users)
df_results_user_group = df_all_results.groupby("user_id")
lst_stats_per_user = []
for user in lst_users:
results_user = df_results_user_group.get_group(user)
hist_mape = mean_absolute_percentage_error(results_user["total_power_mean_watts"], results_user["hist_pred_total_power_mean"])
LR_mape = mean_absolute_percentage_error(results_user["total_power_mean_watts"], results_user["LinearRegression_total_power_mean_watts"])
RF_mape = mean_absolute_percentage_error(results_user["total_power_mean_watts"], results_user["RandomForestRegressor_total_power_mean_watts"])
LSVR_mape = mean_absolute_percentage_error(results_user["total_power_mean_watts"], results_user["LinearSVR_total_power_mean_watts"])
SGD_mape = mean_absolute_percentage_error(results_user["total_power_mean_watts"], results_user["SGDRegressor_total_power_mean_watts"])
res = {"user_id": user,
"hist_mape": hist_mape,
"LinearRegression_mape": LR_mape,
"RandomForestRegressor_mape": RF_mape,
"LinearSVR_mape": LSVR_mape,
"SGDRegressor_mape": SGD_mape}
lst_stats_per_user.append(res)
#break
df_stats_per_user = pd.DataFrame(lst_stats_per_user)
df_stats_per_user
COLS = ["hist_mape","LinearRegression_mape","RandomForestRegressor_mape","LinearSVR_mape","SGDRegressor_mape"]
df_stats_per_user[COLS].describe()
COLS = ["hist_mape","LinearRegression_mape","RandomForestRegressor_mape","LinearSVR_mape","SGDRegressor_mape"]
df_stats_per_user_pivot = pd.melt(df_stats_per_user, id_vars="user_id")
df_stats_per_user_pivot
Figure 3 (a)
import matplotlib.pyplot as plt
TINY_SIZE = 2
SMALL_SIZE = 5
MEDIUM_SIZE = 20
BIGGER_SIZE = 50
FIG_WIDTH = 40
FIG_HEIGHT = 10
#plt.rc('font', size=16) # controls default text sizes
plt.rc('font', size=20) # controls default text sizes
plt.rc('axes', titlesize=MEDIUM_SIZE) # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE) # fontsize of the x and y labels
plt.rc('xtick', labelsize=MEDIUM_SIZE) # fontsize of the tick labels
plt.rc('ytick', labelsize=MEDIUM_SIZE) # fontsize of the tick labels
plt.rc('legend', fontsize=MEDIUM_SIZE) # legend fontsize
plt.rc('figure', titlesize=MEDIUM_SIZE) # fontsize of the figure title
plt.rc('figure', figsize=(8,4))
#g = sns.boxplot(x="variable", y="value", data=df_stats_per_user_pivot, showfliers=False)
#plt.xticks(ticks=[0,1,2,3,4],labels=["History", "LinearRegression", "RandomForest", "LinearSVR", "SGDRegressor"],rotation=30)
g = sns.boxplot(y="variable", x="value", data=df_stats_per_user_pivot, showfliers=False)
plt.yticks(ticks=[0,1,2,3,4],labels=["History", "LinearRegression", "RandomForest", "LinearSVR", "SGDRegressor"],rotation=0)
g.set_ylabel("Prediction Method")
g.set_xlabel("Mean Absolute Percentage Error (MAPE) ")
plt.tight_layout(pad=0)
plt.savefig("./fig3a-pred-mape-mean-power.svg")
plt.savefig("./fig3a-pred-mape-mean-power.pdf")
Processing the max power prediction results
Outputs of script run_prediction_per_user_allmethods_max.py
.
import pandas as pd
import seaborn as sns
import os
RESULTS_PATH = "../user-power-predictions/data/total_power_mean_predictions_users_allmethods_max/"
PRED_COLS = ["hist_pred_total_power_max",
"LinearRegression_total_power_max_watts",
"RandomForestRegressor_total_power_max_watts",
"LinearSVR_total_power_max_watts",
"SGDRegressor_total_power_max_watts"]
result_filenames = os.listdir(RESULTS_PATH)
df_all_results = pd.concat([pd.read_csv(RESULTS_PATH+filename, low_memory=False) for filename in result_filenames])
df_all_results = df_all_results.dropna(subset=PRED_COLS)
df_all_results.to_csv('/tmp/allresults-max.csv', index=False)
#df_all_results
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
lst_users = df_all_results["user_id"].drop_duplicates().to_list()
#print(lst_users)
df_results_user_group = df_all_results.groupby("user_id")
lst_stats_per_user = []
for user in lst_users:
results_user = df_results_user_group.get_group(user)
hist_mape = mean_absolute_percentage_error(results_user["total_power_max_watts"], results_user["hist_pred_total_power_max"])
LR_mape = mean_absolute_percentage_error(results_user["total_power_max_watts"], results_user["LinearRegression_total_power_max_watts"])
RF_mape = mean_absolute_percentage_error(results_user["total_power_max_watts"], results_user["RandomForestRegressor_total_power_max_watts"])
LSVR_mape = mean_absolute_percentage_error(results_user["total_power_max_watts"], results_user["LinearSVR_total_power_max_watts"])
SGD_mape = mean_absolute_percentage_error(results_user["total_power_max_watts"], results_user["SGDRegressor_total_power_max_watts"])
res = {"user_id": user,
"hist_mape": hist_mape,
"LinearRegression_mape": LR_mape,
"RandomForestRegressor_mape": RF_mape,
"LinearSVR_mape": LSVR_mape,
"SGDRegressor_mape": SGD_mape}
lst_stats_per_user.append(res)
#break
df_stats_per_user = pd.DataFrame(lst_stats_per_user)
#df_stats_per_user
COLS = ["hist_mape","LinearRegression_mape","RandomForestRegressor_mape","LinearSVR_mape","SGDRegressor_mape"]
df_stats_per_user[COLS].describe()
COLS = ["hist_mape","LinearRegression_mape","RandomForestRegressor_mape","LinearSVR_mape","SGDRegressor_mape"]
df_stats_per_user_pivot = pd.melt(df_stats_per_user, id_vars="user_id")
df_stats_per_user_pivot
Figure 3 (b)
import matplotlib.pyplot as plt
TINY_SIZE = 2
SMALL_SIZE = 5
MEDIUM_SIZE = 20
BIGGER_SIZE = 50
FIG_WIDTH = 40
FIG_HEIGHT = 10
plt.rc('font', size=20) # controls default text sizes
plt.rc('axes', titlesize=MEDIUM_SIZE) # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE) # fontsize of the x and y labels
plt.rc('xtick', labelsize=MEDIUM_SIZE) # fontsize of the tick labels
plt.rc('ytick', labelsize=MEDIUM_SIZE) # fontsize of the tick labels
plt.rc('legend', fontsize=MEDIUM_SIZE) # legend fontsize
plt.rc('figure', titlesize=MEDIUM_SIZE) # fontsize of the figure title
plt.rc('figure', figsize=(8,4))
#g = sns.boxplot(x="variable", y="value", data=df_stats_per_user_pivot, showfliers=False)
#plt.xticks(ticks=[0,1,2,3,4],labels=["History", "LinearRegression", "RandomForest", "LinearSVR", "SGDRegressor"],rotation=30)
#g.set_xlabel("Prediction Method")
#g.set_ylabel("Mean Absolute Percentage Error (MAPE) ")
g = sns.boxplot(y="variable", x="value", data=df_stats_per_user_pivot, showfliers=False)
plt.yticks(ticks=[0,1,2,3,4],labels=["History", "LinearRegression", "RandomForest", "LinearSVR", "SGDRegressor"],rotation=0)
g.set_ylabel("Prediction Method")
g.set_xlabel("Mean Absolute Percentage Error (MAPE)")
plt.tight_layout(pad=0)
plt.savefig("./fig3b-pred-mape-max-power.svg")
plt.savefig("./fig3b-pred-mape-max-power.pdf")
Getting the actual mean and max power distributions
# clear all Python memory
import sys
sys.modules[__name__].__dict__.clear()
import gc
gc.collect()
library(tidyverse)
data_mean = read_csv('/tmp/allresults-mean.csv')
data_mean %>% ggplot(aes(x=total_power_mean_watts)) +
geom_histogram() +
scale_y_continuous(labels = scales::label_number()) +
theme_bw(base_size=20) +
labs(
x='Total power (W)',
y='Number of jobs'
)
ggsave('./fig2a-distrib-job-power-mean.pdf', width=6, height=3)
ggsave('./fig2a-distrib-job-power-mean.svg', width=6, height=3)
rm(data_mean)
data_max = read_csv('/tmp/allresults-max.csv')
data_max %>% ggplot(aes(x=total_power_max_watts)) +
geom_histogram() +
scale_y_continuous(labels = scales::label_number()) +
theme_bw(base_size=20) +
labs(
x='Total power (W)',
y='Number of jobs'
)
ggsave('./fig2b-distrib-job-power-max.pdf', width=6, height=3)
ggsave('./fig2b-distrib-job-power-max.svg', width=6, height=3)
rm(data_max)