diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..52f327cdb1f62f14570335d28f35fc9a336de4b0 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +artifact-overview.pdf diff --git a/artifact-overview.typ b/artifact-overview.typ index b011c6ccf0b2a8784026c577097c109c947535c9..f3aa1e0085732fec668c69353538da364fe53a61 100644 --- a/artifact-overview.typ +++ b/artifact-overview.typ @@ -377,125 +377,118 @@ The experimental workflow consists of three parts, (i) preprocessing of the orig ==== Step 1 -#tododanilo[in the script source-code: change output filename of step 1 -from a_0_filter12_singlenode.csv and from a_0_filter12_multinode.csv -to 22-0X_filter12_singlenode.csv and 22-0X_filter12_multinode.csv] - -#fullbox(footer:[Memory: 128 Go. Time (sequential): 18:00:00])[ +#fullbox(footer:[#emph-overhead[Memory: 128 Go. Time (sequential): 18:00:00]])[ ```python ./scripts-py/expe_energumen/m100_pred_preprocess_1.py \ - -j ../m100-data/22-01_jobs.parquet \ - -p m100-data/22-01_power_total.parquet + -j ./m100-data/22-01_jobs.parquet \ + -p ./m100-data/22-01_power_total.parquet ``` ```python ./scripts-py/expe_energumen/m100_pred_preprocess_1.py \ - -j ../m100-data/22-02_jobs.parquet \ - -p m100-data/22-02_power_total.parquet + -j ./m100-data/22-02_jobs.parquet \ + -p ./m100-data/22-02_power_total.parquet ``` ```python ./scripts-py/expe_energumen/m100_pred_preprocess_1.py \ - -j ../m100-data/22-03_jobs.parquet \ - -p m100-data/22-03_power_total.parquet + -j ./m100-data/22-03_jobs.parquet \ + -p ./m100-data/22-03_power_total.parquet ``` ```python ./scripts-py/expe_energumen/m100_pred_preprocess_1.py \ - -j ../m100-data/22-04_jobs.parquet \ - -p m100-data/22-04_power_total.parquet + -j ./m100-data/22-04_jobs.parquet \ + -p ./m100-data/22-04_power_total.parquet ``` ```python ./scripts-py/expe_energumen/m100_pred_preprocess_1.py \ - -j ../m100-data/22-05_jobs.parquet \ - -p m100-data/22-05_power_total.parquet + -j ./m100-data/22-05_jobs.parquet \ + -p ./m100-data/22-05_power_total.parquet ``` ```python ./scripts-py/expe_energumen/m100_pred_preprocess_1.py \ - -j ../m100-data/22-06_jobs.parquet \ - -p m100-data/22-06_power_total.parquet + -j ./m100-data/22-06_jobs.parquet \ + -p ./m100-data/22-06_power_total.parquet ``` ```python ./scripts-py/expe_energumen/m100_pred_preprocess_1.py \ - -j ../m100-data/22-07_jobs.parquet \ - -p m100-data/22-07_power_total.parquet + -j ./m100-data/22-07_jobs.parquet \ + -p ./m100-data/22-07_power_total.parquet ``` ```python ./scripts-py/expe_energumen/m100_pred_preprocess_1.py \ - -j ../m100-data/22-08_jobs.parquet \ - -p m100-data/22-08_power_total.parquet + -j ./m100-data/22-08_jobs.parquet \ + -p ./m100-data/22-08_power_total.parquet ``` ```python ./scripts-py/expe_energumen/m100_pred_preprocess_1.py \ - -j ../m100-data/22-09_jobs.parquet \ - -p m100-data/22-09_power_total.parquet + -j ./m100-data/22-09_jobs.parquet \ + -p ./m100-data/22-09_power_total.parquet ``` ] === Step 2 -#tododanilo[in the script source-code: change output filename of step 2 -from a_0_filter123_singlenode.csv and from a_0_filter123_multinode.csv -to 22-0X_filter123_singlenode.csv and 22-0X_filter123_multinode.csv] -#fullbox(footer:[Memory: 128 Go. Time (sequential): 66:00:00])[ +#fullbox(footer:[#emph-overhead[Memory: 128 Go. Time (sequential): 66:00:00]])[ ```python ./scripts-py/expe_energumen/m100_pred_preprocess_2.py \ -js ./m100-data/22-01_filter12_singlenode.csv \ -jm ./m100-data/22-01_filter12_multinode.csv - -p m100-data/22-01_power_total.parquet + -p ./m100-data/22-01_power_total.parquet ``` ```python ./scripts-py/expe_energumen/m100_pred_preprocess_2.py \ -js ./m100-data/22-02_filter12_singlenode.csv \ -jm ./m100-data/22-02_filter12_multinode.csv - -p m100-data/22-02_power_total.parquet + -p ../m100-data/22-02_power_total.parquet ``` ```python ./scripts-py/expe_energumen/m100_pred_preprocess_2.py \ -js ./m100-data/22-03_filter12_singlenode.csv \ -jm ./m100-data/22-03_filter12_multinode.csv - -p m100-data/22-03_power_total.parquet + -p ./m100-data/22-03_power_total.parquet ``` ```python ./scripts-py/expe_energumen/m100_pred_preprocess_2.py \ -js ./m100-data/22-04_filter12_singlenode.csv \ -jm ./m100-data/22-04_filter12_multinode.csv - -p m100-data/22-04_power_total.parquet + -p ./m100-data/22-04_power_total.parquet ``` ```python ./scripts-py/expe_energumen/m100_pred_preprocess_2.py \ -js ./m100-data/22-05_filter12_singlenode.csv \ -jm ./m100-data/22-05_filter12_multinode.csv - -p m100-data/22-05_power_total.parquet + -p ./m100-data/22-05_power_total.parquet ``` ```python ./scripts-py/expe_energumen/m100_pred_preprocess_2.py \ -js ./m100-data/22-06_filter12_singlenode.csv \ -jm ./m100-data/22-06_filter12_multinode.csv - -p m100-data/22-06_power_total.parquet + -p ./m100-data/22-06_power_total.parquet ``` ```python ./scripts-py/expe_energumen/m100_pred_preprocess_2.py \ -js ./m100-data/22-07_filter12_singlenode.csv \ -jm ./m100-data/22-07_filter12_multinode.csv - -p m100-data/22-07_power_total.parquet + -p ./m100-data/22-07_power_total.parquet ``` ```python ./scripts-py/expe_energumen/m100_pred_preprocess_2.py \ -js ./m100-data/22-08_filter12_singlenode.csv \ -jm ./m100-data/22-08_filter12_multinode.csv - -p m100-data/22-08_power_total.parquet + -p ./m100-data/22-08_power_total.parquet ``` ```python ./scripts-py/expe_energumen/m100_pred_preprocess_2.py \ -js ./m100-data/22-09_filter12_singlenode.csv \ -jm ./m100-data/22-09_filter12_multinode.csv - -p m100-data/22-09_power_total.parquet + -p ./m100-data/22-09_power_total.parquet ``` ] @@ -508,52 +501,50 @@ find . -name '*filter123*' | tar -zcvf exadata_job_energy_profiles.tar.gz --file === Compute power metrics and add job information -#tododanilo[Script source-code: change -d (dir path) and pass the path to the necessary files] - #fullbox(footer: [Disk: 32 Go.])[ ``` python ./scripts-py/expe_energumen/m100_pred_jobs_extract_power_metrics.py \ - -d ./data/year_month=22-01 + -d ./m100-data/22-01 ``` ``` python ./scripts-py/expe_energumen/m100_pred_jobs_extract_power_metrics.py \ - -d ./data/year_month=22-02 + -d ./m100-data/22-02 ``` ``` python ./scripts-py/expe_energumen/m100_pred_jobs_extract_power_metrics.py \ - -d ./data/year_month=22-03 + -d ./m100-data/22-03 ``` ``` python ./scripts-py/expe_energumen/m100_pred_jobs_extract_power_metrics.py \ - -d ./data/year_month=22-04 + -d ./m100-data/22-04 ``` ``` python ./scripts-py/expe_energumen/m100_pred_jobs_extract_power_metrics.py \ - -d ./data/year_month=22-05 + -d ./m100-data/22-05 ``` ``` python ./scripts-py/expe_energumen/m100_pred_jobs_extract_power_metrics.py \ - -d ./data/year_month=22-06 + -d ./m100-data/22-06 ``` ``` python ./scripts-py/expe_energumen/m100_pred_jobs_extract_power_metrics.py \ - -d ./data/year_month=22-07 + -d ./m100-data/22-07 ``` ``` python ./scripts-py/expe_energumen/m100_pred_jobs_extract_power_metrics.py \ - -d ./data/year_month=22-08 + -d ./m100-data/22-08 ``` ``` python ./scripts-py/expe_energumen/m100_pred_jobs_extract_power_metrics.py \ - -d ./data/year_month=22-09 + -d ./m100-data/22-09 ``` ] @@ -561,8 +552,6 @@ find . -name '*filter123*' | tar -zcvf exadata_job_energy_profiles.tar.gz --file This will output the `filter123_all_jobs_aggmetrics.csv.gz` needed for the prediction script -#tododanilo[check if /m100-data/ path is correct and also the path of the output] - #fullbox(footer: [Disk: 82 Mo.])[ ``` python @@ -572,7 +561,7 @@ This will output the `filter123_all_jobs_aggmetrics.csv.gz` needed for the predi == Predicting Job mean and maximum power consumption -#fullbox(footer:[Memory: 128 Go. Time (sequential): 72:00:00])[ +#fullbox(footer:[#emph-overhead[Memory: 128 Go. Time (sequential): 72:00:00]])[ ``` mkdir ./m100-data/total_power_mean_predictions_users_allmethods_mean mkdir ./m100-data/total_power_mean_predictions_users_allmethods_max @@ -617,7 +606,10 @@ Output from the previous section - `m100-data/power_pred_users_allmethods_mean.tar.gz`, the jobs mean power predictions. - `m100-data/power_pred_users_allmethods_max.tar.gz`, the jobs maximum power predictions. -#tododanilo[Add notebook that make plots] +=== Reproducing the paper's plots + +Please refer to this #link("./notebooks/m100_process_prediction_results.ipynb")[Notebook] for +the scripts to reproduce the paper's plots, notably Figures 2 and 3. == Job scheduling with power prediction <sec-sched> diff --git a/notebooks/m100_process_prediction_results.ipynb b/notebooks/m100_process_prediction_results.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..6540dc68f5e06f3d2eedba89fb2783403b6b1029 --- /dev/null +++ b/notebooks/m100_process_prediction_results.ipynb @@ -0,0 +1,387 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Processing the mean power prediction results (script `run_prediction_per_user_allmethods_mean.py`)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import seaborn as sns\n", + "\n", + "import os\n", + "\n", + "RESULTS_PATH = \"../m100-data/total_power_mean_predictions_users_allmethods_mean/\"\n", + "PRED_COLS = [\"hist_pred_total_power_mean\",\n", + " \"LinearRegression_total_power_mean_watts\",\n", + " \"RandomForestRegressor_total_power_mean_watts\", \n", + " \"LinearSVR_total_power_mean_watts\", \n", + " \"SGDRegressor_total_power_mean_watts\"]\n", + "\n", + "\n", + "result_filenames = os.listdir(RESULTS_PATH)\n", + "\n", + "df_all_results = pd.concat([pd.read_csv(RESULTS_PATH+filename, low_memory=False) for filename in result_filenames])\n", + "\n", + "df_all_results = df_all_results.dropna(subset=PRED_COLS)\n", + "df_all_results\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error\n", + "\n", + "lst_users = df_all_results[\"user_id\"].drop_duplicates().to_list()\n", + "#print(lst_users)\n", + "\n", + "df_results_user_group = df_all_results.groupby(\"user_id\")\n", + "\n", + "lst_stats_per_user = []\n", + "\n", + "for user in lst_users:\n", + " results_user = df_results_user_group.get_group(user)\n", + " hist_mape = mean_absolute_percentage_error(results_user[\"total_power_mean_watts\"], results_user[\"hist_pred_total_power_mean\"])\n", + " LR_mape = mean_absolute_percentage_error(results_user[\"total_power_mean_watts\"], results_user[\"LinearRegression_total_power_mean_watts\"])\n", + " RF_mape = mean_absolute_percentage_error(results_user[\"total_power_mean_watts\"], results_user[\"RandomForestRegressor_total_power_mean_watts\"])\n", + " LSVR_mape = mean_absolute_percentage_error(results_user[\"total_power_mean_watts\"], results_user[\"LinearSVR_total_power_mean_watts\"])\n", + " SGD_mape = mean_absolute_percentage_error(results_user[\"total_power_mean_watts\"], results_user[\"SGDRegressor_total_power_mean_watts\"])\n", + " res = {\"user_id\": user, \n", + " \"hist_mape\": hist_mape, \n", + " \"LinearRegression_mape\": LR_mape, \n", + " \"RandomForestRegressor_mape\": RF_mape, \n", + " \"LinearSVR_mape\": LSVR_mape,\n", + " \"SGDRegressor_mape\": SGD_mape}\n", + " lst_stats_per_user.append(res)\n", + " #break\n", + "\n", + "df_stats_per_user = pd.DataFrame(lst_stats_per_user)\n", + "df_stats_per_user\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "COLS = [\"hist_mape\",\"LinearRegression_mape\",\"RandomForestRegressor_mape\",\"LinearSVR_mape\",\"SGDRegressor_mape\"]\n", + "\n", + "df_stats_per_user[COLS].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "COLS = [\"hist_mape\",\"LinearRegression_mape\",\"RandomForestRegressor_mape\",\"LinearSVR_mape\",\"SGDRegressor_mape\"]\n", + "\n", + "df_stats_per_user_pivot = pd.melt(df_stats_per_user, id_vars=\"user_id\")\n", + "df_stats_per_user_pivot" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Figure 3 A" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "import matplotlib.pyplot as plt\n", + "\n", + "TINY_SIZE = 2\n", + "SMALL_SIZE = 5\n", + "MEDIUM_SIZE = 20\n", + "BIGGER_SIZE = 50\n", + "FIG_WIDTH = 40\n", + "FIG_HEIGHT = 10\n", + "\n", + "\n", + "#plt.rc('font', size=16) # controls default text sizes\n", + "plt.rc('font', size=20) # controls default text sizes\n", + "plt.rc('axes', titlesize=MEDIUM_SIZE) # fontsize of the axes title\n", + "plt.rc('axes', labelsize=MEDIUM_SIZE) # fontsize of the x and y labels\n", + "plt.rc('xtick', labelsize=MEDIUM_SIZE) # fontsize of the tick labels\n", + "plt.rc('ytick', labelsize=MEDIUM_SIZE) # fontsize of the tick labels\n", + "plt.rc('legend', fontsize=MEDIUM_SIZE) # legend fontsize\n", + "plt.rc('figure', titlesize=MEDIUM_SIZE) # fontsize of the figure title\n", + "\n", + "#g = sns.boxplot(x=\"variable\", y=\"value\", data=df_stats_per_user_pivot, showfliers=False)\n", + "#plt.xticks(ticks=[0,1,2,3,4],labels=[\"History\", \"LinearRegression\", \"RandomForest\", \"LinearSVR\", \"SGDRegressor\"],rotation=30)\n", + "g = sns.boxplot(y=\"variable\", x=\"value\", data=df_stats_per_user_pivot, showfliers=False)\n", + "plt.yticks(ticks=[0,1,2,3,4],labels=[\"History\", \"LinearRegression\", \"RandomForest\", \"LinearSVR\", \"SGDRegressor\"],rotation=0)\n", + "\n", + "g.set_ylabel(\"Prediction Method\")\n", + "g.set_xlabel(\"Mean Absolute Percentage Error (MAPE) \")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Processing the max power prediction results (script `run_prediction_per_user_allmethods_max.py`)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import seaborn as sns\n", + "\n", + "import os\n", + "\n", + "RESULTS_PATH = \"./m100-data/total_power_mean_predictions_users_allmethods_max/\"\n", + "\n", + "PRED_COLS = [\"hist_pred_total_power_max\",\n", + " \"LinearRegression_total_power_max_watts\",\n", + " \"RandomForestRegressor_total_power_max_watts\", \n", + " \"LinearSVR_total_power_max_watts\", \n", + " \"SGDRegressor_total_power_max_watts\"]\n", + "\n", + "\n", + "result_filenames = os.listdir(RESULTS_PATH)\n", + "\n", + "df_all_results = pd.concat([pd.read_csv(RESULTS_PATH+filename, low_memory=False) for filename in result_filenames])\n", + "\n", + "df_all_results = df_all_results.dropna(subset=PRED_COLS)\n", + "df_all_results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error\n", + "\n", + "lst_users = df_all_results[\"user_id\"].drop_duplicates().to_list()\n", + "#print(lst_users)\n", + "\n", + "df_results_user_group = df_all_results.groupby(\"user_id\")\n", + "\n", + "lst_stats_per_user = []\n", + "\n", + "for user in lst_users:\n", + " results_user = df_results_user_group.get_group(user)\n", + " hist_mape = mean_absolute_percentage_error(results_user[\"total_power_max_watts\"], results_user[\"hist_pred_total_power_max\"])\n", + " LR_mape = mean_absolute_percentage_error(results_user[\"total_power_max_watts\"], results_user[\"LinearRegression_total_power_max_watts\"])\n", + " RF_mape = mean_absolute_percentage_error(results_user[\"total_power_max_watts\"], results_user[\"RandomForestRegressor_total_power_max_watts\"])\n", + " LSVR_mape = mean_absolute_percentage_error(results_user[\"total_power_max_watts\"], results_user[\"LinearSVR_total_power_max_watts\"])\n", + " SGD_mape = mean_absolute_percentage_error(results_user[\"total_power_max_watts\"], results_user[\"SGDRegressor_total_power_max_watts\"])\n", + " res = {\"user_id\": user, \n", + " \"hist_mape\": hist_mape, \n", + " \"LinearRegression_mape\": LR_mape, \n", + " \"RandomForestRegressor_mape\": RF_mape, \n", + " \"LinearSVR_mape\": LSVR_mape,\n", + " \"SGDRegressor_mape\": SGD_mape}\n", + " lst_stats_per_user.append(res)\n", + " #break\n", + "\n", + "df_stats_per_user = pd.DataFrame(lst_stats_per_user)\n", + "df_stats_per_user" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "COLS = [\"hist_mape\",\"LinearRegression_mape\",\"RandomForestRegressor_mape\",\"LinearSVR_mape\",\"SGDRegressor_mape\"]\n", + "\n", + "df_stats_per_user[COLS].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "COLS = [\"hist_mape\",\"LinearRegression_mape\",\"RandomForestRegressor_mape\",\"LinearSVR_mape\",\"SGDRegressor_mape\"]\n", + "\n", + "df_stats_per_user_pivot = pd.melt(df_stats_per_user, id_vars=\"user_id\")\n", + "df_stats_per_user_pivot" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Figure 3 B" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "import matplotlib.pyplot as plt\n", + "\n", + "TINY_SIZE = 2\n", + "SMALL_SIZE = 5\n", + "MEDIUM_SIZE = 20\n", + "BIGGER_SIZE = 50\n", + "FIG_WIDTH = 40\n", + "FIG_HEIGHT = 10\n", + "\n", + "\n", + "plt.rc('font', size=20) # controls default text sizes\n", + "plt.rc('axes', titlesize=MEDIUM_SIZE) # fontsize of the axes title\n", + "plt.rc('axes', labelsize=MEDIUM_SIZE) # fontsize of the x and y labels\n", + "plt.rc('xtick', labelsize=MEDIUM_SIZE) # fontsize of the tick labels\n", + "plt.rc('ytick', labelsize=MEDIUM_SIZE) # fontsize of the tick labels\n", + "plt.rc('legend', fontsize=MEDIUM_SIZE) # legend fontsize\n", + "plt.rc('figure', titlesize=MEDIUM_SIZE) # fontsize of the figure title\n", + "\n", + "#g = sns.boxplot(x=\"variable\", y=\"value\", data=df_stats_per_user_pivot, showfliers=False)\n", + "#plt.xticks(ticks=[0,1,2,3,4],labels=[\"History\", \"LinearRegression\", \"RandomForest\", \"LinearSVR\", \"SGDRegressor\"],rotation=30)\n", + "#g.set_xlabel(\"Prediction Method\")\n", + "#g.set_ylabel(\"Mean Absolute Percentage Error (MAPE) \")\n", + "\n", + "g = sns.boxplot(y=\"variable\", x=\"value\", data=df_stats_per_user_pivot, showfliers=False)\n", + "plt.yticks(ticks=[0,1,2,3,4],labels=[\"History\", \"LinearRegression\", \"RandomForest\", \"LinearSVR\", \"SGDRegressor\"],rotation=0)\n", + "g.set_ylabel(\"Prediction Method\")\n", + "g.set_xlabel(\"Mean Absolute Percentage Error (MAPE)\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Getting the actual mean and max power distributions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Mean (Figure 2 A)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "TINY_SIZE = 2\n", + "SMALL_SIZE = 5\n", + "MEDIUM_SIZE = 20\n", + "BIGGER_SIZE = 50\n", + "FIG_WIDTH = 40\n", + "FIG_HEIGHT = 10\n", + "\n", + "plt.clf()\n", + "\n", + "plt.rc('figure', figsize=(8, 6))\n", + "plt.rc('font', size=MEDIUM_SIZE) # controls default text sizes\n", + "plt.rc('axes', titlesize=MEDIUM_SIZE) # fontsize of the axes title\n", + "plt.rc('axes', labelsize=MEDIUM_SIZE) # fontsize of the x and y labels\n", + "plt.rc('xtick', labelsize=MEDIUM_SIZE) # fontsize of the tick labels\n", + "plt.rc('ytick', labelsize=MEDIUM_SIZE) # fontsize of the tick labels\n", + "plt.rc('legend', fontsize=MEDIUM_SIZE) # legend fontsize\n", + "plt.rc('figure', titlesize=MEDIUM_SIZE) # fontsize of the figure title\n", + "\n", + "g = sns.histplot(x=\"total_power_mean_watts\", data=df_all_results, bins=25, fill=False)\n", + "#g.ax.set_yscale('log')\n", + "g.set_xlabel(\"Total Power (watts)\")\n", + "g.set_ylabel(\"Number of Jobs\")\n", + "plt.xticks(ticks=[0,250,500,750,1000,1250,1500], rotation=30)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Max (Figure 2 B)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "TINY_SIZE = 2\n", + "SMALL_SIZE = 5\n", + "MEDIUM_SIZE = 20\n", + "BIGGER_SIZE = 50\n", + "FIG_WIDTH = 40\n", + "FIG_HEIGHT = 10\n", + "\n", + "plt.clf()\n", + "\n", + "plt.rc('figure', figsize=(8, 6))\n", + "plt.rc('font', size=MEDIUM_SIZE) # controls default text sizes\n", + "plt.rc('axes', titlesize=MEDIUM_SIZE) # fontsize of the axes title\n", + "plt.rc('axes', labelsize=MEDIUM_SIZE) # fontsize of the x and y labels\n", + "plt.rc('xtick', labelsize=MEDIUM_SIZE) # fontsize of the tick labels\n", + "plt.rc('ytick', labelsize=MEDIUM_SIZE) # fontsize of the tick labels\n", + "plt.rc('legend', fontsize=MEDIUM_SIZE) # legend fontsize\n", + "plt.rc('figure', titlesize=MEDIUM_SIZE) # fontsize of the figure title\n", + "\n", + "#g = sns.displot(x=\"total_power_max_watts\", data=df_all_results)\n", + "g = sns.histplot(x=\"total_power_max_watts\", data=df_all_results, bins=25, fill=False)\n", + "\n", + "#g.ax.set_yscale('log')\n", + "g.set_xlabel(\"Total Power (watts)\")\n", + "g.set_ylabel(\"Number of Jobs\")\n", + "plt.xticks(ticks=[0,250,500,750,1000,1250,1500,1750,2000], rotation=30)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/scripts-py/expe_energumen/m100_pred_jobs_extract_power_metrics.py b/scripts-py/expe_energumen/m100_pred_jobs_extract_power_metrics.py index 0ae9b2edf1f84d1b98a0d8dd38ccd716a99dede9..feb36a50ad576056c8eef36922baee4c62e81905 100644 --- a/scripts-py/expe_energumen/m100_pred_jobs_extract_power_metrics.py +++ b/scripts-py/expe_energumen/m100_pred_jobs_extract_power_metrics.py @@ -8,10 +8,10 @@ from scipy.stats import iqr Read input data """ def read_data(rootdir): - df_jobs_single = pd.read_csv(rootdir+"/plugin=job_table/metric=job_info_marconi100/a_0_filter123_singlenode.csv") - df_jobs_multi = pd.read_csv(rootdir+"/plugin=job_table/metric=job_info_marconi100/a_0_filter123_multinode.csv") - df_power_single = pd.read_csv(rootdir+"/plugin=ipmi_pub/metric=total_power/a_0_filter123_singlenode.csv") - df_power_multi = pd.read_csv(rootdir+"/plugin=ipmi_pub/metric=total_power/a_0_filter123_multinode.csv") + df_jobs_single = pd.read_csv(rootdir+"_filter123_singlenode.csv") + df_jobs_multi = pd.read_csv(rootdir+"_filter123_multinode.csv") + df_power_single = pd.read_csv(rootdir+"_filter123_singlenode.csv") + df_power_multi = pd.read_csv(rootdir+"_filter123_multinode.csv") df_jobs = pd.concat([df_jobs_single, df_jobs_multi]).reset_index(drop=True) df_power = pd.concat([df_power_single, df_power_multi]).reset_index(drop=True) df_power['node'] = pd.to_numeric(df_power['node']) @@ -37,7 +37,7 @@ def calculate_agg_metrics(df_jobs, df_power): Save results """ def save_results(df_jobs_aggmetrics, rootdir): - df_jobs_aggmetrics.to_csv(rootdir+"/plugin=job_table/metric=job_info_marconi100/a_0_filter123_aggmetrics.csv") + df_jobs_aggmetrics.to_csv(rootdir+"_filter123_aggmetrics.csv") """ Run workflow diff --git a/scripts-py/expe_energumen/m100_pred_merge_jobfiles.py b/scripts-py/expe_energumen/m100_pred_merge_jobfiles.py index 3d3344681c3e4bed4bf25d7e7e087b14acb1855b..453d8171a4a879c359469648b2ade5bbef4718cc 100644 --- a/scripts-py/expe_energumen/m100_pred_merge_jobfiles.py +++ b/scripts-py/expe_energumen/m100_pred_merge_jobfiles.py @@ -9,7 +9,7 @@ Read job files spread in the months folders def read_jobifles(rootdir): #DATASET_PATH = "/home/dancarastan/Documentos/exadata_job_energy_profiles/" - jobfiles_list = glob.glob(rootdir+"*"+"/plugin=job_table"+"/metric=job_info_marconi100"+"/a_0_filter123_aggmetrics.csv") + jobfiles_list = glob.glob(rootdir+"*"+"_filter123_aggmetrics.csv") #print(len(jobfiles_list)) df_jobs = pd.concat([pd.read_csv(jobfile) for jobfile in jobfiles_list]).reset_index(drop=True) diff --git a/scripts-py/expe_energumen/m100_pred_preprocess_1.py b/scripts-py/expe_energumen/m100_pred_preprocess_1.py index 182f632b4ae0af5aa12f23a0c70ae004cba27b97..8ddbf11915f7ccd118af7ce94e3d83fb8ba70179 100644 --- a/scripts-py/expe_energumen/m100_pred_preprocess_1.py +++ b/scripts-py/expe_energumen/m100_pred_preprocess_1.py @@ -105,11 +105,10 @@ def filter2_multi(df_jobs, df_power): """ Save intermediate results to csv """ -def save_results(df_jobs_single, df_jobs_multi, jobfile, metricfile): - jobfile_out = jobfile.rstrip("a_0.parquet") - metric = metricfile.split("/")[-2] - df_jobs_single.to_csv(jobfile_out+metric+"_filter12_singlenode.csv", index=False) - df_jobs_multi.to_csv(jobfile_out+metric+"_filter12_multinode.csv", index=False) +def save_results(df_jobs_single, df_jobs_multi, jobfile, metricfile): + jobfile_out = jobfile.rstrip("jobs.parquet") + df_jobs_single.to_csv(jobfile_out+"_filter12_singlenode.csv", index=False) + df_jobs_multi.to_csv(jobfile_out+"_filter12_multinode.csv", index=False) """ Run workflow diff --git a/scripts-py/expe_energumen/m100_pred_preprocess_2.py b/scripts-py/expe_energumen/m100_pred_preprocess_2.py index 33b3d2e142dbdc326c91e42c69179baa6ff43de6..8bdd94772871c08b620c8c9637cf6ecb517e1879 100644 --- a/scripts-py/expe_energumen/m100_pred_preprocess_2.py +++ b/scripts-py/expe_energumen/m100_pred_preprocess_2.py @@ -183,11 +183,10 @@ def filter3_1_multi(df_jobs_multi, df_total_power): Save results to csv """ def save_results(df_exclusive_jobs_single, df_exclusive_jobs_multi, df_total_power_exclusive_single, df_total_power_exclusive_multi, jobfile_single, metricfile): - metric = metricfile.split("/")[-2] - jobfile_out = jobfile_single.rstrip(metric+"_filter12_singlenode.csv") - metricfile_out = metricfile.rstrip("a_0.parquet") - df_exclusive_jobs_single.to_csv(jobfile_out+metric+"_filter123_singlenode.csv", index=False) - df_exclusive_jobs_multi.to_csv(jobfile_out+metric+"_filter123_multinode.csv", index=False) + jobfile_out = jobfile_single.rstrip("_filter12_singlenode.csv") + metricfile_out = metricfile.rstrip("power_total.parquet") + df_exclusive_jobs_single.to_csv(jobfile_out+"_filter123_singlenode.csv", index=False) + df_exclusive_jobs_multi.to_csv(jobfile_out+"_filter123_multinode.csv", index=False) df_total_power_exclusive_single.to_csv(metricfile_out+"a_0_filter123_singlenode.csv", index=False) df_total_power_exclusive_multi.to_csv(metricfile_out+"a_0_filter123_multinode.csv", index=False)