diff --git a/artifact-overview.typ b/artifact-overview.typ index f3aa1e0085732fec668c69353538da364fe53a61..388c1de6ba8203d72aec28496f0a28962a31a48c 100644 --- a/artifact-overview.typ +++ b/artifact-overview.typ @@ -350,7 +350,7 @@ The notebook also generates a power model of the Marconi100 nodes, which is requ Required input files. - `m100-data/22-agg_power_total.csv` (output of @sec-agg-power-traces-per-node). -#fullbox(footer:[Disk: 1.7 Mo. Time (laptop): 00:00:10])[ +#fullbox(footer:[Disk: 1.7 Mo. Time (laptop): 00:00:10.])[ ```sh nix develop .#r-notebook --command \ Rscript notebooks/run-rmarkdown-notebook.R \ @@ -370,228 +370,99 @@ Required input files. == Job power prediction <sec-job-power-pred> -The experimental workflow consists of three parts, (i) preprocessing of the original data, and +The experimental workflow consists of three parts, (i) preprocessing of the original data, and (ii) prediction of the mean and maximum power consumption. +Please note that reproducing this section involves *heavy computations* and *big data*. +We have *not* made intermediate files available on #link(zenodo-url)[Zenodo] as they were too big. === Pre-processing ==== Step 1 -#fullbox(footer:[#emph-overhead[Memory: 128 Go. Time (sequential): 18:00:00]])[ -```python - ./scripts-py/expe_energumen/m100_pred_preprocess_1.py \ - -j ./m100-data/22-01_jobs.parquet \ - -p ./m100-data/22-01_power_total.parquet - ``` -```python - ./scripts-py/expe_energumen/m100_pred_preprocess_1.py \ - -j ./m100-data/22-02_jobs.parquet \ - -p ./m100-data/22-02_power_total.parquet - ``` -```python - ./scripts-py/expe_energumen/m100_pred_preprocess_1.py \ - -j ./m100-data/22-03_jobs.parquet \ - -p ./m100-data/22-03_power_total.parquet - ``` -```python - ./scripts-py/expe_energumen/m100_pred_preprocess_1.py \ - -j ./m100-data/22-04_jobs.parquet \ - -p ./m100-data/22-04_power_total.parquet - ``` -```python - ./scripts-py/expe_energumen/m100_pred_preprocess_1.py \ - -j ./m100-data/22-05_jobs.parquet \ - -p ./m100-data/22-05_power_total.parquet - ``` -```python - ./scripts-py/expe_energumen/m100_pred_preprocess_1.py \ - -j ./m100-data/22-06_jobs.parquet \ - -p ./m100-data/22-06_power_total.parquet - ``` -```python - ./scripts-py/expe_energumen/m100_pred_preprocess_1.py \ - -j ./m100-data/22-07_jobs.parquet \ - -p ./m100-data/22-07_power_total.parquet - ``` -```python - ./scripts-py/expe_energumen/m100_pred_preprocess_1.py \ - -j ./m100-data/22-08_jobs.parquet \ - -p ./m100-data/22-08_power_total.parquet - ``` -```python - ./scripts-py/expe_energumen/m100_pred_preprocess_1.py \ - -j ./m100-data/22-09_jobs.parquet \ - -p ./m100-data/22-09_power_total.parquet - ``` +#fullbox(footer:[#emph-overhead[Memory: 128 Go. Time (sequential): 18:00:00.]])[ +```sh +for month in 22-01 22-02 22-03 22-04 22-05 22-06 22-07 22-08 22-09; do + nix develop .#py-scripts --command m100-pred-preprocess1 \ + -j ./m100-data/${month}_jobs.parquet \ + -m ./m100-data/${month}_power_total.parquet +done +``` ] -=== Step 2 - -#fullbox(footer:[#emph-overhead[Memory: 128 Go. Time (sequential): 66:00:00]])[ -```python - ./scripts-py/expe_energumen/m100_pred_preprocess_2.py \ - -js ./m100-data/22-01_filter12_singlenode.csv \ - -jm ./m100-data/22-01_filter12_multinode.csv - -p ./m100-data/22-01_power_total.parquet - ``` - -```python - ./scripts-py/expe_energumen/m100_pred_preprocess_2.py \ - -js ./m100-data/22-02_filter12_singlenode.csv \ - -jm ./m100-data/22-02_filter12_multinode.csv - -p ../m100-data/22-02_power_total.parquet - ``` - -```python - ./scripts-py/expe_energumen/m100_pred_preprocess_2.py \ - -js ./m100-data/22-03_filter12_singlenode.csv \ - -jm ./m100-data/22-03_filter12_multinode.csv - -p ./m100-data/22-03_power_total.parquet - ``` - -```python - ./scripts-py/expe_energumen/m100_pred_preprocess_2.py \ - -js ./m100-data/22-04_filter12_singlenode.csv \ - -jm ./m100-data/22-04_filter12_multinode.csv - -p ./m100-data/22-04_power_total.parquet - ``` - -```python - ./scripts-py/expe_energumen/m100_pred_preprocess_2.py \ - -js ./m100-data/22-05_filter12_singlenode.csv \ - -jm ./m100-data/22-05_filter12_multinode.csv - -p ./m100-data/22-05_power_total.parquet - ``` - -```python - ./scripts-py/expe_energumen/m100_pred_preprocess_2.py \ - -js ./m100-data/22-06_filter12_singlenode.csv \ - -jm ./m100-data/22-06_filter12_multinode.csv - -p ./m100-data/22-06_power_total.parquet - ``` - -```python - ./scripts-py/expe_energumen/m100_pred_preprocess_2.py \ - -js ./m100-data/22-07_filter12_singlenode.csv \ - -jm ./m100-data/22-07_filter12_multinode.csv - -p ./m100-data/22-07_power_total.parquet - ``` - -```python - ./scripts-py/expe_energumen/m100_pred_preprocess_2.py \ - -js ./m100-data/22-08_filter12_singlenode.csv \ - -jm ./m100-data/22-08_filter12_multinode.csv - -p ./m100-data/22-08_power_total.parquet - ``` - -```python - ./scripts-py/expe_energumen/m100_pred_preprocess_2.py \ - -js ./m100-data/22-09_filter12_singlenode.csv \ - -jm ./m100-data/22-09_filter12_multinode.csv - -p ./m100-data/22-09_power_total.parquet - ``` +==== Step 2 +#fullbox(footer:[#emph-overhead[Memory: 128 Go. Time (sequential): 66:00:00.]])[ +```sh +for month in 22-01 22-02 22-03 22-04 22-05 22-06 22-07 22-08 22-09; do + nix develop .#py-scripts --command m100-pred-preprocess2 \ + -js ./m100-data/${month}_filter12_singlenode.csv \ + -jm ./m100-data/${month}_filter12_multinode.csv \ + -m ./m100-data/${month}_power_total.parquet +done +``` ] -=== Aggregate step 2 output into a single file +=== Aggregate step 2 output into a single file -#fullbox(footer: [Disk: 32 Go.])[ -find . -name '*filter123*' | tar -zcvf exadata_job_energy_profiles.tar.gz --files-from - +#fullbox(footer: [#emph-overhead[Disk: 32 Go.]])[ +```sh +find . -name '*filter123*' | \ + tar -zcvf exadata_job_energy_profiles.tar.gz --files-from - +``` ] === Compute power metrics and add job information -#fullbox(footer: [Disk: 32 Go.])[ -``` python - ./scripts-py/expe_energumen/m100_pred_jobs_extract_power_metrics.py \ - -d ./m100-data/22-01 +#fullbox(footer: [#emph-overhead[Disk: 32 Go.]])[ +```sh +for month in 22-01 22-02 22-03 22-04 22-05 22-06 22-07 22-08 22-09; do + nix develop .#py-scripts --command m100-pred-jobs-extract-power-metrics \ + -d ./m100-data/${month} +done ``` - -``` python - ./scripts-py/expe_energumen/m100_pred_jobs_extract_power_metrics.py \ - -d ./m100-data/22-02 -``` - -``` python - ./scripts-py/expe_energumen/m100_pred_jobs_extract_power_metrics.py \ - -d ./m100-data/22-03 -``` - -``` python - ./scripts-py/expe_energumen/m100_pred_jobs_extract_power_metrics.py \ - -d ./m100-data/22-04 -``` - -``` python - ./scripts-py/expe_energumen/m100_pred_jobs_extract_power_metrics.py \ - -d ./m100-data/22-05 -``` - -``` python - ./scripts-py/expe_energumen/m100_pred_jobs_extract_power_metrics.py \ - -d ./m100-data/22-06 -``` - -``` python - ./scripts-py/expe_energumen/m100_pred_jobs_extract_power_metrics.py \ - -d ./m100-data/22-07 -``` - -``` python - ./scripts-py/expe_energumen/m100_pred_jobs_extract_power_metrics.py \ - -d ./m100-data/22-08 -``` - -``` python - ./scripts-py/expe_energumen/m100_pred_jobs_extract_power_metrics.py \ - -d ./m100-data/22-09 -``` ] === Merge files into a single CSV file -This will output the `filter123_all_jobs_aggmetrics.csv.gz` needed for the prediction script +This will output the `filter123_all_jobs_aggmetrics.csv.gz` needed for the prediction script. #fullbox(footer: [Disk: 82 Mo.])[ -``` python - ./scripts-py/expe_energumen/m100_pred_merge_jobfiles.py -d ./m100-data/ +```sh +nix develop .#py-scripts --command m100-pred-merge-jobfiles -d ./m100-data/ ``` ] -== Predicting Job mean and maximum power consumption +== Predicting Job mean and maximum power consumption -#fullbox(footer:[#emph-overhead[Memory: 128 Go. Time (sequential): 72:00:00]])[ -``` -mkdir ./m100-data/total_power_mean_predictions_users_allmethods_mean -mkdir ./m100-data/total_power_mean_predictions_users_allmethods_max +#fullbox(footer:[#emph-overhead[Memory: 128 Go. Time (sequential): 72:00:00.]])[ + ```sh + mkdir ./m100-data/total_power_mean_predictions_users_allmethods_mean + mkdir ./m100-data/total_power_mean_predictions_users_allmethods_max -python ./scripts-py/expe_energumen/run_prediction_per_user_allmethods_mean.py \ - -j ./m100-data/filter123_all_jobs_aggmetrics.csv.gz \ - -o ./m100-data/total_power_mean_predictions_users_allmethods_mean + nix develop .#py-scripts --command \ + run-prediction-per-user-allmethods-mean \ + -j ./m100-data/filter123_all_jobs_aggmetrics.csv.gz \ + -o ./m100-data/total_power_mean_predictions_users_allmethods_mean -python ./scripts-py/expe_energumen/run_prediction_per_user_allmethods_max.py \ - -j ./m100-data/filter123_all_jobs_aggmetrics.csv.gz \ - -o ./m100-data/total_power_mean_predictions_users_allmethods_max -``` + nix develop .#py-scripts --command \ + run-prediction-per-user-allmethods-max \ + -j ./m100-data/filter123_all_jobs_aggmetrics.csv.gz \ + -o ./m100-data/total_power_mean_predictions_users_allmethods_max + ``` ] === Compressing prediction output into single files - -#fullbox(footer:[Disk: 82 Mo.])[ -``` -tar -cvzf ./m100-data/power_pred_users_allmethods_max.tar.gz \ - ./m100-data/total_power_mean_predictions_users_allmethods_mean -tar -cvzf ./m100-data/power_pred_users_allmethods_mean.tar.gz \ - ./m100-data/total_power_mean_predictions_users_allmethods_max -``` -] - The expected output data of has been stored on #link(zenodo-url)[Zenodo]. -//#tododanilo[how to reproduce this experiment?] +#fullbox(footer:[Disk: 82 Mo.])[ + ```sh + tar -cvzf ./m100-data/power_pred_users_allmethods_max.tar.gz \ + ./m100-data/total_power_mean_predictions_users_allmethods_mean + tar -cvzf ./m100-data/power_pred_users_allmethods_mean.tar.gz \ + ./m100-data/total_power_mean_predictions_users_allmethods_max + ``` -#fullbox(footer: [Disk: 82 Mo.])[ #filehashes(( "fdcc47998a7e998abde325162833b23e", "power_pred_users_allmethods_max.tar.gz", "954f782a75c9a5b21c53a95c0218e220", "power_pred_users_allmethods_mean.tar.gz", diff --git a/flake.nix b/flake.nix index ce41a85461231cb756a0b98362372a320bf4e755..bec15ed36dda353cad3d26c9c1abdd1c1ff88db9 100644 --- a/flake.nix +++ b/flake.nix @@ -70,6 +70,8 @@ propagatedBuildInputs = with pyPkgs; [ packages.fastparquet-2402 pyPkgs.sortedcontainers + pyPkgs.scipy + pyPkgs.scikit-learn ]; }; fastparquet-2402 = pyPkgs.fastparquet.overrideAttrs(final: prev: rec { diff --git a/scripts-py/expe_energumen/m100_pred_jobs_extract_power_metrics.py b/scripts-py/expe_energumen/m100_pred_jobs_extract_power_metrics.py index feb36a50ad576056c8eef36922baee4c62e81905..9e3025ac58cc5cde4cd803b157bea4d4b68e2d2a 100644 --- a/scripts-py/expe_energumen/m100_pred_jobs_extract_power_metrics.py +++ b/scripts-py/expe_energumen/m100_pred_jobs_extract_power_metrics.py @@ -64,16 +64,14 @@ def read_cli(): return(p.parse_args()) -if __name__ == '__main__': - +def main(): if sys.version_info<(3,5,0): sys.stderr.write("You need python 3.5 or later to run this script\n") sys.exit(1) - - try: - args = read_cli() - except: - print('Try $python process_marconi_jobs.py --help') - sys.exit(1) - run_workflow(args.rootdir) \ No newline at end of file + args = read_cli() + sys.exit(2) + run_workflow(args.rootdir) + +if __name__ == '__main__': + main() diff --git a/scripts-py/expe_energumen/m100_pred_merge_jobfiles.py b/scripts-py/expe_energumen/m100_pred_merge_jobfiles.py index 453d8171a4a879c359469648b2ade5bbef4718cc..bad85e3f2ee1889974cc2f7a96da6c2936ddc3e8 100644 --- a/scripts-py/expe_energumen/m100_pred_merge_jobfiles.py +++ b/scripts-py/expe_energumen/m100_pred_merge_jobfiles.py @@ -43,16 +43,13 @@ def read_cli(): return(p.parse_args()) -if __name__ == '__main__': - +def main(): if sys.version_info<(3,5,0): sys.stderr.write("You need python 3.5 or later to run this script\n") sys.exit(1) - - try: - args = read_cli() - except: - print('Try $python process_marconi_jobs.py --help') - sys.exit(1) - run_workflow(args.rootdir) \ No newline at end of file + args = read_cli() + run_workflow(args.rootdir) + +if __name__ == '__main__': + main() diff --git a/scripts-py/expe_energumen/m100_pred_preprocess_1.py b/scripts-py/expe_energumen/m100_pred_preprocess_1.py index 8ddbf11915f7ccd118af7ce94e3d83fb8ba70179..7eb48462642c32a8cd2b95f0fd08bd3eb62134ae 100644 --- a/scripts-py/expe_energumen/m100_pred_preprocess_1.py +++ b/scripts-py/expe_energumen/m100_pred_preprocess_1.py @@ -135,16 +135,13 @@ def read_cli(): return(p.parse_args()) -if __name__ == '__main__': - +def main(): if sys.version_info<(3,5,0): sys.stderr.write("You need python 3.5 or later to run this script\n") sys.exit(1) - - try: - args = read_cli() - except: - print('Try $python process_marconi_jobs.py --help') - sys.exit(1) - run_workflow(args.metricfile, args.jobfile) \ No newline at end of file + args = read_cli() + run_workflow(args.metricfile, args.jobfile) + +if __name__ == '__main__': + main() diff --git a/scripts-py/expe_energumen/m100_pred_preprocess_2.py b/scripts-py/expe_energumen/m100_pred_preprocess_2.py index 8bdd94772871c08b620c8c9637cf6ecb517e1879..f10f994872a7b4626261c13839d42b7e6f4e820a 100644 --- a/scripts-py/expe_energumen/m100_pred_preprocess_2.py +++ b/scripts-py/expe_energumen/m100_pred_preprocess_2.py @@ -230,16 +230,13 @@ def read_cli(): return(p.parse_args()) -if __name__ == '__main__': - +def main(): if sys.version_info<(3,5,0): sys.stderr.write("You need python 3.5 or later to run this script\n") sys.exit(1) - - try: - args = read_cli() - except: - print('Try $python process_marconi_jobs.py --help') - sys.exit(1) - run_workflow(args.metricfile, args.jobfilesingle, args.jobfilemulti) \ No newline at end of file + args = read_cli() + run_workflow(args.metricfile, args.jobfilesingle, args.jobfilemulti) + +if __name__ == '__main__': + main() diff --git a/scripts-py/expe_energumen/predict_jobs_power_allmethods_max.py b/scripts-py/expe_energumen/predict_jobs_power_allmethods_max.py index 7555644c217f9313ede1aa8da35dc3871fc6a651..1b48ffcad58a329acc340c14e82e4914f43306fe 100644 --- a/scripts-py/expe_energumen/predict_jobs_power_allmethods_max.py +++ b/scripts-py/expe_energumen/predict_jobs_power_allmethods_max.py @@ -475,16 +475,13 @@ def read_cli(): return(p.parse_args()) -if __name__ == '__main__': - +def main(): if sys.version_info<(3,5,0): sys.stderr.write("You need python 3.5 or later to run this script\n") sys.exit(1) - - try: - args = read_cli() - except: - print('Try $python process_marconi_jobs.py --help') - sys.exit(1) - run_workflow(args.jobfile, args.outputfile, args.user) \ No newline at end of file + args = read_cli() + run_workflow(args.jobfile, args.outputfile, args.user) + +if __name__ == '__main__': + main() diff --git a/scripts-py/expe_energumen/predict_jobs_power_allmethods_mean.py b/scripts-py/expe_energumen/predict_jobs_power_allmethods_mean.py index 35c00532646b9d6b9daa4c9b81328c67528dfa1c..269e3116930ac8ce90914cbb30dc4d532d83f069 100644 --- a/scripts-py/expe_energumen/predict_jobs_power_allmethods_mean.py +++ b/scripts-py/expe_energumen/predict_jobs_power_allmethods_mean.py @@ -479,16 +479,13 @@ def read_cli(): return(p.parse_args()) -if __name__ == '__main__': - +def main(): if sys.version_info<(3,5,0): sys.stderr.write("You need python 3.5 or later to run this script\n") sys.exit(1) - - try: - args = read_cli() - except: - print('Try $python process_marconi_jobs.py --help') - sys.exit(1) - run_workflow(args.jobfile, args.outputfile, args.user) \ No newline at end of file + args = read_cli() + run_workflow(args.jobfile, args.outputfile, args.user) + +if __name__ == '__main__': + main() diff --git a/scripts-py/expe_energumen/run_prediction_per_user_allmethods_max.py b/scripts-py/expe_energumen/run_prediction_per_user_allmethods_max.py index 6864e3e2bddc4a9ccf471e5549f95799890371d1..0074f737b0b643dca11054b0bb4ff637d1fafaa9 100644 --- a/scripts-py/expe_energumen/run_prediction_per_user_allmethods_max.py +++ b/scripts-py/expe_energumen/run_prediction_per_user_allmethods_max.py @@ -6,15 +6,6 @@ import os import functools from multiprocessing import Pool - -""" -User power prediction subroutine -""" -def run_prediction_user(user_id, jobfile=None, outdir=None): - outfile = outdir+'/filter123_user_'+str(user_id)+'_total_power_mean_pred.csv.gz' - command = ['python', 'predict_jobs_power.py', "-j", jobfile , '-o', outfile, '-u', str(user_id)] - subprocess.run(command) - """ Run Workflow """ @@ -28,7 +19,7 @@ def run_workflow(jobfile, outdir): # Skip user if prediction was already performed if os.path.isfile(outfile) == True: continue - command = ['python', 'predict_jobs_power_allmethods_max.py', "-j", jobfile , '-o', outfile, '-u', str(user_id)] + command = ['predict-jobs-power-allmethods-max', "-j", jobfile , '-o', outfile, '-u', str(user_id)] subprocess.run(command) """ @@ -45,16 +36,13 @@ def read_cli(): return(p.parse_args()) -if __name__ == '__main__': - +def main(): if sys.version_info<(3,5,0): sys.stderr.write("You need python 3.5 or later to run this script\n") sys.exit(1) - - try: - args = read_cli() - except: - print('Try $python run_prediction_per_user --help') - sys.exit(1) - run_workflow(args.jobfile, args.outdir) \ No newline at end of file + args = read_cli() + run_workflow(args.jobfile, args.outdir) + +if __name__ == '__main__': + main() diff --git a/scripts-py/expe_energumen/run_prediction_per_user_allmethods_mean.py b/scripts-py/expe_energumen/run_prediction_per_user_allmethods_mean.py index 7571f6c46f583f5bd67bac2a9d02d47caae8998c..7d90605a2e2b36d96b95afffc0708c6a1dbd81c7 100644 --- a/scripts-py/expe_energumen/run_prediction_per_user_allmethods_mean.py +++ b/scripts-py/expe_energumen/run_prediction_per_user_allmethods_mean.py @@ -6,15 +6,6 @@ import os import functools from multiprocessing import Pool - -""" -User power prediction subroutine -""" -def run_prediction_user(user_id, jobfile=None, outdir=None): - outfile = outdir+'/filter123_user_'+str(user_id)+'_total_power_mean_pred.csv.gz' - command = ['python', 'predict_jobs_power.py', "-j", jobfile , '-o', outfile, '-u', str(user_id)] - subprocess.run(command) - """ Run Workflow """ @@ -28,7 +19,7 @@ def run_workflow(jobfile, outdir): # Skip user if prediction was already performed if os.path.isfile(outfile) == True: continue - command = ['python', 'predict_jobs_power_allmethods_mean.py', "-j", jobfile , '-o', outfile, '-u', str(user_id)] + command = ['predict-jobs-power-allmethods-mean', "-j", jobfile , '-o', outfile, '-u', str(user_id)] subprocess.run(command) """ @@ -45,16 +36,13 @@ def read_cli(): return(p.parse_args()) -if __name__ == '__main__': - +def main(): if sys.version_info<(3,5,0): sys.stderr.write("You need python 3.5 or later to run this script\n") sys.exit(1) - - try: - args = read_cli() - except: - print('Try $python run_prediction_per_user --help') - sys.exit(1) - run_workflow(args.jobfile, args.outdir) \ No newline at end of file + args = read_cli() + run_workflow(args.jobfile, args.outdir) + +if __name__ == '__main__': + main() diff --git a/scripts-py/pyproject.toml b/scripts-py/pyproject.toml index 486a81b1d512dc121614a1b500ca30e3f1f3b764..8bf3f99b5a9d417b8fa99f153641dfe64a8ef753 100644 --- a/scripts-py/pyproject.toml +++ b/scripts-py/pyproject.toml @@ -11,6 +11,14 @@ version = "0.1.0" [project.scripts] m100-data-downloader = "expe_energumen.m100_data_downloader:main" m100-agg-power-months = "expe_energumen.m100_agg_month_power_values:several_months" +m100-pred-preprocess1 = "expe_energumen.m100_pred_preprocess_1:main" +m100-pred-preprocess2 = "expe_energumen.m100_pred_preprocess_2:main" +m100-pred-jobs-extract-power-metrics = "expe_energumen.m100_pred_jobs_extract_power_metrics:main" +m100-pred-merge-jobfiles = "expe_energumen.m100_pred_merge_jobfiles:main" +run-prediction-per-user-allmethods-mean = "expe_energumen.run_prediction_per_user_allmethods_mean:main" +run-prediction-per-user-allmethods-max = "expe_energumen.run_prediction_per_user_allmethods_max:main" +predict-jobs-power-allmethods-mean = "expe_energumen.predict_jobs_power_allmethods_mean:main" +predict-jobs-power-allmethods-max = "expe_energumen.predict_jobs_power_allmethods_max:main" m100-agg-power-predictions = "expe_energumen.m100_agg_power_predictions:agg_all_files" m100-agg-jobs-info = "expe_energumen.m100_agg_jobs_info:several_months" m100-join-usable-jobs-info = "expe_energumen.m100_join_usable_jobs_info:join"