From 26b7487db5a1a4359350fd9c421ba5fd79662603 Mon Sep 17 00:00:00 2001
From: dlandre2 <dlandre2@univ-fcomte.fr>
Date: Fri, 26 Jul 2024 13:09:54 +0200
Subject: [PATCH] maj artifact

---
 .idea/.gitignore                              |   3 +
 .idea/inspectionProfiles/Project_Default.xml  |  31 +++++
 .../inspectionProfiles/profiles_settings.xml  |   6 +
 .idea/misc.xml                                |   7 ++
 .idea/modules.xml                             |   8 ++
 ...and-and-it-system-usage-in-datacenters.iml |   8 ++
 .idea/vcs.xml                                 |   6 +
 README.md                                     |  29 ++++-
 characterizationUseCase.py                    |   4 +-
 hpc_case/openTimeSeriesJson.py                | 112 +++++++++---------
 hpc_case/periodograms.py                      |   1 -
 .../WikipediaKruskalConoverCluster.py         |   3 -
 wikipedia_2013_case/WikipediaPeriodogram.py   |  38 +++---
 wikipedia_2013_case/WikipediaSeasonalPeaks.py |   1 -
 14 files changed, 166 insertions(+), 91 deletions(-)
 create mode 100644 .idea/.gitignore
 create mode 100644 .idea/inspectionProfiles/Project_Default.xml
 create mode 100644 .idea/inspectionProfiles/profiles_settings.xml
 create mode 100644 .idea/misc.xml
 create mode 100644 .idea/modules.xml
 create mode 100644 .idea/seasonal-study-of-user-demand-and-it-system-usage-in-datacenters.iml
 create mode 100644 .idea/vcs.xml

diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..26d3352
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
new file mode 100644
index 0000000..f967b33
--- /dev/null
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,31 @@
+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
+      <option name="ignoredErrors">
+        <list>
+          <option value="E127" />
+          <option value="E122" />
+          <option value="E266" />
+        </list>
+      </option>
+    </inspection_tool>
+    <inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
+      <option name="ignoredErrors">
+        <list>
+          <option value="N802" />
+          <option value="N806" />
+          <option value="N803" />
+        </list>
+      </option>
+    </inspection_tool>
+    <inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredIdentifiers">
+        <list>
+          <option value="list.__getitem__" />
+          <option value="float.__getitem__" />
+        </list>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..5a7032d
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.10 (python)" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (python)" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..eed9ce0
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/seasonal-study-of-user-demand-and-it-system-usage-in-datacenters.iml" filepath="$PROJECT_DIR$/.idea/seasonal-study-of-user-demand-and-it-system-usage-in-datacenters.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/seasonal-study-of-user-demand-and-it-system-usage-in-datacenters.iml b/.idea/seasonal-study-of-user-demand-and-it-system-usage-in-datacenters.iml
new file mode 100644
index 0000000..5aebf04
--- /dev/null
+++ b/.idea/seasonal-study-of-user-demand-and-it-system-usage-in-datacenters.iml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (python)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..35eb1dd
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/README.md b/README.md
index 84927f6..d65e2a0 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,8 @@
-**Seasonal study of user demand and IT system usage in datacenters artifact**
+#### **Seasonal study of user demand and IT system usage in datacenters artifact**
 
+**Brief presentation:**
 
+This directory contains the data and python files used to obtain the results of the study on the characterization of the seasonality of different time series from 6 HPC workloads and from the Wikipedia workload. The directory consists of the following elements:
 
 - The "hpc_case" folder contains the files and data needed to obtain the seasonality characterization results for the ANL, Ciemat Euler, Metacentrum, PIK IPLEX, RICC and UniLu Gaia workloads;
 
@@ -10,18 +12,33 @@
 
 
 
-In the "hpc_case" folder:
+**Packages needed:**
+
+- pandas
+- prophet (to use the Prophet forecasting method)
+- datetime
+- json
+- matplotlib
+- numpy
+- sktime (to use the TBATS forecasting method)
+- scipy (to use the periodogram, the Kruskal-Wallis test and the gaussian_kde method)
+- scikit_posthocs (to use the Conover-Iman test)
+- statsmodels (to use the ADF test, the KPSS test and the lowess, MSTL and STL methods)
+
+
+
+**In the "hpc_case" folder:**
 
 - The "time_series_json_hpc_data" folder contains workload data;
 - The "openTimeSeriesJson.py" file is used to load workload data;
 
-- Run the "periodograms.py" file to generate periodogram figures of the different workloads and time series and obtain the related results;
-- Run the "conover_iman_and_kruskal_groups.py" file to generate cluster figures of the different workloads and time series and obtain the related results;
-- Run the "seasonal_peaks.py" file to generate seasonal peaks figures.
+- Run the "periodograms.py" file to generate periodogram figures of the different workloads and time series and obtain the related results. You can disable periodogram execution for one or more time series by commenting out the corresponding lines in the executionList object;
+- Run the "conover_iman_and_kruskal_groups.py" file to generate cluster figures of the different workloads and time series and obtain the related results. You can disable cluster computation for one or more time series by commenting out the corresponding lines in the executionList object;
+- Run the "seasonal_peaks.py" file to generate seasonal peaks figures. You can disable the computation of seasonal peaks for one or more time series by commenting out the corresponding lines in the executionList object
 
 
 
-In the "wikipedia_2013_case" folder:
+**In the "wikipedia_2013_case" folder:**
 
 - The "Wikipedia2013.json" file corresponds to wikipedia data for 2013;
 
diff --git a/characterizationUseCase.py b/characterizationUseCase.py
index e966be2..cb4b7fb 100644
--- a/characterizationUseCase.py
+++ b/characterizationUseCase.py
@@ -11,8 +11,8 @@ plt.rcParams['figure.constrained_layout.use'] = True
 
 
 # Loading data
-nom_fichier = "/home/dlandre/Doctorat/phd1/Workload_Prevision/timeSeriesJsonHPC/NumberJobsAnlIntrepid.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/NumberJobsAnlIntrepid.json"
+with open(filename, 'r') as file:
     AllocatedCoresAnlIntrepid = json.load(file)
 
 y = []
diff --git a/hpc_case/openTimeSeriesJson.py b/hpc_case/openTimeSeriesJson.py
index 5487d03..fb6e3ee 100644
--- a/hpc_case/openTimeSeriesJson.py
+++ b/hpc_case/openTimeSeriesJson.py
@@ -2,120 +2,120 @@ import json
 
 ### Maximum number of allocated cores by the system
 
-nom_fichier = "timeSeriesJsonHPC/AllocatedCoresSystemAnlIntrepid.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresSystemAnlIntrepid.json"
+with open(filename, 'r') as file:
     AllocatedCoresSystemAnlIntrepid = json.load(file)
 
-nom_fichier = "timeSeriesJsonHPC/AllocatedCoresSystemCiematEuler.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresSystemCiematEuler.json"
+with open(filename, 'r') as file:
     AllocatedCoresSystemCiematEuler = json.load(file)
 
-nom_fichier = "timeSeriesJsonHPC/AllocatedCoresSystemMetacentrum.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresSystemMetacentrum.json"
+with open(filename, 'r') as file:
     AllocatedCoresSystemMetacentrum = json.load(file)
 
-nom_fichier = "timeSeriesJsonHPC/AllocatedCoresSystemMetacentrumFiltered.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresSystemMetacentrumFiltered.json"
+with open(filename, 'r') as file:
     AllocatedCoresSystemMetacentrumFiltered = json.load(file)
 
-nom_fichier = "timeSeriesJsonHPC/AllocatedCoresSystemPIKIPLEX.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresSystemPIKIPLEX.json"
+with open(filename, 'r') as file:
     AllocatedCoresSystemPIKIPLEX = json.load(file)
 
-nom_fichier = "timeSeriesJsonHPC/AllocatedCoresSystemRICC.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresSystemRICC.json"
+with open(filename, 'r') as file:
     AllocatedCoresSystemRICC = json.load(file)
 
-nom_fichier = "timeSeriesJsonHPC/AllocatedCoresSystemUniLuGaia.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresSystemUniLuGaia.json"
+with open(filename, 'r') as file:
     AllocatedCoresSystemUniLuGaia = json.load(file)
 
 ### Maximum number of requested cores by the users
 
-nom_fichier = "timeSeriesJsonHPC/AllocatedCoresAnlIntrepid.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresAnlIntrepid.json"
+with open(filename, 'r') as file:
     AllocatedCoresAnlIntrepid = json.load(file)
 
-nom_fichier = "timeSeriesJsonHPC/AllocatedCoresCiematEuler.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresCiematEuler.json"
+with open(filename, 'r') as file:
     AllocatedCoresCiematEuler = json.load(file)
 
-nom_fichier = "timeSeriesJsonHPC/AllocatedCoresMetacentrum.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresMetacentrum.json"
+with open(filename, 'r') as file:
     AllocatedCoresMetacentrum = json.load(file)
 
-nom_fichier = "timeSeriesJsonHPC/AllocatedCoresMetacentrumFiltered.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresMetacentrumFiltered.json"
+with open(filename, 'r') as file:
     AllocatedCoresMetacentrumFiltered = json.load(file)
 
-nom_fichier = "timeSeriesJsonHPC/AllocatedCoresPIKIPLEX.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresPIKIPLEX.json"
+with open(filename, 'r') as file:
     AllocatedCoresPIKIPLEX = json.load(file)
 
-nom_fichier = "timeSeriesJsonHPC/AllocatedCoresRICC.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresRICC.json"
+with open(filename, 'r') as file:
     AllocatedCoresRICC = json.load(file)
 
-nom_fichier = "timeSeriesJsonHPC/AllocatedCoresUniLuGaia.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresUniLuGaia.json"
+with open(filename, 'r') as file:
     AllocatedCoresUniLuGaia = json.load(file)
 
 ### Workload mass
 
-nom_fichier = "timeSeriesJsonHPC/MassJobsAnlIntrepid.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/MassJobsAnlIntrepid.json"
+with open(filename, 'r') as file:
     MassJobsAnlIntrepid = json.load(file)
 
-nom_fichier = "timeSeriesJsonHPC/MassJobsCiematEuler.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/MassJobsCiematEuler.json"
+with open(filename, 'r') as file:
     MassJobsCiematEuler = json.load(file)
 
-nom_fichier = "timeSeriesJsonHPC/MassJobsMetacentrum.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/MassJobsMetacentrum.json"
+with open(filename, 'r') as file:
     MassJobsMetacentrum = json.load(file)
 
-nom_fichier = "timeSeriesJsonHPC/MassJobsMetacentrumFiltered.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/MassJobsMetacentrumFiltered.json"
+with open(filename, 'r') as file:
     MassJobsMetacentrumFiltered = json.load(file)
 
-nom_fichier = "timeSeriesJsonHPC/MassJobsPIKIPLEX.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/MassJobsPIKIPLEX.json"
+with open(filename, 'r') as file:
     MassJobsPIKIPLEX = json.load(file)
 
-nom_fichier = "timeSeriesJsonHPC/MassJobsRICC.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/MassJobsRICC.json"
+with open(filename, 'r') as file:
     MassJobsRICC = json.load(file)
 
-nom_fichier = "timeSeriesJsonHPC/MassJobsUniLuGaia.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/MassJobsUniLuGaia.json"
+with open(filename, 'r') as file:
     MassJobsUniLuGaia = json.load(file)
 
 ### Number of jobs
 
-nom_fichier = "timeSeriesJsonHPC/NumberJobsAnlIntrepid.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/NumberJobsAnlIntrepid.json"
+with open(filename, 'r') as file:
     NumberJobsAnlIntrepid = json.load(file)
 
-nom_fichier = "timeSeriesJsonHPC/NumberJobsCiematEuler.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/NumberJobsCiematEuler.json"
+with open(filename, 'r') as file:
     NumberJobsCiematEuler = json.load(file)
 
-nom_fichier = "timeSeriesJsonHPC/NumberJobsMetacentrum.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/NumberJobsMetacentrum.json"
+with open(filename, 'r') as file:
     NumberJobsMetacentrum = json.load(file)
 
-nom_fichier = "timeSeriesJsonHPC/NumberJobsMetacentrumFiltered.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/NumberJobsMetacentrumFiltered.json"
+with open(filename, 'r') as file:
     NumberJobsMetacentrumFiltered = json.load(file)
 
-nom_fichier = "timeSeriesJsonHPC/NumberJobsPIKIPLEX.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/NumberJobsPIKIPLEX.json"
+with open(filename, 'r') as file:
     NumberJobsPIKIPLEX = json.load(file)
 
-nom_fichier = "timeSeriesJsonHPC/NumberJobsRICC.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/NumberJobsRICC.json"
+with open(filename, 'r') as file:
     NumberJobsRICC = json.load(file)
 
-nom_fichier = "timeSeriesJsonHPC/NumberJobsUniLuGaia.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/NumberJobsUniLuGaia.json"
+with open(filename, 'r') as file:
     NumberJobsUniLuGaia = json.load(file)
\ No newline at end of file
diff --git a/hpc_case/periodograms.py b/hpc_case/periodograms.py
index 7cd3c40..06e94cc 100644
--- a/hpc_case/periodograms.py
+++ b/hpc_case/periodograms.py
@@ -4,7 +4,6 @@ import matplotlib.pyplot as plt
 from scipy.signal import periodogram
 from statsmodels.tsa.stattools import adfuller, kpss
 from statsmodels.nonparametric.smoothers_lowess import lowess
-from math import *
 
 plt.rc('font', size=35)
 plt.rcParams['figure.constrained_layout.use'] = True
diff --git a/wikipedia_2013_case/WikipediaKruskalConoverCluster.py b/wikipedia_2013_case/WikipediaKruskalConoverCluster.py
index bb538ce..0176f4d 100644
--- a/wikipedia_2013_case/WikipediaKruskalConoverCluster.py
+++ b/wikipedia_2013_case/WikipediaKruskalConoverCluster.py
@@ -1,11 +1,8 @@
 import numpy as np
 import matplotlib.pyplot as plt
 import scipy.stats as st
-
 from statsmodels.nonparametric.smoothers_lowess import lowess
-
 import json
-
 import scikit_posthocs as sp
 
 plt.rc('font', size=20)
diff --git a/wikipedia_2013_case/WikipediaPeriodogram.py b/wikipedia_2013_case/WikipediaPeriodogram.py
index 7ee7940..5734288 100644
--- a/wikipedia_2013_case/WikipediaPeriodogram.py
+++ b/wikipedia_2013_case/WikipediaPeriodogram.py
@@ -1,23 +1,17 @@
 import json
-import time
-
 import matplotlib.pyplot as plt
 import pandas as pd
-
 from scipy.signal import periodogram
-
 from statsmodels.tsa.stattools import adfuller, kpss
-
 from statsmodels.tsa.seasonal import MSTL, STL
-
 from statsmodels.nonparametric.smoothers_lowess import lowess
 
 plt.rc('font', size=35)
 plt.rcParams['figure.constrained_layout.use'] = True
 
 # Loading data
-nom_fichier = "Wikipedia2013.json"
-with open(nom_fichier, 'r') as file:
+filename = "Wikipedia2013.json"
+with open(filename, 'r') as file:
     Wikipedia = json.load(file)
 
 
@@ -36,10 +30,10 @@ plt.show()
 X = [i for i in range(len(Requests))]
 
 # Removing the trend component from the time series
-t0 = time.time()
+# t0 = time.time()
 low = lowess(Requests, X, frac=0.1)
-dt = time.time() - t0
-print("low time:", dt) # approximately 6e-1 s
+# dt = time.time() - t0
+# print("low time:", dt) # approximately 6e-1 s
 low = [v[1] for v in low]
 
 # Display of time series and trend component
@@ -48,18 +42,18 @@ plt.plot(low)
 plt.show()
 
 # Display STL results with weekly period
-t0 = time.time()
+# t0 = time.time()
 res = STL(pd.Series(v for v in Requests), period=168).fit()
-dt = time.time() - t0
-print("STL time:", dt) # approximately 1e-1 s
+# dt = time.time() - t0
+# print("STL time:", dt) # approximately 1e-1 s
 res.plot()
 plt.show()
 
 # Display MSTL results with daily, half-weekly and weekly periods
-t0 = time.time()
+# t0 = time.time()
 res = MSTL(Requests, periods=[24, 84, 168]).fit()
-dt = time.time() - t0
-print("MSTL time:", dt) # approximately 2.5 s
+# dt = time.time() - t0
+# print("MSTL time:", dt) # approximately 2.5 s
 res.plot()
 plt.tight_layout()
 plt.show()
@@ -73,10 +67,10 @@ plt.plot(Requests)
 plt.show()
 
 # Periodogram results
-t0 = time.time()
+# t0 = time.time()
 freqencies, spectrum = periodogram(Requests)
-dt = time.time() - t0
-print("Periodogram time:", dt) # approximately 1e-3 s
+# dt = time.time() - t0
+# print("Periodogram time:", dt) # approximately 1e-3 s
 plt.plot(freqencies, spectrum, color='blue')
 plt.grid(True, linestyle='-', which='major', alpha=0.5, axis='both')
 plt.xlabel('Frequency', fontsize=45)
@@ -128,10 +122,10 @@ plt.show()
 X = [i for i in range(len(Bytes))]
 
 # Removing the trend component from the time series
-t0 = time.time()
+# t0 = time.time()
 low = lowess(Bytes, X, frac=0.1)
 low = [v[1] for v in low]
-dt = time.time() - t0
+# dt = time.time() - t0
 
 # Display of time series and trend component
 plt.plot(Bytes)
diff --git a/wikipedia_2013_case/WikipediaSeasonalPeaks.py b/wikipedia_2013_case/WikipediaSeasonalPeaks.py
index fe1a237..516a398 100644
--- a/wikipedia_2013_case/WikipediaSeasonalPeaks.py
+++ b/wikipedia_2013_case/WikipediaSeasonalPeaks.py
@@ -1,6 +1,5 @@
 import json
 import datetime
-
 import matplotlib.pyplot as plt
 import matplotlib.dates as mdates
 
-- 
GitLab