maj artifact

26b7487d · dlandre2 · 133c8115 · 26b7487d · 26b7487d · 26b7487d
Commit 26b7487d authored 1 year ago by dlandre2
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
+# Default ignored files
+/shelf/
+/workspace.xml
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ b/.idea/inspectionProfiles/Project_Default.xml
+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
+      <option name="ignoredErrors">
+        <list>
+          <option value="E127" />
+          <option value="E122" />
+          <option value="E266" />
+        </list>
+      </option>
+    </inspection_tool>
+    <inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
+      <option name="ignoredErrors">
+        <list>
+          <option value="N802" />
+          <option value="N806" />
+          <option value="N803" />
+        </list>
+      </option>
+    </inspection_tool>
+    <inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredIdentifiers">
+        <list>
+          <option value="list.__getitem__" />
+          <option value="float.__getitem__" />
+        </list>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>
\ No newline at end of file
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.10 (python)" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (python)" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/seasonal-study-of-user-demand-and-it-system-usage-in-datacenters.iml" filepath="$PROJECT_DIR$/.idea/seasonal-study-of-user-demand-and-it-system-usage-in-datacenters.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/seasonal-study-of-user-demand-and-it-system-usage-in-datacenters.iml
+++ b/.idea/seasonal-study-of-user-demand-and-it-system-usage-in-datacenters.iml
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (python)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
--- a/README.md
+++ b/README.md
-**Seasonal study of user demand and IT system usage in datacenters artifact**
+#### **Seasonal study of user demand and IT system usage in datacenters artifact**

+**Brief presentation:**

+This directory contains the data and python files used to obtain the results of the study on the characterization of the seasonality of different time series from 6 HPC workloads and from the Wikipedia workload. The directory consists of the following elements:

 - The "hpc_case" folder contains the files and data needed to obtain the seasonality characterization results for the ANL, Ciemat Euler, Metacentrum, PIK IPLEX, RICC and UniLu Gaia workloads;

@@ -10,18 +12,33 @@



-In the "hpc_case" folder:
+**Packages needed:**
+
+- pandas
+- prophet (to use the Prophet forecasting method)
+- datetime
+- json
+- matplotlib
+- numpy
+- sktime (to use the TBATS forecasting method)
+- scipy (to use the periodogram, the Kruskal-Wallis test and the gaussian_kde method)
+- scikit_posthocs (to use the Conover-Iman test)
+- statsmodels (to use the ADF test, the KPSS test and the lowess, MSTL and STL methods)
+
+
+
+**In the "hpc_case" folder:**

 - The "time_series_json_hpc_data" folder contains workload data;
 - The "openTimeSeriesJson.py" file is used to load workload data;

- Run the "periodograms.py" file to generate periodogram figures of the different workloads and time series and obtain the related results;
- Run the "conover_iman_and_kruskal_groups.py" file to generate cluster figures of the different workloads and time series and obtain the related results;
- Run the "seasonal_peaks.py" file to generate seasonal peaks figures.
+- Run the "periodograms.py" file to generate periodogram figures of the different workloads and time series and obtain the related results. You can disable periodogram execution for one or more time series by commenting out the corresponding lines in the executionList object;
+- Run the "conover_iman_and_kruskal_groups.py" file to generate cluster figures of the different workloads and time series and obtain the related results. You can disable cluster computation for one or more time series by commenting out the corresponding lines in the executionList object;
+- Run the "seasonal_peaks.py" file to generate seasonal peaks figures. You can disable the computation of seasonal peaks for one or more time series by commenting out the corresponding lines in the executionList object



-In the "wikipedia_2013_case" folder:
+**In the "wikipedia_2013_case" folder:**

 - The "Wikipedia2013.json" file corresponds to wikipedia data for 2013;


--- a/characterizationUseCase.py
+++ b/characterizationUseCase.py
@@ -11,8 +11,8 @@ plt.rcParams['figure.constrained_layout.use'] = True


 # Loading data
-nom_fichier = "/home/dlandre/Doctorat/phd1/Workload_Prevision/timeSeriesJsonHPC/NumberJobsAnlIntrepid.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/NumberJobsAnlIntrepid.json"
+with open(filename, 'r') as file:
    AllocatedCoresAnlIntrepid = json.load(file)

 y = []

--- a/hpc_case/openTimeSeriesJson.py
+++ b/hpc_case/openTimeSeriesJson.py
@@ -2,120 +2,120 @@ import json

 ### Maximum number of allocated cores by the system

-nom_fichier = "timeSeriesJsonHPC/AllocatedCoresSystemAnlIntrepid.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresSystemAnlIntrepid.json"
+with open(filename, 'r') as file:
    AllocatedCoresSystemAnlIntrepid = json.load(file)

-nom_fichier = "timeSeriesJsonHPC/AllocatedCoresSystemCiematEuler.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresSystemCiematEuler.json"
+with open(filename, 'r') as file:
    AllocatedCoresSystemCiematEuler = json.load(file)

-nom_fichier = "timeSeriesJsonHPC/AllocatedCoresSystemMetacentrum.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresSystemMetacentrum.json"
+with open(filename, 'r') as file:
    AllocatedCoresSystemMetacentrum = json.load(file)

-nom_fichier = "timeSeriesJsonHPC/AllocatedCoresSystemMetacentrumFiltered.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresSystemMetacentrumFiltered.json"
+with open(filename, 'r') as file:
    AllocatedCoresSystemMetacentrumFiltered = json.load(file)

-nom_fichier = "timeSeriesJsonHPC/AllocatedCoresSystemPIKIPLEX.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresSystemPIKIPLEX.json"
+with open(filename, 'r') as file:
    AllocatedCoresSystemPIKIPLEX = json.load(file)

-nom_fichier = "timeSeriesJsonHPC/AllocatedCoresSystemRICC.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresSystemRICC.json"
+with open(filename, 'r') as file:
    AllocatedCoresSystemRICC = json.load(file)

-nom_fichier = "timeSeriesJsonHPC/AllocatedCoresSystemUniLuGaia.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresSystemUniLuGaia.json"
+with open(filename, 'r') as file:
    AllocatedCoresSystemUniLuGaia = json.load(file)

 ### Maximum number of requested cores by the users

-nom_fichier = "timeSeriesJsonHPC/AllocatedCoresAnlIntrepid.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresAnlIntrepid.json"
+with open(filename, 'r') as file:
    AllocatedCoresAnlIntrepid = json.load(file)

-nom_fichier = "timeSeriesJsonHPC/AllocatedCoresCiematEuler.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresCiematEuler.json"
+with open(filename, 'r') as file:
    AllocatedCoresCiematEuler = json.load(file)

-nom_fichier = "timeSeriesJsonHPC/AllocatedCoresMetacentrum.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresMetacentrum.json"
+with open(filename, 'r') as file:
    AllocatedCoresMetacentrum = json.load(file)

-nom_fichier = "timeSeriesJsonHPC/AllocatedCoresMetacentrumFiltered.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresMetacentrumFiltered.json"
+with open(filename, 'r') as file:
    AllocatedCoresMetacentrumFiltered = json.load(file)

-nom_fichier = "timeSeriesJsonHPC/AllocatedCoresPIKIPLEX.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresPIKIPLEX.json"
+with open(filename, 'r') as file:
    AllocatedCoresPIKIPLEX = json.load(file)

-nom_fichier = "timeSeriesJsonHPC/AllocatedCoresRICC.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresRICC.json"
+with open(filename, 'r') as file:
    AllocatedCoresRICC = json.load(file)

-nom_fichier = "timeSeriesJsonHPC/AllocatedCoresUniLuGaia.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresUniLuGaia.json"
+with open(filename, 'r') as file:
    AllocatedCoresUniLuGaia = json.load(file)

 ### Workload mass

-nom_fichier = "timeSeriesJsonHPC/MassJobsAnlIntrepid.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/MassJobsAnlIntrepid.json"
+with open(filename, 'r') as file:
    MassJobsAnlIntrepid = json.load(file)

-nom_fichier = "timeSeriesJsonHPC/MassJobsCiematEuler.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/MassJobsCiematEuler.json"
+with open(filename, 'r') as file:
    MassJobsCiematEuler = json.load(file)

-nom_fichier = "timeSeriesJsonHPC/MassJobsMetacentrum.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/MassJobsMetacentrum.json"
+with open(filename, 'r') as file:
    MassJobsMetacentrum = json.load(file)

-nom_fichier = "timeSeriesJsonHPC/MassJobsMetacentrumFiltered.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/MassJobsMetacentrumFiltered.json"
+with open(filename, 'r') as file:
    MassJobsMetacentrumFiltered = json.load(file)

-nom_fichier = "timeSeriesJsonHPC/MassJobsPIKIPLEX.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/MassJobsPIKIPLEX.json"
+with open(filename, 'r') as file:
    MassJobsPIKIPLEX = json.load(file)

-nom_fichier = "timeSeriesJsonHPC/MassJobsRICC.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/MassJobsRICC.json"
+with open(filename, 'r') as file:
    MassJobsRICC = json.load(file)

-nom_fichier = "timeSeriesJsonHPC/MassJobsUniLuGaia.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/MassJobsUniLuGaia.json"
+with open(filename, 'r') as file:
    MassJobsUniLuGaia = json.load(file)

 ### Number of jobs

-nom_fichier = "timeSeriesJsonHPC/NumberJobsAnlIntrepid.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/NumberJobsAnlIntrepid.json"
+with open(filename, 'r') as file:
    NumberJobsAnlIntrepid = json.load(file)

-nom_fichier = "timeSeriesJsonHPC/NumberJobsCiematEuler.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/NumberJobsCiematEuler.json"
+with open(filename, 'r') as file:
    NumberJobsCiematEuler = json.load(file)

-nom_fichier = "timeSeriesJsonHPC/NumberJobsMetacentrum.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/NumberJobsMetacentrum.json"
+with open(filename, 'r') as file:
    NumberJobsMetacentrum = json.load(file)

-nom_fichier = "timeSeriesJsonHPC/NumberJobsMetacentrumFiltered.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/NumberJobsMetacentrumFiltered.json"
+with open(filename, 'r') as file:
    NumberJobsMetacentrumFiltered = json.load(file)

-nom_fichier = "timeSeriesJsonHPC/NumberJobsPIKIPLEX.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/NumberJobsPIKIPLEX.json"
+with open(filename, 'r') as file:
    NumberJobsPIKIPLEX = json.load(file)

-nom_fichier = "timeSeriesJsonHPC/NumberJobsRICC.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/NumberJobsRICC.json"
+with open(filename, 'r') as file:
    NumberJobsRICC = json.load(file)

-nom_fichier = "timeSeriesJsonHPC/NumberJobsUniLuGaia.json"
-with open(nom_fichier, 'r') as file:
+filename = "hpc_case/time_series_json_hpc_data/NumberJobsUniLuGaia.json"
+with open(filename, 'r') as file:
    NumberJobsUniLuGaia = json.load(file)
\ No newline at end of file
--- a/hpc_case/periodograms.py
+++ b/hpc_case/periodograms.py
@@ -4,7 +4,6 @@ import matplotlib.pyplot as plt
 from scipy.signal import periodogram
 from statsmodels.tsa.stattools import adfuller, kpss
 from statsmodels.nonparametric.smoothers_lowess import lowess
-from math import *

 plt.rc('font', size=35)
 plt.rcParams['figure.constrained_layout.use'] = True

--- a/wikipedia_2013_case/WikipediaKruskalConoverCluster.py
+++ b/wikipedia_2013_case/WikipediaKruskalConoverCluster.py
 import numpy as np
 import matplotlib.pyplot as plt
 import scipy.stats as st
-
 from statsmodels.nonparametric.smoothers_lowess import lowess
-
 import json
-
 import scikit_posthocs as sp

 plt.rc('font', size=20)

--- a/wikipedia_2013_case/WikipediaPeriodogram.py
+++ b/wikipedia_2013_case/WikipediaPeriodogram.py
 import json
-import time
-
 import matplotlib.pyplot as plt
 import pandas as pd
-
 from scipy.signal import periodogram
-
 from statsmodels.tsa.stattools import adfuller, kpss
-
 from statsmodels.tsa.seasonal import MSTL, STL
-
 from statsmodels.nonparametric.smoothers_lowess import lowess

 plt.rc('font', size=35)
 plt.rcParams['figure.constrained_layout.use'] = True

 # Loading data
-nom_fichier = "Wikipedia2013.json"
-with open(nom_fichier, 'r') as file:
+filename = "Wikipedia2013.json"
+with open(filename, 'r') as file:
    Wikipedia = json.load(file)


@@ -36,10 +30,10 @@ plt.show()
 X = [i for i in range(len(Requests))]

 # Removing the trend component from the time series
-t0 = time.time()
+# t0 = time.time()
 low = lowess(Requests, X, frac=0.1)
-dt = time.time() - t0
-print("low time:", dt) # approximately 6e-1 s
+# dt = time.time() - t0
+# print("low time:", dt) # approximately 6e-1 s
 low = [v[1] for v in low]

 # Display of time series and trend component
@@ -48,18 +42,18 @@ plt.plot(low)
 plt.show()

 # Display STL results with weekly period
-t0 = time.time()
+# t0 = time.time()
 res = STL(pd.Series(v for v in Requests), period=168).fit()
-dt = time.time() - t0
-print("STL time:", dt) # approximately 1e-1 s
+# dt = time.time() - t0
+# print("STL time:", dt) # approximately 1e-1 s
 res.plot()
 plt.show()

 # Display MSTL results with daily, half-weekly and weekly periods
-t0 = time.time()
+# t0 = time.time()
 res = MSTL(Requests, periods=[24, 84, 168]).fit()
-dt = time.time() - t0
-print("MSTL time:", dt) # approximately 2.5 s
+# dt = time.time() - t0
+# print("MSTL time:", dt) # approximately 2.5 s
 res.plot()
 plt.tight_layout()
 plt.show()
@@ -73,10 +67,10 @@ plt.plot(Requests)
 plt.show()

 # Periodogram results
-t0 = time.time()
+# t0 = time.time()
 freqencies, spectrum = periodogram(Requests)
-dt = time.time() - t0
-print("Periodogram time:", dt) # approximately 1e-3 s
+# dt = time.time() - t0
+# print("Periodogram time:", dt) # approximately 1e-3 s
 plt.plot(freqencies, spectrum, color='blue')
 plt.grid(True, linestyle='-', which='major', alpha=0.5, axis='both')
 plt.xlabel('Frequency', fontsize=45)
@@ -128,10 +122,10 @@ plt.show()
 X = [i for i in range(len(Bytes))]

 # Removing the trend component from the time series
-t0 = time.time()
+# t0 = time.time()
 low = lowess(Bytes, X, frac=0.1)
 low = [v[1] for v in low]
-dt = time.time() - t0
+# dt = time.time() - t0

 # Display of time series and trend component
 plt.plot(Bytes)

--- a/wikipedia_2013_case/WikipediaSeasonalPeaks.py
+++ b/wikipedia_2013_case/WikipediaSeasonalPeaks.py
 import json
 import datetime
-
 import matplotlib.pyplot as plt
 import matplotlib.dates as mdates