Skip to content
Snippets Groups Projects
Commit 26b7487d authored by dlandre2's avatar dlandre2
Browse files

maj artifact

parent 133c8115
No related branches found
No related tags found
No related merge requests found
# Default ignored files
/shelf/
/workspace.xml
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<option name="ignoredErrors">
<list>
<option value="E127" />
<option value="E122" />
<option value="E266" />
</list>
</option>
</inspection_tool>
<inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<option name="ignoredErrors">
<list>
<option value="N802" />
<option value="N806" />
<option value="N803" />
</list>
</option>
</inspection_tool>
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredIdentifiers">
<list>
<option value="list.__getitem__" />
<option value="float.__getitem__" />
</list>
</option>
</inspection_tool>
</profile>
</component>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Black">
<option name="sdkName" value="Python 3.10 (python)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (python)" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/seasonal-study-of-user-demand-and-it-system-usage-in-datacenters.iml" filepath="$PROJECT_DIR$/.idea/seasonal-study-of-user-demand-and-it-system-usage-in-datacenters.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.10 (python)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>
\ No newline at end of file
**Seasonal study of user demand and IT system usage in datacenters artifact** #### **Seasonal study of user demand and IT system usage in datacenters artifact**
**Brief presentation:**
This directory contains the data and python files used to obtain the results of the study on the characterization of the seasonality of different time series from 6 HPC workloads and from the Wikipedia workload. The directory consists of the following elements:
- The "hpc_case" folder contains the files and data needed to obtain the seasonality characterization results for the ANL, Ciemat Euler, Metacentrum, PIK IPLEX, RICC and UniLu Gaia workloads; - The "hpc_case" folder contains the files and data needed to obtain the seasonality characterization results for the ANL, Ciemat Euler, Metacentrum, PIK IPLEX, RICC and UniLu Gaia workloads;
...@@ -10,18 +12,33 @@ ...@@ -10,18 +12,33 @@
In the "hpc_case" folder: **Packages needed:**
- pandas
- prophet (to use the Prophet forecasting method)
- datetime
- json
- matplotlib
- numpy
- sktime (to use the TBATS forecasting method)
- scipy (to use the periodogram, the Kruskal-Wallis test and the gaussian_kde method)
- scikit_posthocs (to use the Conover-Iman test)
- statsmodels (to use the ADF test, the KPSS test and the lowess, MSTL and STL methods)
**In the "hpc_case" folder:**
- The "time_series_json_hpc_data" folder contains workload data; - The "time_series_json_hpc_data" folder contains workload data;
- The "openTimeSeriesJson.py" file is used to load workload data; - The "openTimeSeriesJson.py" file is used to load workload data;
- Run the "periodograms.py" file to generate periodogram figures of the different workloads and time series and obtain the related results; - Run the "periodograms.py" file to generate periodogram figures of the different workloads and time series and obtain the related results. You can disable periodogram execution for one or more time series by commenting out the corresponding lines in the executionList object;
- Run the "conover_iman_and_kruskal_groups.py" file to generate cluster figures of the different workloads and time series and obtain the related results; - Run the "conover_iman_and_kruskal_groups.py" file to generate cluster figures of the different workloads and time series and obtain the related results. You can disable cluster computation for one or more time series by commenting out the corresponding lines in the executionList object;
- Run the "seasonal_peaks.py" file to generate seasonal peaks figures. - Run the "seasonal_peaks.py" file to generate seasonal peaks figures. You can disable the computation of seasonal peaks for one or more time series by commenting out the corresponding lines in the executionList object
In the "wikipedia_2013_case" folder: **In the "wikipedia_2013_case" folder:**
- The "Wikipedia2013.json" file corresponds to wikipedia data for 2013; - The "Wikipedia2013.json" file corresponds to wikipedia data for 2013;
......
...@@ -11,8 +11,8 @@ plt.rcParams['figure.constrained_layout.use'] = True ...@@ -11,8 +11,8 @@ plt.rcParams['figure.constrained_layout.use'] = True
# Loading data # Loading data
nom_fichier = "/home/dlandre/Doctorat/phd1/Workload_Prevision/timeSeriesJsonHPC/NumberJobsAnlIntrepid.json" filename = "hpc_case/time_series_json_hpc_data/NumberJobsAnlIntrepid.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
AllocatedCoresAnlIntrepid = json.load(file) AllocatedCoresAnlIntrepid = json.load(file)
y = [] y = []
......
...@@ -2,120 +2,120 @@ import json ...@@ -2,120 +2,120 @@ import json
### Maximum number of allocated cores by the system ### Maximum number of allocated cores by the system
nom_fichier = "timeSeriesJsonHPC/AllocatedCoresSystemAnlIntrepid.json" filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresSystemAnlIntrepid.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
AllocatedCoresSystemAnlIntrepid = json.load(file) AllocatedCoresSystemAnlIntrepid = json.load(file)
nom_fichier = "timeSeriesJsonHPC/AllocatedCoresSystemCiematEuler.json" filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresSystemCiematEuler.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
AllocatedCoresSystemCiematEuler = json.load(file) AllocatedCoresSystemCiematEuler = json.load(file)
nom_fichier = "timeSeriesJsonHPC/AllocatedCoresSystemMetacentrum.json" filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresSystemMetacentrum.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
AllocatedCoresSystemMetacentrum = json.load(file) AllocatedCoresSystemMetacentrum = json.load(file)
nom_fichier = "timeSeriesJsonHPC/AllocatedCoresSystemMetacentrumFiltered.json" filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresSystemMetacentrumFiltered.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
AllocatedCoresSystemMetacentrumFiltered = json.load(file) AllocatedCoresSystemMetacentrumFiltered = json.load(file)
nom_fichier = "timeSeriesJsonHPC/AllocatedCoresSystemPIKIPLEX.json" filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresSystemPIKIPLEX.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
AllocatedCoresSystemPIKIPLEX = json.load(file) AllocatedCoresSystemPIKIPLEX = json.load(file)
nom_fichier = "timeSeriesJsonHPC/AllocatedCoresSystemRICC.json" filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresSystemRICC.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
AllocatedCoresSystemRICC = json.load(file) AllocatedCoresSystemRICC = json.load(file)
nom_fichier = "timeSeriesJsonHPC/AllocatedCoresSystemUniLuGaia.json" filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresSystemUniLuGaia.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
AllocatedCoresSystemUniLuGaia = json.load(file) AllocatedCoresSystemUniLuGaia = json.load(file)
### Maximum number of requested cores by the users ### Maximum number of requested cores by the users
nom_fichier = "timeSeriesJsonHPC/AllocatedCoresAnlIntrepid.json" filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresAnlIntrepid.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
AllocatedCoresAnlIntrepid = json.load(file) AllocatedCoresAnlIntrepid = json.load(file)
nom_fichier = "timeSeriesJsonHPC/AllocatedCoresCiematEuler.json" filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresCiematEuler.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
AllocatedCoresCiematEuler = json.load(file) AllocatedCoresCiematEuler = json.load(file)
nom_fichier = "timeSeriesJsonHPC/AllocatedCoresMetacentrum.json" filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresMetacentrum.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
AllocatedCoresMetacentrum = json.load(file) AllocatedCoresMetacentrum = json.load(file)
nom_fichier = "timeSeriesJsonHPC/AllocatedCoresMetacentrumFiltered.json" filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresMetacentrumFiltered.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
AllocatedCoresMetacentrumFiltered = json.load(file) AllocatedCoresMetacentrumFiltered = json.load(file)
nom_fichier = "timeSeriesJsonHPC/AllocatedCoresPIKIPLEX.json" filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresPIKIPLEX.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
AllocatedCoresPIKIPLEX = json.load(file) AllocatedCoresPIKIPLEX = json.load(file)
nom_fichier = "timeSeriesJsonHPC/AllocatedCoresRICC.json" filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresRICC.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
AllocatedCoresRICC = json.load(file) AllocatedCoresRICC = json.load(file)
nom_fichier = "timeSeriesJsonHPC/AllocatedCoresUniLuGaia.json" filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresUniLuGaia.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
AllocatedCoresUniLuGaia = json.load(file) AllocatedCoresUniLuGaia = json.load(file)
### Workload mass ### Workload mass
nom_fichier = "timeSeriesJsonHPC/MassJobsAnlIntrepid.json" filename = "hpc_case/time_series_json_hpc_data/MassJobsAnlIntrepid.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
MassJobsAnlIntrepid = json.load(file) MassJobsAnlIntrepid = json.load(file)
nom_fichier = "timeSeriesJsonHPC/MassJobsCiematEuler.json" filename = "hpc_case/time_series_json_hpc_data/MassJobsCiematEuler.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
MassJobsCiematEuler = json.load(file) MassJobsCiematEuler = json.load(file)
nom_fichier = "timeSeriesJsonHPC/MassJobsMetacentrum.json" filename = "hpc_case/time_series_json_hpc_data/MassJobsMetacentrum.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
MassJobsMetacentrum = json.load(file) MassJobsMetacentrum = json.load(file)
nom_fichier = "timeSeriesJsonHPC/MassJobsMetacentrumFiltered.json" filename = "hpc_case/time_series_json_hpc_data/MassJobsMetacentrumFiltered.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
MassJobsMetacentrumFiltered = json.load(file) MassJobsMetacentrumFiltered = json.load(file)
nom_fichier = "timeSeriesJsonHPC/MassJobsPIKIPLEX.json" filename = "hpc_case/time_series_json_hpc_data/MassJobsPIKIPLEX.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
MassJobsPIKIPLEX = json.load(file) MassJobsPIKIPLEX = json.load(file)
nom_fichier = "timeSeriesJsonHPC/MassJobsRICC.json" filename = "hpc_case/time_series_json_hpc_data/MassJobsRICC.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
MassJobsRICC = json.load(file) MassJobsRICC = json.load(file)
nom_fichier = "timeSeriesJsonHPC/MassJobsUniLuGaia.json" filename = "hpc_case/time_series_json_hpc_data/MassJobsUniLuGaia.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
MassJobsUniLuGaia = json.load(file) MassJobsUniLuGaia = json.load(file)
### Number of jobs ### Number of jobs
nom_fichier = "timeSeriesJsonHPC/NumberJobsAnlIntrepid.json" filename = "hpc_case/time_series_json_hpc_data/NumberJobsAnlIntrepid.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
NumberJobsAnlIntrepid = json.load(file) NumberJobsAnlIntrepid = json.load(file)
nom_fichier = "timeSeriesJsonHPC/NumberJobsCiematEuler.json" filename = "hpc_case/time_series_json_hpc_data/NumberJobsCiematEuler.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
NumberJobsCiematEuler = json.load(file) NumberJobsCiematEuler = json.load(file)
nom_fichier = "timeSeriesJsonHPC/NumberJobsMetacentrum.json" filename = "hpc_case/time_series_json_hpc_data/NumberJobsMetacentrum.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
NumberJobsMetacentrum = json.load(file) NumberJobsMetacentrum = json.load(file)
nom_fichier = "timeSeriesJsonHPC/NumberJobsMetacentrumFiltered.json" filename = "hpc_case/time_series_json_hpc_data/NumberJobsMetacentrumFiltered.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
NumberJobsMetacentrumFiltered = json.load(file) NumberJobsMetacentrumFiltered = json.load(file)
nom_fichier = "timeSeriesJsonHPC/NumberJobsPIKIPLEX.json" filename = "hpc_case/time_series_json_hpc_data/NumberJobsPIKIPLEX.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
NumberJobsPIKIPLEX = json.load(file) NumberJobsPIKIPLEX = json.load(file)
nom_fichier = "timeSeriesJsonHPC/NumberJobsRICC.json" filename = "hpc_case/time_series_json_hpc_data/NumberJobsRICC.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
NumberJobsRICC = json.load(file) NumberJobsRICC = json.load(file)
nom_fichier = "timeSeriesJsonHPC/NumberJobsUniLuGaia.json" filename = "hpc_case/time_series_json_hpc_data/NumberJobsUniLuGaia.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
NumberJobsUniLuGaia = json.load(file) NumberJobsUniLuGaia = json.load(file)
\ No newline at end of file
...@@ -4,7 +4,6 @@ import matplotlib.pyplot as plt ...@@ -4,7 +4,6 @@ import matplotlib.pyplot as plt
from scipy.signal import periodogram from scipy.signal import periodogram
from statsmodels.tsa.stattools import adfuller, kpss from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.nonparametric.smoothers_lowess import lowess from statsmodels.nonparametric.smoothers_lowess import lowess
from math import *
plt.rc('font', size=35) plt.rc('font', size=35)
plt.rcParams['figure.constrained_layout.use'] = True plt.rcParams['figure.constrained_layout.use'] = True
......
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import scipy.stats as st import scipy.stats as st
from statsmodels.nonparametric.smoothers_lowess import lowess from statsmodels.nonparametric.smoothers_lowess import lowess
import json import json
import scikit_posthocs as sp import scikit_posthocs as sp
plt.rc('font', size=20) plt.rc('font', size=20)
......
import json import json
import time
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import pandas as pd import pandas as pd
from scipy.signal import periodogram from scipy.signal import periodogram
from statsmodels.tsa.stattools import adfuller, kpss from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.seasonal import MSTL, STL from statsmodels.tsa.seasonal import MSTL, STL
from statsmodels.nonparametric.smoothers_lowess import lowess from statsmodels.nonparametric.smoothers_lowess import lowess
plt.rc('font', size=35) plt.rc('font', size=35)
plt.rcParams['figure.constrained_layout.use'] = True plt.rcParams['figure.constrained_layout.use'] = True
# Loading data # Loading data
nom_fichier = "Wikipedia2013.json" filename = "Wikipedia2013.json"
with open(nom_fichier, 'r') as file: with open(filename, 'r') as file:
Wikipedia = json.load(file) Wikipedia = json.load(file)
...@@ -36,10 +30,10 @@ plt.show() ...@@ -36,10 +30,10 @@ plt.show()
X = [i for i in range(len(Requests))] X = [i for i in range(len(Requests))]
# Removing the trend component from the time series # Removing the trend component from the time series
t0 = time.time() # t0 = time.time()
low = lowess(Requests, X, frac=0.1) low = lowess(Requests, X, frac=0.1)
dt = time.time() - t0 # dt = time.time() - t0
print("low time:", dt) # approximately 6e-1 s # print("low time:", dt) # approximately 6e-1 s
low = [v[1] for v in low] low = [v[1] for v in low]
# Display of time series and trend component # Display of time series and trend component
...@@ -48,18 +42,18 @@ plt.plot(low) ...@@ -48,18 +42,18 @@ plt.plot(low)
plt.show() plt.show()
# Display STL results with weekly period # Display STL results with weekly period
t0 = time.time() # t0 = time.time()
res = STL(pd.Series(v for v in Requests), period=168).fit() res = STL(pd.Series(v for v in Requests), period=168).fit()
dt = time.time() - t0 # dt = time.time() - t0
print("STL time:", dt) # approximately 1e-1 s # print("STL time:", dt) # approximately 1e-1 s
res.plot() res.plot()
plt.show() plt.show()
# Display MSTL results with daily, half-weekly and weekly periods # Display MSTL results with daily, half-weekly and weekly periods
t0 = time.time() # t0 = time.time()
res = MSTL(Requests, periods=[24, 84, 168]).fit() res = MSTL(Requests, periods=[24, 84, 168]).fit()
dt = time.time() - t0 # dt = time.time() - t0
print("MSTL time:", dt) # approximately 2.5 s # print("MSTL time:", dt) # approximately 2.5 s
res.plot() res.plot()
plt.tight_layout() plt.tight_layout()
plt.show() plt.show()
...@@ -73,10 +67,10 @@ plt.plot(Requests) ...@@ -73,10 +67,10 @@ plt.plot(Requests)
plt.show() plt.show()
# Periodogram results # Periodogram results
t0 = time.time() # t0 = time.time()
freqencies, spectrum = periodogram(Requests) freqencies, spectrum = periodogram(Requests)
dt = time.time() - t0 # dt = time.time() - t0
print("Periodogram time:", dt) # approximately 1e-3 s # print("Periodogram time:", dt) # approximately 1e-3 s
plt.plot(freqencies, spectrum, color='blue') plt.plot(freqencies, spectrum, color='blue')
plt.grid(True, linestyle='-', which='major', alpha=0.5, axis='both') plt.grid(True, linestyle='-', which='major', alpha=0.5, axis='both')
plt.xlabel('Frequency', fontsize=45) plt.xlabel('Frequency', fontsize=45)
...@@ -128,10 +122,10 @@ plt.show() ...@@ -128,10 +122,10 @@ plt.show()
X = [i for i in range(len(Bytes))] X = [i for i in range(len(Bytes))]
# Removing the trend component from the time series # Removing the trend component from the time series
t0 = time.time() # t0 = time.time()
low = lowess(Bytes, X, frac=0.1) low = lowess(Bytes, X, frac=0.1)
low = [v[1] for v in low] low = [v[1] for v in low]
dt = time.time() - t0 # dt = time.time() - t0
# Display of time series and trend component # Display of time series and trend component
plt.plot(Bytes) plt.plot(Bytes)
......
import json import json
import datetime import datetime
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import matplotlib.dates as mdates import matplotlib.dates as mdates
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment