Skip to content
Snippets Groups Projects
Commit 26b7487d authored by dlandre2's avatar dlandre2
Browse files

maj artifact

parent 133c8115
Branches
No related tags found
No related merge requests found
# Default ignored files
/shelf/
/workspace.xml
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<option name="ignoredErrors">
<list>
<option value="E127" />
<option value="E122" />
<option value="E266" />
</list>
</option>
</inspection_tool>
<inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<option name="ignoredErrors">
<list>
<option value="N802" />
<option value="N806" />
<option value="N803" />
</list>
</option>
</inspection_tool>
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredIdentifiers">
<list>
<option value="list.__getitem__" />
<option value="float.__getitem__" />
</list>
</option>
</inspection_tool>
</profile>
</component>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Black">
<option name="sdkName" value="Python 3.10 (python)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (python)" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/seasonal-study-of-user-demand-and-it-system-usage-in-datacenters.iml" filepath="$PROJECT_DIR$/.idea/seasonal-study-of-user-demand-and-it-system-usage-in-datacenters.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.10 (python)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>
\ No newline at end of file
**Seasonal study of user demand and IT system usage in datacenters artifact**
#### **Seasonal study of user demand and IT system usage in datacenters artifact**
**Brief presentation:**
This directory contains the data and python files used to obtain the results of the study on the characterization of the seasonality of different time series from 6 HPC workloads and from the Wikipedia workload. The directory consists of the following elements:
- The "hpc_case" folder contains the files and data needed to obtain the seasonality characterization results for the ANL, Ciemat Euler, Metacentrum, PIK IPLEX, RICC and UniLu Gaia workloads;
......@@ -10,18 +12,33 @@
In the "hpc_case" folder:
**Packages needed:**
- pandas
- prophet (to use the Prophet forecasting method)
- datetime
- json
- matplotlib
- numpy
- sktime (to use the TBATS forecasting method)
- scipy (to use the periodogram, the Kruskal-Wallis test and the gaussian_kde method)
- scikit_posthocs (to use the Conover-Iman test)
- statsmodels (to use the ADF test, the KPSS test and the lowess, MSTL and STL methods)
**In the "hpc_case" folder:**
- The "time_series_json_hpc_data" folder contains workload data;
- The "openTimeSeriesJson.py" file is used to load workload data;
- Run the "periodograms.py" file to generate periodogram figures of the different workloads and time series and obtain the related results;
- Run the "conover_iman_and_kruskal_groups.py" file to generate cluster figures of the different workloads and time series and obtain the related results;
- Run the "seasonal_peaks.py" file to generate seasonal peaks figures.
- Run the "periodograms.py" file to generate periodogram figures of the different workloads and time series and obtain the related results. You can disable periodogram execution for one or more time series by commenting out the corresponding lines in the executionList object;
- Run the "conover_iman_and_kruskal_groups.py" file to generate cluster figures of the different workloads and time series and obtain the related results. You can disable cluster computation for one or more time series by commenting out the corresponding lines in the executionList object;
- Run the "seasonal_peaks.py" file to generate seasonal peaks figures. You can disable the computation of seasonal peaks for one or more time series by commenting out the corresponding lines in the executionList object
In the "wikipedia_2013_case" folder:
**In the "wikipedia_2013_case" folder:**
- The "Wikipedia2013.json" file corresponds to wikipedia data for 2013;
......
......@@ -11,8 +11,8 @@ plt.rcParams['figure.constrained_layout.use'] = True
# Loading data
nom_fichier = "/home/dlandre/Doctorat/phd1/Workload_Prevision/timeSeriesJsonHPC/NumberJobsAnlIntrepid.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/NumberJobsAnlIntrepid.json"
with open(filename, 'r') as file:
AllocatedCoresAnlIntrepid = json.load(file)
y = []
......
......@@ -2,120 +2,120 @@ import json
### Maximum number of allocated cores by the system
nom_fichier = "timeSeriesJsonHPC/AllocatedCoresSystemAnlIntrepid.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresSystemAnlIntrepid.json"
with open(filename, 'r') as file:
AllocatedCoresSystemAnlIntrepid = json.load(file)
nom_fichier = "timeSeriesJsonHPC/AllocatedCoresSystemCiematEuler.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresSystemCiematEuler.json"
with open(filename, 'r') as file:
AllocatedCoresSystemCiematEuler = json.load(file)
nom_fichier = "timeSeriesJsonHPC/AllocatedCoresSystemMetacentrum.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresSystemMetacentrum.json"
with open(filename, 'r') as file:
AllocatedCoresSystemMetacentrum = json.load(file)
nom_fichier = "timeSeriesJsonHPC/AllocatedCoresSystemMetacentrumFiltered.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresSystemMetacentrumFiltered.json"
with open(filename, 'r') as file:
AllocatedCoresSystemMetacentrumFiltered = json.load(file)
nom_fichier = "timeSeriesJsonHPC/AllocatedCoresSystemPIKIPLEX.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresSystemPIKIPLEX.json"
with open(filename, 'r') as file:
AllocatedCoresSystemPIKIPLEX = json.load(file)
nom_fichier = "timeSeriesJsonHPC/AllocatedCoresSystemRICC.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresSystemRICC.json"
with open(filename, 'r') as file:
AllocatedCoresSystemRICC = json.load(file)
nom_fichier = "timeSeriesJsonHPC/AllocatedCoresSystemUniLuGaia.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresSystemUniLuGaia.json"
with open(filename, 'r') as file:
AllocatedCoresSystemUniLuGaia = json.load(file)
### Maximum number of requested cores by the users
nom_fichier = "timeSeriesJsonHPC/AllocatedCoresAnlIntrepid.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresAnlIntrepid.json"
with open(filename, 'r') as file:
AllocatedCoresAnlIntrepid = json.load(file)
nom_fichier = "timeSeriesJsonHPC/AllocatedCoresCiematEuler.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresCiematEuler.json"
with open(filename, 'r') as file:
AllocatedCoresCiematEuler = json.load(file)
nom_fichier = "timeSeriesJsonHPC/AllocatedCoresMetacentrum.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresMetacentrum.json"
with open(filename, 'r') as file:
AllocatedCoresMetacentrum = json.load(file)
nom_fichier = "timeSeriesJsonHPC/AllocatedCoresMetacentrumFiltered.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresMetacentrumFiltered.json"
with open(filename, 'r') as file:
AllocatedCoresMetacentrumFiltered = json.load(file)
nom_fichier = "timeSeriesJsonHPC/AllocatedCoresPIKIPLEX.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresPIKIPLEX.json"
with open(filename, 'r') as file:
AllocatedCoresPIKIPLEX = json.load(file)
nom_fichier = "timeSeriesJsonHPC/AllocatedCoresRICC.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresRICC.json"
with open(filename, 'r') as file:
AllocatedCoresRICC = json.load(file)
nom_fichier = "timeSeriesJsonHPC/AllocatedCoresUniLuGaia.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/AllocatedCoresUniLuGaia.json"
with open(filename, 'r') as file:
AllocatedCoresUniLuGaia = json.load(file)
### Workload mass
nom_fichier = "timeSeriesJsonHPC/MassJobsAnlIntrepid.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/MassJobsAnlIntrepid.json"
with open(filename, 'r') as file:
MassJobsAnlIntrepid = json.load(file)
nom_fichier = "timeSeriesJsonHPC/MassJobsCiematEuler.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/MassJobsCiematEuler.json"
with open(filename, 'r') as file:
MassJobsCiematEuler = json.load(file)
nom_fichier = "timeSeriesJsonHPC/MassJobsMetacentrum.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/MassJobsMetacentrum.json"
with open(filename, 'r') as file:
MassJobsMetacentrum = json.load(file)
nom_fichier = "timeSeriesJsonHPC/MassJobsMetacentrumFiltered.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/MassJobsMetacentrumFiltered.json"
with open(filename, 'r') as file:
MassJobsMetacentrumFiltered = json.load(file)
nom_fichier = "timeSeriesJsonHPC/MassJobsPIKIPLEX.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/MassJobsPIKIPLEX.json"
with open(filename, 'r') as file:
MassJobsPIKIPLEX = json.load(file)
nom_fichier = "timeSeriesJsonHPC/MassJobsRICC.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/MassJobsRICC.json"
with open(filename, 'r') as file:
MassJobsRICC = json.load(file)
nom_fichier = "timeSeriesJsonHPC/MassJobsUniLuGaia.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/MassJobsUniLuGaia.json"
with open(filename, 'r') as file:
MassJobsUniLuGaia = json.load(file)
### Number of jobs
nom_fichier = "timeSeriesJsonHPC/NumberJobsAnlIntrepid.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/NumberJobsAnlIntrepid.json"
with open(filename, 'r') as file:
NumberJobsAnlIntrepid = json.load(file)
nom_fichier = "timeSeriesJsonHPC/NumberJobsCiematEuler.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/NumberJobsCiematEuler.json"
with open(filename, 'r') as file:
NumberJobsCiematEuler = json.load(file)
nom_fichier = "timeSeriesJsonHPC/NumberJobsMetacentrum.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/NumberJobsMetacentrum.json"
with open(filename, 'r') as file:
NumberJobsMetacentrum = json.load(file)
nom_fichier = "timeSeriesJsonHPC/NumberJobsMetacentrumFiltered.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/NumberJobsMetacentrumFiltered.json"
with open(filename, 'r') as file:
NumberJobsMetacentrumFiltered = json.load(file)
nom_fichier = "timeSeriesJsonHPC/NumberJobsPIKIPLEX.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/NumberJobsPIKIPLEX.json"
with open(filename, 'r') as file:
NumberJobsPIKIPLEX = json.load(file)
nom_fichier = "timeSeriesJsonHPC/NumberJobsRICC.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/NumberJobsRICC.json"
with open(filename, 'r') as file:
NumberJobsRICC = json.load(file)
nom_fichier = "timeSeriesJsonHPC/NumberJobsUniLuGaia.json"
with open(nom_fichier, 'r') as file:
filename = "hpc_case/time_series_json_hpc_data/NumberJobsUniLuGaia.json"
with open(filename, 'r') as file:
NumberJobsUniLuGaia = json.load(file)
\ No newline at end of file
......@@ -4,7 +4,6 @@ import matplotlib.pyplot as plt
from scipy.signal import periodogram
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.nonparametric.smoothers_lowess import lowess
from math import *
plt.rc('font', size=35)
plt.rcParams['figure.constrained_layout.use'] = True
......
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as st
from statsmodels.nonparametric.smoothers_lowess import lowess
import json
import scikit_posthocs as sp
plt.rc('font', size=20)
......
import json
import time
import matplotlib.pyplot as plt
import pandas as pd
from scipy.signal import periodogram
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.seasonal import MSTL, STL
from statsmodels.nonparametric.smoothers_lowess import lowess
plt.rc('font', size=35)
plt.rcParams['figure.constrained_layout.use'] = True
# Loading data
nom_fichier = "Wikipedia2013.json"
with open(nom_fichier, 'r') as file:
filename = "Wikipedia2013.json"
with open(filename, 'r') as file:
Wikipedia = json.load(file)
......@@ -36,10 +30,10 @@ plt.show()
X = [i for i in range(len(Requests))]
# Removing the trend component from the time series
t0 = time.time()
# t0 = time.time()
low = lowess(Requests, X, frac=0.1)
dt = time.time() - t0
print("low time:", dt) # approximately 6e-1 s
# dt = time.time() - t0
# print("low time:", dt) # approximately 6e-1 s
low = [v[1] for v in low]
# Display of time series and trend component
......@@ -48,18 +42,18 @@ plt.plot(low)
plt.show()
# Display STL results with weekly period
t0 = time.time()
# t0 = time.time()
res = STL(pd.Series(v for v in Requests), period=168).fit()
dt = time.time() - t0
print("STL time:", dt) # approximately 1e-1 s
# dt = time.time() - t0
# print("STL time:", dt) # approximately 1e-1 s
res.plot()
plt.show()
# Display MSTL results with daily, half-weekly and weekly periods
t0 = time.time()
# t0 = time.time()
res = MSTL(Requests, periods=[24, 84, 168]).fit()
dt = time.time() - t0
print("MSTL time:", dt) # approximately 2.5 s
# dt = time.time() - t0
# print("MSTL time:", dt) # approximately 2.5 s
res.plot()
plt.tight_layout()
plt.show()
......@@ -73,10 +67,10 @@ plt.plot(Requests)
plt.show()
# Periodogram results
t0 = time.time()
# t0 = time.time()
freqencies, spectrum = periodogram(Requests)
dt = time.time() - t0
print("Periodogram time:", dt) # approximately 1e-3 s
# dt = time.time() - t0
# print("Periodogram time:", dt) # approximately 1e-3 s
plt.plot(freqencies, spectrum, color='blue')
plt.grid(True, linestyle='-', which='major', alpha=0.5, axis='both')
plt.xlabel('Frequency', fontsize=45)
......@@ -128,10 +122,10 @@ plt.show()
X = [i for i in range(len(Bytes))]
# Removing the trend component from the time series
t0 = time.time()
# t0 = time.time()
low = lowess(Bytes, X, frac=0.1)
low = [v[1] for v in low]
dt = time.time() - t0
# dt = time.time() - t0
# Display of time series and trend component
plt.plot(Bytes)
......
import json
import datetime
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment