Skip to content
Snippets Groups Projects
Commit e8c842c3 authored by Georges Da Costa's avatar Georges Da Costa
Browse files

Adds capability to remove watermark from experiments using *remove_watermack* command on cli

parent 17ff51e8
No related branches found
No related tags found
No related merge requests found
import pandas as pd
import numpy as np
import json
import os
def init_bundle(bundlename):
'Reads an experiment file'
......@@ -15,6 +16,12 @@ def init_bundle(bundlename):
experiments['basename'] = bundlename
return experiments, zip_fid
def save_bundle(bundle_name, bundle_data, target_directory):
os.makedirs(target_directory, exist_ok=True)
bundle_data.to_csv(target_directory+'/'+bundle_name,sep=' ', index=False)
def merge_timeseries_blocks(references, additions, prefix = 'add_', key='#timestamp'):
return [
[
......
import os
import pandas as pd
def read_host_csv(prefix, hostname, startTime, basename, fullname, archive_fid=None):
......@@ -20,3 +21,21 @@ def read_bundle_csv(prefix, bundle, archive_fid=None):
for index, row in bundle.iterrows()]
return csv_data
def write_host_csv(prefix, hostname, startTime, basename, fullname, data, target_directory):
fullpath= '%s/%s_%s/%s_%s_%s' % (target_directory, basename, prefix, hostname, fullname, startTime)
os.makedirs('%s/%s_%s' % (target_directory, basename, prefix), exist_ok=True)
data.to_csv(fullpath,sep=' ', index=False)
def write_run_csv(prefix, hostname, startTime, basename, fullname, hostlist, data, target_directory):
for index, host in enumerate(hostlist.split(';')):
write_host_csv(prefix, host, startTime, basename, fullname, data[index], target_directory)
def write_bundle_csv(prefix, bundle, data, target_directory):
for index, row in bundle.iterrows():
write_run_csv(prefix, row.hostname, row.startTime, row.basename, row.fullname, row.hostlist, data[index], target_directory)
import os
import json
import pandas as pd
......@@ -6,24 +7,21 @@ def read_run_list(prefix, hostname, startTime, basename, fullname, hostlist=None
fullpath= '%s_%s/%s_%s_%s' % (basename, prefix, hostname, fullname, startTime)
result = []
try:
if archive_fid is None:
with open(fullpath) as file_id:
raw_data = json.loads(file_id.read())
else:
with archive_fid.open(fullpath) as file_id:
raw_data = json.loads(file_id.read())
if archive_fid is None:
with open(fullpath) as file_id:
raw_data = json.loads(file_id.read())
else:
with archive_fid.open(fullpath) as file_id:
raw_data = json.loads(file_id.read())
data = {host:(timestamp, values) for (host, timestamp, values) in raw_data}
for host in hostlist.split(';'):
name, _ = host.split('.', maxsplit=1)
df = pd.DataFrame(list(data[name])).transpose()
df.columns = ["#timestamp", prefix]
result.append(df)
except:
pass
data = {host:(timestamp, values) for (host, timestamp, values) in raw_data}
for host in hostlist.split(';'):
name, _ = host.split('.', maxsplit=1)
df = pd.DataFrame(list(data[name])).transpose()
df.columns = ["#timestamp", prefix]
result.append(df)
return result
def read_bundle_list(prefix, bundle, archive_fid=None):
......@@ -32,3 +30,28 @@ def read_bundle_list(prefix, bundle, archive_fid=None):
for index, row in bundle.iterrows()]
return list_data
def write_run_list(prefix, hostname, startTime, basename, fullname, hostlist, data, target_directory):
fullpath= '%s/%s_%s/%s_%s_%s' % (target_directory, basename, prefix, hostname, fullname, startTime)
os.makedirs('%s/%s_%s' % (target_directory, basename, prefix), exist_ok=True)
hosts = hostlist.split(';')
res = []
for index, host in enumerate(hosts):
tmp = [host, list(data[index]['#timestamp']), list(data[index][prefix])]
res.append(tmp)
with open(fullpath, 'w') as file_id:
json.dump(res, file_id)
def write_bundle_list(prefix, bundle, data, target_directory):
'Writes the power files associated to a bundle'
for index, row in bundle.iterrows():
write_run_list(prefix, row.hostname, row.startTime, row.basename, row.fullname, row.hostlist, data[index], target_directory)
import expetator.bundle as bundle
import expetator.monitoring_csv as monitoring_csv
import expetator.monitoring_list as monitoring_list
import expetator.watermark as watermark
import sys
def remove_watermark(target_file, target_dir):
bundle_data, zip_fid = bundle.init_bundle(target_file)
try:
moj = monitoring_csv.read_bundle_csv('mojitos', bundle_data, zip_fid)
moj_cleaned = watermark.remove_watermark_blocks(moj, frequency=20)
except:
moj_cleaned = None
try:
power = monitoring_list.read_bundle_list('power', bundle_data, zip_fid)
power_cleaned = watermark.remove_watermark_blocks(power, frequency=20)
except:
power_cleaned = None
watermark.remove_wt_name(bundle_data)
bundle.save_bundle(target_file, bundle_data, target_dir)
if not moj_cleaned is None:
monitoring_csv.write_bundle_csv('mojitos', bundle_data, moj_cleaned, target_dir)
if not power_cleaned is None:
monitoring_list.write_bundle_list('power', bundle_data, power_cleaned, target_dir)
def main():
if len(sys.argv) != 3:
print('Usage : %s main_file target_directory')
sys.exit(0)
target_file = sys.argv[1]
target_dir = sys.argv[2]
remove_watermark(target_file, target_dir)
if __name__ == '__main__':
main()
......@@ -94,7 +94,12 @@ def remove_watermark_blocks(block, frequency=10, duration=30):
]
for experiment in range(len(block))
]
def remove_wt_name(dataframe, target='fullname', signature='wt-30-'):
s_len = len(signature)
dataframe[target] = [elem[s_len:] for elem in dataframe[target]]
## Tool for virtualisation
def demo_watermark_detection(focus, freq):
......
......@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
setuptools.setup(
name="expetator",
version="0.3.11",
version="0.3.12",
author="Georges Da Costa",
author_email="georges.da-costa@irit.fr",
description="A framework for monitoring HPC applications using DVFS",
......@@ -24,5 +24,11 @@ setuptools.setup(
'leverages/*.[ch]', 'leverages/*.sh', 'leverages/*_mak']},
include_package_data=True,
install_requires=['execo'],
entry_points={
'console_scripts': [
'remove_watermark = expetator.remove_watermark:main',
]
}
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment