diff --git a/expetator/bundle.py b/expetator/bundle.py index d44c038e92919b3eb35d1109b6d8899384fa8bfe..3cb2b1605b9c263a471f0e189a5e54eec30bef03 100644 --- a/expetator/bundle.py +++ b/expetator/bundle.py @@ -1,6 +1,7 @@ import pandas as pd import numpy as np import json +import os def init_bundle(bundlename): 'Reads an experiment file' @@ -15,6 +16,12 @@ def init_bundle(bundlename): experiments['basename'] = bundlename return experiments, zip_fid +def save_bundle(bundle_name, bundle_data, target_directory): + os.makedirs(target_directory, exist_ok=True) + bundle_data.to_csv(target_directory+'/'+bundle_name,sep=' ', index=False) + + + def merge_timeseries_blocks(references, additions, prefix = 'add_', key='#timestamp'): return [ [ diff --git a/expetator/monitoring_csv.py b/expetator/monitoring_csv.py index 09b72edf9b6953efd06c61cdf9645914b3aba06e..a0f2507868f5b41ba0d2e046c6793714397f48a7 100644 --- a/expetator/monitoring_csv.py +++ b/expetator/monitoring_csv.py @@ -1,3 +1,4 @@ +import os import pandas as pd def read_host_csv(prefix, hostname, startTime, basename, fullname, archive_fid=None): @@ -20,3 +21,21 @@ def read_bundle_csv(prefix, bundle, archive_fid=None): for index, row in bundle.iterrows()] return csv_data + + + +def write_host_csv(prefix, hostname, startTime, basename, fullname, data, target_directory): + fullpath= '%s/%s_%s/%s_%s_%s' % (target_directory, basename, prefix, hostname, fullname, startTime) + os.makedirs('%s/%s_%s' % (target_directory, basename, prefix), exist_ok=True) + + data.to_csv(fullpath,sep=' ', index=False) + + +def write_run_csv(prefix, hostname, startTime, basename, fullname, hostlist, data, target_directory): + for index, host in enumerate(hostlist.split(';')): + write_host_csv(prefix, host, startTime, basename, fullname, data[index], target_directory) + + +def write_bundle_csv(prefix, bundle, data, target_directory): + for index, row in bundle.iterrows(): + write_run_csv(prefix, row.hostname, row.startTime, row.basename, row.fullname, row.hostlist, data[index], target_directory) diff --git a/expetator/monitoring_list.py b/expetator/monitoring_list.py index b6885313a368f6d461bfb4ce30b7b4daf778fdd1..4f98d31696649a4ebb285f542d154b35f7d60011 100644 --- a/expetator/monitoring_list.py +++ b/expetator/monitoring_list.py @@ -1,3 +1,4 @@ +import os import json import pandas as pd @@ -6,24 +7,21 @@ def read_run_list(prefix, hostname, startTime, basename, fullname, hostlist=None fullpath= '%s_%s/%s_%s_%s' % (basename, prefix, hostname, fullname, startTime) result = [] - try: - if archive_fid is None: - with open(fullpath) as file_id: - raw_data = json.loads(file_id.read()) - else: - with archive_fid.open(fullpath) as file_id: - raw_data = json.loads(file_id.read()) + if archive_fid is None: + with open(fullpath) as file_id: + raw_data = json.loads(file_id.read()) + else: + with archive_fid.open(fullpath) as file_id: + raw_data = json.loads(file_id.read()) - data = {host:(timestamp, values) for (host, timestamp, values) in raw_data} - - for host in hostlist.split(';'): - name, _ = host.split('.', maxsplit=1) - df = pd.DataFrame(list(data[name])).transpose() - df.columns = ["#timestamp", prefix] - result.append(df) - except: - pass + data = {host:(timestamp, values) for (host, timestamp, values) in raw_data} + for host in hostlist.split(';'): + name, _ = host.split('.', maxsplit=1) + df = pd.DataFrame(list(data[name])).transpose() + df.columns = ["#timestamp", prefix] + result.append(df) + return result def read_bundle_list(prefix, bundle, archive_fid=None): @@ -32,3 +30,28 @@ def read_bundle_list(prefix, bundle, archive_fid=None): for index, row in bundle.iterrows()] return list_data + + + + +def write_run_list(prefix, hostname, startTime, basename, fullname, hostlist, data, target_directory): + fullpath= '%s/%s_%s/%s_%s_%s' % (target_directory, basename, prefix, hostname, fullname, startTime) + os.makedirs('%s/%s_%s' % (target_directory, basename, prefix), exist_ok=True) + + hosts = hostlist.split(';') + res = [] + + for index, host in enumerate(hosts): + tmp = [host, list(data[index]['#timestamp']), list(data[index][prefix])] + res.append(tmp) + + with open(fullpath, 'w') as file_id: + json.dump(res, file_id) + + + +def write_bundle_list(prefix, bundle, data, target_directory): + 'Writes the power files associated to a bundle' + for index, row in bundle.iterrows(): + write_run_list(prefix, row.hostname, row.startTime, row.basename, row.fullname, row.hostlist, data[index], target_directory) + diff --git a/expetator/remove_watermark.py b/expetator/remove_watermark.py new file mode 100644 index 0000000000000000000000000000000000000000..de28e5305b842424ee84f05ada97b5b0a5a8fce7 --- /dev/null +++ b/expetator/remove_watermark.py @@ -0,0 +1,41 @@ +import expetator.bundle as bundle +import expetator.monitoring_csv as monitoring_csv +import expetator.monitoring_list as monitoring_list +import expetator.watermark as watermark +import sys + +def remove_watermark(target_file, target_dir): + + bundle_data, zip_fid = bundle.init_bundle(target_file) + + try: + moj = monitoring_csv.read_bundle_csv('mojitos', bundle_data, zip_fid) + moj_cleaned = watermark.remove_watermark_blocks(moj, frequency=20) + except: + moj_cleaned = None + + try: + power = monitoring_list.read_bundle_list('power', bundle_data, zip_fid) + power_cleaned = watermark.remove_watermark_blocks(power, frequency=20) + except: + power_cleaned = None + + watermark.remove_wt_name(bundle_data) + bundle.save_bundle(target_file, bundle_data, target_dir) + + if not moj_cleaned is None: + monitoring_csv.write_bundle_csv('mojitos', bundle_data, moj_cleaned, target_dir) + if not power_cleaned is None: + monitoring_list.write_bundle_list('power', bundle_data, power_cleaned, target_dir) + +def main(): + if len(sys.argv) != 3: + print('Usage : %s main_file target_directory') + sys.exit(0) + + target_file = sys.argv[1] + target_dir = sys.argv[2] + remove_watermark(target_file, target_dir) + +if __name__ == '__main__': + main() diff --git a/expetator/watermark.py b/expetator/watermark.py index d8ab7743a4a7221e8c2b885b0477a954f5dbd44d..e622310c3245c569f0716d3bf2a396f92b928620 100644 --- a/expetator/watermark.py +++ b/expetator/watermark.py @@ -94,7 +94,12 @@ def remove_watermark_blocks(block, frequency=10, duration=30): ] for experiment in range(len(block)) ] - + + +def remove_wt_name(dataframe, target='fullname', signature='wt-30-'): + s_len = len(signature) + dataframe[target] = [elem[s_len:] for elem in dataframe[target]] + ## Tool for virtualisation def demo_watermark_detection(focus, freq): diff --git a/setup.py b/setup.py index 9e0c463c3c52e0cde5ba9464780672479b23eb33..a302eab1b49ef155e6f9f890687042845f685b10 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ with open("README.md", "r") as fh: setuptools.setup( name="expetator", - version="0.3.11", + version="0.3.12", author="Georges Da Costa", author_email="georges.da-costa@irit.fr", description="A framework for monitoring HPC applications using DVFS", @@ -24,5 +24,11 @@ setuptools.setup( 'leverages/*.[ch]', 'leverages/*.sh', 'leverages/*_mak']}, include_package_data=True, install_requires=['execo'], + entry_points={ + 'console_scripts': [ + 'remove_watermark = expetator.remove_watermark:main', + ] + } + )