Adds capability to remove watermark from experiments using *remove_watermack* command on cli

Adds capability to remove watermark from experiments using remove_watermack command on cli
e8c842c3 · Georges Da Costa · 17ff51e8 · e8c842c3 · e8c842c3 · e8c842c3
Commit e8c842c3 authored 3 years ago by Georges Da Costa
--- a/expetator/bundle.py
+++ b/expetator/bundle.py
 import pandas as pd
 import numpy as np
 import json
+import os

 def init_bundle(bundlename):
    'Reads an experiment file'
@@ -15,6 +16,12 @@ def init_bundle(bundlename):
    experiments['basename'] = bundlename
    return experiments, zip_fid

+def save_bundle(bundle_name, bundle_data, target_directory):
+    os.makedirs(target_directory, exist_ok=True)
+    bundle_data.to_csv(target_directory+'/'+bundle_name,sep=' ', index=False)
+
+
+
 def merge_timeseries_blocks(references, additions, prefix = 'add_', key='#timestamp'):
    return [
        [

--- a/expetator/monitoring_csv.py
+++ b/expetator/monitoring_csv.py
+import os
 import pandas as pd

 def read_host_csv(prefix, hostname, startTime, basename, fullname, archive_fid=None):
@@ -20,3 +21,21 @@ def read_bundle_csv(prefix, bundle, archive_fid=None):
                    for index, row in bundle.iterrows()]
    return csv_data

+
+
+
+def write_host_csv(prefix, hostname, startTime, basename, fullname, data, target_directory):
+    fullpath= '%s/%s_%s/%s_%s_%s' % (target_directory, basename, prefix, hostname, fullname, startTime)
+    os.makedirs('%s/%s_%s' % (target_directory, basename, prefix), exist_ok=True)
+
+    data.to_csv(fullpath,sep=' ', index=False)
+
+
+def write_run_csv(prefix, hostname, startTime, basename, fullname, hostlist, data, target_directory):
+    for index, host in enumerate(hostlist.split(';')):
+        write_host_csv(prefix, host, startTime, basename, fullname, data[index], target_directory)
+
+
+def write_bundle_csv(prefix, bundle, data, target_directory):
+    for index, row in bundle.iterrows():
+        write_run_csv(prefix, row.hostname, row.startTime, row.basename, row.fullname, row.hostlist, data[index], target_directory) 
--- a/expetator/monitoring_list.py
+++ b/expetator/monitoring_list.py
+import os
 import json
 import pandas as pd

@@ -6,24 +7,21 @@ def read_run_list(prefix, hostname, startTime, basename, fullname, hostlist=None
    fullpath= '%s_%s/%s_%s_%s' % (basename, prefix, hostname, fullname, startTime)
    result = []

-    try:
-        if archive_fid is None:
-            with open(fullpath) as file_id:
-                raw_data = json.loads(file_id.read())
-        else:
-            with archive_fid.open(fullpath) as file_id:
-                raw_data = json.loads(file_id.read())
+    if archive_fid is None:
+        with open(fullpath) as file_id:
+            raw_data = json.loads(file_id.read())
+    else:
+        with archive_fid.open(fullpath) as file_id:
+            raw_data = json.loads(file_id.read())
    
-        data = {host:(timestamp, values) for (host, timestamp, values) in raw_data}
-    
-        for host in hostlist.split(';'):
-            name, _ = host.split('.', maxsplit=1)
-            df = pd.DataFrame(list(data[name])).transpose()
-            df.columns = ["#timestamp", prefix]
-            result.append(df)
-    except:
-        pass
+    data = {host:(timestamp, values) for (host, timestamp, values) in raw_data}
    
+    for host in hostlist.split(';'):
+        name, _ = host.split('.', maxsplit=1)
+        df = pd.DataFrame(list(data[name])).transpose()
+        df.columns = ["#timestamp", prefix]
+        result.append(df)
+
    return result

 def read_bundle_list(prefix, bundle, archive_fid=None):
@@ -32,3 +30,28 @@ def read_bundle_list(prefix, bundle, archive_fid=None):
                    for index, row in bundle.iterrows()]
    return list_data

+
+
+
+
+def write_run_list(prefix, hostname, startTime, basename, fullname, hostlist, data, target_directory):
+    fullpath= '%s/%s_%s/%s_%s_%s' % (target_directory, basename, prefix, hostname, fullname, startTime)
+    os.makedirs('%s/%s_%s' % (target_directory, basename, prefix), exist_ok=True)
+
+    hosts = hostlist.split(';')
+    res = []
+
+    for index, host in enumerate(hosts):
+        tmp = [host, list(data[index]['#timestamp']), list(data[index][prefix])]
+        res.append(tmp)
+
+    with open(fullpath, 'w') as file_id:
+        json.dump(res, file_id)
+    
+
+
+def write_bundle_list(prefix, bundle, data, target_directory):
+    'Writes the power files associated to a bundle'    
+    for index, row in bundle.iterrows():
+        write_run_list(prefix, row.hostname, row.startTime, row.basename, row.fullname, row.hostlist, data[index], target_directory)
+
--- a/expetator/remove_watermark.py
+++ b/expetator/remove_watermark.py
+import expetator.bundle as bundle
+import expetator.monitoring_csv as monitoring_csv
+import expetator.monitoring_list as monitoring_list
+import expetator.watermark as watermark
+import sys
+
+def remove_watermark(target_file, target_dir):
+
+    bundle_data, zip_fid = bundle.init_bundle(target_file)
+
+    try:
+        moj = monitoring_csv.read_bundle_csv('mojitos', bundle_data, zip_fid)
+        moj_cleaned = watermark.remove_watermark_blocks(moj, frequency=20)
+    except:
+        moj_cleaned = None
+
+    try:
+        power = monitoring_list.read_bundle_list('power', bundle_data, zip_fid)
+        power_cleaned = watermark.remove_watermark_blocks(power, frequency=20)
+    except:
+        power_cleaned = None
+
+    watermark.remove_wt_name(bundle_data)
+    bundle.save_bundle(target_file, bundle_data, target_dir)
+
+    if not moj_cleaned is None:
+        monitoring_csv.write_bundle_csv('mojitos', bundle_data, moj_cleaned, target_dir)
+    if not power_cleaned is None:
+        monitoring_list.write_bundle_list('power', bundle_data, power_cleaned, target_dir)
+
+def main():
+    if len(sys.argv) != 3:
+        print('Usage : %s main_file target_directory')
+        sys.exit(0)
+
+    target_file = sys.argv[1]
+    target_dir = sys.argv[2]
+    remove_watermark(target_file, target_dir)
+
+if __name__ == '__main__':
+    main()
--- a/expetator/watermark.py
+++ b/expetator/watermark.py
@@ -94,7 +94,12 @@ def remove_watermark_blocks(block, frequency=10, duration=30):
        ]
        for experiment in range(len(block))
    ]
-        
+
+
+def remove_wt_name(dataframe, target='fullname', signature='wt-30-'):
+    s_len = len(signature)
+    dataframe[target] = [elem[s_len:] for elem in dataframe[target]]
+
 ## Tool for virtualisation

 def demo_watermark_detection(focus, freq):

--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:

 setuptools.setup(
    name="expetator",
-    version="0.3.11",
+    version="0.3.12",
    author="Georges Da Costa",
    author_email="georges.da-costa@irit.fr",
    description="A framework for monitoring HPC applications using DVFS",
@@ -24,5 +24,11 @@ setuptools.setup(
                       'leverages/*.[ch]', 'leverages/*.sh', 'leverages/*_mak']},
    include_package_data=True,
    install_requires=['execo'],
+    entry_points={
+        'console_scripts': [
+            'remove_watermark = expetator.remove_watermark:main',
+        ]
+    }
+

 )