From 373b8e6f563a831ff0a05eabff483a0a3b9102d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABl=20Madon?= <mael.madon@irit.fr> Date: Wed, 10 May 2023 16:16:43 +0200 Subject: [PATCH] typehints and take input and output param as filepaths rather than IO --- batmenTools/swf2batsim_split_by_user.py | 16 ++-- batmenTools/swf_filter.py | 104 ++++++++++++------------ 2 files changed, 62 insertions(+), 58 deletions(-) diff --git a/batmenTools/swf2batsim_split_by_user.py b/batmenTools/swf2batsim_split_by_user.py index 818049d..7b2ea18 100755 --- a/batmenTools/swf2batsim_split_by_user.py +++ b/batmenTools/swf2batsim_split_by_user.py @@ -16,14 +16,14 @@ from math import ceil from batmenTools.swf import SwfField -def generate_workload(input_swf, output_folder, - start_time=None, - job_walltime_factor=2, - given_walltime_only=False, - job_grain=1, - indent=None, - quiet=False, - job_size_function_string='1*nb_res'): +def generate_workload(input_swf:str, output_folder:str, + start_time:int=None, + job_walltime_factor:float=2, + given_walltime_only:bool=False, + job_grain:int=1, + indent:int=None, + quiet:bool=False, + job_size_function_string:str='1*nb_res'): """Generate a Batsim workload from a SWF trace.""" if not quiet: print(f"Input file = {input_swf}") diff --git a/batmenTools/swf_filter.py b/batmenTools/swf_filter.py index fe56ba6..3ee2339 100755 --- a/batmenTools/swf_filter.py +++ b/batmenTools/swf_filter.py @@ -6,16 +6,15 @@ Inspired from https://gitlab.inria.fr/batsim/batsim/-/blob/master/tools/swf_to_b """ import argparse -import json import re from batmenTools.swf import SwfField -def filter_workload(input_swf, output_swf=None, - partitions_to_select=None, - keep_only=None, - quiet=False): +def filter_workload(input_swf:str, output_swf:str=None, + partitions_to_select:list=None, + keep_only:str=None, + quiet:bool=False): """Makes a selection from a SWF trace, optionally outputing it as SWF.""" element = '([-+]?\d+(?:\.\d+)?)' r = re.compile('\s*' + (element + '\s+') * 17 + element + '\s*') @@ -27,51 +26,56 @@ def filter_workload(input_swf, output_swf=None, not_line_match_format = 0 users = [] + # Output file + if output_swf is not None: + out_swf = open(output_swf, 'w') + # Let's loop over the lines of the input file i = 0 - for line in input_swf: - i += 1 - if not quiet and i % 10000 == 0: - print(f"\r\033[KProcessing swf line {i}", end="") - - res = r.match(line) - - if res: - # Parsing... - nb_res = int( - float(res.group(SwfField.REQUESTED_NUMBER_OF_PROCESSORS.value))) - run_time = float(res.group(SwfField.RUN_TIME.value)) - submit_time = max(0, float(res.group(SwfField.SUBMIT_TIME.value))) - walltime = float(res.group(SwfField.REQUESTED_TIME.value)) - user_id = str(res.group(SwfField.USER_ID.value)) - partition_id = int(res.group(SwfField.PARTITION_ID.value)) - - # Select jobs to keep - is_valid_job = (nb_res > 0 and walltime > - run_time and run_time > 0 and submit_time >= 0) - select_partition = ((partitions_to_select is None) or - (partition_id in partitions_to_select)) - use_job = select_partition and ( - (keep_only is None) or eval(keep_only)) - - # Increment counters - if not is_valid_job: - not_valid += 1 - elif not use_job: - not_selected["nb"] += 1 - not_selected["coreh"] += run_time * nb_res - else: - selected["nb"] += 1 - selected["coreh"] += run_time * nb_res - if user_id not in users: - users.append(user_id) + with open(input_swf, 'r') as swf: + for line in swf: + i += 1 + if not quiet and i % 10000 == 0: + print(f"\r\033[KProcessing swf line {i}", end="") + + res = r.match(line) + + if res: + # Parsing... + nb_res = int( + float(res.group(SwfField.REQUESTED_NUMBER_OF_PROCESSORS.value))) + run_time = float(res.group(SwfField.RUN_TIME.value)) + submit_time = max(0, float(res.group(SwfField.SUBMIT_TIME.value))) + walltime = float(res.group(SwfField.REQUESTED_TIME.value)) + user_id = str(res.group(SwfField.USER_ID.value)) + partition_id = int(res.group(SwfField.PARTITION_ID.value)) + + # Select jobs to keep + is_valid_job = (nb_res > 0 and walltime > + run_time and run_time > 0 and submit_time >= 0) + select_partition = ((partitions_to_select is None) or + (partition_id in partitions_to_select)) + use_job = select_partition and ( + (keep_only is None) or eval(keep_only)) + + # Increment counters + if not is_valid_job: + not_valid += 1 + elif not use_job: + not_selected["nb"] += 1 + not_selected["coreh"] += run_time * nb_res + else: + selected["nb"] += 1 + selected["coreh"] += run_time * nb_res + if user_id not in users: + users.append(user_id) + + # Output in the swf + if output_swf is not None: + out_swf.write(line) - # Output in the swf - if output_swf is not None: - output_swf.write(line) - - else: - not_line_match_format += 1 + else: + not_line_match_format += 1 if not quiet: print("\n-------------------\nEnd parsing.") @@ -98,11 +102,11 @@ def main(): description="Filter jobs from the SWF input according to some criteria " "and display the proportion of jobs kept, optionnally outputing the " "selection in a new SWF file.") - parser.add_argument('input_swf', type=argparse.FileType('r'), + parser.add_argument('input_swf', type=str, help='The input SWF file') parser.add_argument('-o', '--output_swf', - type=argparse.FileType('w'), default=None, - help='The optional output SWF file') + type=str, default=None, + help='The optional output SWF file.') parser.add_argument('-sp', '--partitions_to_select', type=int, nargs='+', default=None, help="List of partitions to only consider in the input " -- GitLab