From 373b8e6f563a831ff0a05eabff483a0a3b9102d7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C3=ABl=20Madon?= <mael.madon@irit.fr>
Date: Wed, 10 May 2023 16:16:43 +0200
Subject: [PATCH] typehints and take input and output param as filepaths rather
 than IO

---
 batmenTools/swf2batsim_split_by_user.py |  16 ++--
 batmenTools/swf_filter.py               | 104 ++++++++++++------------
 2 files changed, 62 insertions(+), 58 deletions(-)

diff --git a/batmenTools/swf2batsim_split_by_user.py b/batmenTools/swf2batsim_split_by_user.py
index 818049d..7b2ea18 100755
--- a/batmenTools/swf2batsim_split_by_user.py
+++ b/batmenTools/swf2batsim_split_by_user.py
@@ -16,14 +16,14 @@ from math import ceil
 from batmenTools.swf import SwfField
 
 
-def generate_workload(input_swf, output_folder,
-                      start_time=None,
-                      job_walltime_factor=2,
-                      given_walltime_only=False,
-                      job_grain=1,
-                      indent=None,
-                      quiet=False,
-                      job_size_function_string='1*nb_res'):
+def generate_workload(input_swf:str, output_folder:str,
+                      start_time:int=None,
+                      job_walltime_factor:float=2,
+                      given_walltime_only:bool=False,
+                      job_grain:int=1,
+                      indent:int=None,
+                      quiet:bool=False,
+                      job_size_function_string:str='1*nb_res'):
     """Generate a Batsim workload from a SWF trace."""
     if not quiet:
         print(f"Input file = {input_swf}")
diff --git a/batmenTools/swf_filter.py b/batmenTools/swf_filter.py
index fe56ba6..3ee2339 100755
--- a/batmenTools/swf_filter.py
+++ b/batmenTools/swf_filter.py
@@ -6,16 +6,15 @@ Inspired from https://gitlab.inria.fr/batsim/batsim/-/blob/master/tools/swf_to_b
 """
 
 import argparse
-import json
 import re
 
 from batmenTools.swf import SwfField
 
 
-def filter_workload(input_swf, output_swf=None,
-                    partitions_to_select=None,
-                    keep_only=None,
-                    quiet=False):
+def filter_workload(input_swf:str, output_swf:str=None,
+                    partitions_to_select:list=None,
+                    keep_only:str=None,
+                    quiet:bool=False):
     """Makes a selection from a SWF trace, optionally outputing it as SWF."""
     element = '([-+]?\d+(?:\.\d+)?)'
     r = re.compile('\s*' + (element + '\s+') * 17 + element + '\s*')
@@ -27,51 +26,56 @@ def filter_workload(input_swf, output_swf=None,
     not_line_match_format = 0
     users = []
 
+    # Output file
+    if output_swf is not None:
+        out_swf = open(output_swf, 'w')
+
     # Let's loop over the lines of the input file
     i = 0
-    for line in input_swf:
-        i += 1
-        if not quiet and i % 10000 == 0:
-            print(f"\r\033[KProcessing swf line {i}", end="")
-
-        res = r.match(line)
-
-        if res:
-            # Parsing...
-            nb_res = int(
-                float(res.group(SwfField.REQUESTED_NUMBER_OF_PROCESSORS.value)))
-            run_time = float(res.group(SwfField.RUN_TIME.value))
-            submit_time = max(0, float(res.group(SwfField.SUBMIT_TIME.value)))
-            walltime = float(res.group(SwfField.REQUESTED_TIME.value))
-            user_id = str(res.group(SwfField.USER_ID.value))
-            partition_id = int(res.group(SwfField.PARTITION_ID.value))
-
-            # Select jobs to keep
-            is_valid_job = (nb_res > 0 and walltime >
-                            run_time and run_time > 0 and submit_time >= 0)
-            select_partition = ((partitions_to_select is None) or
-                                (partition_id in partitions_to_select))
-            use_job = select_partition and (
-                (keep_only is None) or eval(keep_only))
-
-            # Increment counters
-            if not is_valid_job:
-                not_valid += 1
-            elif not use_job:
-                not_selected["nb"] += 1
-                not_selected["coreh"] += run_time * nb_res
-            else:
-                selected["nb"] += 1
-                selected["coreh"] += run_time * nb_res
-                if user_id not in users:
-                    users.append(user_id)
+    with open(input_swf, 'r') as swf:
+        for line in swf:
+            i += 1
+            if not quiet and i % 10000 == 0:
+                print(f"\r\033[KProcessing swf line {i}", end="")
+
+            res = r.match(line)
+
+            if res:
+                # Parsing...
+                nb_res = int(
+                    float(res.group(SwfField.REQUESTED_NUMBER_OF_PROCESSORS.value)))
+                run_time = float(res.group(SwfField.RUN_TIME.value))
+                submit_time = max(0, float(res.group(SwfField.SUBMIT_TIME.value)))
+                walltime = float(res.group(SwfField.REQUESTED_TIME.value))
+                user_id = str(res.group(SwfField.USER_ID.value))
+                partition_id = int(res.group(SwfField.PARTITION_ID.value))
+
+                # Select jobs to keep
+                is_valid_job = (nb_res > 0 and walltime >
+                                run_time and run_time > 0 and submit_time >= 0)
+                select_partition = ((partitions_to_select is None) or
+                                    (partition_id in partitions_to_select))
+                use_job = select_partition and (
+                    (keep_only is None) or eval(keep_only))
+
+                # Increment counters
+                if not is_valid_job:
+                    not_valid += 1
+                elif not use_job:
+                    not_selected["nb"] += 1
+                    not_selected["coreh"] += run_time * nb_res
+                else:
+                    selected["nb"] += 1
+                    selected["coreh"] += run_time * nb_res
+                    if user_id not in users:
+                        users.append(user_id)
+
+                    # Output in the swf
+                    if output_swf is not None:
+                        out_swf.write(line)
 
-                # Output in the swf
-                if output_swf is not None:
-                    output_swf.write(line)
-
-        else:
-            not_line_match_format += 1
+            else:
+                not_line_match_format += 1
 
     if not quiet:
         print("\n-------------------\nEnd parsing.")
@@ -98,11 +102,11 @@ def main():
         description="Filter jobs from the SWF input according to some criteria "
         "and display the proportion of jobs kept, optionnally outputing the "
         "selection in a new SWF file.")
-    parser.add_argument('input_swf', type=argparse.FileType('r'),
+    parser.add_argument('input_swf', type=str,
                         help='The input SWF file')
     parser.add_argument('-o', '--output_swf',
-                        type=argparse.FileType('w'), default=None,
-                        help='The optional output SWF file')
+                        type=str, default=None,
+                        help='The optional output SWF file.')
     parser.add_argument('-sp', '--partitions_to_select',
                         type=int, nargs='+', default=None,
                         help="List of partitions to only consider in the input "
-- 
GitLab