Skip to content
Snippets Groups Projects
Commit 373b8e6f authored by Maël Madon's avatar Maël Madon
Browse files

typehints and take input and output param as filepaths rather than IO

parent c3e19230
Branches
No related tags found
No related merge requests found
...@@ -16,14 +16,14 @@ from math import ceil ...@@ -16,14 +16,14 @@ from math import ceil
from batmenTools.swf import SwfField from batmenTools.swf import SwfField
def generate_workload(input_swf, output_folder, def generate_workload(input_swf:str, output_folder:str,
start_time=None, start_time:int=None,
job_walltime_factor=2, job_walltime_factor:float=2,
given_walltime_only=False, given_walltime_only:bool=False,
job_grain=1, job_grain:int=1,
indent=None, indent:int=None,
quiet=False, quiet:bool=False,
job_size_function_string='1*nb_res'): job_size_function_string:str='1*nb_res'):
"""Generate a Batsim workload from a SWF trace.""" """Generate a Batsim workload from a SWF trace."""
if not quiet: if not quiet:
print(f"Input file = {input_swf}") print(f"Input file = {input_swf}")
......
...@@ -6,16 +6,15 @@ Inspired from https://gitlab.inria.fr/batsim/batsim/-/blob/master/tools/swf_to_b ...@@ -6,16 +6,15 @@ Inspired from https://gitlab.inria.fr/batsim/batsim/-/blob/master/tools/swf_to_b
""" """
import argparse import argparse
import json
import re import re
from batmenTools.swf import SwfField from batmenTools.swf import SwfField
def filter_workload(input_swf, output_swf=None, def filter_workload(input_swf:str, output_swf:str=None,
partitions_to_select=None, partitions_to_select:list=None,
keep_only=None, keep_only:str=None,
quiet=False): quiet:bool=False):
"""Makes a selection from a SWF trace, optionally outputing it as SWF.""" """Makes a selection from a SWF trace, optionally outputing it as SWF."""
element = '([-+]?\d+(?:\.\d+)?)' element = '([-+]?\d+(?:\.\d+)?)'
r = re.compile('\s*' + (element + '\s+') * 17 + element + '\s*') r = re.compile('\s*' + (element + '\s+') * 17 + element + '\s*')
...@@ -27,51 +26,56 @@ def filter_workload(input_swf, output_swf=None, ...@@ -27,51 +26,56 @@ def filter_workload(input_swf, output_swf=None,
not_line_match_format = 0 not_line_match_format = 0
users = [] users = []
# Output file
if output_swf is not None:
out_swf = open(output_swf, 'w')
# Let's loop over the lines of the input file # Let's loop over the lines of the input file
i = 0 i = 0
for line in input_swf: with open(input_swf, 'r') as swf:
i += 1 for line in swf:
if not quiet and i % 10000 == 0: i += 1
print(f"\r\033[KProcessing swf line {i}", end="") if not quiet and i % 10000 == 0:
print(f"\r\033[KProcessing swf line {i}", end="")
res = r.match(line)
res = r.match(line)
if res:
# Parsing... if res:
nb_res = int( # Parsing...
float(res.group(SwfField.REQUESTED_NUMBER_OF_PROCESSORS.value))) nb_res = int(
run_time = float(res.group(SwfField.RUN_TIME.value)) float(res.group(SwfField.REQUESTED_NUMBER_OF_PROCESSORS.value)))
submit_time = max(0, float(res.group(SwfField.SUBMIT_TIME.value))) run_time = float(res.group(SwfField.RUN_TIME.value))
walltime = float(res.group(SwfField.REQUESTED_TIME.value)) submit_time = max(0, float(res.group(SwfField.SUBMIT_TIME.value)))
user_id = str(res.group(SwfField.USER_ID.value)) walltime = float(res.group(SwfField.REQUESTED_TIME.value))
partition_id = int(res.group(SwfField.PARTITION_ID.value)) user_id = str(res.group(SwfField.USER_ID.value))
partition_id = int(res.group(SwfField.PARTITION_ID.value))
# Select jobs to keep
is_valid_job = (nb_res > 0 and walltime > # Select jobs to keep
run_time and run_time > 0 and submit_time >= 0) is_valid_job = (nb_res > 0 and walltime >
select_partition = ((partitions_to_select is None) or run_time and run_time > 0 and submit_time >= 0)
(partition_id in partitions_to_select)) select_partition = ((partitions_to_select is None) or
use_job = select_partition and ( (partition_id in partitions_to_select))
(keep_only is None) or eval(keep_only)) use_job = select_partition and (
(keep_only is None) or eval(keep_only))
# Increment counters
if not is_valid_job: # Increment counters
not_valid += 1 if not is_valid_job:
elif not use_job: not_valid += 1
not_selected["nb"] += 1 elif not use_job:
not_selected["coreh"] += run_time * nb_res not_selected["nb"] += 1
else: not_selected["coreh"] += run_time * nb_res
selected["nb"] += 1 else:
selected["coreh"] += run_time * nb_res selected["nb"] += 1
if user_id not in users: selected["coreh"] += run_time * nb_res
users.append(user_id) if user_id not in users:
users.append(user_id)
# Output in the swf
if output_swf is not None:
out_swf.write(line)
# Output in the swf else:
if output_swf is not None: not_line_match_format += 1
output_swf.write(line)
else:
not_line_match_format += 1
if not quiet: if not quiet:
print("\n-------------------\nEnd parsing.") print("\n-------------------\nEnd parsing.")
...@@ -98,11 +102,11 @@ def main(): ...@@ -98,11 +102,11 @@ def main():
description="Filter jobs from the SWF input according to some criteria " description="Filter jobs from the SWF input according to some criteria "
"and display the proportion of jobs kept, optionnally outputing the " "and display the proportion of jobs kept, optionnally outputing the "
"selection in a new SWF file.") "selection in a new SWF file.")
parser.add_argument('input_swf', type=argparse.FileType('r'), parser.add_argument('input_swf', type=str,
help='The input SWF file') help='The input SWF file')
parser.add_argument('-o', '--output_swf', parser.add_argument('-o', '--output_swf',
type=argparse.FileType('w'), default=None, type=str, default=None,
help='The optional output SWF file') help='The optional output SWF file.')
parser.add_argument('-sp', '--partitions_to_select', parser.add_argument('-sp', '--partitions_to_select',
type=int, nargs='+', default=None, type=int, nargs='+', default=None,
help="List of partitions to only consider in the input " help="List of partitions to only consider in the input "
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment