diff --git a/swf_filter.py b/swf_filter.py index e03708dcdbb9bf3c0b21de63d0a4eaa716cd098b..790b606609bbe6dd097b9eef37757e3b5ca5a1fb 100755 --- a/swf_filter.py +++ b/swf_filter.py @@ -12,17 +12,10 @@ import re from swf import SwfField -def generate_workload(input_swf, output_swf=None, - partitions_to_select=None, - job_walltime_factor=2, - given_walltime_only=False, - job_grain=1, - platform_size=None, - indent=None, - translate_submit_times=False, - keep_only=None, - quiet=False, - job_size_function_string='1*nb_res'): +def filter_workload(input_swf, output_swf=None, + partitions_to_select=None, + keep_only=None, + quiet=False): """Makes a selection from a SWF trace, optionally outputing it as SWF.""" element = '([-+]?\d+(?:\.\d+)?)' r = re.compile('\s*' + (element + '\s+') * 17 + element + '\s*') @@ -45,23 +38,14 @@ def generate_workload(input_swf, output_swf=None, if res: # Parsing... - job_id = (int(float(res.group(SwfField.JOB_ID.value)))) nb_res = int( float(res.group(SwfField.REQUESTED_NUMBER_OF_PROCESSORS.value))) run_time = float(res.group(SwfField.RUN_TIME.value)) submit_time = max(0, float(res.group(SwfField.SUBMIT_TIME.value))) - walltime = max(job_walltime_factor * run_time, - float(res.group(SwfField.REQUESTED_TIME.value))) + walltime = float(res.group(SwfField.REQUESTED_TIME.value)) user_id = str(res.group(SwfField.USER_ID.value)) - partition_id = int(res.group(SwfField.PARTITION_ID.value)) - # nb_res may be changed by calling a user-given function - nb_res = eval(job_size_function_string) - - if given_walltime_only: - walltime = float(res.group(SwfField.REQUESTED_TIME.value)) - # Select jobs to keep is_valid_job = (nb_res > 0 and walltime > run_time and run_time > 0 and submit_time >= 0) @@ -70,14 +54,13 @@ def generate_workload(input_swf, output_swf=None, use_job = select_partition and ( (keep_only is None) or eval(keep_only)) + # Increment counters if not is_valid_job: not_valid += 1 - if not use_job: + elif not use_job: not_selected["nb"] += 1 not_selected["coreh"] += run_time * nb_res - else: - # Increment counters selected["nb"] += 1 selected["coreh"] += run_time * nb_res if user_id not in users: @@ -87,25 +70,22 @@ def generate_workload(input_swf, output_swf=None, if output_swf is not None: output_swf.write(line) - else: not_line_match_format += 1 if not quiet: - print('\n-------------------\nEnd parsing.') - print('Total {} jobs and {} users have been created.'.format( - selected["nb"], len(users))) + print("\n-------------------\nEnd parsing.") print( - 'Total number of core-hours: {:.0f}'.format(selected["coreh"] / 3600)) - print('{} valid jobs were not selected (keep_only) for {:.0f} core-hour'.format( - not_selected["nb"], not_selected["coreh"] / 3600)) + f"Total {selected['nb']} jobs and {len(users)} users have been created.") + print(f"Total number of core-hours: {selected['coreh'] / 3600:.0f}") + print(f"{not_selected['nb']} valid jobs were not selected (keep_only) " + f"for {not_selected['coreh'] / 3600:.0f} core-hour") print("Jobs not selected: {:.1f}% in number, {:.1f}% in core-hour" - .format(not_selected["nb"] / (not_selected["nb"]+selected["nb"]) * 100, - not_selected["coreh"] / (selected["coreh"]+not_selected["coreh"]) * 100)) - print('{} out of {} lines in the file did not match the swf format'.format( - not_line_match_format, i)) - print('{} jobs were not valid'.format(not_valid)) - + .format(not_selected["nb"] / (not_selected["nb"]+selected["nb"]) * 100, + not_selected["coreh"] / (selected["coreh"]+not_selected["coreh"]) * 100)) + print(f"{not_line_match_format} out of {i} lines in the file did not " + "match the swf format") + print(f"{not_valid} jobs were not valid") def main(): @@ -115,7 +95,9 @@ def main(): Parses the input arguments then calls generate_flat_platform. """ parser = argparse.ArgumentParser( - description='Reads a SWF (Standard Workload Format) file and outputs some stats') + description="Filter jobs from the SWF input according to some criteria " + "and display the proportion of jobs kept, optionnally outputing the " + "selection in a new SWF file.") parser.add_argument('input_swf', type=argparse.FileType('r'), help='The input SWF file') parser.add_argument('-o', '--output_swf', @@ -125,27 +107,6 @@ def main(): type=int, nargs='+', default=None, help='List of partitions to only consider in the input trace. The jobs running in the other partitions will be discarded.') - parser.add_argument('-jsf', '--job-size-function', - type=str, - default='1*nb_res', - help='The function to apply on the jobs size. ' - 'The identity is used by default.') - parser.add_argument('-jwf', '--job_walltime_factor', - type=float, default=2, - help='Jobs walltimes are computed by the formula max(givenWalltime, jobWalltimeFactor*givenRuntime)') - parser.add_argument('-gwo', '--given_walltime_only', - action="store_true", - help='If set, only the given walltime in the trace will be used') - parser.add_argument('-jg', '--job_grain', - type=int, default=1, - help='Selects the level of detail we want for jobs. This parameter is used to group jobs that have close running time') - parser.add_argument('-pf', '--platform_size', type=int, default=None, - help='If set, the number of machines to put in the output JSON files is set by this parameter instead of taking the maximum job size') - parser.add_argument('-i', '--indent', type=int, default=None, - help='If set to a non-negative integer, then JSON array elements and object members will be pretty-printed with that indent level. An indent level of 0, or negative, will only insert newlines. The default value (None) selects the most compact representation.') - parser.add_argument('-t', '--translate_submit_times', - action="store_true", - help="If set, the jobs' submit times will be translated towards 0") parser.add_argument('--keep_only', type=str, default=None, @@ -155,18 +116,11 @@ def main(): args = parser.parse_args() - generate_workload(input_swf=args.input_swf, - output_swf=args.output_swf, - partitions_to_select=args.partitions_to_select, - job_walltime_factor=args.job_walltime_factor, - given_walltime_only=args.given_walltime_only, - job_grain=args.job_grain, - platform_size=args.platform_size, - indent=args.indent, - translate_submit_times=args.translate_submit_times, - keep_only=args.keep_only, - quiet=args.quiet, - job_size_function_string=args.job_size_function) + filter_workload(input_swf=args.input_swf, + output_swf=args.output_swf, + partitions_to_select=args.partitions_to_select, + keep_only=args.keep_only, + quiet=args.quiet) if __name__ == "__main__":