diff --git a/default.nix b/default.nix index 68776ea5039cd86eee667636d3c90dde93abba61..bcf81e6346aa08eca681702475374902c74b794c 100644 --- a/default.nix +++ b/default.nix @@ -26,6 +26,7 @@ in rec { propagatedBuildInputs = with pyPkgs; [ xlrd pandas + patool click ]; }; diff --git a/flake.nix b/flake.nix index 1043d0c90eab08d8e647a79c862b7ad52b86d0de..a37f70e080dee166ce4507436c502c907f99e036 100644 --- a/flake.nix +++ b/flake.nix @@ -10,6 +10,7 @@ in rec { packages = import ./default.nix { inherit pkgs; }; apps.realist-students-xls2csv = flake-utils.lib.mkApp { drv = packages.ut3_survival; exePath = "/bin/realist-students-xls2csv"; }; + apps.prepare-moodle-assessment = flake-utils.lib.mkApp { drv = packages.ut3_survival; exePath = "/bin/prepare-moodle-assessment"; }; defaultPackage = packages.ut3_survival; } ); diff --git a/pyproject.toml b/pyproject.toml index e87ebf5bf6c45c403c56305e874415dd81305055..c1304e341f6f32796cc8b0a9472ad60db928b212 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,3 +29,4 @@ dependencies = [ [project.scripts] realist-students-xls2csv = "ut3_survival.cmd.realist_students_xls_to_csv:main" +prepare-moodle-assessment = "ut3_survival.cmd.prepare_moodle_assessment:main" diff --git a/ut3_survival/cmd/prepare_moodle_assessment.py b/ut3_survival/cmd/prepare_moodle_assessment.py new file mode 100644 index 0000000000000000000000000000000000000000..3e0d0286db7dd384ddbda6b5b01790776ffa977b --- /dev/null +++ b/ut3_survival/cmd/prepare_moodle_assessment.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 +import sys + +import click +import pandas + +from ut3_survival import realist +from ut3_survival import moodle + +@click.command() +@click.option('-z', '--moodle-submissions-zip-file', required=True, help='The zip file that contains all the submissions done by the students.') +@click.option('-p', '--moodle-participants-csv-file', required=True, help='The csv file that lists the participants of the Moodle course you evaluate.') +@click.option('-s', '--realist-students-csv-file', required=True, help='The csv file that lists the students that you have to evaluate.') +@click.option('-o', '--output-dir', required=True, help='The output directory where the assessment should be prepared.') +@click.option('-x', '--extract', is_flag=True, default=False, help='If set, extract archived files into each student directory.') +def main(moodle_submissions_zip_file, moodle_participants_csv_file, realist_students_csv_file, output_dir, extract): + students = realist.read_parse_csv(realist_students_csv_file) + moodle_participants = moodle.read_parse_participants(moodle_participants_csv_file) + students_to_keep = moodle.join_participants_with_realist_students(moodle_participants, students) + + moodle.prepare_assessment_repo(moodle_submissions_zip_file, output_dir, students_to_keep) + + if extract: + moodle.extract_archives_from_repo(output_dir) + +if __name__ == "__main__": + main() diff --git a/ut3_survival/moodle.py b/ut3_survival/moodle.py new file mode 100644 index 0000000000000000000000000000000000000000..40d8d4611bd466aa86e832f69a09ab5aca15b966 --- /dev/null +++ b/ut3_survival/moodle.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +import glob +import os +import re +import shutil +import sys +import tempfile +import zipfile + +import pandas +import patoolib + +def read_parse_participants(filename: str) -> pandas.DataFrame: + df = pandas.read_csv(filename) + + column_names = [str(x) for x in df.columns] + expected_column_names = ['Prénom', 'Nom', "Numéro d'identification", 'Adresse de courriel'] + if column_names != expected_column_names: + raise RuntimeError(f"unexpected column names in moodle participant file '{filename}': got '{column_names}' while '{expected_column_names}' was expected") + + df.rename(columns={ + expected_column_names[0]: "moodle_firstname", + expected_column_names[1]: "moodle_lastname", + expected_column_names[2]: "id", + expected_column_names[3]: "moodle_email", + }, inplace=True) + df['id'] = df['id'].fillna(-1) + df = df.astype({"id":int}) + return df + +def join_participants_with_realist_students(participants_df, realist_students_df, check=True): + joined_df = realist_students_df.merge(participants_df, how='inner') + + if len(realist_students_df) != len(joined_df): + error_msg = 'some students have been lost by the inner join of realist students to moodle participants!' + print(error_msg, file=sys.stderr) + if check: + raise ValueError(error_msg) + + return joined_df + +def name_to_dirname(input_name: str) -> str: + return input_name.strip().lower().replace(" ", "_") + +def prepare_assessment_repo(submissions_zip_filename: str, repo_path: str, students_to_keep_df: pandas.DataFrame, orig_dirname='.orig'): + with tempfile.TemporaryDirectory() as tmp_extract_dir: + with zipfile.ZipFile(submissions_zip_filename, 'r') as zf: + zf.extractall(path=tmp_extract_dir) + + # parse the name of the directories in the zip extract + regex = re.compile('^(.* .*)_\d+.*$') + prefix_to_dirname = {} + subdir_names = {x.name for x in os.scandir(tmp_extract_dir)} + for subdir_name in subdir_names: + m = regex.match(subdir_name) + if m is None: + print(f"directory '{subdir_name}' could not be parsed", file=sys.stderr) + continue + if m.group(1) in prefix_to_dirname: + print(f"duplication of prefix '{m.group(1)}' while parsing directories of zipfile '{submissions_zip_filename}'") + continue + prefix_to_dirname[m.group(1)] = m.group(0) + + os.makedirs(repo_path) + for index, student in students_to_keep_df.iterrows(): + expected_dir_prefix = f"{student['moodle_lastname']} {student['moodle_firstname']}" + + if expected_dir_prefix not in prefix_to_dirname: + print(f"warning: '{expected_dir_prefix}' dir not found in zip. student: {dict(student[['id', 'lastname', 'firstname', 'email']])}", file=sys.stderr) + continue + + renamed_dir = "{}/{}-{}-{}/{}".format( + repo_path, + name_to_dirname(student['moodle_lastname']), + name_to_dirname(student['moodle_firstname']), + student['id'], + orig_dirname, + ) + + shutil.move("/".join([tmp_extract_dir, prefix_to_dirname[expected_dir_prefix]]), renamed_dir) + + +def extract_archives_from_repo(repo_path, orig_dirname='.orig'): + for orig_dir in glob.glob(f"{repo_path}/*/{orig_dirname}"): + student_assessment_dir = orig_dir + '/../' + for file in os.scandir(orig_dir): + if file.is_file(): + try: + #print(f"{file.path}, {orig_dir}") + patoolib.extract_archive(file.path, outdir=student_assessment_dir, interactive=False, verbosity=-1) + except patoolib.util.PatoolError: + shutil.move(file.path, student_assessment_dir)