Skip to content
Snippets Groups Projects
events.py 4.84 KiB
import logging
import pandas as pd

class CelcatEvents:
    def __init__(self, celcat_raw_response):
        self.df = pd.read_json(celcat_raw_response)
        self.df['start'] = self.df['start'].astype('datetime64[ns]')
        self.df['end'] = self.df['end'].astype('datetime64[ns]')
        self.df = self.df[["start", "end", "allDay", "description", "eventCategory", "modules"]]
        self.df['timeslot_id'] = self.df.index

class FilteredCelcatEvents:
    def __init__(self, course_request, celcat_events):
        self._course_request = course_request
        self.crossed_df = celcat_events.df.merge(course_request.df, how='cross')

        # parse descriptions
        parsed_desc_df = self.crossed_df.apply(FilteredCelcatEvents.parse_description, axis=1)
        self.crossed_df = pd.concat([self.crossed_df.reset_index(drop=True), parsed_desc_df], axis=1)

        self.crossed_df['keep'] = self.crossed_df.apply(lambda row: FilteredCelcatEvents.timeslot_matches_course(row), axis=1)
        self.df = self.crossed_df[self.crossed_df['keep'] == True]

    def timeslot_matches_course(row):
        if row['allDay'] == True:
            return False
        if (row['course_type'].lower() not in row['eventCategory'].lower()) and (row['course_type'].lower() not in row['course_type_parsed'].lower()):
            return False
        if (row['module_apogee'] not in row['modules']) and (row['module_apogee'].lower() not in row['description'].lower()):
            return False
        if row['group'].lower() not in row['groups_parsed'].lower():
            return False
        if row['start'] < row['begin_date']:
            return False
        if row['end'] > row['end_date']:
            return False

        return True

    def check_expected_nb_timeslots(self):
        check_grp = self.df.groupby(['course_request_id'])
        check_grp['timeslot_id'].count()

        check_df = pd.DataFrame({
            'course_request_id': [x for x in range(len(check_grp))],
            'fetched_timeslot_count': check_grp['timeslot_id'].count(),
        }).reset_index(drop=True)

        reordered_course_req_df = self._course_request.df[['course_request_id', 'module_apogee', 'module_readable', 'begin_date', 'end_date', 'course_type', 'group', 'expected_nb_slots']]
        checked_df = reordered_course_req_df.merge(check_df, how='inner', on='course_request_id')
        fetch_problem_df = checked_df[checked_df['expected_nb_slots'] != checked_df['fetched_timeslot_count']]

        if len(fetch_problem_df) > 0:
            logging.warning('The number of time slots fetched from CELCAT does not match the expected number of time slots for some courses')
            logging.warning(f'\n{fetch_problem_df}')

            logging.warning('Details of the involved time slots:')
            problematic_courses = fetch_problem_df[['course_request_id']]
            problematic_time_slots = problematic_courses.merge(self.df, how='inner', on='course_request_id')
            problematic_time_slots = problematic_time_slots.sort_values(by=['course_request_id', 'start'])[['course_request_id', 'module_apogee', 'module_readable', 'start', 'end', 'course_type', 'group']]
            logging.warning(f'\n{problematic_time_slots}')

    def parse_description(row):
        '''
        Expecting an HTML text with this information, separated by HTML/CRCF line breaks:
        - (The room where the course takes place): optional
        - The apogee code of the course and its readable name
        - A list of student groups that should attend this course
        - The course type

        Example: 'FSI / U3-01\r\n\r\n<br />\r\n\r\nKINX7AD1 - Parall&#233;lisme [KINX7AD1]\r\n\r\n<br />\r\n\r\nKINB7TPA41<br />KINB7TPA42\r\n\r\n<br />\r\n\r\nTD\r\n'
        '''

        desc = row['description'].replace('\n', '').replace('\r', '')
        fields = [x.strip() for x in desc.split('<br />')]

        room = 'unset'
        groups_joined = 'unset'
        course_type = 'unset'

        if len(fields) == 0:
            raise ValueError(f'There should be at least 1 field, but fields are {fields}')
        elif len(fields) == 1:
            # probably not a course. examples: "CONGES\r\n" or "FERIE\r\n"
            course_type = fields[0]
        else:
            # first field should be the room, but this is not always set
            room = 'unset'
            if fields[0].startswith('FSI / '):
                room = fields[0].replace('FSI / ', '')
                fields = fields[1:]

            # let us assume that the second field is the course name
            fields = fields[1:]

            # last field should be the course type
            course_type = fields[-1]

            # all remaining fields should be student groups
            groups = fields[:-1]
            groups_joined = ' '.join(groups)

        return pd.Series([room, course_type, groups_joined], index=['room_parsed', 'course_type_parsed', 'groups_parsed'])