-
Millian Poquet authoredMillian Poquet authored
events.py 4.84 KiB
import logging
import pandas as pd
class CelcatEvents:
def __init__(self, celcat_raw_response):
self.df = pd.read_json(celcat_raw_response)
self.df['start'] = self.df['start'].astype('datetime64[ns]')
self.df['end'] = self.df['end'].astype('datetime64[ns]')
self.df = self.df[["start", "end", "allDay", "description", "eventCategory", "modules"]]
self.df['timeslot_id'] = self.df.index
class FilteredCelcatEvents:
def __init__(self, course_request, celcat_events):
self._course_request = course_request
self.crossed_df = celcat_events.df.merge(course_request.df, how='cross')
# parse descriptions
parsed_desc_df = self.crossed_df.apply(FilteredCelcatEvents.parse_description, axis=1)
self.crossed_df = pd.concat([self.crossed_df.reset_index(drop=True), parsed_desc_df], axis=1)
self.crossed_df['keep'] = self.crossed_df.apply(lambda row: FilteredCelcatEvents.timeslot_matches_course(row), axis=1)
self.df = self.crossed_df[self.crossed_df['keep'] == True]
def timeslot_matches_course(row):
if row['allDay'] == True:
return False
if (row['course_type'].lower() not in row['eventCategory'].lower()) and (row['course_type'].lower() not in row['course_type_parsed'].lower()):
return False
if (row['module_apogee'] not in row['modules']) and (row['module_apogee'].lower() not in row['description'].lower()):
return False
if row['group'].lower() not in row['groups_parsed'].lower():
return False
if row['start'] < row['begin_date']:
return False
if row['end'] > row['end_date']:
return False
return True
def check_expected_nb_timeslots(self):
check_grp = self.df.groupby(['course_request_id'])
check_grp['timeslot_id'].count()
check_df = pd.DataFrame({
'course_request_id': [x for x in range(len(check_grp))],
'fetched_timeslot_count': check_grp['timeslot_id'].count(),
}).reset_index(drop=True)
reordered_course_req_df = self._course_request.df[['course_request_id', 'module_apogee', 'module_readable', 'begin_date', 'end_date', 'course_type', 'group', 'expected_nb_slots']]
checked_df = reordered_course_req_df.merge(check_df, how='inner', on='course_request_id')
fetch_problem_df = checked_df[checked_df['expected_nb_slots'] != checked_df['fetched_timeslot_count']]
if len(fetch_problem_df) > 0:
logging.warning('The number of time slots fetched from CELCAT does not match the expected number of time slots for some courses')
logging.warning(f'\n{fetch_problem_df}')
logging.warning('Details of the involved time slots:')
problematic_courses = fetch_problem_df[['course_request_id']]
problematic_time_slots = problematic_courses.merge(self.df, how='inner', on='course_request_id')
problematic_time_slots = problematic_time_slots.sort_values(by=['course_request_id', 'start'])[['course_request_id', 'module_apogee', 'module_readable', 'start', 'end', 'course_type', 'group']]
logging.warning(f'\n{problematic_time_slots}')
def parse_description(row):
'''
Expecting an HTML text with this information, separated by HTML/CRCF line breaks:
- (The room where the course takes place): optional
- The apogee code of the course and its readable name
- A list of student groups that should attend this course
- The course type
Example: 'FSI / U3-01\r\n\r\n<br />\r\n\r\nKINX7AD1 - Parallélisme [KINX7AD1]\r\n\r\n<br />\r\n\r\nKINB7TPA41<br />KINB7TPA42\r\n\r\n<br />\r\n\r\nTD\r\n'
'''
desc = row['description'].replace('\n', '').replace('\r', '')
fields = [x.strip() for x in desc.split('<br />')]
room = 'unset'
groups_joined = 'unset'
course_type = 'unset'
if len(fields) == 0:
raise ValueError(f'There should be at least 1 field, but fields are {fields}')
elif len(fields) == 1:
# probably not a course. examples: "CONGES\r\n" or "FERIE\r\n"
course_type = fields[0]
else:
# first field should be the room, but this is not always set
room = 'unset'
if fields[0].startswith('FSI / '):
room = fields[0].replace('FSI / ', '')
fields = fields[1:]
# let us assume that the second field is the course name
fields = fields[1:]
# last field should be the course type
course_type = fields[-1]
# all remaining fields should be student groups
groups = fields[:-1]
groups_joined = ' '.join(groups)
return pd.Series([room, course_type, groups_joined], index=['room_parsed', 'course_type_parsed', 'groups_parsed'])