import datetime import itertools import pandas as pd import re from procset import ProcSet SLOT_RE_TEMPLATE = r'^(?P<weekday>WEEKDAYLIST)(?P<hour>\d{2})h(?P<minute>\d{2})?$' DURATION_RE_STR = r'^(?P<hour>\d{1,2})h(?P<minute>\d{1,2})?$' DURATION_RE = re.compile(DURATION_RE_STR) ACADEMIC_YEAR_RE_STR = r'^(?P<beginyear>\d{4})-(?P<endyear>\d{4})$' ACADEMIC_YEAR_RE = re.compile(ACADEMIC_YEAR_RE_STR) def gen_parsable_weekdays(lang, nb_char): ''' Generate a list of truncated weekdays, and a string->isoweekday map to parse & interpret results Args: lang: The language to use, such as 'fr' for French or 'en' for English. nb_char: The number of characters to use to represent each week day. Returns: list(str): The ordered list of truncated week day names. In iso order (Monday to Sunday). dict(str, int): A map from truncated week day names to their iso number (1 is Monday, 7 is Sunday). ''' lang_to_weekdays = { 'en': ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], 'fr': ['Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi', 'Samedi', 'Dimanche'], } if lang not in lang_to_weekdays: raise ValueError(f'lang={lang} is not supported. supported languages: {sorted(list(lang_to_weekdays.keys()))}') weekdays = lang_to_weekdays[lang] trunc_weekdays = [wd[:nb_char] for wd in weekdays] assert len(set(trunc_weekdays)) == len(trunc_weekdays), f"Invalid weekday format: using lang={lang} with {nb_char} characters creates non-unique truncated weekdays {trunc_weekdays}" trunc_weekdays_map = {wd: num+1 for num, wd in enumerate(trunc_weekdays)} return trunc_weekdays, trunc_weekdays_map def gen_slot_parser(lang, weekday_nb_char): ''' Generate a parser (compiled regex and truncated weekday name to iso weekday map) for a given lang and number of characters per weekday Args: lang: The language to use, such as 'fr' for French or 'en' for English. nb_char: The number of characters to use to represent each week day. Returns: re.Pattern: The compiled regular expression that can parse a slot. dict(str, int): A map from truncated week day names to their iso number (1 is Monday, 7 is Sunday). ''' weekdays, weekday_parse_map = gen_parsable_weekdays(lang, weekday_nb_char) daylist = '|'.join(weekdays) re_str = SLOT_RE_TEMPLATE.replace('WEEKDAYLIST', daylist) r = re.compile(re_str) return r, weekday_parse_map def slot_to_dt(slot: str, year: int, week: int, re_parser: re.Pattern, wd_iso_map: dict[str, int]): ''' Generate a time point (datetime) from a slot and context (year, int) and parsing information ''' m = re_parser.match(slot) if m is None: raise ValueError(f"Slot '{slot}' could not be parsed") wd_iso = wd_iso_map[m['weekday']] hours = int(m['hour']) minutes = m['minute'] or '0' minutes = int(minutes) dt = datetime.datetime.fromisocalendar(year, week, wd_iso) dt = dt + datetime.timedelta(hours=hours, minutes=minutes) return dt def duration_to_timedelta(duration: str): ''' Parse a string duration to a timedelta. ''' m = DURATION_RE.match(duration) if m is None: raise ValueError(f"Duration '{duration}' could not be parsed") hours = int(m['hour']) minutes = m['minute'] or '0' minutes = int(minutes) delta = datetime.timedelta(hours=hours, minutes=minutes) return delta def year_from_academic_year_week(academic_year, week, week_cut=32): ''' Determine the year to use of an (academic year, week) tuple depending on whether week is before of after cut ''' m = ACADEMIC_YEAR_RE.match(academic_year) if m is None: raise ValueError(f"Academic year '{academic_year}' could not be parsed") begin_year = int(m['beginyear']) end_year = int(m['endyear']) if end_year != begin_year + 1: raise ValueError(f"Invalid academic year '{academic_year}': years should be consecutive") if week <= week_cut: return end_year return begin_year def read_weekslot_csv(filename, slot_lang, slot_nb_char): col_types = { 'mod_code': str, 'display_name': str, 'student_group': str, 'slots': str, 'duration': str, 'academic_year': str, 'weeks': str, } df = pd.read_csv(filename, dtype=col_types) re_parser, wd_iso_map = gen_slot_parser(slot_lang, slot_nb_char) flat_slots = [] for index, row in df.iterrows(): slots = row['slots'].split() weeks = ProcSet.from_str(row['weeks']) for slot, week in itertools.product(slots, weeks): year = year_from_academic_year_week(row['academic_year'], week) dt_begin = slot_to_dt(slot, year, week, re_parser, wd_iso_map) dt_end = dt_begin + duration_to_timedelta(row['duration']) flat_slots.append({ 'mod_code': row['mod_code'], 'display_name': row['display_name'], 'student_group': row['student_group'], 'start_dt': dt_begin, 'end_dt': dt_end, }) flat_df = pd.DataFrame(flat_slots) return flat_df