diff --git a/default.nix b/default.nix new file mode 100644 index 0000000000000000000000000000000000000000..32c85112f6ea359e3b01591ab5e94f6ffd8e95a3 --- /dev/null +++ b/default.nix @@ -0,0 +1,16 @@ +# using old nixpkgs because tatsu (ics dependency) broke as of nixpkgs-22.05 +{ pkgs ? import (fetchTarball { + url = "https://github.com/NixOS/nixpkgs/archive/21.11.tar.gz"; + sha256 = "sha256:162dywda2dvfj1248afxc45kcrg83appjd0nmdb541hl7rnncf02"; + }) {} +}: + +pkgs.mkShell { + buildInputs = with pkgs; [ + python3Packages.ipython + python3Packages.requests + python3Packages.pandas + python3Packages.nltk + python3Packages.ics + ]; +} diff --git a/src/script.py b/src/script.py new file mode 100644 index 0000000000000000000000000000000000000000..eb72e8f269a46367257eedc50990066e1add1d21 --- /dev/null +++ b/src/script.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +import ics +import requests +import pandas as pd +import nltk + +input_dtypes = { + 'module_apogee': 'str', + 'module_readable': 'str', + 'begin_date': 'str', + 'end_date': 'str', + 'course_type': 'str', + 'group': 'str', + 'expected_nb_slots': 'int64' +} +input_data = pd.read_csv('input-data.csv', parse_dates=['begin_date', 'end_date']) +input_data['input_id'] = input_data.index + +input_date_range_min = min(input_data['begin_date']).strftime("%Y-%m-%d") +input_date_range_max = (max(input_data['end_date']) + pd.Timedelta(days=1)).strftime("%Y-%m-%d") + +apogee_codes = input_data['module_apogee'].unique() + +request_data = [ + f'start={input_date_range_min}', + f'end={input_date_range_max}', + 'resType=100', + 'calView=agendaWeek', +] + ['federationIds%5B%5D={}'.format(apogee_code) for apogee_code in apogee_codes] + +url = 'https://edt.univ-tlse3.fr/calendar2/Home/GetCalendarData'; +request_headers = { + "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8" +} +response = requests.post(url, '&'.join(request_data), headers=request_headers) + +with open('out.json', 'w') as f: + f.write(response.text) + +celcat_data = pd.read_json(response.text) +celcat_data['start'] = celcat_data['start'].astype('datetime64[ns]') +celcat_data['end'] = celcat_data['end'].astype('datetime64[ns]') +celcat_data = celcat_data[["start", "end", "allDay", "description", "eventCategory", "modules"]] +celcat_data['timeslot_id'] = celcat_data.index + +crossed = celcat_data.merge(input_data, how='cross') + +def timeslot_matches_course(row): + if row['allDay'] == True: + return False + if (row['course_type'].lower() not in row['eventCategory'].lower()): #and (row['course_type'].lower() not in row['description'].lower()): + return False + if (row['module_apogee'] not in row['modules']) and (row['module_apogee'].lower() not in row['description'].lower()): + return False + if row['group'].lower() not in row['description'].lower(): + return False + if row['start'] < row['begin_date']: + return False + if row['end'] > row['end_date']: + return False + + return True + +crossed['keep'] = crossed.apply(lambda row: timeslot_matches_course(row), axis=1) +crossed.to_csv('/tmp/debug.csv', index=False) +keep = crossed[crossed['keep'] == True] + +check_grp = keep.groupby(['input_id']) +check_grp['timeslot_id'].count() + +check_df = pd.DataFrame({ + 'input_id': [x for x in range(len(check_grp))], + 'fetched_timeslot_count': check_grp['timeslot_id'].count(), +}).reset_index(drop=True) + +reordered_input_data = input_data[['input_id', 'module_apogee', 'module_readable', 'begin_date', 'end_date', 'course_type', 'group', 'expected_nb_slots']] +checked_df = reordered_input_data.merge(check_df, how='inner', on='input_id') +fetch_problem_df = checked_df[checked_df['expected_nb_slots'] != checked_df['fetched_timeslot_count']] +print(fetch_problem_df) + +c = ics.Calendar() +for _, row in keep.sort_values(by='start').iterrows(): + event = ics.Event( + name = f'{row["module_readable"]} - {row["course_type"]} - {row["group"]}', + begin = row['start'].tz_localize(tz='Europe/Paris'), + end = row['end'].tz_localize(tz='Europe/Paris'), + description = row['description'] + ) + c.events.add(event) + +with open('out.ics', 'w') as f: + f.write(str(c))