#!/usr/bin/env python3 import ics import requests import pandas as pd import nltk input_dtypes = { 'module_apogee': 'str', 'module_readable': 'str', 'begin_date': 'str', 'end_date': 'str', 'course_type': 'str', 'group': 'str', 'expected_nb_slots': 'int64' } input_data = pd.read_csv('input-data.csv', parse_dates=['begin_date', 'end_date']) input_data['input_id'] = input_data.index input_date_range_min = min(input_data['begin_date']).strftime("%Y-%m-%d") input_date_range_max = (max(input_data['end_date']) + pd.Timedelta(days=1)).strftime("%Y-%m-%d") apogee_codes = input_data['module_apogee'].unique() request_data = [ f'start={input_date_range_min}', f'end={input_date_range_max}', 'resType=100', 'calView=agendaWeek', ] + ['federationIds%5B%5D={}'.format(apogee_code) for apogee_code in apogee_codes] url = 'https://edt.univ-tlse3.fr/calendar2/Home/GetCalendarData'; request_headers = { "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8" } response = requests.post(url, '&'.join(request_data), headers=request_headers) with open('out.json', 'w') as f: f.write(response.text) celcat_data = pd.read_json(response.text) celcat_data['start'] = celcat_data['start'].astype('datetime64[ns]') celcat_data['end'] = celcat_data['end'].astype('datetime64[ns]') celcat_data = celcat_data[["start", "end", "allDay", "description", "eventCategory", "modules"]] celcat_data['timeslot_id'] = celcat_data.index crossed = celcat_data.merge(input_data, how='cross') def timeslot_matches_course(row): if row['allDay'] == True: return False if (row['course_type'].lower() not in row['eventCategory'].lower()): #and (row['course_type'].lower() not in row['description'].lower()): return False if (row['module_apogee'] not in row['modules']) and (row['module_apogee'].lower() not in row['description'].lower()): return False if row['group'].lower() not in row['description'].lower(): return False if row['start'] < row['begin_date']: return False if row['end'] > row['end_date']: return False return True crossed['keep'] = crossed.apply(lambda row: timeslot_matches_course(row), axis=1) crossed.to_csv('/tmp/debug.csv', index=False) keep = crossed[crossed['keep'] == True] check_grp = keep.groupby(['input_id']) check_grp['timeslot_id'].count() check_df = pd.DataFrame({ 'input_id': [x for x in range(len(check_grp))], 'fetched_timeslot_count': check_grp['timeslot_id'].count(), }).reset_index(drop=True) reordered_input_data = input_data[['input_id', 'module_apogee', 'module_readable', 'begin_date', 'end_date', 'course_type', 'group', 'expected_nb_slots']] checked_df = reordered_input_data.merge(check_df, how='inner', on='input_id') fetch_problem_df = checked_df[checked_df['expected_nb_slots'] != checked_df['fetched_timeslot_count']] print(fetch_problem_df) c = ics.Calendar() for _, row in keep.sort_values(by='start').iterrows(): event = ics.Event( name = f'{row["module_readable"]} - {row["course_type"]} - {row["group"]}', begin = row['start'].tz_localize(tz='Europe/Paris'), end = row['end'].tz_localize(tz='Europe/Paris'), description = row['description'] ) c.events.add(event) with open('out.ics', 'w') as f: f.write(str(c))