Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
L
lflex-celcat-survival
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
poquet
millian
lflex-celcat-survival
Commits
4b7c38de
Commit
4b7c38de
authored
2 years ago
by
Millian Poquet
Browse files
Options
Downloads
Patches
Plain Diff
POC script -> reusable functions&types
parent
3a225fd0
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
default.nix
+0
-1
0 additions, 1 deletion
default.nix
src/script.py
+149
-85
149 additions, 85 deletions
src/script.py
with
149 additions
and
86 deletions
default.nix
+
0
−
1
View file @
4b7c38de
...
@@ -10,7 +10,6 @@ pkgs.mkShell {
...
@@ -10,7 +10,6 @@ pkgs.mkShell {
python3Packages
.
ipython
python3Packages
.
ipython
python3Packages
.
requests
python3Packages
.
requests
python3Packages
.
pandas
python3Packages
.
pandas
python3Packages
.
nltk
python3Packages
.
ics
python3Packages
.
ics
];
];
}
}
This diff is collapsed.
Click to expand it.
src/script.py
+
149
−
85
View file @
4b7c38de
...
@@ -2,91 +2,155 @@
...
@@ -2,91 +2,155 @@
import
ics
import
ics
import
requests
import
requests
import
pandas
as
pd
import
pandas
as
pd
import
nltk
import
logging
input_dtypes
=
{
class
CourseRequest
:
'
module_apogee
'
:
'
str
'
,
def
__init__
(
self
,
filename
):
'
module_readable
'
:
'
str
'
,
self
.
df
=
pd
.
read_csv
(
filename
,
parse_dates
=
[
'
begin_date
'
,
'
end_date
'
])
'
begin_date
'
:
'
str
'
,
self
.
df
[
'
course_request_id
'
]
=
self
.
df
.
index
'
end_date
'
:
'
str
'
,
'
course_type
'
:
'
str
'
,
def
generate_request_body
(
self
):
'
group
'
:
'
str
'
,
date_range_min
=
min
(
self
.
df
[
'
begin_date
'
]).
strftime
(
"
%Y-%m-%d
"
)
'
expected_nb_slots
'
:
'
int64
'
date_range_max
=
(
max
(
self
.
df
[
'
end_date
'
])
+
pd
.
Timedelta
(
days
=
1
)).
strftime
(
"
%Y-%m-%d
"
)
}
apogee_codes
=
self
.
df
[
'
module_apogee
'
].
unique
()
input_data
=
pd
.
read_csv
(
'
input-data.csv
'
,
parse_dates
=
[
'
begin_date
'
,
'
end_date
'
])
input_data
[
'
input_id
'
]
=
input_data
.
index
fields
=
[
f
'
start=
{
date_range_min
}
'
,
input_date_range_min
=
min
(
input_data
[
'
begin_date
'
]).
strftime
(
"
%Y-%m-%d
"
)
f
'
end=
{
date_range_max
}
'
,
input_date_range_max
=
(
max
(
input_data
[
'
end_date
'
])
+
pd
.
Timedelta
(
days
=
1
)).
strftime
(
"
%Y-%m-%d
"
)
'
resType=100
'
,
'
calView=agendaWeek
'
,
apogee_codes
=
input_data
[
'
module_apogee
'
].
unique
()
]
+
[
'
federationIds%5B%5D={}
'
.
format
(
apogee_code
)
for
apogee_code
in
apogee_codes
]
return
'
&
'
.
join
(
fields
)
request_data
=
[
f
'
start=
{
input_date_range_min
}
'
,
def
do_request
(
self
,
url
=
'
https://edt.univ-tlse3.fr/calendar2/Home/GetCalendarData
'
):
f
'
end=
{
input_date_range_max
}
'
,
headers
=
{
"
Content-Type
"
:
"
application/x-www-form-urlencoded; charset=UTF-8
"
}
'
resType=100
'
,
request_body
=
self
.
generate_request_body
()
'
calView=agendaWeek
'
,
response
=
requests
.
post
(
url
,
request_body
,
headers
=
request_headers
)
]
+
[
'
federationIds%5B%5D={}
'
.
format
(
apogee_code
)
for
apogee_code
in
apogee_codes
]
if
not
response
.
ok
:
url
=
'
https://edt.univ-tlse3.fr/calendar2/Home/GetCalendarData
'
;
logging
.
error
(
f
'
POST HTTP request failed (status code
{
response
.
status_code
}
):
{
response
.
reason
}
'
)
request_headers
=
{
logging
.
error
(
f
'
Request response text:
\n
---
\n
{
response
.
text
}
\n
---
'
)
"
Content-Type
"
:
"
application/x-www-form-urlencoded; charset=UTF-8
"
response
.
raise_for_status
()
}
response
=
requests
.
post
(
url
,
'
&
'
.
join
(
request_data
),
headers
=
request_headers
)
return
response
.
text
with
open
(
'
out.json
'
,
'
w
'
)
as
f
:
class
CelcatEvents
:
f
.
write
(
response
.
text
)
def
__init__
(
self
,
celcat_raw_response
):
self
.
df
=
pd
.
read_json
(
celcat_raw_response
)
celcat_data
=
pd
.
read_json
(
response
.
text
)
self
.
df
[
'
start
'
]
=
self
.
df
[
'
start
'
].
astype
(
'
datetime64[ns]
'
)
celcat_data
[
'
start
'
]
=
celcat_data
[
'
start
'
].
astype
(
'
datetime64[ns]
'
)
self
.
df
[
'
end
'
]
=
self
.
df
[
'
end
'
].
astype
(
'
datetime64[ns]
'
)
celcat_data
[
'
end
'
]
=
celcat_data
[
'
end
'
].
astype
(
'
datetime64[ns]
'
)
self
.
df
=
self
.
df
[[
"
start
"
,
"
end
"
,
"
allDay
"
,
"
description
"
,
"
eventCategory
"
,
"
modules
"
]]
celcat_data
=
celcat_data
[[
"
start
"
,
"
end
"
,
"
allDay
"
,
"
description
"
,
"
eventCategory
"
,
"
modules
"
]]
self
.
df
[
'
timeslot_id
'
]
=
self
.
df
.
index
celcat_data
[
'
timeslot_id
'
]
=
celcat_data
.
index
crossed
=
celcat_data
.
merge
(
input_data
,
how
=
'
cross
'
)
def
timeslot_matches_course
(
row
):
if
row
[
'
allDay
'
]
==
True
:
return
False
if
(
row
[
'
course_type
'
].
lower
()
not
in
row
[
'
eventCategory
'
].
lower
()):
#and (row['course_type'].lower() not in row['description'].lower()):
return
False
if
(
row
[
'
module_apogee
'
]
not
in
row
[
'
modules
'
])
and
(
row
[
'
module_apogee
'
].
lower
()
not
in
row
[
'
description
'
].
lower
()):
return
False
if
row
[
'
group
'
].
lower
()
not
in
row
[
'
description
'
].
lower
():
return
False
if
row
[
'
start
'
]
<
row
[
'
begin_date
'
]:
return
False
if
row
[
'
end
'
]
>
row
[
'
end_date
'
]:
return
False
return
True
class
FilteredCelcatEvents
:
def
__init__
(
self
,
course_request
,
celcat_events
):
crossed
[
'
keep
'
]
=
crossed
.
apply
(
lambda
row
:
timeslot_matches_course
(
row
),
axis
=
1
)
self
.
_course_request
=
course_request
crossed
.
to_csv
(
'
/tmp/debug.csv
'
,
index
=
False
)
self
.
crossed_df
=
celcat_events
.
df
.
merge
(
course_request
.
df
,
how
=
'
cross
'
)
keep
=
crossed
[
crossed
[
'
keep
'
]
==
True
]
# parse descriptions
check_grp
=
keep
.
groupby
([
'
input_id
'
])
parsed_desc_df
=
self
.
crossed_df
.
apply
(
FilteredCelcatEvents
.
parse_description
,
axis
=
1
)
check_grp
[
'
timeslot_id
'
].
count
()
self
.
crossed_df
=
pd
.
concat
([
self
.
crossed_df
.
reset_index
(
drop
=
True
),
parsed_desc_df
],
axis
=
1
)
check_df
=
pd
.
DataFrame
({
self
.
crossed_df
[
'
keep
'
]
=
self
.
crossed_df
.
apply
(
lambda
row
:
FilteredCelcatEvents
.
timeslot_matches_course
(
row
),
axis
=
1
)
'
input_id
'
:
[
x
for
x
in
range
(
len
(
check_grp
))],
self
.
df
=
self
.
crossed_df
[
self
.
crossed_df
[
'
keep
'
]
==
True
]
'
fetched_timeslot_count
'
:
check_grp
[
'
timeslot_id
'
].
count
(),
}).
reset_index
(
drop
=
True
)
def
timeslot_matches_course
(
row
):
if
row
[
'
allDay
'
]
==
True
:
reordered_input_data
=
input_data
[[
'
input_id
'
,
'
module_apogee
'
,
'
module_readable
'
,
'
begin_date
'
,
'
end_date
'
,
'
course_type
'
,
'
group
'
,
'
expected_nb_slots
'
]]
return
False
checked_df
=
reordered_input_data
.
merge
(
check_df
,
how
=
'
inner
'
,
on
=
'
input_id
'
)
if
(
row
[
'
course_type
'
].
lower
()
not
in
row
[
'
eventCategory
'
].
lower
())
and
(
row
[
'
course_type
'
].
lower
()
not
in
row
[
'
course_type_parsed
'
].
lower
()):
fetch_problem_df
=
checked_df
[
checked_df
[
'
expected_nb_slots
'
]
!=
checked_df
[
'
fetched_timeslot_count
'
]]
return
False
print
(
fetch_problem_df
)
if
(
row
[
'
module_apogee
'
]
not
in
row
[
'
modules
'
])
and
(
row
[
'
module_apogee
'
].
lower
()
not
in
row
[
'
description
'
].
lower
()):
return
False
c
=
ics
.
Calendar
()
if
row
[
'
group
'
].
lower
()
not
in
row
[
'
groups_parsed
'
].
lower
():
for
_
,
row
in
keep
.
sort_values
(
by
=
'
start
'
).
iterrows
():
return
False
event
=
ics
.
Event
(
if
row
[
'
start
'
]
<
row
[
'
begin_date
'
]:
name
=
f
'
{
row
[
"
module_readable
"
]
}
-
{
row
[
"
course_type
"
]
}
-
{
row
[
"
group
"
]
}
'
,
return
False
begin
=
row
[
'
start
'
].
tz_localize
(
tz
=
'
Europe/Paris
'
),
if
row
[
'
end
'
]
>
row
[
'
end_date
'
]:
end
=
row
[
'
end
'
].
tz_localize
(
tz
=
'
Europe/Paris
'
),
return
False
description
=
row
[
'
description
'
]
)
return
True
c
.
events
.
add
(
event
)
def
check_expected_nb_timeslots
(
self
):
check_grp
=
self
.
df
.
groupby
([
'
course_request_id
'
])
check_grp
[
'
timeslot_id
'
].
count
()
check_df
=
pd
.
DataFrame
({
'
course_request_id
'
:
[
x
for
x
in
range
(
len
(
check_grp
))],
'
fetched_timeslot_count
'
:
check_grp
[
'
timeslot_id
'
].
count
(),
}).
reset_index
(
drop
=
True
)
reordered_course_req_df
=
self
.
_course_request
.
df
[[
'
course_request_id
'
,
'
module_apogee
'
,
'
module_readable
'
,
'
begin_date
'
,
'
end_date
'
,
'
course_type
'
,
'
group
'
,
'
expected_nb_slots
'
]]
checked_df
=
reordered_course_req_df
.
merge
(
check_df
,
how
=
'
inner
'
,
on
=
'
course_request_id
'
)
fetch_problem_df
=
checked_df
[
checked_df
[
'
expected_nb_slots
'
]
!=
checked_df
[
'
fetched_timeslot_count
'
]]
if
len
(
fetch_problem_df
)
>
0
:
logging
.
warning
(
'
The number of time slots fetched from CELCAT does not match the expected number of time slots for some courses
'
)
logging
.
warning
(
f
'
\n
{
fetch_problem_df
}
'
)
def
parse_description
(
row
):
'''
Expecting an HTML text with this information, separated by HTML/CRCF line breaks:
- (The room where the course takes place): optional
- The apogee code of the course and its readable name
- A list of student groups that should attend this course
- The course type
Example:
'
FSI / U3-01
\r\n\r\n
<br />
\r\n\r\n
KINX7AD1 - Parallélisme [KINX7AD1]
\r\n\r\n
<br />
\r\n\r\n
KINB7TPA41<br />KINB7TPA42
\r\n\r\n
<br />
\r\n\r\n
TD
\r\n
'
'''
desc
=
row
[
'
description
'
].
replace
(
'
\n
'
,
''
).
replace
(
'
\r
'
,
''
)
fields
=
[
x
.
strip
()
for
x
in
desc
.
split
(
'
<br />
'
)]
room
=
'
unset
'
groups_joined
=
'
unset
'
course_type
=
'
unset
'
if
len
(
fields
)
==
0
:
raise
ValueError
(
f
'
There should be at least 1 field, but fields are
{
fields
}
'
)
elif
len
(
fields
)
==
1
:
# probably not a course. examples: "CONGES\r\n" or "FERIE\r\n"
course_type
=
fields
[
0
]
else
:
# first field should be the room, but this is not always set
room
=
'
unset
'
if
fields
[
0
].
startswith
(
'
FSI /
'
):
room
=
fields
[
0
].
replace
(
'
FSI /
'
,
''
)
fields
=
fields
[
1
:]
# let us assume that the second field is the course name
fields
=
fields
[
1
:]
# last field should be the course type
course_type
=
fields
[
-
1
]
# all remaining fields should be student groups
groups
=
fields
[:
-
1
]
groups_joined
=
'
'
.
join
(
groups
)
return
pd
.
Series
([
room
,
course_type
,
groups_joined
],
index
=
[
'
room_parsed
'
,
'
course_type_parsed
'
,
'
groups_parsed
'
])
def
course_df_to_ics
(
df
):
c
=
ics
.
Calendar
()
for
_
,
row
in
df
.
iterrows
():
event
=
ics
.
Event
(
name
=
f
'
{
row
[
"
module_readable
"
]
}
-
{
row
[
"
course_type
"
]
}
-
{
row
[
"
groups_parsed
"
]
}
'
,
begin
=
row
[
'
start
'
].
tz_localize
(
tz
=
'
Europe/Paris
'
),
end
=
row
[
'
end
'
].
tz_localize
(
tz
=
'
Europe/Paris
'
),
)
if
row
[
'
room_parsed
'
]
!=
'
unset
'
:
event
.
location
=
row
[
'
room_parsed
'
]
c
.
events
.
add
(
event
)
return
c
req
=
CourseRequest
(
'
input-data.csv
'
)
celcat_raw_response
=
req
.
do_request
()
celcat_events
=
CelcatEvents
(
celcat_raw_response
)
filtered_celcat_events
=
FilteredCelcatEvents
(
req
,
celcat_events
)
filtered_celcat_events
.
check_expected_nb_timeslots
()
c
=
course_df_to_ics
(
filtered_celcat_events
.
df
)
with
open
(
'
out.ics
'
,
'
w
'
)
as
f
:
with
open
(
'
out.ics
'
,
'
w
'
)
as
f
:
f
.
write
(
str
(
c
)
)
f
.
write
lines
(
c
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment