diff options
Diffstat (limited to 'management/parsers')
-rw-r--r-- | management/parsers/ups2018.py | 28 | ||||
-rw-r--r-- | management/parsers/ups2019.py | 126 |
2 files changed, 144 insertions, 10 deletions
diff --git a/management/parsers/ups2018.py b/management/parsers/ups2018.py index f1da5bf..ad8322c 100644 --- a/management/parsers/ups2018.py +++ b/management/parsers/ups2018.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018 Alban Gruin +# Copyright (C) 2018-2019 Alban Gruin # # celcatsanitizer is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published @@ -26,12 +26,16 @@ from django.utils import timezone import lxml.html import requests -from ...models import Course, Group, Room +from ...models import Course, Group, Module, Room from ...utils import get_current_week, get_week from .abstractparser import AbstractParser, ParserError VARNAME = "v.events.list = " +GROUP_PREFIXES = ("L1 ", "L2 ", "L3 ", "L3P ", "M1 ", "M2 ", "DEUST ", "MAG1 ", + "1ERE ANNEE ", "2EME ANNEE ", "3EME ANNEE ", + "MAT-Agreg Interne ") + def find_events_list(soup): res = [] @@ -114,7 +118,8 @@ class Parser(AbstractParser): return course = Course.objects.create( - source=self.source, begin=begin, end=end + source=self.source, begin=begin, end=end, + celcat_id=int(event["id"]) ) min_i = 0 @@ -122,11 +127,7 @@ class Parser(AbstractParser): min_i = 1 i = min_i - while i < len(data) and not data[i].startswith( - ("L1 ", "L2 ", "L3 ", "L3P ", "M1 ", "M2 ", "DEUST ", "MAG1 ", - "1ERE ANNEE ", "2EME ANNEE ", "3EME ANNEE ", - "MAT-Agreg Interne ") - ): + while i < len(data) and not data[i].startswith(GROUP_PREFIXES): i += 1 groups = data[i] @@ -136,6 +137,13 @@ class Parser(AbstractParser): # par un dictionnaire classique. names = OrderedDict.fromkeys(data[i - 1].split(';')) course.name = ", ".join(names.keys()) + + module_names = [t for t in event["tag"] + if len(t) > 0 and + any(n.startswith(t) for n in names.keys())] + if len(module_names) > 0: + module, _ = Module.objects.get_or_create(name=module_names[0]) + course.module = module else: course.name = "Sans nom" if i - 2 >= min_i: @@ -213,10 +221,10 @@ class Parser(AbstractParser): responses = yield from asyncio.gather(*futures) return responses - def get_source_from_months(self, async=True): + def get_source_from_months(self, asynchronous=True): events = [] - if async: + if asynchronous: loop = asyncio.get_event_loop() events = loop.run_until_complete(self.get_months_async()) else: diff --git a/management/parsers/ups2019.py b/management/parsers/ups2019.py new file mode 100644 index 0000000..c6bd7e3 --- /dev/null +++ b/management/parsers/ups2019.py @@ -0,0 +1,126 @@ +# Copyright (C) 2019 Alban Gruin +# +# celcatsanitizer is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# celcatsanitizer is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with celcatsanitizer. If not, see <http://www.gnu.org/licenses/>. + +from datetime import date, datetime, timedelta +from html import unescape + +from django.utils import timezone + +import requests + +from ...models import Course, Group, Module, Room +from ...utils import get_current_week, get_week +from .abstractparser import AbstractParser +from .ups2018 import GROUP_PREFIXES + + +class Parser(AbstractParser): + def __get_event(self, event, year, week): + if event["allDay"]: + return + + begin = timezone.make_aware( + datetime.strptime(event["start"], "%Y-%m-%dT%H:%M:%S") + ) + end = timezone.make_aware( + datetime.strptime(event["end"], "%Y-%m-%dT%H:%M:%S") + ) + + if year is not None and week is not None: + event_year, event_week, _ = begin.isocalendar() + if event_year != year or event_week != week: + return + + data = [unescape(st.strip()) + for st in event["description"].split("<br />")] + groups = [] + rooms = [] + + course = Course.objects.create( + source=self.source, begin=begin, end=end, + celcat_id=event["id"] + ) + + max_i = len(data) + + if event.get("eventCategory") is not None and \ + len(event.get("eventCategory", "")) > 0: + course.type = event["eventCategory"] + max_i -= 1 + + if event.get("module", "") is not None and \ + len(event.get("module", "")) > 0: + module, _ = Module.objects.get_or_create(name=event["module"]) + course.module = module + + i = 0 + while i < max_i and not data[i].startswith(GROUP_PREFIXES): + rooms.append(data[i]) + i += 1 + course.rooms.add(*Room.objects.filter(name__in=rooms)) + + if len(rooms) != course.rooms.count(): + print(rooms, course.rooms) + + while i < max_i and data[i].startswith(GROUP_PREFIXES): + group, _ = Group.objects.get_or_create(source=self.source, + celcat_name=data[i]) + groups.append(group) + i += 1 + course.groups.add(*groups) + + if i < max_i and course.module is not None and \ + data[i].startswith(course.module.name): + course.name = data[i] + i += 1 + + course.notes = "\n".join(data[i:max_i]).strip() + if "other" in data[i]: + print("Warning: \"other\" in notes") + + return course + + def get_events(self, today, year=None, week=None): + for event in self.events: + course = self.__get_event(event, year, week) + if course is not None: + yield course + + def get_update_date(self): + return + + def get_weeks(self): + # FIXME: détection automatique à partir des événements présents + beginning, _ = get_week(*get_current_week()) + self.weeks = {"1": beginning} + + return self.weeks + + def get_source(self): + start = date.today() + end = start + timedelta(days=365) + + req = requests.post(self.source.url, + headers={"User-Agent": self.user_agent}, + data={"calView": "month", + "resType": 103, + "federationIds[]": self.source.metadata, + "start": start.strftime("%Y-%m-%d"), + "end": end.strftime("%Y-%m-%d")}) + req.encoding = "uft8" + req.raise_for_status() + + self.events = req.json() + return self.events |